## How I created my model.

1. I created two datasets basing on [PP2021 - Ultimate Preprocessing](https://www.kaggle.com/nickuzmenkov/pp2021-ultimate-preprocessing) notebook. Particular dataset was divided into augmented trainset and not augmented validationset. Unlike original notebook I replaced rough resize of image with resize shorter axis to arbitrary size (224 for cassava model and 640 for iception model), subsequently cropped randomly desirable image. 
2. Both models were trained on augmented sets. On validation sets results for f1 measure were over 90% and over 85%  respectively for cassava and inception models.
3. In order to concatenate results I used original not augmented full dataset and created multilabel tree model with 12 input variables and 6 output.
4. Later I noticed that because of random crop of image I am getting different results with every run. So I calculate results on 3 different crops and average results. Later I found that multiplaing results of models by some coefficient give me better results on LB. 
5. Results of my model because of such approach differ with every run but my best results I obtained with previous run was 0.761.

In [None]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_hub as hub
from tensorflow.keras.applications import InceptionResNetV2
import tensorflow_addons as tfa
import albumentations
import PIL
import matplotlib.pyplot as plt
from multiprocessing import Pool
import pickle
import shutil

In [None]:
image_size = 640
transform = albumentations.Compose([
    albumentations.SmallestMaxSize(max_size=image_size),
    albumentations.RandomCrop(image_size,image_size)
    ])

os.mkdir('./Pictures1')
os.mkdir('./Pictures2')
os.mkdir('./Pictures3')
df = pd.read_csv('../input/plant-pathology-2021-fgvc8/sample_submission.csv').iloc[:,0]

def save_img(df):
    for p in df:
        img = PIL.Image.open('../input/plant-pathology-2021-fgvc8/test_images/'+p)
        img = np.asarray(img)
        for n in ['1','2','3']:
            img1 = transform(image=img)['image']
            img1 = PIL.Image.fromarray(img1.astype('uint8'), 'RGB')
            img1.save('./Pictures'+n+'/'+p)

In [None]:
l = df.shape[0]//4
with Pool(4) as p:
    p.map(save_img, [df.iloc[0:l],df.iloc[l:(2*l)],df.iloc[(2*l):(3*l)],df.iloc[(3*l):]])

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15,15))
ax1.imshow(PIL.Image.open('./Pictures1/'+df[0]))
ax2.imshow(PIL.Image.open('./Pictures2/'+df[0]))
ax3.imshow(PIL.Image.open('./Pictures3/'+df[0]))

In [None]:
def load_image(folder):
    def load_image(image_path):
        img = tf.io.read_file('./Pictures'+folder+'/'+image_path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.cast(img, tf.float32) / 255.
        return img
    return load_image

AUTOTUNE = tf.data.experimental.AUTOTUNE
training_data = tf.data.Dataset.from_tensor_slices(df)

training_data1 = training_data.map(load_image('1'), num_parallel_calls=AUTOTUNE)
training_data_batches1 = training_data1.batch(16).prefetch(buffer_size=AUTOTUNE)

training_data2 = training_data.map(load_image('2'), num_parallel_calls=AUTOTUNE)
training_data_batches2 = training_data2.batch(16).prefetch(buffer_size=AUTOTUNE)

training_data3 = training_data.map(load_image('3'), num_parallel_calls=AUTOTUNE)
training_data_batches3 = training_data3.batch(16).prefetch(buffer_size=AUTOTUNE)

In [None]:
!cp -R ../input/cassava-layer/ /kaggle/working/cassava-layer/
os.environ["TFHUB_CACHE_DIR"] = "/kaggle/working/cassava-layer/"

cassava = hub.KerasLayer('https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2', 
                         trainable=False)
model = tf.keras.Sequential([tf.keras.Input(shape=(224,224,3)),
                            cassava])
model.load_weights('../input/apple-train-cassava/cassava_model_weights.h5')
inputs = keras.Input(shape=(image_size,image_size,3))
resize = keras.layers.experimental.preprocessing.Resizing(224, 224)(inputs)
output = model(resize)
model_2 = keras.Model(inputs, output, name = 'model_2')

model_1 = tf.keras.Sequential([
        InceptionResNetV2(
            input_shape=(image_size, image_size, 3),
            weights=None,
            include_top=False
        ),
        keras.layers.GlobalMaxPooling2D(),
        keras.layers.Dense(6, activation='softmax')
    ], name = 'model_1')
model_1.load_weights('../input/apple-train-inception/inception_model_weights.h5')
model_1.trainable = False

In [None]:
inputs = keras.Input(shape=(image_size,image_size,3))
x = model_1(inputs)
y = model_2(inputs)
output = tf.keras.layers.Concatenate()([x, y])

model = keras.Model(inputs, output, name = 'model')
model.summary()

In [None]:
X1=model.predict(training_data_batches1) 
X2=model.predict(training_data_batches2) 
X3=model.predict(training_data_batches3) 
X=X1+X2+X3
X=X/3 * 1.4

In [None]:
pkl_filename = "../input/apple-train-tree/tree_model.pkl"
with open(pkl_filename, 'rb') as file:
    pickle_model = pickle.load(file)

response = pickle_model.predict(X)

In [None]:
preds = response.tolist()

indices = []
for pred in preds:
    temp = []
    for i in range(6):
        if pred[i]==1:
            temp.append(i)
    if temp!=[]:
        indices.append(temp)
    else:
        temp.append(np.argmax(pred))
        indices.append(temp)
    
print(indices)

In [None]:
labels = ['complex', 'frog_eye_leaf_spot', 'powdery_mildew', 'rust', 'scab', 'healthy']

testlabels = []


for image in indices:
    temp = []
    for i in image:
        temp.append(str(labels[i]))
    testlabels.append(' '.join(temp))

print(testlabels)

In [None]:
sub = pd.read_csv('../input/plant-pathology-2021-fgvc8/sample_submission.csv')
sub['labels'] = testlabels
sub.to_csv('submission.csv', index=False)
sub

In [None]:
shutil.rmtree('./Pictures1')
shutil.rmtree('./Pictures2')
shutil.rmtree('./Pictures3')
shutil.rmtree('./cassava-layer')