**Inference Notebook :** [ https://www.kaggle.com/virajkadam/plant-pathology-inference ]

# Resources
* **[ https://www.kaggle.com/ankursingh12/resized-plant2021 ]**
* **[ https://www.kaggle.com/arnabs007/apple-leaf-diseases-with-inceptionresnetv2-keras ]**

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import os,gc
import pickle

#kfolds
from sklearn.model_selection import KFold 
from sklearn.preprocessing import MultiLabelBinarizer



import cv2
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import backend as K
from tensorflow.keras.applications import EfficientNetB7,InceptionResNetV2
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.callbacks import ReduceLROnPlateau,EarlyStopping,ModelCheckpoint

from tensorflow_addons.metrics import F1Score

# Loading data.

In [None]:
train=pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv')
sample_sub=pd.read_csv('../input/plant-pathology-2021-fgvc8/sample_submission.csv')

train_dir='../input/resized-plant2021/img_sz_256'
test_dir='../input/plant-pathology-2021-fgvc8/test_images'

sample_sub.head()

In [None]:
train.head()

**Basic EDA**

In [None]:
print(f'Number of Images in Training set : {len(os.listdir(train_dir))}')
print(f'Number of Images in test set : {len(os.listdir(test_dir))}')

In [None]:
#lets count the instances of each class we have :

fig,ax=plt.subplots(figsize=(16,8))
sns.countplot(train['labels'])
#rotate labels
plt.setp(ax.get_xticklabels(),rotation=45)

plt.title('Label counts')


**Multilabel Classification**

In [None]:
#converting the labels as multiple labels:
train['labels']=train['labels'].str.split(' ')

mlb = MultiLabelBinarizer()

# one hot encode labels
lab=mlb.fit_transform(train['labels'])
lab[:10]

In [None]:
#classes for OHE encoded var.
classes=mlb.classes_
classes

**Lets see some Images**

In [None]:
def show_sample_images(df,train_dir,n):
    dfs=df.sample(n)
    plt.subplots(int(n/3),3,figsize=(20,7*int(n/3)))
    
    for i in range(n):
        plt.subplot(int(n/3),3,i+1)
        
        row=dfs.iloc[i]
        img_id=row['image']
        title=row['labels']
        path=os.path.join(train_dir +'/' +f'{img_id}')
        image=cv2.imread(path)
        
        plt.imshow(image)
        plt.title(f'{title}')
        plt.axis('off')
     
    plt.grid('off')
    plt.tight_layout()
    plt.show()
show_sample_images(train,train_dir,n=18)

**SETTING RANDOM SEED**

In [None]:
#setting random seed :
seed=7

def set_seed(seed):
    tf.random.set_seed(seed)
    np.random.seed(seed)
    os.environ['PYHTONHASHSEED']=str(seed)
    
set_seed(seed)

* **USING ALREADY RESIZED IMAGES. CREDITS TO AUTHOR [ https://www.kaggle.com/ankursingh12/resized-plant2021 ]**

**Loading Images**

In [None]:
def load_images(df,val_df,path):
    
    datagen=ImageDataGenerator(
        width_shift_range=(0.1,0.2),
        height_shift_range=(0.1,0.2),
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='nearest',
        rescale=1./255
        )
    
    train=datagen.flow_from_dataframe(
        dataframe=df,
        directory = path,
        x_col='image',
        y_col='labels',
        target_size=image_size,
        batch_size=batch,
        color_mode="rgb",
        class_mode='categorical',
        shuffle=True,
        seed=seed)
    
    val=datagen.flow_from_dataframe(
        dataframe=val_df,
        directory = path,
        x_col='image',
        y_col='labels',
        target_size=image_size,
        batch_size=batch,
        color_mode="rgb",
        class_mode='categorical',
        shuffle=True,
        seed=seed)
    
    return train,val
    

In [None]:
#loading images:

batch=64
image_size=(256,256)

kf=KFold(n_splits = 3, random_state = seed, shuffle = True) 
num_images=len(train)
y=train.labels

# Model

In [None]:
def build_model(base,dense=False):
    '''build a cnn model with base provided.'''
    
    inp=layers.Input(shape=(256,256,3))
    
    x=base(inp)
    
    
    x=layers.GlobalAveragePooling2D()(x)
#     x=layers.BatchNormalization()(x)
    
    #dense
    if dense: 
        for i in range(len(dense)):
            x=layers.Dense(dense[i],activation='relu')(x)
#             x=layers.Dropout(rate=0.5)(x)
            x=layers.BatchNormalization()(x)
            
    #output
    out=layers.Dense(6,activation='sigmoid')(x)
    
    model=Model(inputs=inp,outputs=out)
    
    return model
            

In [None]:
#plotting accuracy and loss  
def plot_history(history):
    his=pd.DataFrame(history.history)
    plt.subplots(1,2,figsize=(16,8))
    
    #loss:
    plt.subplot(1,2,1)
    plt.plot(range(len(his)),his['loss'],color='g',label='training')
    plt.plot(range(len(his)),his['val_loss'],color='r',label='validation')
    plt.legend()
    plt.title('Loss')
    
    #accuracy
    plt.subplot(1,2,2)
    plt.plot(range(len(his)),his['accuracy'],color='g',label='training_acc')
    plt.plot(range(len(his)),his['val_accuracy'],color='r',label='validation_acc')
    
    #f1_score
    plt.plot(range(len(his)),his['f1_score'],color='steelblue',label='training_f1')
    plt.plot(range(len(his)),his['val_f1_score'],color='maroon',label='validation_f1')
    
    plt.legend()
    plt.title('accuracy')
    
    plt.show()              

# Model1 with EfficientNet B7 Base

In [None]:
#using inceptionresnet for transfer learning
base1=InceptionResNetV2(include_top=False,weights='imagenet')

In [None]:
#metrics for model evaluation:
f1_score=F1Score(num_classes=6,average='macro',name='f1_score')

#model name for training in folds
def get_model_name(i):
    '''return model name for out of folds'''
    return f'model_{i}.h5' 

# Training model in folds

In [None]:
i=0
for train_index,test_index in kf.split(train):
    train_set=train.iloc[train_index]
    val_set=train.iloc[test_index]
    
    train_gen,val_gen=load_images(train_set,val_set,train_dir)
    
    #model
    model=build_model(base=base1,dense=None)
    
    #freezing layers
    for layer in model.layers[:-1]:
        layer.trainable=False
    
    #compile
    model.compile(loss=BinaryCrossentropy(),
             optimizer='adam',
             metrics=['accuracy',f1_score])
    
    
    
    EPOCHS=30
    #callbacks:
    model_path=get_model_name(i)
    
    #reduce_lr
    reduce_lr=ReduceLROnPlateau(patience=2,factor=0.5,min_delta=1e-2,
                                monitor='val_f1_score',verbose=0,mode='max')

    #early stopping
    early_stopping=EarlyStopping(patience=5,min_delta=1e-3,
                              monitor='val_f1_score',restore_best_weights=True,mode='max')

    #save model:
    checkpoint1 = ModelCheckpoint(filepath=model_path, monitor='val_f1_score', verbose=1,
                                save_best_only=True,mode='max') 

    callbacks_1=[reduce_lr,checkpoint1,early_stopping]
    
    
    history1=model.fit(
        train_gen,
        validation_data=val_gen,
        steps_per_epoch=train_gen.n//batch,
        shuffle=True,
        callbacks=callbacks_1,
        epochs=EPOCHS,
        verbose=1
       )

    plot_history(history1)
    
    
    #clearing model 
    K.clear_session()
    
    #next fold:
    print('Number of folds Trained {}'.format(i+1))
    i+=1