# Basic CNN with tensorflow and keras over the Plant Pathology Dataset

Companion notebook to understand how the model comes to decisions is available at https://www.kaggle.com/mreenav/interpreting-cnns-plant-pathology-2021

In [None]:
import numpy as np 
import pandas as pd 
import os
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import keras
from keras.preprocessing import image
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Flatten,Dense,Dropout,BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16, InceptionResNetV2, ResNet50, Xception
import cv2
from PIL import Image

In [None]:
path = '../input/plant-pathology-2021-fgvc8/'
train_dir = path + 'train_images/'
test_dir = path + 'test_images/'
#resized images
train_paths = '../input/resized-plant2021/img_sz_256/' 

In [None]:
df = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv')

In [None]:
df.head()

In [None]:
df.labels.value_counts()

In [None]:
df['labels'].unique()

In [None]:
df['labels'] = df['labels'].astype(str)

In [None]:
plt.figure(figsize=(8,5))
sns.countplot(data = df,y='labels')

In [None]:
def plot_examples(label):
    fig, ax = plt.subplots(1, 5, figsize=(25, 15))
    ax = ax.ravel()
    for i in range(5):
        idx = df[df['labels']==label].index[i]
        image = cv2.imread(train_paths+df.loc[idx, 'image'])
        
        image =cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        ax[i].imshow(image)
        ax[i].set_title(label)
        ax[i].set_xticklabels([])
        ax[i].set_yticklabels([])

In [None]:
for labels in list(df['labels'].unique()):
    plot_examples(labels)

In [None]:
#Converting to multi-label problem
#https://www.kaggle.com/shanmukh05/plant-pathology-2k21-baseline-tpu-training

count_dict = df.labels.value_counts()
label2id = {
    'scab': 0,
    'frog_eye_leaf_spot' : 1,
    'rust' : 2,
    'complex' : 3,
    'powdery_mildew' : 4,
}
NUM_CLASS = len(label2id)
id2label = dict([(value, key) for key, value in label2id.items()])
df["labels"] = df["labels"].map(lambda x : [i for i in x.split(" ") if i != "healthy"])
df["labels"] = df["labels"].map(lambda x : [label2id[i] for i in x])
df.head()

In [None]:
train_datagen = ImageDataGenerator(rescale = 1./255.,
                                   rotation_range = 40,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True,
                                   validation_split = 0.2)
test_datagen = ImageDataGenerator(rescale = 1./255,
                                  validation_split = 0.2)

In [None]:
train_generator = train_datagen.flow_from_dataframe(dataframe = df,
                                                   directory = train_paths,
                                                   target_size = (256,256),
                                                   x_col = 'image',
                                                   y_col = 'labels',
                                                   batch_size = 128,
                                                   color_mode = 'rgb',
                                                   class_mode = 'categorical',
                                                   subset = 'training')

test_generator = test_datagen.flow_from_dataframe(dataframe = df,
                                                 directory = train_paths,
                                                 target_size = (256,256),
                                                 x_col = 'image',
                                                 y_col = 'labels',
                                                 batch_size = 128,
                                                 color_mode = 'rgb',
                                                 class_mode = 'categorical',
                                                 subset = 'validation')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), input_shape=(256,256,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(NUM_CLASS, activation='sigmoid')
])
model.summary()

In [None]:
METRIC = "val_f1_score"

def create_callbacks(metric = METRIC):
    
    cpk_path = './best_model.h5'
    
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=cpk_path,
        monitor= metric,
        mode='max',
        save_best_only=True,
        verbose=1,
    )

    reducelr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor= metric,
        mode='max',
        factor=0.2,
        patience=3,
        verbose=1
    )

    earlystop = tf.keras.callbacks.EarlyStopping(
        monitor= metric,
        mode='max',
        patience=10, 
        verbose=1
    )
    
    callbacks = [checkpoint, reducelr, earlystop]         
    
    return callbacks

In [None]:
from tensorflow.keras.optimizers import RMSprop,Adam
import tensorflow_addons as tfa

epochs = 40
batch_size = 256
optimizer = Adam(lr = 0.001)
model.compile(optimizer = optimizer,
             loss = 'binary_crossentropy',
             metrics = ['accuracy', tfa.metrics.F1Score(num_classes = NUM_CLASS,average = "macro", name = "f1_score")])

In [None]:
callbacks = create_callbacks()
history = model.fit(train_generator,epochs = epochs,validation_data = test_generator,verbose=1, callbacks = callbacks)

In [None]:
figure, axis = plt.subplots(2, 1, figsize=(15,15))
axis.ravel()
axis[0].plot(history.history['accuracy'],label='Training Data')
axis[0].plot(history.history['val_accuracy'], label='Validation Data')
axis[0].set(xlabel='Epochs',ylabel='Accuracy', title='Accuracy vs Epochs')
axis[0].legend(loc="upper left")

axis[1].plot(history.history['loss'], label='Training Data')
axis[1].plot(history.history['val_loss'], label='Validation Data')
axis[1].set(xlabel='Epochs',ylabel='Loss', title='Categorical Crossentropy Loss vs Epochs')
axis[1].legend(loc="upper left")

plt.show()

# Acknowledgements

Starter code used from this notebook by Ayush: 
https://www.kaggle.com/aayushmishra1512/plant-pathology-starter

Resized dataset used from:
https://www.kaggle.com/ankursingh12/resized-plant2021