# Leaf Disease Detection
In this notebook, you would need to implement a CNN classifier for leaf disease detection. Your goal is to submit your predictions to the competition! Feel free to use previous case studies, but make sure you understand what the code is doing before using it.

In [None]:
import os
from os.path import join

from tqdm.notebook import tqdm # progress bar

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow.keras.models import Model

import cv2

import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

from sklearn.model_selection import train_test_split

from IPython.display import SVG


## Load Data

In [None]:
IMAGE_PATH = "../input/plant-pathology-2020-fgvc7/images/"
TEST_PATH = "../input/plant-pathology-2020-fgvc7/test.csv"
TRAIN_PATH = "../input/plant-pathology-2020-fgvc7/train.csv"
SUB_PATH = "../input/plant-pathology-2020-fgvc7/sample_submission.csv"
MODEL_PATH = "models/plant_pathology_model.h5"

sub = pd.read_csv(SUB_PATH)
df_test = pd.read_csv(TEST_PATH)
df_train = pd.read_csv(TRAIN_PATH)

EPOCHS = 50

# Define size of the image to train on
# Remember: large image size will probably lead to higher performance
# at the expense of long training time and large memory use
IMAGE_X = ... #---YOUR VALUE HERE---
IMAGE_Y = ... #---YOUR VALUE HERE---

labels = ['healthy', 'multiple_diseases', 'rust', 'scab']

# Explore the data

In [None]:
df_train.head()

In [None]:
df_test.head()

In [None]:
print('Training set size:', len(df_train))
for label in labels:
    print(f"\t{label}: {df_train[df_train[label]==1].shape[0]}")
print('Test set size:', len(df_test))

In [None]:
# Here you can include any other exploration you might want to do

... #---YOUR CODE HERE---

## Data Generators and Augmentation

In [None]:
# Convert the names of the images into a correct path
def format_path(st):
    return os.path.join(IMAGE_PATH, st + '.jpg')

# Genereate train and test paths
train_paths = ... #---YOUR CODE HERE---
test_paths = ... #---YOUR CODE HERE---

# Convert the labels to floats
train_labels = #---YOUR CODE HERE---

# Split the data into validation and training sets
train_paths, valid_paths, train_labels, valid_labels = #---YOUR CODE HERE---

In [None]:
def decode_image(filename, label=None, image_size=(IMAGE_X, IMAGE_Y)):
    """
    Loads, normalizes and resizes the image
    """
    #---YOUR CODE HERE---
    ...

def data_augment(image, label=None):
    """
    Define your data augmentations here
    """
    #---YOUR CODE HERE---
    ...

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 64

# Create datasets
train_dataset = ... #---YOUR CODE HERE---

valid_dataset = ... #---YOUR CODE HERE---

test_dataset = ... #---YOUR CODE HERE---

### Model Definition

In [None]:
from tensorflow.keras.applications import MobileNet, DenseNet121
from tensorflow.keras import layers

# Define model architecture
model = ... #---YOUR CODE HERE---

# compile the model
model.compile(... #---YOUR CODE HERE---

### Callbacks

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Define checkpointing callback
mcp = ... #---YOUR CODE HERE---

# Define learnning rate schedule
def build_lrfn(lr_start=0.00001, lr_max=0.00005, 
               lr_min=0.00001, lr_rampup_epochs=5, 
               lr_sustain_epochs=0, lr_exp_decay=.8):
    #define your learning rate schedule
    ... #---YOUR CODE HERE---

# Create a learning rate schedule as keras callback
lrfn = build_lrfn()
lr_schedule = ... #---YOUR CODE HERE---

In [None]:
# Visualize the learning rate across epochs
epochs_dummy = list(range(0, 50))
y = [lrfn(e) for e in epochs_dummy]
fig = go.Figure(go.Scatter(x=epochs_dummy, y=y, mode='lines+markers'))
fig.update_layout(
    yaxis = dict(
        showexponent='all',
        exponentformat='e'
    ),
    title='Learning rate schedule'
)

### Training

In [None]:
# setup the callbacks
callbacks = ... #---YOUR CODE HERE---

# train your model
history = model.fit(... #---YOUR CODE HERE---

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


def visualize_training_process(history):
    """ 
    Visualize loss and accuracy from training history
    
    :param history: A Keras History object
    """
    history_df = pd.DataFrame(history.history)
    epochs = np.arange(1, len(history_df) + 1)
    fig = make_subplots(2, 1)
    fig.append_trace(go.Scatter(x=epochs, y=history_df['categorical_accuracy'], mode='lines+markers', name='Accuracy Train'), row=1, col=1)
    fig.append_trace(go.Scatter(x=epochs, y=history_df['val_categorical_accuracy'], mode='lines+markers', name='Accuracy Val'), row=1, col=1)
    
    fig.append_trace(go.Scatter(x=epochs, y=history_df['loss'], mode='lines+markers', name='Loss Train'), row=2, col=1)
    fig.append_trace(go.Scatter(x=epochs, y=history_df['val_loss'], mode='lines+markers', name='Loss Val'), row=2, col=1)
    
    fig.update_layout( xaxis_title="Epochs", template="plotly_white")
    
    return fig
visualize_training_process(history)

In [None]:
# Evaluate performance of model by plotting confusion matrix
from sklearn.metrics import confusion_matrix

# see http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
import itertools

def accuracy(y, y_pred):
    return np.sum(y == y_pred)/len(y)

def plot_confusion_matrix(cm, labels=None, title='Confusion Matrix'):
    import plotly.figure_factory as ff

    x = labels
    y = x

    # change each element of z to type string for annotations
    z_text = [[str(y) for y in x] for x in cm]

    # set up figure 
    fig = ff.create_annotated_heatmap(cm, x=x, y=y, annotation_text=z_text, colorscale='YlGnBu', showscale=True)

    # add title
    fig.update_layout(title_text=title,
                      #xaxis = dict(title='x'),
                      #yaxis = dict(title='x')
                     )

    # add custom xaxis title
    fig.add_annotation(dict(font=dict(color="black",size=14),
                            x=0.5,
                            y=-0.15,
                            showarrow=False,
                            text="Predicted value",
                            xref="paper",
                            yref="paper"))

    # add custom yaxis title
    fig.add_annotation(dict(font=dict(color="black",size=14),
                            x=-0.35,
                            y=0.5,
                            showarrow=False,
                            text="Real value",
                            textangle=-90,
                            xref="paper",
                            yref="paper"))

    # adjust margins to make room for yaxis title
    fig.update_layout(margin=dict(t=100, l=200), width=700, height=600)
    fig.show()
    
# predict labels from validation set
y_pred = model.predict(valid_dataset)
# convert data to label number
y_pred = np.argmax(y_pred, axis=1) 
y_true = np.argmax(valid_labels, axis=1) 

# compute the confusion matrix
cm = confusion_matrix(y_true, y_pred) 

plot_confusion_matrix(cm, labels, title='Confusion_matrix Validation Set (acc={:.3f})'.format(accuracy(y_true, y_pred)))

# Save predictions

In [None]:
# Predict labels on the test set
predictions = model.predict(test_dataset)

# Prepare the submission file
sub.loc[:, 'healthy':] = predictions
sub.to_csv('submission_densenet.csv', index=False)
sub.head()

The last step is to submit your predictions!