# Cassava Leaf Disease Classification

In [None]:
import os
import json
import pandas as pd

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import cv2

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau
from keras.optimizers import RMSprop
from keras.layers.normalization import BatchNormalization

import warnings
warnings.filterwarnings("ignore")

In [None]:
path = '../input/cassava-leaf-disease-classification/'

In [None]:
os.listdir(path)

In [None]:
print('No of Train images: ' + str(len(os.listdir(path + 'train_images'))))
print('No of Test images: ' + str(len(os.listdir(path + 'test_images'))))

In [None]:
train = pd.read_csv(path + 'train.csv')
train.head()

In [None]:
with open(os.path.join(path + 'label_num_to_disease_map.json')) as f:
    label_name = json.loads(f.read())
    
print(json.dumps(label_name, indent = 1))

In [None]:
train['label'] = train['label'].astype(str)
train['label_name'] = train['label'].map(label_name)
train.head()

## Exploratory Data Analysis

In [None]:
plt.figure(figsize = (12,6))
sns.countplot(y = 'label_name', data = train, order = pd.value_counts(train['label_name']).index, palette = 'muted', edgecolor = 'black')

plt.xlabel("")
plt.ylabel("")
plt.yticks(fontsize = 12)
plt.show()

In [None]:
train['label_name'].value_counts()

There are:

- <b>13158</b> leaf images having Cassava Mosaic Disease (CMD)
- <b>2577</b> healthy leaf images 
- <b>2386</b> leaf images having Cassava Green Mottle (CGM)
- <b>2189</b> leaf images having Cassava Brown Streak Disease (CBSD)
- <b>1087</b> leaf images having Cassava Bacterial Blight (CBB)

In [None]:
##Credits to https://www.kaggle.com/ihelon/cassava-leaf-disease-exploratory-data-analysis for this function

def get_image(image_id, labels):
    
    plt.figure(figsize=(20, 18))
    
    for i, (image_id, label_name) in enumerate(zip(image_id, labels)):
        plt.subplot(4, 3, i + 1)
        image = cv2.imread(os.path.join(path, 'train_images', image_id))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        plt.imshow(image)
        plt.title(f"{label_name}", fontweight='bold', fontsize=12)
        plt.axis("off")
    
    plt.show()

In [None]:
sample = train.sample(12)
image_ids = sample['image_id'].values
labels = sample['label_name'].values

get_image(image_ids, labels)

**Cassava Mosaic Disease (CMD)**

In [None]:
##Cassava Mosaic Disease (CMD)
cmd_sample = train[train['label'] == '3'].sample(12)
image_ids = cmd_sample['image_id'].values
labels = cmd_sample['label_name'].values

get_image(image_ids, labels)

**Healthy**

In [None]:
##healthy
healthy_sample = train[train['label'] == '4'].sample(12)
image_ids = healthy_sample['image_id'].values
labels = healthy_sample['label_name'].values

get_image(image_ids, labels)

**Cassava Green Mottle (CGM)**

In [None]:
##Cassava Green Mottle (CGM)
cgm_sample = train[train['label'] == '2'].sample(12)
image_ids = cgm_sample['image_id'].values
labels = cgm_sample['label_name'].values

get_image(image_ids, labels)

**Cassava Brown Streak Disease (CBSD)**

In [None]:
##Cassava Brown Streak Disease (CBSD)
cbsd_sample = train[train['label'] == '1'].sample(12)
image_ids = cbsd_sample['image_id'].values
labels = cbsd_sample['label_name'].values

get_image(image_ids, labels)

**Cassava Bacterial Blight (CBB)**

In [None]:
##Cassava Bacterial Blight (CBB)
cbb_sample = train[train['label'] == '0'].sample(12)
image_ids = cbb_sample['image_id'].values
labels = cbb_sample['label_name'].values

get_image(image_ids, labels)

## Modelling

In [None]:
train, validation = train_test_split(train, train_size = 0.8, shuffle = True, random_state = 8)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (5, 5), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (5, 5), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(128, (5, 5), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(128, (5, 5), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(5, activation='softmax')
])

model.compile(optimizer = RMSprop(), loss='categorical_crossentropy', metrics=['acc'])

callbacks = ReduceLROnPlateau(monitor='val_acc', 
                              factor=0.5, 
                              patience=5, 
                              verbose=1, 
                              min_lr=0.0001)

In [None]:
train_datagen = ImageDataGenerator(rescale=1/255,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   vertical_flip=True)

validation_datagen = ImageDataGenerator(rescale=1/255)

In [None]:
BATCH_SIZE = 256
STEPS_PER_EPOCH = train.shape[0]/BATCH_SIZE
VALIDATION_STEPS = validation.shape[0]/BATCH_SIZE
EPOCHS = 20

train_generator = train_datagen.flow_from_dataframe(train, 
                                                    directory = os.path.join(path, 'train_images'),
                                                    x_col = 'image_id',
                                                    y_col = 'label',
                                                    target_size = (150, 150),
                                                    batch_size = BATCH_SIZE,
                                                    class_mode = 'categorical')

validation_generator = validation_datagen.flow_from_dataframe(validation, 
                                                    directory = os.path.join(path, 'train_images'),
                                                    x_col = 'image_id',
                                                    y_col = 'label',
                                                    target_size = (150,150),
                                                    batch_size = BATCH_SIZE,
                                                    class_mode = 'categorical')

In [None]:
history = model.fit_generator(
            train_generator,
            steps_per_epoch = STEPS_PER_EPOCH,
            epochs = EPOCHS,
            validation_data = validation_generator,
            validation_steps = VALIDATION_STEPS,
            verbose = 1,
            callbacks = [callbacks])

In [None]:
epochs = range(1, EPOCHS + 1)

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 6))
ax1.plot(epochs, acc, label = 'Training Accuracy')
ax1.plot(epochs, val_acc, label = 'Validation Accuracy')
ax1.set_title('Training & Validation Accuracy', fontweight='bold', fontsize=16)
ax1.legend()

ax2.plot(epochs, loss, label = 'Training loss')
ax2.plot(epochs, val_loss, label = 'Validation loss')
ax2.set_title('Training & Validation Loss', fontweight='bold', fontsize=16)
ax2.legend()

plt.show()