# Cassava Leaf Disease Classification
> ### Identify the type of disease present on a Cassava Leaf image

The following topics will be covered:

1.   Exploratory Data Analysis
1.   Data visualization 
1.   Image Augmentation
1.   Model Building and Prediction
1.   Evaluation 

In [None]:
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Activation
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import RMSprop
from keras.metrics import AUC 
import matplotlib.image as mpimg
import cv2
import json
import os 

> ### EDA

In [None]:
# import train_dataset
train_=pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")
train_.head()

In [None]:
# import real disease_names. 
disease_names = open('../input/cassava-leaf-disease-classification/label_num_to_disease_map.json') # disease_name
disease_names = json.load(disease_names)
train_['disease_name'] = train_['label'].apply(lambda x: disease_names[str(x)])
train_.head()

In [None]:
train_['disease_name'].unique() # cassava_leaf_disease

> ### Data visualization

In [None]:
sns.countplot(train_['label'],palette="Greens") # largest number of samples for label 3 
plt.show()

In [None]:
train_['label'].value_counts().plot.pie(autopct='%1.1f%%',y=train_['label'])
plt.show()

**Cassava Bacterial Blight (CBB)**

In [None]:
df0 = train_[train_["label"] == 0] # label 0 is Cassava Bacterial Blight (CBB)
df0.head(4)

In [None]:
image_name=df0[df0['label']==0]['image_id'].iloc[3] 
disease_name=df0[df0['label']==0]['disease_name'].iloc[3]
img = mpimg.imread("../input/cassava-leaf-disease-classification/train_images/"+image_name)
imgplot = plt.imshow(img)
plt.title(r"$\bf{"+disease_name+ "}$"+'\n'+image_name)
plt.show()

**Cassava Brown Streak Disease (CBSD)**

In [None]:
df1 = train_[train_["label"] == 1] # label 1 is Cassava Brown Streak Disease (CBSD)
df1.head(4)

In [None]:
image_name1=df1[df1['label']==1]['image_id'].iloc[3]
disease_name1=df1[df1['label']==1]['disease_name'].iloc[3]
img = mpimg.imread("../input/cassava-leaf-disease-classification/train_images/"+image_name1)
imgplot = plt.imshow(img)
plt.title(r"$\bf{"+disease_name1+ "}$"+'\n'+image_name1)
plt.show()

**Cassava Green Mottle (CGM)**

In [None]:
df2 = train_[train_["label"] == 2] # label 2 is Cassava Green Mottle (CGM)
df2.head(3)

In [None]:
image_name2=df2[df2['label']==2]['image_id'].iloc[3]
disease_name2=df2[df2['label']==2]['disease_name'].iloc[3]
img = mpimg.imread("../input/cassava-leaf-disease-classification/train_images/"+image_name2)
imgplot = plt.imshow(img)
plt.title(r"$\bf{"+disease_name2+ "}$"+'\n'+image_name2)
plt.show()

**Cassava Mosaic Disease (CMD)**

In [None]:
df3 = train_[train_["label"] == 3] # Label 3 is Cassava Mosaic Disease (CMD)
df3.head(3)

In [None]:
image_name3=df3[df3['label']==3]['image_id'].iloc[2]
disease_name3=df3[df3['label']==3]['disease_name'].iloc[2]
img = mpimg.imread("../input/cassava-leaf-disease-classification/train_images/"+image_name3)
imgplot = plt.imshow(img)
plt.title(r"$\bf{"+disease_name3+ "}$"+'\n'+image_name3)
plt.show()

**Healthy**

In [None]:
df4 = train_[train_["label"] == 4] # Label 4 is Healthy
df4.head(3)

In [None]:
image_name4=df4[df4['label']==4]['image_id'].iloc[2]
disease_name4=df4[df4['label']==4]['disease_name'].iloc[2]
img = mpimg.imread("../input/cassava-leaf-disease-classification/train_images/"+image_name4)
imgplot = plt.imshow(img)
plt.title(r"$\bf{"+disease_name4+ "}$"+'\n'+image_name4)
plt.show()

In [None]:
img.shape # Image Height : 600, Image Width : 800, channel : 3

In [None]:
img.size, img.max(),img.min() # Image size 1440000, Maximum RGB value 255, Minimum RGB value 0

> ### ImageDataGenerator

In [None]:
train_['label'] = train_['label'].astype('str') 
train_path="../input/cassava-leaf-disease-classification"
input_shape=(512, 512, 3)

train_datagen = ImageDataGenerator(validation_split = 0.3,
        rescale=1./255,
        shear_range=0.2,
        rotation_range=90,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode="nearest")

train_generator = train_datagen.flow_from_dataframe(train_,
                         directory = os.path.join(train_path, "train_images"),
                         subset = "training",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (512, 512),
                         batch_size = 8,
                         class_mode = "sparse")


validation_datagen = ImageDataGenerator(validation_split = 0.3, rescale=1./255)

validation_generator = validation_datagen.flow_from_dataframe(train_,
                         directory = os.path.join(train_path, "train_images"),
                         subset = "validation",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (512, 512),
                         batch_size = 8,
                         class_mode = "sparse")

> # Model Building

In [None]:
cnn_model=keras.models.Sequential([keras.layers.Conv2D(filters=512,padding="same",kernel_size=5, activation='relu', input_shape=input_shape),
                                  keras.layers.MaxPool2D(pool_size=(2,2)),
                                  keras.layers.Conv2D(filters=256,padding='same',kernel_size=4, activation='relu'),
                                  keras.layers.MaxPool2D(pool_size=(2,2)),
                                  keras.layers.Conv2D(filters=128,padding='same',kernel_size=3, activation='relu'),
                                  keras.layers.MaxPool2D(pool_size=(2,2)),
                                  keras.layers.Conv2D(filters=64,padding='same',kernel_size=2, activation='relu'),
                                  keras.layers.MaxPool2D(pool_size=(2,2)),
                                  keras.layers.Conv2D(filters=32,padding='same',kernel_size=2, activation='relu'),
                                  keras.layers.MaxPool2D(pool_size=(2,2)),
                                  keras.layers.Dropout(0.5),
                                  keras.layers.Flatten(),
                                  keras.layers.Dense(units=512, activation='relu'),
                                  keras.layers.Dropout(0.1),
                                  keras.layers.Dense(units=256, activation='relu'),
                                  keras.layers.Dropout(0.25),
                                  keras.layers.Dense(units=128, activation='relu'),
                                  keras.layers.Dropout(0.25),
                                  keras.layers.Dense(units=5, activation='softmax')])

# complie cnn model
cnn_model.compile(optimizer = RMSprop(learning_rate=0.001,rho=0.9,momentum=0.9), loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

# cnn model summary
cnn_model.summary()

In [None]:
from keras.callbacks import ModelCheckpoint
# Save the model with the minimum validation loss
checkpoint = ModelCheckpoint("./Cassava_best_model.h5",
                             save_best_only=True,
                             monitor = 'val_loss',
                             mode='min')

# train cnn model
cnn_model.fit(train_generator, epochs=5, verbose=1, validation_data=validation_generator,callbacks=[checkpoint])

In [None]:
cnn_model.save('Cassava_best_model.h5')

In [None]:
# import test_dataset.
test=pd.read_csv("../input/cassava-leaf-disease-classification/sample_submission.csv")
test.head()

In [None]:
from PIL import Image # Prediction
preds = []
WORK_DIR="../input/cassava-leaf-disease-classification"
for image_id in test.image_id:
    image = Image.open(os.path.join(WORK_DIR,  "test_images", image_id))
    image = image.resize((512, 512))
    image = np.expand_dims(image, axis = 0)
    preds.append(np.argmax(cnn_model.predict(image)))

test['label'] = preds
test

In [None]:
img = mpimg.imread("../input/cassava-leaf-disease-classification/test_images/2216849948.jpg")
imgplot = plt.imshow(img)
plt.show()

In [None]:
test.to_csv('test.csv', index = False)