
<img src="https://storage.googleapis.com/kaggle-competitions/kaggle/13836/logos/header.png?t=2020-10-01-17-22-54">
<center>
<h1 style="color:red;font-weight:700;font-size:3em">Cassava Leaf Disease Classification</h1>
<h3 style="color:red;font-weight:700;font-size:1.5em">Identify the type of disease present on a Cassava Leaf image</h3>
</center>

<center>
<div class="list-group" id="list-tab" role="tablist">
  <h3 class="list-group-item list-group-item-action active" data-toggle="list" role="tab" aria-controls="home">Table of Contents</h3>
  <a class="list-group-item list-group-item-action" data-toggle="list" href="#explore" role="tab" aria-controls="profile" target="_self">Explore The Data<span class="badge badge-primary badge-pill">1</span></a>
    <a class="list-group-item list-group-item-action" data-toggle="list" href="#data-prep" role="tab" aria-controls="profile" target="_self">Data-Prep<span class="badge badge-primary badge-pill">2</span></a>
  <a class="list-group-item list-group-item-action" data-toggle="list" href="#Model" role="tab" aria-controls="messages" target="_self">Model-Building<span class="badge badge-primary badge-pill">3</span></a>
</div>
</center>

In [None]:


import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.backend as K
import sys
import cv2
import json
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import random
from sklearn.metrics import accuracy_score
from PIL import Image

def seedAll(seed):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

seedAll(42)
PATH = "../input/cassava-leaf-disease-classification/"

# <a id="explore">Explore the Data</a>

In [None]:
print(f"Number of Images in the train set {len(os.listdir(os.path.join(PATH,'train_images/')))}")
print(f"Number of Images in the test set {len(os.listdir(os.path.join(PATH,'test_images/')))}")

### Woahhh! Strange!! Only one image in the test set?
Not really! This is a code competition where submission has to be made through a notebook. The test set is not visible to the participant and is only visible to the notebook when the submission is being evaluated. All operations on the test set must be applied to the images in the test_images folder. During evaluation this folder will be replaced by the actual test set.

In [None]:
img =cv2.imread(PATH+"/train_images/1000015157.jpg")
print(f"Shape of images in train_set: {img.shape}")

## Load the data

In [None]:
with open(os.path.join(PATH,'label_num_to_disease_map.json')) as file:
    mapping = json.loads(file.read())
    
print(mapping)

In [None]:
train_df = pd.read_csv(os.path.join(PATH,'train.csv'))
train_df.head()

Let's create a column with the name of the diseases

In [None]:
train_df.loc[:,"disease_name"] = train_df.label.astype(str).map(mapping)
train_df.head()

## A look at the images

In [None]:
def viz_batch(length=15,folder="train_images/"):
    plt.figure(figsize=(5*3,5*int(length/3)))
    a = train_df.sample(length)
    for i in range(length):
        img = a.iloc[i]["image_id"]
        image = cv2.cvtColor(cv2.imread(os.path.join(PATH,folder,img)),cv2.COLOR_BGR2RGB)
        label = a.iloc[i]["disease_name"]
        
        plt.subplot(int(length/3),3,i+1)
        plt.imshow(image)
        plt.title(label);
    plt.tight_layout()

In [None]:
viz_batch(length=9)

# <a id="data-prep">Data-Prep</a>

#### A look at the class distribution in the train set reveals a huge class imbalance. We will therefore compute the weights to be assigned to each class

In [None]:
plt.figure()
sns.countplot(train_df.label)

In [None]:
#compute weights
weights={}
ref = train_df[train_df.label==3].shape[0]
for i in range(5):
    weights[i]= ref/train_df[train_df.label==i].shape[0]
print(weights)

# <a id="Model"> Model Building </a>

In [None]:
def build_model():
    inp = L.Input(shape=(128,128,3))
    
    
    block1 = L.Conv2D(32,(3,3),name="block1_conv2")(inp)
    block1 = L.Conv2D(32,(1,1),activation="relu",name="block1_conv1")(block1)
    block1 = L.BatchNormalization()(block1)
    block1 = L.Activation("relu")(block1)
    block1 = L.MaxPooling2D((2,2))(block1)
    
    side_out = block1
    side_out =  L.Conv2D(32,(4,4),strides=2,activation="relu",name='first_skip')(side_out)
    
    block1 = L.Conv2D(32,(3,3),name="block1_conv3")(block1)
    block1 = L.BatchNormalization()(block1)
    block1 = L.Activation("relu")(block1)
    block1 = L.MaxPooling2D((2,2))(block1)
    
    block1 = L.Concatenate(axis=3)([block1,side_out])
    
    block2 = L.Conv2D(64,(3,3),name="block2_conv2")(block1)
    block2 = L.BatchNormalization()(block2)
    block2 = L.Activation("relu")(block2)
    block2 = L.MaxPooling2D((2,2))(block2)
    
    side_out2 = block2
    side_out2 =  L.Conv2D(64,(4,4),strides=2,activation="relu",name='second_skip')(side_out2)
    
    block2 = L.Conv2D(64,(3,3),name="block2_conv3")(block2)
    block2 = L.BatchNormalization()(block2)
    block2 = L.Activation("relu")(block2)
    block2 = L.MaxPooling2D((2,2))(block2)
    
    block2 = L.Concatenate(axis=3)([block2,side_out2]) 
    
    out = L.GlobalAveragePooling2D()(block2)
    out = L.Dense(50,activation="relu",kernel_initializer="he_normal")(out)
    out = L.Dense(5,activation="softmax",kernel_initializer="he_normal")(out)
    
    model = tf.keras.Model(inputs=inp,outputs=out)
    model.compile(loss="categorical_crossentropy",optimizer="adam",metrics=["accuracy"])
    
    return model

In [None]:
def build_model2():
    inp = L.Input(shape=(128,128,3))
    model = tf.keras.applications.resnet50.ResNet50(include_top=False,weights='../input/resnet50/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',input_tensor=inp)
    for layer in model.layers:
        layer.trainable=True
    
    for layer in model.layers[:-50]:
        if not isinstance(layer,L.BatchNormalization):
            layer.trainable=False
    
    out = L.Flatten()(model.output)
    out = L.Dense(50,activation="relu",kernel_initializer="he_normal")(out)
    out = L.Dense(5,activation="softmax",kernel_initializer="he_normal")(out)
    
    model_fin = tf.keras.Model(inputs=inp,outputs=out)
    model_fin.compile(loss="categorical_crossentropy",optimizer="adam",metrics=["accuracy"])
    return model_fin

In [None]:
model = build_model2()

In [None]:
tf.keras.utils.plot_model(model)

# <a id="training">Model-Training</a>

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
from sklearn.model_selection import StratifiedKFold
kf = StratifiedKFold(n_splits=5)

for id,(tr_,te_) in enumerate(kf.split(train_df["image_id"],y=train_df["label"])):
    train_df.loc[te_,'kfold']=id

In [None]:
train_df["label"] = train_df["label"].astype(str)

In [None]:
ss = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
ss.head()

In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', factor = 0.1, 
                              patience = 2, min_delta = 0.001, 
                              mode = 'min', verbose = 1)
model_save = tf.keras.callbacks.ModelCheckpoint('./best_baseline_model.h5', 
                             save_best_only = True, 
                             save_weights_only = True,
                             monitor = 'val_loss', 
                             mode = 'min', verbose = 1)

In [None]:
histories = []
preds = np.zeros((len(ss),1,5))



X_train = train_df[train_df.kfold!=0]
val_df = train_df[train_df.kfold==0]

train_generator = ImageDataGenerator(preprocessing_function = tf.keras.applications.resnet50.preprocess_input,
                                 zoom_range = 0.15,
                                 cval = 0.,
                                 horizontal_flip = True,
                                 vertical_flip = True,
                                 shear_range = 0.15,
                                 height_shift_range = 0.15,
                                 width_shift_range = 0.15)

train_set = train_generator.flow_from_dataframe(X_train,
                         directory = '../input/cassava-leaf-disease-classification/train_images/',
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (128, 128),
                         batch_size = 64)

val_generator = ImageDataGenerator(preprocessing_function = tf.keras.applications.resnet50.preprocess_input,
                                 zoom_range = 0.15,
                                 cval = 0.,
                                 horizontal_flip = True,
                                 vertical_flip = True,
                                 shear_range = 0.15,
                                 height_shift_range = 0.15,
                                 width_shift_range = 0.15)

val_set = val_generator.flow_from_dataframe(val_df,
                         directory = '../input/cassava-leaf-disease-classification/train_images/',
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (128, 128),
                         batch_size = 64)

K.clear_session()                                                                  
model = build_model2()

history = model.fit_generator(train_set,
                              epochs=15,
                              steps_per_epoch=int(len(X_train)/64),
                              validation_data = val_set,
                              callbacks=[model_save,reduce_lr]
                             )

histories.append(history)

In [None]:
val_pred = []
for img in val_df.image_id:
    img = Image.open('../input/cassava-leaf-disease-classification/train_images/'+img)
    img = np.asarray(img.resize((128,128)))
    img = tf.keras.applications.resnet50.preprocess_input(img)
    val_pred.append(np.argmax(model.predict(np.stack([img]))))
print('validation score: {}'.format(accuracy_score(val_df.label.astype(int),val_pred)))

In [None]:
pred = []
preds = np.zeros((len(ss),1,5))
for img in ss.image_id:
    img = Image.open('../input/cassava-leaf-disease-classification/test_images/'+img)
    img = np.asarray(img.resize((128,128)))
    img = tf.keras.applications.resnet50.preprocess_input(img)
    pred.append(np.argmax(model.predict(np.stack([img]))))

ss["label"] = pred

In [None]:
ss.to_csv('submission.csv',index=False)

In [None]:
plt.figure()
plt.subplot(1,2,1)
for history in histories:
    plt.plot(history.history["val_loss"],color="green")
    plt.plot(history.history["loss"],color="red")
plt.subplot(1,2,2)
for history in histories:
    plt.plot(history.history["val_accuracy"],color="green")
    plt.plot(history.history["accuracy"],color="red")