In [None]:
!pip install -U efficientnet

### Import Libraries

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.metrics import roc_curve,auc,classification_report,confusion_matrix
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import tensorflow.keras
import json
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout  
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint,ReduceLROnPlateau
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.optimizers import Adam,SGD,RMSprop,Adamax
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import StratifiedKFold
from efficientnet.keras import EfficientNetB3
from random import shuffle
from tqdm import tqdm  
import scipy
import skimage
from skimage.transform import resize
import random
import os

In [None]:
df = pd.read_csv('/kaggle/input/cassava-leaf-disease-classification/train.csv')
df.head(5)

So, lets map the label number to actual names i.e., disease types

In [None]:
with open('/kaggle/input/cassava-leaf-disease-classification/label_num_to_disease_map.json') as f:
    mapping = json.loads(f.read())
    print(mapping)

### Target Distribution


In [None]:
df['label'].value_counts()

There is a huge class imbalance and in it Cassava Mosaic Disease has majority of samples which is more than 6 times to other diseases.

### Visualize Disease Types

In [None]:
def visualize(img_list):
    rows = 3
    cols = 3

    plt.figure(figsize=(18, 10))

    for i in range(rows*cols):
        plt.subplot(10/cols+1, cols, i+1)
        r = np.random.randint(len(img_list))
        img_path = "/kaggle/input/cassava-leaf-disease-classification/train_images/" + str(img_list[r])
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.xticks([])
        plt.yticks([])
        plt.title(str(img_list[r]))
        plt.imshow(img)
       

    plt.tight_layout()
    plt.show()

### Class 0 : Cassava Bacterial Blight (CBB)

In [None]:
cbb_df = df[df['label'].isin([0])]
cbb_img_list = list(df['image_id'])

visualize(cbb_img_list)

### Class 1 : Cassava Brown Streak Disease (CBSD)

In [None]:
cbb_df = df[df['label'].isin([1])]
cbb_img_list = list(df['image_id'])

visualize(cbb_img_list)

### Class 2 : Cassava Green Mottle (CGM)

In [None]:
cbb_df = df[df['label'].isin([2])]
cbb_img_list = list(df['image_id'])

visualize(cbb_img_list)

### Class 3 : Cassava Mosiac Disease (CMD)

In [None]:
cbb_df = df[df['label'].isin([3])]
cbb_img_list = list(df['image_id'])

visualize(cbb_img_list)

### Class 4 : Healthy Leaves

In [None]:
cbb_df = df[df['label'].isin([4])]
cbb_img_list = list(df['image_id'])

visualize(cbb_img_list)

### Defining Hyperparameters

In [None]:
BATCH_SIZE = 16
TARGET_SIZE = 224
BASE_DIR = "/kaggle/input/cassava-leaf-disease-classification/"
EPOCHS = 10

### Image Pre-processing

In [None]:
def preprocess(image):
    #Converting to numpy array from numpy tensor with rank 3
    image = np.array(image, dtype=np.uint8)
    #Gaussian Blur
    gaussian_blur = cv2.GaussianBlur(image,(5,5),0)
    img = np.asarray(gaussian_blur, dtype=np.float64)
    return img

In [None]:
#Converting labels to string to use sparse class mode
df.label = df.label.astype('str')

### Data Augmentation

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
#Training  Augumentation
datagen = ImageDataGenerator(rescale=1.0/255,
                             featurewise_center=True,
                             featurewise_std_normalization=True,
                             rotation_range=30,
                             width_shift_range=0.3,
                             height_shift_range=0.3,
                             shear_range=15.0,
                             zoom_range=0.2,
                             horizontal_flip=True,
                             brightness_range=[0.2, 0.8],
                             validation_split=0.2,
                             fill_mode='nearest',
                             preprocessing_function=preprocess)


train_datagen = datagen.flow_from_dataframe(df,
                                            directory = os.path.join(BASE_DIR, "train_images"),
                                            subset = "training",
                                            x_col = "image_id",
                                            y_col = "label",
                                            target_size = (TARGET_SIZE, TARGET_SIZE),
                                            batch_size = BATCH_SIZE,
                                            class_mode = "sparse")

#Validation
validation_datagen = ImageDataGenerator(rescale=1.0/255,
                                        validation_split=0.2,
                                       preprocessing_function=preprocess)


valid_datagen = validation_datagen.flow_from_dataframe(df,
                                            directory = os.path.join(BASE_DIR, "train_images"),
                                            subset = "validation",
                                            x_col = "image_id",
                                            y_col = "label",
                                            target_size = (TARGET_SIZE, TARGET_SIZE),
                                            batch_size = BATCH_SIZE,
                                            class_mode = "sparse")

## Defining Model

In [None]:
#He Uniform Initializer for Dense Layer
import tensorflow as tf
def my_init(shape, dtype=None):
    initializer = tf.keras.initializers.he_uniform(seed = 1)
    return initializer(shape, dtype=dtype)


In [None]:
base_model = EfficientNetB3(weights = 'imagenet', include_top=False, input_shape = (TARGET_SIZE, TARGET_SIZE, 3), pooling=None)

base_output = base_model.output
pooling_layer = layers.GlobalAveragePooling2D()(base_output)
Dense1 = layers.Dense(256, activation = "relu", kernel_initializer=my_init)(pooling_layer)
BN1 = layers.BatchNormalization()(Dense1)
dropout = layers.Dropout(0.2)(BN1)
model = layers.Dense(5, activation="softmax")(dropout)

model = models.Model(base_model.input, model)

model.compile(optimizer = 'adam', 
              loss = "sparse_categorical_crossentropy", 
              metrics=["acc"])
model.summary()

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

filepath = "model.h5"
    
callbacks = [ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.1),
             EarlyStopping(monitor='val_loss', patience=2),
             ModelCheckpoint(filepath=filepath, monitor='val_loss', save_best_only=True)]

In [None]:
history = model.fit(train_datagen, epochs = EPOCHS, validation_data = valid_datagen, callbacks=callbacks)

In [None]:
plt.style.use("ggplot")
plt.figure()
N = 8
plt.plot(np.arange(0, 8), history.history["acc"], label="train_acc")
plt.plot(np.arange(0, 8), history.history["val_acc"], label="val_acc")
plt.title("Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="upper left")
plt.show()


In [None]:
plt.style.use("ggplot")
plt.figure()
N = 8
plt.plot(np.arange(0, 8), history.history["loss"], label="train_loss")
plt.plot(np.arange(0, 8), history.history["val_loss"], label="val_loss")
plt.title("Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="upper left")
plt.show()


### Model Inspection 

In this step we have visualized the activation layers of the model

In [None]:
layer_outputs = [layer.output for layer in model.layers[1:15]]

# This is image of a Rose flower from our dataset. All of the visualizations in this cell are of this image.
test_image = BASE_DIR+'test_images/2216849948.jpg'

In [None]:
from keras.models import Model
import numpy as np
im=[]
layer_outputs = [layer.output for layer in model.layers]
activation_model = Model(inputs=model.input, outputs=layer_outputs)
image=cv2.imread(test_image)
image_from_array = Image.fromarray(image, 'RGB')
size_image = image_from_array.resize((224, 224))
im.append(np.array(size_image))
fv=np.array(im)
fv = fv.astype('float32')/255
activations = activation_model.predict(fv)
 
def display_activation(activations, col_size, row_size, act_index): 
    activation = activations[act_index]
    activation_index=0
    fig, ax = plt.subplots(row_size, col_size, figsize=(row_size*4.5,col_size*2.5))
    for row in range(0,row_size):
        for col in range(0,col_size):
            ax[row][col].imshow(activation[0, :, :, activation_index], cmap='gray')
            activation_index += 1

In [None]:
display_activation(activations, 4, 6, 1)

In [None]:
display_activation(activations, 4, 6, 3)

In [None]:
display_activation(activations, 4, 6, 5)

In [None]:
display_activation(activations, 4, 6, 6)