In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Importing Libraries

In [None]:
import os
import cv2
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import Xception
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Distrubtion of classes

In [None]:
df = pd.read_csv('/kaggle/input/cassava-leaf-disease-classification/train.csv')
df.head(5)

### Remove the duplicate image according to discussions

In [None]:
df = df[~df['image_id'].isin(['1562043567.jpg', '3551135685.jpg', '2252529694.jpg'])]

In [None]:
with open('/kaggle/input/cassava-leaf-disease-classification/label_num_to_disease_map.json') as f:
    mapping = json.loads(f.read())
    print(mapping)

In [None]:
df['label'].value_counts().plot.bar()

In [None]:
plt.pie(df['label'].value_counts(), labels = mapping.values()) 
plt.show()

## Visualization

In [None]:
def visualize(img_list):
    rows = 2
    cols = 4

    plt.figure(figsize=(20, 10))

    for i in range(rows*cols):
        plt.subplot(10/cols+1, cols, i+1)
        r = np.random.randint(len(img_list))
        img_path = "/kaggle/input/cassava-leaf-disease-classification/train_images/" + str(img_list[r])
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        #hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
        plt.xticks([])
        plt.yticks([])
        plt.title(str(img_list[r]))
        plt.imshow(img)
        #plt.imshow(hsv, cmap = 'hsv')

    plt.tight_layout()
    plt.show()

In [None]:
#Visualizing Class 0 -> Cassava Bacterial Blight (CBB)
cbb_df = df[df['label'].isin([0])]
cbb_img_list = list(df['image_id'])

visualize(cbb_img_list)

In [None]:
#Visualizing Class 1 -> Cassava Brown Streak Disease (CBSD)
cbsd_df = df[df['label'].isin([1])]
cbsd_img_list = list(df['image_id'])

visualize(cbsd_img_list)

In [None]:
#Visualizing Class 2 -> Cassava Green Mottle (CGM)
cgm_df = df[df['label'].isin([2])]
cgm_img_list = list(df['image_id'])

visualize(cgm_img_list)

In [None]:
#Visualizing Class 3 -> Cassava Mosaic Disease (CMD)
cmd_df = df[df['label'].isin([3])]
cmd_img_list = list(df['image_id'])

visualize(cmd_img_list)

In [None]:
#Visualizing Class 4 -> Healthy Leaves
healthy_df = df[df['label'].isin([4])]
healthy_img_list = list(df['image_id'])

visualize(healthy_img_list)

## Defining Hyper Parameters

In [None]:
BATCH_SIZE = 16
TARGET_SIZE = 299
BASE_DIR = "/kaggle/input/cassava-leaf-disease-classification/"
EPOCHS = 30

## Augumentations along with Preprocessing

In [None]:
#Defining the preprocessing function
def preprocess(image):
    #Converting to numpy array from numpy tensor with rank 3
    image = np.array(image, dtype=np.uint8)
    #Converting to RGB
    #img = cv2.cvtCoor(img, cv2.COLOR_BGR2RGB)
    #Gaussian Blur
    gaussian_blur = cv2.GaussianBlur(image,(5,5),0)
    img = np.asarray(gaussian_blur, dtype=np.float64)
    return img

In [None]:
#Converting labels to string to use sparse class mode
df.label = df.label.astype('str')

#Training  Augumentation
datagen = ImageDataGenerator(rescale=1.0/255,
                             featurewise_center=True,
                             featurewise_std_normalization=True,
                             rotation_range=30,
                             width_shift_range=0.3,
                             height_shift_range=0.3,
                             shear_range=15.0,
                             zoom_range=0.3,
                             horizontal_flip=True,
                             brightness_range=[0.5, 1.0],
                             validation_split=0.2,
                             fill_mode='nearest',
                             preprocessing_function=preprocess)


train_datagen = datagen.flow_from_dataframe(df,
                                            directory = os.path.join(BASE_DIR, "train_images"),
                                            subset = "training",
                                            x_col = "image_id",
                                            y_col = "label",
                                            target_size = (TARGET_SIZE, TARGET_SIZE),
                                            batch_size = BATCH_SIZE,
                                            class_mode = "sparse")

#Validation
validation_datagen = ImageDataGenerator(rescale=1.0/255,
                                        validation_split=0.2,
                                       preprocessing_function=preprocess)


valid_datagen = validation_datagen.flow_from_dataframe(df,
                                            directory = os.path.join(BASE_DIR, "train_images"),
                                            subset = "validation",
                                            x_col = "image_id",
                                            y_col = "label",
                                            target_size = (TARGET_SIZE, TARGET_SIZE),
                                            batch_size = BATCH_SIZE,
                                            class_mode = "sparse")

## Defining the Model

In [None]:
#He Uniform Initializer for Dense Layer
import tensorflow as tf
def my_init(shape, dtype=None):
    initializer = tf.keras.initializers.he_uniform(seed = 1)
    return initializer(shape, dtype=dtype)

In [None]:
base_model = Xception(weights = 'imagenet', include_top=False, input_shape = (TARGET_SIZE, TARGET_SIZE, 3), pooling=None)

base_output = base_model.output
pooling_layer = layers.GlobalAveragePooling2D()(base_output)
Dense1 = layers.Dense(128, activation = "relu", kernel_initializer=my_init)(pooling_layer)
BN1 = layers.BatchNormalization()(Dense1)
dropout = layers.Dropout(0.2)(BN1)
model = layers.Dense(5, activation="softmax")(dropout)

model = models.Model(base_model.input, model)

model.compile(optimizer = Adam(lr = 0.001), 
              loss = "sparse_categorical_crossentropy", 
              metrics=["acc"])

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

filepath = "model.h5"
    
callbacks = [ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.2),
             EarlyStopping(monitor='val_loss', patience=3),
             ModelCheckpoint(filepath=filepath, monitor='val_loss', save_best_only=True)]

In [None]:
h = model.fit(train_datagen, epochs = EPOCHS, validation_data = valid_datagen, callbacks=callbacks)

## Plotting 

In [None]:
plt.style.use("ggplot")
plt.figure()
N = 12
plt.plot(np.arange(0, N), h.history["acc"], label="train_acc")
plt.plot(np.arange(0, N), h.history["val_acc"], label="val_acc")
plt.title("Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="upper left")
plt.show()

In [None]:
plt.style.use("ggplot")
plt.figure()
N = 12
plt.plot(np.arange(0, N), h.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), h.history["val_loss"], label="val_loss")
plt.title("Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="upper left")
plt.show()