# Import statements and Data


In [1]:
import os
import cv2
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

from sklearn.model_selection import train_test_split

from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import Xception
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
cassava = pd.read_csv('drive/MyDrive/cassava/train.csv')
cassava.head(10)

The data was a too big to be directly uploaded onto collab hence used Google drive to use the data. Here is a csv that is first used to show the image id of image in a separate folder and the label given to them by experts.

# EDA and Visualization

In [5]:
with open('drive/MyDrive/cassava/label_num_to_disease_map.json') as label:
    mapping = json.loads(label.read())
    print(mapping)

{'0': 'Cassava Bacterial Blight (CBB)', '1': 'Cassava Brown Streak Disease (CBSD)', '2': 'Cassava Green Mottle (CGM)', '3': 'Cassava Mosaic Disease (CMD)', '4': 'Healthy'}


In [None]:

total_count = len(cassava)
label_counts = cassava['label'].value_counts()
labels = mapping.values()

plt.pie(label_counts, labels=labels, autopct='%1.1f%%', startangle=90)
plt.text(-1.5, 0, f'Total: {total_count}', ha='center', va='center', fontsize=12, color='blue')
plt.show()

Here we have plotted a pie chart to show the distribution of the images we have. We can clearly see that CBB inages are more than half of the data and that can be taken as one of the drawbacks if the data.

In [27]:
def leafVisualizations(img_list):
    ROWS = 2
    COLS = 4

    plt.figure(figsize=(20, 10))

    # Loop through the subplots
    for i in range(ROWS * COLS):

        plt.subplot(ROWS, COLS, i + 1)

        # Picking Random images from list
        leaf = np.random.randint(len(img_list))
        img_path = "drive/MyDrive/cassava/train_images/" + str(img_list[leaf])
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        plt.xticks([])
        plt.yticks([])
        plt.title(str(img_list[leaf]))
        plt.imshow(img)

    plt.tight_layout()
    plt.show()

Here I have written a a method that I will later call to get pictures of leaves from the list that I wil later cfreate and display them in a plot.

In [None]:
#Displaying Leaves with Cassava Bacterial Blight (CBB)
cbb_cassava = cassava[cassava['label'].isin([0])]
cbb_img_list = list(cassava['image_id'])
leafVisualizations(cbb_img_list)

In [None]:
#Displaying Leaves with Cassava Brown Streak Disease (CBSD)
cbsd_cassava = cassava[cassava['label'].isin([1])]
cbsd_img_list = list(cassava['image_id'])
leafVisualizations(cbsd_img_list)

In [None]:
#Displaying Leaves withCassava Green Mottle (CGM)
cgm_cassava = cassava[cassava['label'].isin([2])]
cgm_img_list = list(cassava['image_id'])
leafVisualizations(cgm_img_list)

In [None]:
#Displaying Leaves with Cassava Mosaic Disease (CMD)
cmd_cassava = cassava[cassava['label'].isin([3])]
cmd_img_list = list(cassava['image_id'])
leafVisualizations(cmd_img_list)

In [None]:
#Displaying Healthy Leaves
healthy_cassava = cassava[cassava['label'].isin([4])]
healthy_img_list = list(cassava['image_id'])
leafVisualizations(healthy_img_list)

In [35]:
EPOCHS = 10
BATCH_SIZE = 8
TARGET_SIZE = 299
BASE_DIR = "drive/MyDrive/cassava/"


Setting Values for some commmon parameters to be used inna bunch of places. Here the reason for setting Batch size and epoch so low due to computational issues I faced.

In [36]:

def preprocessImages(image):
    image = np.array(image, dtype=np.uint8)
    #Gaussian Blur
    gaussian_blur = cv2.GaussianBlur(image,(5,5),0)
    img = np.asarray(gaussian_blur, dtype=np.float64)
    return img


This is a new method that I will be passing later as an returning method hereI aM converting the image taken as an input to a NumPy array, applying a Gaussian blur to smooth out the images.

In [44]:
cassava.label = cassava.label.astype('str')
#ForTraining data
dataGenerator = ImageDataGenerator(rescale=1.0/255,
                             featurewise_center=True,
                             featurewise_std_normalization=True,
                             rotation_range=30,
                             width_shift_range=0.3,
                             height_shift_range=0.3,
                             shear_range=15.0,
                             zoom_range=0.3,
                             horizontal_flip=True,
                             brightness_range=[0.5, 1.0],
                             validation_split=0.2,
                             fill_mode='nearest',
                             preprocessing_function=preprocessImages)


trainDataGenerator = dataGenerator.flow_from_dataframe(cassava,
                                            directory = os.path.join(BASE_DIR, "train_images"),
                                            subset = "training",
                                            x_col = "image_id",
                                            y_col = "label",
                                            target_size = (TARGET_SIZE, TARGET_SIZE),
                                            batch_size = BATCH_SIZE,
                                            class_mode = "sparse")

#For Validation data
validationDataGenerator = ImageDataGenerator(rescale=1.0/255,
                                        validation_split=0.2,
                                       preprocessing_function=preprocessImages)


validDataGenerator = validationDataGenerator.flow_from_dataframe(cassava,
                                            directory = os.path.join(BASE_DIR, "train_images"),
                                            subset = "validation",
                                            x_col = "image_id",
                                            y_col = "label",
                                            target_size = (TARGET_SIZE, TARGET_SIZE),
                                            batch_size = BATCH_SIZE,
                                            class_mode = "sparse")


Found 17116 validated image filenames belonging to 5 classes.
Found 4278 validated image filenames belonging to 5 classes.


In [46]:
#He Uniform Initializer for Dense Layer

def my_init(shape, dtype=None):
    initializer = tf.keras.initializers.he_uniform(seed = 1)
    return initializer(shape, dtype=dtype)

baseModel = Xception(weights = 'imagenet', include_top=False, input_shape = (TARGET_SIZE, TARGET_SIZE, 3), pooling=None)

#Global Average pooling.
poolingLayer = layers.GlobalAveragePooling2D()(baseModel.output)

# Connected layer with units as 16, Activation function (ReLU).
dense = layers.Dense(16, activation = "relu", kernel_initializer=my_init)(poolingLayer)

#Batch normalization
batchNormalization = layers.BatchNormalization()(dense)

# To prevent overfitting using dropout with a rate of 0.2
Dropout = layers.Dropout(0.2)(batchNormalization )

model = layers.Dense(5, activation="softmax")(Dropout)
model = models.Model(baseModel.input, model)

# Using Adam optimizer and displaying Loss and Accuracy
model.compile(optimizer = Adam(lr = 0.001),loss = "sparse_categorical_crossentropy", metrics=["acc"])


callbacks = [ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.2),
             EarlyStopping(monitor='val_loss', patience=3),
             ModelCheckpoint(filepath="model.h5", monitor='val_loss', save_best_only=True)]


modelCassavaHistory = model.fit(trainDataGenerator, epochs = EPOCHS, validation_data = validDataGenerator, callbacks=callbacks)



Epoch 1/10

KeyboardInterrupt: ignored

In [None]:
plt.style.use("ggplot")
plt.figure()
N = 12
plt.plot(np.arange(0, N), modelCassavaHistory.history["acc"], label="trainAcc")
plt.plot(np.arange(0, N), modelCassavaHistory.history["val_acc"], label="valAcc")
plt.title("Accuracy and Loss for model")
plt.xlabel("EPOCH NUMBER")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="upper left")
plt.show()