In [None]:
!nvidia-smi

# 1: IMPORTING LIBRARIES AND DATA

In [None]:
import os
import pandas as pd
import numpy as np
import json

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import cv2

import tensorflow as tf
import tensorflow.keras.backend as k
from keras_preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras import layers, optimizers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split

In [None]:
base_path = "../input/cassava-leaf-disease-classification/"

with open(os.path.join(base_path, 'label_num_to_disease_map.json'), 'r') as f:
    class_map = json.load(f)
    class_map = {int(k):v for k,v in class_map.items()}
print(class_map)

so out of **5 classes**, we are having **4 classes** with disease and **1 Healthy**

In [None]:
print("Number of Images in Train Set {} & in Test Set {}".format(len(os.listdir(os.path.join(base_path, 'train_images'))), 
                                                                 len(os.listdir(os.path.join(base_path, 'test_images')))))

In [None]:
df = pd.read_csv(os.path.join(base_path, 'train.csv'))
df['class_name'] = df.label.map(class_map)
df

# 2: DATA VISULAZIATION

In [None]:
df.class_name.value_counts()

In [None]:
fig = px.histogram(df, x="class_name", color="class_name")

fig.update_layout(
    yaxis=dict(title_text='Count', titlefont=dict(size=20)),
    xaxis=dict(title_text='Class Label Name (Healthy or Disease)', titlefont=dict(size=20)),
    title_text='Class Label Name Count Plot'
)
fig.show()

In [None]:
def plot_batch(data=df):
    plt.figure(figsize=(16,12))
    for i in range(9):
        k = np.random.randint(0, len(data)) #for plotting random images from dataset
        image = cv2.imread(os.path.join(base_path, 'train_images/', data.image_id[k]))
        
        plt.subplot(3,3,i+1)
        plt.imshow(image)
        plt.axis("off")
        plt.title("Class Label:{}\nClass Name:{}".format(data.label[k], data.class_name[k]))
    
    plt.tight_layout()
    plt.show()

In [None]:
plot_batch()

**now let's have a look at batch images of each class one by one**

<h2 align=center> Label:0 - Cassava Bacterial Blight (CBB)</h2>

**Main characteristics to leverage: angular spots, brown spots with yellow borders, yellow leaves, leaves wilting**

<img style="height:300px" src="https://www.googleapis.com/download/storage/v1/b/kaggle-forum-message-attachments/o/inbox%2F1865449%2Fbe9cdd94efb9b1660066ad10b55c8626%2Fbact_bright.jpeg?generation=1605827469211692&alt=media">

all the images and characterstic are takes from [discussion](https://www.kaggle.com/c/cassava-leaf-disease-classification/discussion/198143)

In [None]:
temp_df = df.loc[df['label']==0]
temp_df.reset_index(inplace=True)
plot_batch(temp_df)

- black leaf spots and blights, angular leaf spots, and premature drying and shedding of leaves due to the wilting of young leaves and severe attack.

- At first, angular, water-soaked spots occur on the leaves which are restricted by the veins; the spots are more clearly seen on the lower leaf surface. The spots expand rapidly, join together, especially along the margins of the leaves, and turn brown with yellow borders

- Droplets of a creamy-white ooze occur at the centre of the spots; later, they turn yellow.

<h2 align=center> Label:1 - Cassava Brown Streak Disease (CBSD) </h2>

**Main characteristics to leverage: yellow spots**
<img style="height:300px" src="https://www.googleapis.com/download/storage/v1/b/kaggle-forum-message-attachments/o/inbox%2F1865449%2Ffeba3dafc914d04517659650d137b77a%2Fbrown_st.jpeg?generation=1605830407530983&alt=media">

In [None]:
temp_df = df.loc[df['label']==1]
temp_df.reset_index(inplace=True)
plot_batch(temp_df)

- CBSD leaf symptoms consist of a characteristic yellow or necrotic vein banding which may enlarge and coalesce to form comparatively large yellow patches.

- Tuberous root symptoms consist of dark-brown necrotic areas within the tuber and reduction in root size

<h2 align=center> Label:2 Cassava Green Mottle (CGM) </h2>

**Main characteristics to leverage: yellow patterns, irregular patches of yellow and green, leaf margins distortion, stunted**

<img style="height:300px" src="https://www.googleapis.com/download/storage/v1/b/kaggle-forum-message-attachments/o/inbox%2F1865449%2F4f2975866feb2a1d4ef4111c2d57db29%2Fgreen_mottle.jpeg?generation=1605829101431013&alt=media">

In [None]:
temp_df = df.loc[df['label']==2]
temp_df.reset_index(inplace=True)
plot_batch(temp_df)

- This disease causes white spotting of leaves, which increase from the initial small spots to cover the entire leaf causing loss of chlorophyll. Young leaves are puckered with faint to distinct yellow spots

- Leaves with this disease show mottled symptoms which can be confused with symptoms of cassava mosaic disease (CMD). Severely damaged leaves shrink, dry out and fall off, which can cause a characteristic candle-stick appearance

<h2 align=center> Label:3 - Cassava Mosaic Disease (CMD) </h2>

**Main characteristics to leverage: severe shape distortion, mosaic patterns**

<img style="height:300px" src="https://www.googleapis.com/download/storage/v1/b/kaggle-forum-message-attachments/o/inbox%2F1865449%2F36990f77ded6667e5c30d19b5405d4d3%2Fmosaic_disease.jpeg?generation=1605829705010773&alt=media">

In [None]:
temp_df = df.loc[df['label']==3]
temp_df.reset_index(inplace=True)
plot_batch(temp_df)

- CMD produces a variety of foliar symptoms that include mosaic, mottling, misshapen and twisted leaflets, and an overall reduction in size of leaves and plants

- Leaves affected by this disease have patches of normal green color mixed with different proportions of yellow and white depending on the severity

<h2 align=center> Label:4 - Healthy </h2>

In [None]:
temp_df = df.loc[df['label']==4]
temp_df.reset_index(inplace=True)
plot_batch(temp_df)

test set image

In [None]:
plt.imshow(cv2.imread("../input/cassava-leaf-disease-classification/test_images/2216849948.jpg"));
plt.axis('off');

# 3: CREATING TRAIN & VALIDATION DATASET

In [None]:
# giving image path to image_id column
df['path'] = df['image_id'].apply(lambda x: base_path + 'train_images/' + x)

# Convert the data in mask column to string format, to use categorical mode in flow_from_dataframe
#df['label'] = df['label'].apply(lambda x: str(x))

df.head()

In [None]:
seed = 42
X_train, X_val = train_test_split(df, test_size = 0.1, random_state=seed, shuffle=True)
print("Training Set: {} \t Validation Set: {}".format(len(X_train), len(X_val)))

In [None]:
del df # to free up space
del temp_df

In [None]:
with tf.device('/cpu:0'):
    train_data = tf.data.Dataset.from_tensor_slices((X_train.path.values, X_train.label.values))
    valid_data = tf.data.Dataset.from_tensor_slices((X_val.path.values, X_val.label.values))

In [None]:
for path, label in train_data.take(3):
    print ('Path: {}, Label: {}'.format(path, label))

In [None]:
def process_data_train(image_path, label):
    # load the raw data from the file as a string
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.random_brightness(img, 0.3)
    img = tf.image.random_flip_left_right(img, seed=None)
    img = tf.image.random_flip_up_down(img)
    img = tf.image.random_crop(img, size=[row,col, 3])
    return img, label

def process_data_valid(image_path, label):
    # load the raw data from the file as a string
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [row,col])
    return img, label

In [None]:
# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
row,col = 380, 380

with tf.device('/cpu:0'):
    train_data = train_data.map(process_data_train, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    valid_data = valid_data.map(process_data_valid, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [None]:
def configure_for_performance(ds, batch_size = 32):
    ds = ds.cache('/kaggle/dump.tfcache') 
    
    ds = ds.shuffle(buffer_size=1024)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return ds

bs = 16

with tf.device('/cpu:0'):
    train_data_batch = configure_for_performance(train_data, bs)
    valid_data_batch = valid_data.batch(bs)

In [None]:
#plotting 1st batch

def plot_df_batch():
    plt.figure(figsize=(10, 10))
    image_batch, label_batch = next(iter(train_data_batch)) #loading batch
    for i in range(8):
        ax = plt.subplot(4, 4, i + 1)
        plt.imshow(image_batch[i].numpy().astype("uint8"))
        label = label_batch[i].numpy()
        plt.title("Class Label :" + str(label))
        plt.axis("off")

plot_df_batch()

In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.2, interpolation='nearest'),
    tf.keras.layers.experimental.preprocessing.RandomContrast((0.2))
])

def plot_arg():
    plt.figure(figsize=(10, 10))
    image_batch, label_batch = next(iter(train_data_batch)) #loading batch
    for i in range(8):
        augmented_images = data_augmentation(image_batch)
        ax = plt.subplot(4, 4, i + 1)
        plt.imshow(augmented_images[i].numpy().astype("uint8"))
        label = label_batch[i].numpy()
        plt.title(label)
        plt.axis("off")
        
plot_arg()

In [None]:
# row, col = 512, 512
# bs = 8 # batch size

# datagen = ImageDataGenerator(
#     rescale=1./255.,
#     validation_split=0.1,
#     zoom_range=0.2,
#     rotation_range=0.15,
#     horizontal_flip=True,
#     vertical_flip=True,
#     fill_mode='nearest',
#     shear_range=0.2,
#     height_shift_range=0.1,
#     width_shift_range=0.1
# )

# train_generator = datagen.flow_from_dataframe(
#     df,
#     x_col='path',
#     y_col='label',
#     class_mode='categorical',
#     batch_size=bs,
#     shuffle=True,
#     target_size=(row,col),
#     subset='training'
# )
# val_generator = datagen.flow_from_dataframe(
#     df,
#     x_col='path',
#     y_col='label',
#     class_mode='categorical',
#     batch_size=bs,
#     shuffle=True,
#     target_size=(row,col),
#     subset='validation'
# )

# 4: CALLING PRE-TRAINED MODEL

In [None]:
def build_model(opt=tf.keras.optimizers.Adam(lr = 1e-4), loss='sparse_categorical_crossentropy', metrics =['sparse_categorical_accuracy']):
    
    base = tf.keras.applications.EfficientNetB4(
        weights='../input/tfkerasefficientnetimagenetnotop/efficientnetb4_notop.h5', 
        include_top=False, 
        input_shape=(row,col,3),
        drop_connect_rate=0.4
    )
    base.trainable = True
    
    inputs = tf.keras.layers.Input(shape=(row,col, 3))
    X = data_augmentation(inputs)
    X = tf.keras.layers.experimental.preprocessing.Rescaling(1./255.)(X)
    X = base(inputs)
    X = GlobalAveragePooling2D()(X)
    X = Dropout(0.3)(X)
    X = Dense(256, activation='relu', kernel_initializer='he_normal')(X)
    X = BatchNormalization()(X)
    X = Dropout(0.3)(X)
    output = Dense(5, kernel_initializer='he_normal', activation='softmax')(X)

    model = tf.keras.Model(inputs, output)
    
    model.compile(
        optimizer=opt, loss=loss, metrics=metrics
    )
    
    model.summary()
    return model


model = build_model()

# 5: MODEL TRAINING AND DEFINING CALLBACKS

In [None]:
checkpointer = ModelCheckpoint(
    filepath='leaf-doctor-wieghts.hdf5', 
    verbose=1, 
    save_best_only=True
)
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss', 
    factor=0.2, 
    patience=2, 
    verbose=1, 
    mode='min'
)
earlystopping = EarlyStopping(
    monitor='val_loss', 
    mode='min', 
    verbose=1,
    patience=5
)

In [None]:
h = model.fit(
    train_data_batch,
    epochs =15, 
    validation_data=valid_data_batch,  
    callbacks=[checkpointer, earlystopping, reduce_lr]
) 

In [None]:
i = 1
plt.figure(figsize=(14,5))
for metric in ['loss', 'sparse_categorical_accuracy']:
    plt.subplot(1,2,i)
    plt.plot(h.history[metric], marker='o', linestyle='--', label=metric)
    plt.plot(h.history['val_' + metric], marker='o', linestyle='--', label='val_' + metric)
    plt.xlabel('EPOCH')
    plt.ylabel(metric.upper())
    plt.legend()
    plt.title(metric.upper() +' Vs EPOCH')
    i+=1
    
plt.show()

In [None]:
#storing model as json
model_json = model.to_json()
with open('leaf-doctor-model.json', 'w') as json_file:
    json_file.write(model_json)

# 5: Testing & Submitting Prediction

In [None]:
import glob

In [None]:
test_images = glob.glob('../input/cassava-leaf-disease-classification/test_images/*.jpg')
print(test_images)

In [None]:
df = pd.DataFrame(np.array(test_images), columns=['Path'])
df

In [None]:
df = tf.data.Dataset.from_tensor_slices((df.Path.values))

def process_test(image_path):
    # load the raw data from the file as a string
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.random_brightness(img, 0.3)
    img = tf.image.random_flip_left_right(img, seed=None)
    img = tf.image.random_flip_up_down(img)
    img = tf.image.random_crop(img, size=[row,col, 3])
    return img
    
df = df.map(process_test, num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(bs)

In [None]:
pred = model.predict(df, workers=16, verbose=1)
pred

In [None]:
pred = np.argmax(pred, axis=-1)

sub = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
sub['label'] = pred
sub

In [None]:
sub.to_csv('submission.csv', index=False)

<h1 style="text-align:justify"> If my notebook was helpful then plese upvote, this will keep me motivated :)
- Drop comment for any doubts</h1>