## Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from tqdm import tqdm
import cv2
import random as rn

In [None]:
pd.options.mode.chained_assignment = None
%config Completer.use_jedi = False

from IPython.core.interactiveshell import InteractiveShell   
InteractiveShell.ast_node_interactivity = "all"

In [None]:
os.listdir('/kaggle/input/cassava-leaf-disease-classification')

# tfrecords is a tensorflow file format for storing the images
# json files are mainly used for data transfer (mostly text)
# csv files contains image file names and their corresponding labels

In [None]:
# Reading json files to know different classes of possible leaf disease

import json

with open('/kaggle/input/cassava-leaf-disease-classification/label_num_to_disease_map.json') as f:
    print(json.loads(f.read()))

In [None]:
img_lbl = pd.read_csv('/kaggle/input/cassava-leaf-disease-classification/train.csv')
img_lbl.head()

In [None]:
# Removing Duplicate images as mentioned in the discussion ('1562043567.jpg', '3551135685.jpg', '2252529694.jpg' are duplicate)

img_lbl=img_lbl[~img_lbl['image_id'].isin(['1562043567.jpg', '3551135685.jpg', '2252529694.jpg'])]

In [None]:
img_lbl['label'].value_counts()

# Cassava Mosaic Disease (CMD) is the most spread leaf disease.
# Cassava Bacterial Blight (CBB) is the least spread leaf disease.

In [None]:
# importing some random images

X=[]   # variable to store leaf images
Z=[]   # variable to store leaf diseases

for img, dseas in tqdm(img_lbl.sample(9).values):
    image=cv2.imread('/kaggle/input/cassava-leaf-disease-classification/train_images/{}'.format(img),cv2.IMREAD_COLOR)
    image=cv2.resize(image,(600,600))
    X.append(image)    # Appending the images into X
    Z.append(dseas)    # Appending the image labels into Z

In [None]:
fig, ax=plt.subplots(3,3)
fig.set_size_inches(20,20)
l=0
for row in range(3):    
    for col in range(3):
        ax[row,col].imshow(X[l])
        ax[row,col].set_title('Disease Class : '+str(Z[l]))
        l=l+1

plt.tight_layout
sns.set(font_scale=1.5)

# Augmenting Images

In [None]:
# Splitting data into train and validation

from sklearn.model_selection import train_test_split
train, validation = train_test_split(img_lbl,test_size=0.2,shuffle=True,stratify=img_lbl['label'])

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D,GlobalAveragePooling2D,Dense,Flatten,BatchNormalization,Dropout,MaxPooling2D,AvgPool2D
from tensorflow.keras.applications import EfficientNetB3
import tensorflow as tf

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Imagedatagenerator for training
datagen_trng = ImageDataGenerator(preprocessing_function=tf.keras.applications.efficientnet.preprocess_input,
                                  rescale=1./255,
                                  rotation_range=30,
                                  width_shift_range=0.3,
                                  height_shift_range=0.3,
                                  shear_range=0.2,
                                  zoom_range=0.3,
                                  horizontal_flip=True,
                                  fill_mode='nearest')

# label should be converted to string to be used
train['label']=train['label'].astype('str')                        

# Augmenting Images for training
train_datagen=datagen_trng.flow_from_dataframe(dataframe=train,
                                               directory='/kaggle/input/cassava-leaf-disease-classification/train_images',
                                               x_col="image_id",
                                               y_col="label",
                                               color_mode="rgb",
                                               target_size=(420,420),
                                               batch_size=16,
                                               seed=42,
                                               class_mode="categorical")

In [None]:
# Imagedatagenerator for validation
datagen_valid = ImageDataGenerator(rescale=1./255,
                                   preprocessing_function=tf.keras.applications.efficientnet.preprocess_input)

# label should be converted to string to be used
validation['label']=validation['label'].astype('str')

# Augmenting Images for validating
valid_datagen=datagen_valid.flow_from_dataframe(dataframe=validation,
                                                directory='/kaggle/input/cassava-leaf-disease-classification/train_images',
                                                x_col='image_id',
                                                y_col="label",
                                                color_mode="rgb",
                                                target_size=(420,420),
                                                batch_size=16,
                                                seed=42,
                                                class_mode="categorical")

# Model Creation

In [None]:
# Defining model

model=Sequential()
model.add(EfficientNetB3(include_top=False,weights='imagenet',input_shape=(420,420,3)))
model.add(GlobalAveragePooling2D())
model.add(Flatten())
model.add(Dense(128,activation='relu',bias_regularizer=tf.keras.regularizers.l1_l2()))
model.add(Dropout(0.4))
model.add(Dense(256,activation='relu',bias_regularizer=tf.keras.regularizers.l1_l2()))
model.add(Dropout(0.4))
model.add(Dense(5,activation='softmax'))

model.compile(optimizer=tf.keras.optimizers.Adam(),loss=tf.keras.losses.CategoricalCrossentropy(),metrics=tf.keras.metrics.CategoricalAccuracy())
model.summary()

In [None]:
# Defining callbacks

from keras.callbacks import EarlyStopping,ReduceLROnPlateau

early_stop=EarlyStopping(monitor='val_loss',
                         min_delta=0.002,
                         patience=3,
                         mode='min',
                         verbose=1,
                         restore_best_weights=True)

reduce_lr=ReduceLROnPlateau(monitor='val_loss',
                            patience=2,
                            factor=0.1,
                            mode='min',
                            min_lr=1e-6,verbose=1)

In [None]:
model.fit(train_datagen,
          batch_size=train_datagen.n//train_datagen.batch_size,
          epochs=25,verbose=1,shuffle=True,
          validation_data=valid_datagen,
          callbacks=[early_stop,reduce_lr])

In [None]:
## Saving the model as HDF5
model.save('casava_leaf_disease.h5')

## Checking our model on test data

In [None]:
# Loading submission file
sub_csv=pd.read_csv('/kaggle/input/cassava-leaf-disease-classification/sample_submission.csv')

In [None]:
# Loading test images
datagen_test=ImageDataGenerator(rescale=1./255,
                                preprocessing_function=tf.keras.applications.efficientnet.preprocess_input)

test_datagen=datagen_test.flow_from_dataframe(dataframe=sub_csv,
                                              directory='/kaggle/input/cassava-leaf-disease-classification/test_images',
                                              x_col='image_id',
                                              target_size=(420,420),
                                              color_mode='rgb',
                                              batch_size=16,
                                              class_mode=None)

In [None]:
# Doing predictions on test data
final_rslt=np.argmax(model.predict(test_datagen))

In [None]:
# Creating Final output
final_results=pd.DataFrame({'image_id':sub_csv.image_id,'label':final_rslt})
final_results

In [None]:
final_results.to_csv('submission.csv',index=False)