In [None]:
# Importing the required libraries
import pandas  as pd
import numpy as np
import matplotlib.pyplot  as plt
import cv2

import tensorflow as tf 
from tensorflow.keras import applications
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers

In [None]:
train_csv_path = "../input/cassava-leaf-disease-classification/train.csv"
label_json_path = "../input/cassava-leaf-disease-classification/label_num_to_disease_map.json"
images_dir_path = "../input/cassava-leaf-disease-classification/train_images"

In [None]:
train_csv = pd.read_csv(train_csv_path)
train_csv['label'] = train_csv['label'].astype('string')

label_class = pd.read_json(label_json_path, orient='index')
label_class = label_class.values.flatten().tolist()

In [None]:
IMG_SIZE = 380
BATCH_SIZE = 8
EPOCHS = 16

In [None]:
# Data agumentation and pre-processing using tensorflow
train_gen = ImageDataGenerator(
                                rotation_range=270,
                                width_shift_range=0.2,
                                height_shift_range=0.2,
                                brightness_range=[0.1,0.9],
                                shear_range=25,
                                zoom_range=0.3,
                                channel_shift_range=0.1,
                                horizontal_flip=True,
                                rescale=1/255,
                                validation_split=0.2
                               )
                                    
    
valid_gen = ImageDataGenerator(rescale=1/255,
                               validation_split = 0.2
                              )

In [None]:
train_generator = train_gen.flow_from_dataframe(
                            dataframe=train_csv,
                            directory = images_dir_path,
                            x_col = "image_id",
                            y_col = "label",
                            target_size = (IMG_SIZE, IMG_SIZE),
                            class_mode = "categorical",
                            batch_size = BATCH_SIZE,
                            shuffle = True,
                            subset = "training",

)

valid_generator = valid_gen.flow_from_dataframe(
                            dataframe=train_csv,
                            directory = images_dir_path,
                            x_col = "image_id",
                            y_col = "label",
                            target_size = (IMG_SIZE, IMG_SIZE),
                            class_mode = "categorical",
                            batch_size = BATCH_SIZE,
                            shuffle = True,
                            subset = "validation"
)

In [None]:
def build_model(input_size = [IMG_SIZE, IMG_SIZE, 3]):
    BASE0 = applications.EfficientNetB4(include_top=False, 
                                    input_shape=[IMG_SIZE, IMG_SIZE, 3],
                                    pooling='avg')

    model = tf.keras.Sequential()
    model.add(BASE0)
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(64, activation='relu', kernel_initializer=tf.keras.initializers.HeUniform()))
    model.add(layers.Dense(5, activation='softmax', kernel_initializer=tf.keras.initializers.HeUniform()))
    
    model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
                  optimizer = tf.keras.optimizers.Adam(),
                  metrics=['accuracy', tf.keras.metrics.categorical_accuracy])
    
    return model

In [None]:
# A callback to save the model
callback0 = tf.keras.callbacks.ModelCheckpoint("CasavaLeafDiseaseModel.h5", 
                                               monitor='val_categorical_accuracy',save_best_only=True)

lr_reducer_callback = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_categorical_accuracy",
    factor=0.75,
    patience=2,
    verbose=1,
)

callbacks = [callback0, lr_reducer_callback]

In [None]:
model = build_model()
model.summary()

In [None]:
total = len(train_csv)
label_number = dict(train_csv.label.value_counts())

weight_of_class = {}

for i in range(len(label_number)):
    weight_of_class[i] = (total - label_number[str(i)])/total
    print(f"Weight of {i} : {label_class[i]} : {weight_of_class[i]}")

In [None]:
his = model.fit(train_generator, validation_data=valid_generator, epochs=EPOCHS, callbacks=callbacks, class_weight=weight_of_class)

# **Predicting Test Data**

In [None]:
preds = []
ss = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')

for image in ss.image_id:
    img = tf.keras.preprocessing.image.load_img('../input/cassava-leaf-disease-classification/test_images/' + image)
    img = tf.keras.preprocessing.image.img_to_array(img)
    img = tf.keras.preprocessing.image.smart_resize(img, (IMG_SIZE, IMG_SIZE))
    img = tf.reshape(img, (-1, IMG_SIZE, IMG_SIZE, 3))
    
    prediction0 = model0.predict(img/255)
    prediction1 = model1.predict(img/255)
    prediction = (prediction1 + prediction0)/2
    
    preds.append(np.argmax(prediction))

my_submission = pd.DataFrame({'image_id': ss.image_id, 'label': preds})
my_submission.to_csv('submission.csv', index=False) 

In [None]:
# Submission file ouput
print("Submission File: \n---------------\n")
print(my_submission.head()) # Predicted Output