References Used:

1 - [Transfer Learning using Keras and VGG](http://https://riptutorial.com/keras/example/32608/transfer-learning-using-keras-and-vgg)

2 - [VGG16 and VGG19](http://keras.io/api/applications/vgg/)

# Import required Libraries

In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import tensorflow as tf
from PIL import Image
import os
import matplotlib.pyplot as plt
from collections import Counter

# First --> Define the data working paths

In [None]:
main_directory = '/kaggle/input/cassava-leaf-disease-classification/'
training_images_path = main_directory + 'train_images'
print('List of Files:\n',os.listdir(main_directory))

In [None]:
## image_label_data is the file containing the data 
## which matches eah image file to its corresponding label
image_label_data = pd.read_csv(main_directory + 'train.csv')
labels = pd.read_json(main_directory + 'label_num_to_disease_map.json', typ='series')

print('Cassava Leaf Disease Classification Labels are:\n',dict(labels))
image_label_data.sample(10)

Classes Balance:

It shows that there is an unbalace in the whole images available

In [None]:
print(Counter(image_label_data['label']))
image_label_data['label'].value_counts(normalize = True)

# Second --> Images to Labels Mapping

In [None]:
image_label_data['disease_name'] = image_label_data.label.map(labels)
print(image_label_data)

# Third --> Data Splitting for Training and Validation Steps

In [None]:
from sklearn.model_selection import train_test_split

TEST_PERCENTAGE = 0.05

train_set_splitted, validation_set_splitted = train_test_split(image_label_data, test_size = TEST_PERCENTAGE, random_state = 42,
                             # Stratify:
                             # is to make sure that all the testing labels
                             # has the same % of labels as the whole dataset and
                             # not to take random images
                             # 95% trainning set --> 
                             # Ex: Class (3) has 13158 images, so 13158*95% = 12500.
                             #     Class (4) has 2577 images, so 2577*95% = 2448.
                             stratify = image_label_data['disease_name'])

#print('Training Dataset:\n',dict(Counter(list(train_set['label']))))
#print('\nValidation Dataset:\n',dict(Counter(list(validation_set['label']))))

# Fourth --> Image Augmentation with KERAS ImageDataGenerator

In [None]:
# By using ImageDataGenerator we can make an on-fly image Augmentation

from keras.preprocessing.image import ImageDataGenerator

IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224
IMAGE_SIZE = (IMAGE_WIDTH, IMAGE_HEIGHT)
NO_OF_CLASSES = 5
BATCH_SIZE = 20

## Frist We have to make the IMAGE_DATA_GENERATOR variable for both
## Training and Validation sets

TrainingImageGenerator = ImageDataGenerator( 
                                            preprocessing_function = tf.keras.applications.vgg19.preprocess_input,
                                            horizontal_flip = True,
                                            vertical_flip = True,
                                            fill_mode = 'nearest'
                                            )

ValidatonImageGenerator = ImageDataGenerator( 
                                            preprocessing_function = tf.keras.applications.vgg19.preprocess_input)


training_dataset = TrainingImageGenerator.flow_from_dataframe(
                                                         train_set_splitted,
                                                         directory = training_images_path,
                                                         seed=9806,
                                                         x_col = 'image_id',
                                                         y_col = 'disease_name',
                                                         target_size = IMAGE_SIZE,
                                                         class_mode = 'categorical',
                                                         interpolation = 'nearest',
                                                         shuffle = True,
                                                         batch_size = BATCH_SIZE)

validation_dataset = ValidatonImageGenerator.flow_from_dataframe(
                                                         validation_set_splitted,
                                                         directory = training_images_path,
                                                         seed=9806,
                                                         x_col = 'image_id',
                                                         y_col = 'disease_name',
                                                         target_size = IMAGE_SIZE,
                                                         class_mode = 'categorical',
                                                         interpolation = 'nearest',
                                                         shuffle = True,
                                                         batch_size = BATCH_SIZE)

# Fifth --> Create the Neural Network Model

# Transfer Learning using Keras and VGG-19

Loading weights from available pre-trained model

In [None]:
import keras
from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import RMSprop, Adam, SGD
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications import VGG19

LOAD_MODEL = True

if not LOAD_MODEL:
    CassaveDisease_model = Sequential(name='Cassava_Neural_Network')
    CassaveDisease_model.add(VGG19(input_shape = (IMAGE_WIDTH, IMAGE_HEIGHT, 3),
                                   include_top = False,
                                   weights = 'imagenet'))
    CassaveDisease_model.add(GlobalAveragePooling2D())
    CassaveDisease_model.add(Flatten())
    CassaveDisease_model.add(Dense(256, activation = 'relu'#, bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)
                                  ))
    #CassaveDisease_model.add(Dropout(0.5))
    CassaveDisease_model.add(BatchNormalization());
    CassaveDisease_model.add(Dense(NO_OF_CLASSES, activation = 'softmax'))

    CassaveDisease_model.summary()
    keras.utils.plot_model(CassaveDisease_model)
    
    ## Optimizer Definition
    Adam_Optimizer = Adam(learning_rate = 0.001)

    CassaveDisease_model.compile(
                                loss = "categorical_crossentropy", 
                                optimizer = Adam_Optimizer, 
                                metrics = ["accuracy"])

else:
    CassaveDisease_model = keras.models.load_model('../input/84-percentage-model/Cassava_best_Model_Reached_best_model_7_Jan_04_acc_is_86.h5')
    print('Model Loaded Successfully!')
    CassaveDisease_model.optimizers = Adam(learning_rate = 0.00002)

-----------------------------------------------
1 - Define the Optimizer that we are going to use

2 - Compile the Neural Network Created

In [None]:
## Epoches Definition
EPOCHES = 5

## The model fitting will stop if no improvement occured during consecutive 3 EPOCES
EPOCHES_TO_WAIT_WITH_NO_IMPORVEMENT = 3

EARLY_STOP = EarlyStopping(monitor='val_accuracy',
                           patience = EPOCHES_TO_WAIT_WITH_NO_IMPORVEMENT,
                           restore_best_weights = True)

# Save the Cassava Model with only the minimum validation loss reached
BEST_MODEL_REACHED = ModelCheckpoint(filepath = "./Cassava_best_Model_Reached_best_model_8_Jan_03.h5",
                                save_best_only = True,
                                monitor = 'val_loss',
                                mode = 'min')

# Reduce learning rate
# if no improvement is seen for a 'patience' number of epochs, the learning rate is reduced
REDUCE_LR = ReduceLROnPlateau(monitor = 'val_loss',
                              factor = 0.2,
                              patience = 2,
                              min_lr = 1e-6,
                              mode = 'min',
                              verbose = 1)

Trained_Model = CassaveDisease_model.fit(
                        training_dataset,
                        validation_data = validation_dataset, 
                        epochs = EPOCHES,
                        callbacks = [EARLY_STOP,
                                     BEST_MODEL_REACHED,
                                     REDUCE_LR],
                        verbose=1)

CassaveDisease_model.save("./Cassava_best_Model_Reached_8_jan_03.h5")
print("Model Saved Successfully!")

In [None]:
print(Trained_Model.history.keys())

In [None]:
def Train_Val_Plot(acc,val_acc,loss,val_loss):
    
    fig, (ax1, ax2) = plt.subplots(1,2, figsize= (15,10))
    fig.suptitle(" MODEL'S METRICS VISUALIZATION ", fontsize=20)

    ax1.plot(range(1, len(acc) + 1), acc)
    ax1.plot(range(1, len(val_acc) + 1), val_acc)
    ax1.set_title('History of Accuracy', fontsize=15)
    ax1.set_xlabel('Epochs', fontsize=15)
    ax1.set_ylabel('Accuracy', fontsize=15)
    ax1.legend(['training', 'validation'])


    ax2.plot(range(1, len(loss) + 1), loss)
    ax2.plot(range(1, len(val_loss) + 1), val_loss)
    ax2.set_title('History of Loss', fontsize=15)
    ax2.set_xlabel('Epochs', fontsize=15)
    ax2.set_ylabel('Loss', fontsize=15)
    ax2.legend(['training', 'validation'])
    plt.show()
    

#Train_Val_Plot(Trained_Model.history['accuracy'],Trained_Model.history['val_accuracy'],
#               Trained_Model.history['loss'],Trained_Model.history['val_loss'])

In [None]:
TEST_DIR = '../input/cassava-leaf-disease-classification/test_images/'
test_images = os.listdir(TEST_DIR)
predictions = []
size = (IMAGE_WIDTH, IMAGE_HEIGHT)
for image in test_images:
    img = Image.open(TEST_DIR + image)
    img = img.resize(size)
    img = np.expand_dims(img, axis=0)
    predictions.extend(CassaveDisease_model.predict(img).argmax(axis = 1))

In [None]:
print(predictions)

In [None]:
# Creating the CSV for final submission

sub = pd.DataFrame({'image_id': test_images, 'label': predictions})
display(sub)
sub.to_csv('submission.csv', index = False)