In [None]:

import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import cv2 as cv
import random

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

import tensorflow.keras 
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras import layers 
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.metrics import Precision, Recall, AUC
from tensorflow.keras.callbacks import ModelCheckpoint, Callback, EarlyStopping


In [None]:
# import the data
train_df = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
test_df = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
train_df.head()

In [None]:
import json 

diseases_file = open('../input/cassava-leaf-disease-classification/label_num_to_disease_map.json')
diseases = json.load(diseases_file)
diseases

## Quick Data Exploration

In [None]:
num_instances = train_df.groupby('label').size()

plt.figure(figsize = (10,5))
plt.bar(np.unique(train_df.label),num_instances)
plt.title('Number of labels within the training set', fontweight = 'bold')
plt.xlabel('labels')
plt.ylabel('instances')

## Prepare The Data:

Convert and resize the jpg images into npy files with size 224x224. Important to note, 224x224 is not assumed to be the optimal size; however, this size is a decent place to start because we are implementing imagenet pretrained models like Vgg16 and Resnet50 as cheap baseline models.

In [None]:
import os
os.makedirs('/kaggle/working/train_images', exist_ok=True)

train_path = '../input/cassava-leaf-disease-classification/train_images/'
for index, filename in tqdm(enumerate(train_df.image_id)):
    image = cv.imread(train_path + filename).astype('float32')
    image = cv.resize(image, (224,224), interpolation = cv.INTER_CUBIC)
    np.save('/kaggle/working/train_images/' + filename.replace('.jpg', ''), image)
    
os.makedirs('/kaggle/working/test_images', exist_ok=True)

test_path = '../input/cassava-leaf-disease-classification/test_images/'
for index, filename in tqdm(enumerate(test_df.image_id)):
    image = cv.imread(test_path + filename).astype('float32')
    image = cv.resize(image, (224,224), interpolation = cv.INTER_CUBIC)
    np.save('/kaggle/working/test_images/' + filename.replace('.jpg', ''), image)


    

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical

def onehot_encoded(data):
    return to_categorical(data)


train_df['image_path'] = '../input/cassava-leaf-disease-classification/train_images/' + train_df.image_id
train_df.head()
plt.imshow(cv.imread(train_df.image_path[0]))
plt.show()

encoded_df = pd.DataFrame(onehot_encoded(train_df.label))
encoded_df.columns = ['label_0', 'label_1', 'label_2', 'label_3', 'label_4']
train_df = pd.concat([train_df, encoded_df], axis = 1)
train_df.head()

hat tip [Shervine Amidi](https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly) for the general data generator implementation in Keras. 

In [None]:
import keras

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=32, dim=(224, 224), n_channels=3,
                 n_classes=5, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        self.indexes = np.arange(len(self.list_IDs))
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # Find list of IDs
        list_IDs_temp = [self.list_IDs.iloc[k] for k in indexes]
        # Generate data
        X, y = self.__data_generation(list_IDs_temp)
        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def standard_norm(self,image):
        image[:,:,0] = (image[:,:,0] - np.mean(image[:,:,0]))/np.std(image[:,:,0])
        image[:,:,1] = (image[:,:,1] - np.mean(image[:,:,1]))/np.std(image[:,:,1])
        image[:,:,2] = (image[:,:,2] - np.mean(image[:,:,2]))/np.std(image[:,:,2])
        return image
    
    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,:,:,:] = np.load('/kaggle/working/train_images/' + ID.replace('.jpg', '.npy')).astype('float32')
            X[i,] = self.standard_norm(X[i,])
            
            y[i] = self.labels[self.list_IDs == str(ID)]

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

In [None]:

# The function for creating the data generators needed for training ...

def create_gen(train_df, train_index, test_index, params):
    # IDs
    tr_partition, ts_partition = train_df.image_id.iloc[train_index], train_df.image_id.iloc[test_index]
    #labels
    tr_labels,ts_labels = train_df.label.iloc[train_index], train_df.label.iloc[test_index]

    #labels = train_df.iloc[train_index,-5:], train_df.iloc[test_index, -5:]

    # Generators
    training_generator = DataGenerator(tr_partition, tr_labels, **params)
    validation_generator = DataGenerator(ts_partition, ts_labels, **params)
    return training_generator, validation_generator


## Model Pipeline:

Hat tip to [Chengwei](https://www.dlology.com/blog/multi-class-classification-with-focal-loss-for-imbalanced-datasets/) for sharing the quick and easy focal cross entropy function for multi-classification. Focal cross entropy is introduced as an effective and heurisitic loss to over class imbalances; however, in future models, I plan to drop focal cross entropy and balance the data set with other methods. 

In [None]:
import tensorflow as tf

def focal_loss(gamma=2., alpha=4.):

    gamma = float(gamma)
    alpha = float(alpha)

    def focal_loss_fixed(y_true, y_pred):
        """Focal loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is probability after softmax
        gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
        d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002

        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]

        Keyword Arguments:
            gamma {float} -- (default: {2.0})
            alpha {float} -- (default: {4.0})

        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.e-9
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)

        model_out = tf.add(y_pred, epsilon)
        ce = tf.multiply(y_true, -tf.math.log(model_out))
        weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
        fl = tf.multiply(alpha, tf.multiply(weight, ce))
        reduced_fl = tf.reduce_max(fl, axis=1)
        return tf.reduce_mean(reduced_fl)
    return focal_loss_fixed

In [None]:
"""
Neural network garage ... models are created so that the back bone is 
a pretrained imagenet model and a final global average pool is used before
the final dense layer. This way, we can easy implement the class activation map
analysis shown at the end of this notebook
"""

from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.vgg16 import VGG16

from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D
import tensorflow as tf


def Resnet50_model():
    return ResNet50(include_top = False, weights = "imagenet", input_shape = (224, 224, 3))

def Vgg16_model():
    return VGG16(include_top = False, weights = "imagenet", input_shape = (224, 224, 3))

def Xception_model():
    return Xception(include_top = False, weights = "imagenet", input_shape = (224,224,3))

def classification_model(bottom_model, lr, num_class = 5, target_size = (224,224)):
    model = Sequential()
 #   model.add(Input(shape = (600, 800, 3)))
  #  model.add(tf.keras.layers.Lambda(lambda image: tf.image.resize(image, target_size)))
    model.add(bottom_model)
    model.add(GlobalAveragePooling2D(name = "Global_avg"))
    model.add(Dense(num_class, activation = 'softmax', name = 'classes'))
    model.compile(optimizer = Adam(lr=lr), metrics = ['accuracy' ,'Precision', 'Recall', 'AUC'],
                  loss = focal_loss())
    return model


In [None]:
# all possible models ... 

'example code: showing how to load an individual model'

bottom_model = Xception_model()
model_Xception = classification_model(bottom_model, 0.0001)
bottom_model = Resnet50_model()
model_Resnet50 = classification_model(bottom_model, 0.0001)
bottom_model = Vgg16_model()
model_Vgg16 = classification_model(bottom_model, 0.0001)

model_Vgg16.summary()

## Train model

In [None]:
'fucntion to train the model with a data generator'
def train_model(model,tr_gen, ts_gen, epochs):
    train_history = model.fit_generator(
        generator = tr_gen,
        epochs =  epochs,
        validation_data = ts_gen,
        use_multiprocessing=True,
        workers=8,
        verbose = 1)
    return train_history, model

In [None]:
%%time
from sklearn.model_selection import KFold

"""
Here, we are using cross-validation so that we can create an ensemble model to the measure the uncertainity
in the test prediction. With GPU, roughly takes ~1hr to train for VGG16... 
"""

# Parameters
params = {'dim': (224,224),
          'batch_size': 128,
          'n_classes': 5,
          'n_channels': 3,
          'shuffle': True}

train_dic = dict(hist=list(), train_df=list(), valid_df=list())

kf = KFold(n_splits = 5)
KFold(n_splits = 2, random_state = 42, shuffle = True)

model_ensemble = []
kk=0

for train_index, test_index in kf.split(train_df):
    
    # creating the training and testing data generators
    column_names = ['image_path', 'label_0', 'label_1', 'label_2', 'label_3', 'label_4']
    height_img, width_img = 224, 224
    
    training_generator, validation_generator = create_gen(train_df, train_index, test_index, params)
    
    bot_model = Vgg16_model()
    model_final = classification_model(bot_model, 0.0001)
    print(f'========== Training Fold {kk+1} ==========' )
    train_history, model = train_model(model_final, training_generator, validation_generator, 5)
    
    model_ensemble.append(model)
    
    kk+=1
    train_dic['hist'].append(train_history)
    train_dic['train_df'].append(train_df.iloc[train_index])
    train_dic['valid_df'].append(train_df.iloc[test_index])

#### Predict on the single test image: 

In [None]:
'Compute test predictions with the trained ensemble model ...'

def standard_norm(image):
    image[:,:,0] = (image[:,:,0] - np.mean(image[:,:,0]))/np.std(image[:,:,0])
    image[:,:,1] = (image[:,:,1] - np.mean(image[:,:,1]))/np.std(image[:,:,1])
    image[:,:,2] = (image[:,:,2] - np.mean(image[:,:,2]))/np.std(image[:,:,2])
    return image

test_img = np.load('/kaggle/working/test_images/'+test_df.image_id[0].replace('.jpg','.npy'))
test_img = standard_norm(cv.resize(test_img, (224,224), interpolation = cv.INTER_CUBIC))

y_pred = np.zeros((5,5))

for index, model in enumerate(model_ensemble):
    y_pred[index, :] = model.predict(test_img.reshape(1,224,224,3))
    plt.scatter(column_names[1:], y_pred[index,:], color = 'b', alpha = 0.3)

plt.scatter(column_names[1:], np.mean(y_pred, axis = 0), color = 'r', label = 'ensemble mean')
plt.title('Red --> mean prediction | Blue --> k-fold prediction')

**Above plot**: The blue scatter points correspond to each k-fold neural network prediction, so we should see 5 blue points for each column because we conducted k=5 cross-validation. The red points is the ensemble model's mean prediction. We can see that each k-fold network disagrees with each other, while also, it seems like the ensemble model cannot differentiate between label 2 and 4. 

**Summary on the ensemble model's peformanceo on the test sample:** We can see that our ensemble model is very poor because the prediction on the test sample has high entropy, and the two highest proability predictions (label 1 and 4) are difficult to differentiate. However, it is safe to say that the ensemble model indicates high confidence in predicting label 0 and 3 because each individual k-fold network predicted 0 probability. Unsuprisingly, albeit the ensemble model seems to be very confident in predicting label 3, this class label is highly abudant; in other words, it seems that this baseline ensemble model has overfitted to this over imbalanced. However, which is somewhate suprising, ensemble model predict label 0 quite confidently, indicating that this class is quite easy to differentiate. Overall, this will perforamnce is okay considering that this Vgg16 ensemble model is only a cheap baseline. 

## Class Activation Maps:

Because our model implements a Global Average Pooling layer before the final dense layer, we can implement class activation maps to see the overall model performance. 


The below code can be found in my other notebook, implementing class activation maps on the MNIST dataset: [MNIST Activation Maps](https://www.kaggle.com/niksapraljak/mnist-activation-maps)

In [None]:
"""
    Implementing class activation maps for architectures with Global Average Pooling 2D before the final dense layer 
"""
class Leaf_CAM:
    
    def __init__(self, img):
        self.resize_width, self.resize_height, _ = img.shape    
    
    # zero-center normalization 
    def standard_norm(self, img):
        return ((img - np.mean(img))/np.std(img))
    
    # final layer should be (7,7,2048)
    def feature_model(self, model):  
        return Model(inputs = model.layers[0].input, outputs = model.layers[-3].output)
    
    # final weight tensor before classification layer is 3*2048
    def weight_tensor(self, model):
        final_outputs = model.layers[-1]
        return final_outputs.get_weights()[0]
    
    # output prediction class of the image of interest
    def predict_class(self, model, X):
        prob_vec = model.predict(X)
        return np.argmax(prob_vec[0])
        
    # generate class activation maps (CAMs)    
    def generate_CAM(self, model, img):
        norm_img = self.standard_norm(img)
        Fmap_model = self.feature_model(model)
        Wtensor = self.weight_tensor(model)
        feature_map = Fmap_model.predict(norm_img.reshape(1,224,224,3))
        label = self.predict_class(model, norm_img.reshape(1,224,224,3))
        CAM = feature_map.dot(Wtensor[:,label])[0,:,:]
        return cv.resize(CAM, 
                         (self.resize_width, self.resize_height),
                         interpolation = cv.INTER_CUBIC), label
    
    # generate probability vector 
    def generate_probvec(self, model, img):
        X = self.standard_norm(img)
        prob_vec = model.predict(X.reshape(1,224,224,3))
        return prob_vec

In [None]:
'Example code: quickly compute a class activation map for a random image sample ...'

def read_img(file_path):
    image = cv.imread(file_path).astype('float32')
    image = cv.resize(image, (224,224), interpolation = cv.INTER_CUBIC)
    # normalize image 
    norm_image = standard_norm(image.copy())
    return norm_image, image


# generate a random activation map

rand_val = random.randint(0, 20000)
# example image 
file_path = train_df.image_path[rand_val]
norm_img, img = read_img(file_path)
CAM_generator = Leaf_CAM(img)
plt.subplot(1,2,1)
plt.imshow(img.astype('uint'))
plt.subplot(1,2,2)
plt.imshow(img.astype('uint'))
activation_map, label = CAM_generator.generate_CAM(model_ensemble[0], img)
plt.imshow(activation_map,'jet', alpha = 0.5)
plt.title("Predicted Class: " + str(label))
plt.show()


# Activation Map Analysis:

## Plot Cassava Bacterial (CBB) Blight Activation Maps: 

In [None]:
CBB_df = train_df[train_df.label == 0]

for index in range(5):

    file_path = CBB_df.image_path.iloc[index]
    norm_img, img = read_img(file_path)
    CAM_generator = Leaf_CAM(img)
    plt.figure(figsize = (10,5))
    plt.subplot(1,2,1)
    plt.imshow(img.astype('uint'))
    plt.subplot(1,2,2)
    plt.imshow(img.astype('uint'))
    activation_map, label = CAM_generator.generate_CAM(model_ensemble[0], img)
    plt.imshow(activation_map,'jet', alpha = 0.5)
    plt.title("Predicted Class: " + str(label))
    plt.show()


## Plot Cassava Brown Streak Disease (CBSD) Activation Maps:


In [None]:
CBSD_df = train_df[train_df.label == 1]

for index in range(5):

    file_path = CBSD_df.image_path.iloc[index]
    norm_img, img = read_img(file_path)
    CAM_generator = Leaf_CAM(img)
    plt.figure(figsize = (10,5))
    plt.subplot(1,2,1)
    plt.imshow(img.astype('uint'))
    plt.subplot(1,2,2)
    plt.imshow(img.astype('uint'))
    activation_map, label = CAM_generator.generate_CAM(model_ensemble[0], img)
    plt.imshow(activation_map,'jet', alpha = 0.5)
    plt.title("Predicted Class: " + str(label))
    plt.show()


## Plot Cassava Green Mottle (CGM) Acativation Maps: 

In [None]:
CGM_df = train_df[train_df.label == 2]

for index in range(5):

    file_path = CGM_df.image_path.iloc[index]
    norm_img, img = read_img(file_path)
    CAM_generator = Leaf_CAM(img)
    plt.figure(figsize = (10,5))
    plt.subplot(1,2,1)
    plt.imshow(img.astype('uint'))
    plt.subplot(1,2,2)
    plt.imshow(img.astype('uint'))
    activation_map, label = CAM_generator.generate_CAM(model_ensemble[0], img)
    plt.imshow(activation_map,'jet', alpha = 0.5)
    plt.title("Predicted Class: " + str(label))
    plt.show()


##  Plot Cassava Mosaic Disease (CMD) Activation Maps:


In [None]:
CMD_df = train_df[train_df.label == 3]

for index in range(5):

    file_path = CMD_df.image_path.iloc[index]
    norm_img, img = read_img(file_path)
    CAM_generator = Leaf_CAM(img)
    plt.figure(figsize = (10,5))
    plt.subplot(1,2,1)
    plt.imshow(img.astype('uint'))
    plt.subplot(1,2,2)
    plt.imshow(img.astype('uint'))
    activation_map, label = CAM_generator.generate_CAM(model_ensemble[0], img)
    plt.imshow(activation_map,'jet', alpha = 0.5)
    plt.title("Predicted Class: " + str(label))
    plt.show()


##  Plot Healthly Activation Maps:


In [None]:
He_df = train_df[train_df.label == 4]

for index in range(5):

    file_path = He_df.image_path.iloc[index]
    norm_img, img = read_img(file_path)
    CAM_generator = Leaf_CAM(img)
    plt.figure(figsize = (10,5))
    plt.subplot(1,2,1)
    plt.imshow(img.astype('uint'))
    plt.subplot(1,2,2)
    plt.imshow(img.astype('uint'))
    activation_map, label = CAM_generator.generate_CAM(model_ensemble[0], img)
    plt.imshow(activation_map,'jet', alpha = 0.5)
    plt.title("Predicted Class: " + str(label))
    plt.show()


## Save ensemble model


In [None]:

for kfold, model in enumerate(model_ensemble):
    # serialize model to JSON
    model_json = model.to_json()
    with open(f"model-{kfold}-vgg16.json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights(f"model-{kfold}-vgg16.h5")
    print("Saved model to disk")
 