# Project Overview

This is my first code competition with Kaggle. I learned a significant amount of information reinforcing code methods, noisy label managment, data augmentation (specifically tta), and with more time, then I would have added in cut mix methods for even better data augmentation. The problem statement of determining the diseases within Cassava plants is intriguing and at this point I think I can probabaly identify Cassava Mosaic disease within a Cassava leaf. Let us have a look at the data. 

In [None]:
# Import the needed paskages for this notebook
# These are the general data management tools:
import numpy as np
import pandas as pd
import os
import json

# Tools for visualization of the data:
from PIL import Image
import matplotlib.pyplot as plt
from keras.preprocessing.image import load_img,img_to_array

# For data cleaning
import sys
sys.path.append("../input/cleanlab/")
import cleanlab

# For data preparation:
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# For data models:
import tensorflow as tf
import keras
from tensorflow.keras import backend, models
from tensorflow.keras.applications import EfficientNetB4
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model

# For model performance review:
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

In [None]:
#Import the dataset framework:
# Define working directories
directory = '../input/cassava-leaf-disease-classification/'
traindir = '/kaggle/input/cassava-leaf-disease-classification/train_images'
testdir = '/kaggle/input/cassava-leaf-disease-classification/test_images'

with open(directory +"label_num_to_disease_map.json") as file:
    map_classes = json.loads(file.read())
    map_classes = {int(k) : v for k, v in map_classes.items()}

image_map = pd.read_csv(directory+'train.csv')
image_map['names'] = image_map['label'].map(map_classes)

# Copy for use in final CV tests 
image_copy = image_map.copy()

In [None]:
#General image parameters:
img_size = 224
size = (img_size, img_size)
shape = (img_size, img_size, 3)

#Model Parameters:
classes = 5
batch_size = 32

In [None]:
#Functions for the Notebook:

def image_print(images):
    i=0
    for image in images:
        plt.rcParams['figure.figsize'] = (25,15)
        ax = plt.subplot(1, 4, i+1)
        img = load_img(traindir +'/'+ image)
        title = str(image_map.loc[image_map['image_id']==image]['names'])[5:-27]
        plt.title(title) 
        plt.imshow(img)
        plt.axis('off')
        i+=1
        if i == 4:
            break
            
def visualize_noisy(ids, labels, guesses, target_class):
    c_ids, c_guess, c_labels = ids[guesses == target_class], \
                               guesses[guesses == target_class], \
                               labels[guesses == target_class]
    fig, axes = plt.subplots(1,4, sharex=True, sharey=True, figsize=(15, 12))
    i = 0
    for ids in c_ids:
        plt.rcParams['figure.figsize'] = (25,20)
        ax = plt.subplot(2, 4, i+1)
        img = load_img(traindir +'/'+ ids)
        title = f'{map_classes[c_labels[i]]}\n{ids}'
        subtitle = f'Guess: {map_classes[c_guess[i]]}'
        plt.title(title, color = 'r')
        plt.suptitle(subtitle, y=.9, color='b', fontsize=20, fontweight='bold') , 
        plt.tight_layout()
        plt.imshow(img)
        plt.axis('off')
        i+=1
        if i == 4:
            break


# Exploratory Data Analysis on Cassava

The Cassava dataset consists of mostly the Cassava Mosaic Disease (CMD) and a few Cassava Bacterial Blight (CBB). This is a very unbalanced dataset. 

In [None]:
disease_count = image_map[['names','label']].groupby('names').count()
disease_count.columns = ['count']

fig, axes = plt.subplots(figsize=(7,5), dpi=100)
plt.barh(disease_count.index,width = disease_count['count'])
plt.title('Frequency of each leaf disease in images')
plt.xlabel('Frequency');

In [None]:
label = 0
count = disease_count.iloc[label]['count']
total = disease_count['count'].sum()
#total.sum() 
#CBB_count

print(f"The {disease_count.index[label]} classification has {count}. {(count*100/total).round(2)}% of the images. Here are a few images of the CBB diseased Cassava:")

images = image_map.loc[image_map['label']==label]['image_id']

image_print(images)

In [None]:
label = 1
count = disease_count.iloc[label]['count']
total = disease_count['count'].sum()
#total.sum() 
#CBB_count

print(f"The {disease_count.index[label]} classification has {count}. {(count*100/total).round(2)}% of the images. Here are a few images of the CBB diseased Cassava:")


images = image_map.loc[image_map['label']==label]['image_id']

image_print(images)

In [None]:
label = 2
count = disease_count.iloc[label]['count']
total = disease_count['count'].sum()
#total.sum() 
#CBB_count

print(f"The {disease_count.index[label]} classification has {count}. {(count*100/total).round(2)}% of the images. Here are a few images of the CBB diseased Cassava:")


images = image_map.loc[image_map['label']==label]['image_id']

image_print(images)

In [None]:
label = 3
count = disease_count.iloc[label]['count']
total = disease_count['count'].sum()
#total.sum() 
#CBB_count

print(f"The {disease_count.index[label]} classification has {count}. {(count*100/total).round(2)}% of the images. Here are a few images of the CBB diseased Cassava:")


images = image_map.loc[image_map['label']==label]['image_id']

image_print(images)

In [None]:
label = 4
count = disease_count.iloc[label]['count']
total = disease_count['count'].sum()
#total.sum() 
#CBB_count

print(f"The {disease_count.index[label]} classification has {count}. {(count*100/total).round(2)}% of the images. Here are a few images of the CBB diseased Cassava:")


images = image_map.loc[image_map['label']==label]['image_id']

image_print(images)

There do appear to be some images that do not go with the label. For example only one of the healthy images shown looks healthy. 

# Data Cleaning

How do we deal with noisy labels?  One notebook that I read started this process by doing an image comparison and finding that there are duplicates in the dataset. [Duplicate images](https://www.kaggle.com/nakajima/duplicate-train-images) where identified through image hashing and comparison of hash codes. That notebook identifies 2 sets of duplicate images:

In [None]:
images = ['1562043567.jpg', '3551135685.jpg', '2252529694.jpg', '911861181.jpg']
image_print(images)

That is pretty impressive. The 2 duplicate Healthy images are obviously slightly different if you look at the edges. Possibly the image is just slightly cropped but other nuaces lead me to believe that these are likely back to back photos taken in both cases. The Healthy and CBB labeled photo furthers my question of the data labels integrity. 

Further reading on noisy labels leads to looking at [CleanLab and specifically this notebook](https://www.kaggle.com/telljoy/noisy-label-eda-with-cleanlab). I created my own code from this notebook to identify labels based off my models feature sets with these results. As ussual machine learning is iterative so the model I use is one I trained before cleaning out the noisy labels. I uploaded it here so you can see the results.  

In [None]:
class FBETA(tf.keras.metrics.Metric):

    def __init__(self, beta = 1, 
                 name="Fbeta", **kwargs):
        super(FBETA, self).__init__(name=name, **kwargs)

        
        
        self.beta = beta

        self.true_poss = self.add_weight(name="ctp", initializer='zeros')
        self.false_neg = self.add_weight(name="cfn", initializer='zeros')
        self.false_poss = self.add_weight(name="cfp", initializer='zeros')
       

    def update_state(self, y_true, y_pred, sample_weight=None):   
        
        threshold_shift = 0
        y_pred = backend.clip(y_pred, 0, 1)
        y_pred_bin = backend.round(y_pred + threshold_shift)

        tp = backend.sum(backend.round(y_true * y_pred_bin)) + backend.epsilon()
        fp = backend.sum(backend.round(backend.clip(y_pred_bin - y_true, 0, 1)))
        fn = backend.sum(backend.round(backend.clip(y_true - y_pred, 0, 1)))   
        
        self.true_poss.assign_add(tp)
        self.false_poss.assign_add(fp)
        self.false_neg.assign_add(fn)
        
    def result(self):
        
        bb = self.beta**2
        
        p = self.true_poss/(self.true_poss + self.false_poss)
        r = self.true_poss/(self.true_poss + self.false_neg)                     
        
        result = backend.mean(((1 + bb**2) * p * r) / (bb**2 * p + r + backend.epsilon()))
        return result

In [None]:
datagen_general = ImageDataGenerator(preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
                                    )

data_set = datagen_general.flow_from_dataframe(image_map,
                             directory = traindir,
                             seed=42,
                             x_col = 'image_id',
                             y_col = 'names',
                             target_size = size,
                             class_mode = 'categorical',
                             interpolation = 'nearest',
                             shuffle = False,
                             batch_size = 64)

In [None]:
model1 = keras.models.load_model('../input/models/Cassava_rev14.h5',custom_objects={'FBETA':FBETA()})
    
preds = model1.predict(data_set, verbose=1)
pred_dict = {'image_id': image_map['image_id'], 'labels': preds.tolist()}
predictions = pd.DataFrame(pred_dict)

In [None]:
pred_df = predictions.sort_values(by='image_id', ascending=1)
label_df = image_map.drop(['names'], axis = 1)
label_df = label_df.sort_values(by='image_id', ascending=1)

ids, labels = label_df.image_id.values, label_df.label.values
preds = np.array([pred for pred in pred_df.labels.values])

print(f'total {len(ids)} images')
print(f'prediction shape: {preds.shape}, label shape: {labels.shape}')

(Note: This code is taken from [cleanlab](https://github.com/cgnorthcutt/cleanlab) tutorial.)

In [None]:
# STEP 1 - Compute confident joint

# Verify inputs
s = labels
psx = preds

# Find the number of unique classes if K is not given
K = len(np.unique(s))

# Estimate the probability thresholds for confident counting
# You can specify these thresholds yourself if you want
# as you may want to optimize them using a validation set.
# By default they are set to the average class prob.
thresholds = [np.mean(psx[:,k][s == k]) for k in range(K)] # P(s^=k|s=k)
thresholds = np.asarray(thresholds)

# Compute confident joint
confident_joint = np.zeros((K, K), dtype = int)
for i, row in enumerate(psx):
    s_label = s[i]
    # Find out how many classes each example is confidently labeled as
    confident_bins = row >= thresholds - 1e-6
    num_confident_bins = sum(confident_bins)
    # If more than one conf class, inc the count of the max prob class
    if num_confident_bins == 1:
        confident_joint[s_label][np.argmax(confident_bins)] += 1
    elif num_confident_bins > 1:
        confident_joint[s_label][np.argmax(row)] += 1

# Normalize confident joint (use cleanlab, trust me on this)
confident_joint = cleanlab.latent_estimation.calibrate_confident_joint(
    confident_joint, s)

cleanlab.util.print_joint_matrix(confident_joint)

# STEP 2 - Find label errors

# We arbitrarily choose at least 5 examples left in every class.
# Regardless of whether some of them might be label errors.
MIN_NUM_PER_CLASS = 5
# Leave at least MIN_NUM_PER_CLASS examples per class.
# NOTE prune_count_matrix is transposed (relative to confident_joint)
prune_count_matrix = cleanlab.pruning.keep_at_least_n_per_class(
    prune_count_matrix=confident_joint.T,
    n=MIN_NUM_PER_CLASS,
)

s_counts = np.bincount(s)
noise_masks_per_class = []
# For each row in the transposed confident joint
for k in range(K):
    noise_mask = np.zeros(len(psx), dtype=bool)
    psx_k = psx[:, k]
    if s_counts[k] > MIN_NUM_PER_CLASS:  # Don't prune if not MIN_NUM_PER_CLASS
        for j in range(K):  # noisy label index (k is the true label index)
            if k != j:  # Only prune for noise rates, not diagonal entries
                num2prune = prune_count_matrix[k][j]
                if num2prune > 0:
                    # num2prune'th largest p(classk) - p(class j)
                    # for x with noisy label j
                    margin = psx_k - psx[:, j]
                    s_filter = s == j
                    threshold = -np.partition(
                        -margin[s_filter], num2prune - 1
                    )[num2prune - 1]
                    noise_mask = noise_mask | (s_filter & (margin >= threshold))
        noise_masks_per_class.append(noise_mask)
    else:
        noise_masks_per_class.append(np.zeros(len(s), dtype=bool))

# Boolean label error mask
label_errors_bool = np.stack(noise_masks_per_class).any(axis=0)

 # Remove label errors if given label == model prediction
for i, pred_label in enumerate(psx.argmax(axis=1)):
    # np.all let's this work for multi_label and single label
    if label_errors_bool[i] and np.all(pred_label == s[i]):
        label_errors_bool[i] = False

# Convert boolean mask to an ordered list of indices for label errors
label_errors_idx = np.arange(len(s))[label_errors_bool]
# self confidence is the holdout probability that an example
# belongs to its given class label
self_confidence = np.array(
    [np.mean(psx[i][s[i]]) for i in label_errors_idx]
)
margin = self_confidence - psx[label_errors_bool].max(axis=1)
label_errors_idx = label_errors_idx[np.argsort(margin)]


#Reference: @misc{northcutt2019confidentlearning, title={Confident Learning: 
#Estimating Uncertainty in Dataset Labels}, author={Curtis G. Northcutt and Lu 
#Jiang and Isaac L. Chuang}, year={2019}, eprint={1911.00068}, 
#archivePrefix={arXiv}, primaryClass={stat.ML} }


In [None]:
total_idx = np.arange(len(ids))
clean_idx = np.array([idx for idx in total_idx if idx not in label_errors_idx])

guesses = np.stack(noise_masks_per_class).argmax(axis=0)
guesses[clean_idx] = labels[clean_idx]

clean_ids = ids[clean_idx]
clean_labels = labels[clean_idx]
clean_guesses = guesses[clean_idx]

noisy_ids = ids[label_errors_idx]
noisy_labels = labels[label_errors_idx]
noisy_guesses = guesses[label_errors_idx]

print(f'[clean ratio] \t {len(clean_idx) / len(total_idx) * 100:.2f}%')
print(f'[noise ratio] \t {len(noisy_ids) / len(total_idx) * 100:.2f}%')

In [None]:
print(f"The {len(clean_idx)} clean classifications consisted:")
visualize_noisy(clean_ids, clean_labels, clean_guesses, target_class=0)
visualize_noisy(clean_ids, clean_labels, clean_guesses, target_class=1)
visualize_noisy(clean_ids, clean_labels, clean_guesses, target_class=2)
visualize_noisy(clean_ids, clean_labels, clean_guesses, target_class=3)
visualize_noisy(clean_ids, clean_labels, clean_guesses, target_class=4)

In [None]:
print(f"The {len(noisy_ids)} noisy -- lower prediction probability classifications consisted:")
visualize_noisy(noisy_ids, noisy_labels, noisy_guesses, target_class=0)
visualize_noisy(noisy_ids, noisy_labels, noisy_guesses, target_class=1)
visualize_noisy(noisy_ids, noisy_labels, noisy_guesses, target_class=2)
visualize_noisy(noisy_ids, noisy_labels, noisy_guesses, target_class=3)
visualize_noisy(noisy_ids, noisy_labels, noisy_guesses, target_class=4)

Using the noisy label data, I was able to do 2 options: 1) I can remove the noisy labels from the dataset or 2) replace the noisy labels with the new inferences. I evaluated both options and found my best performing model after training to be the one where noisy labels are replaced. This makes a lot of sense given that throwing away data means throwing away the potential for this model to learn. Looking at the noisy labels the new predictions appear correct to my non-expert botanist's eyes.

In [None]:
# Drop the noisy labels and replace with the infered value.
image_copy = image_map.copy()
for id in range(len(noisy_ids)):    
    i = image_map.index[image_map['image_id'] == noisy_ids[id]]
    image_map.loc[i] = (noisy_ids[id], noisy_labels[id], map_classes[noisy_guesses[id]])

After replacement of the noisy labels I want to re-evaluate the balance of the input labels and find that it is still an unbalanced problem.

In [None]:
disease_count = image_map[['names','label']].groupby('names').count()
disease_count.columns = ['count']

fig, axes = plt.subplots(figsize=(7,5), dpi=100)
plt.barh(disease_count.index,width = disease_count['count'])
plt.title('Frequency of each leaf disease in images')
plt.xlabel('Frequency');

In [None]:
label = 0
count = disease_count.iloc[label]['count']
total = disease_count['count'].sum()

print(f"The {disease_count.index[label]} classification has {count}. {(count*100/total).round(2)}% of the images.")

label = 1
count = disease_count.iloc[label]['count']
total = disease_count['count'].sum()

print(f"The {disease_count.index[label]} classification has {count}. {(count*100/total).round(2)}% of the images.")
label = 2
count = disease_count.iloc[label]['count']
total = disease_count['count'].sum()

print(f"The {disease_count.index[label]} classification has {count}. {(count*100/total).round(2)}% of the images.")
label = 3
count = disease_count.iloc[label]['count']
total = disease_count['count'].sum()

print(f"The {disease_count.index[label]} classification has {count}. {(count*100/total).round(2)}% of the images.")
label = 4
count = disease_count.iloc[label]['count']
total = disease_count['count'].sum()

print(f"The {disease_count.index[label]} classification has {count}. {(count*100/total).round(2)}% of the images.")

Most of the change is in the healthy group which has gained ~2% while all but CBB dropped slightly.

# Data Preparation

For the models, I will use train test split to create a training and validation set. Early in the competition I had a Test split as well that I used for finding the optimal model parameters but at the end I used all of the data for training/validation and used the LB score to guide my final model tweaks. 

In [None]:
train,val = train_test_split(image_map, test_size = 0.05, random_state = 42, stratify = image_map['names']) 

I used ImageDataGenerator for both the augmentation and image processing. Remembering to set the val set to shuffle false so I can use it for predictions after the model is fit. A significant amount of this code was obtained from this [notebook](https://www.kaggle.com/marto24/keras-model-89-tta) which continues to evolve and change names since my first reading at version 6, so I hope this link still works.  

In [None]:
datagen_general = ImageDataGenerator(preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
                                    )
datagen_train = ImageDataGenerator(preprocessing_function = tf.keras.applications.efficientnet.preprocess_input, 
                            rotation_range = 90,
                            width_shift_range = [0,0.2],
                            height_shift_range = [0,0.2],
                            shear_range = 0.2,
                            zoom_range = 0.2,
                            channel_shift_range=25,
                            fill_mode = 'constant',
                            cval = 175, 
                            horizontal_flip = True,
                            vertical_flip = True
                            )

train_set = datagen_train.flow_from_dataframe(train,
                             directory = traindir,
                             seed=42,
                             x_col = 'image_id',
                             y_col = 'names',
                             target_size = size,
                             class_mode = 'categorical',
                             interpolation = 'nearest',
                             shuffle = True,
                             batch_size = batch_size)
val_set = datagen_general.flow_from_dataframe(val,
                             directory = traindir,
                             seed=42,
                             x_col = 'image_id',
                             y_col = 'names',
                             target_size = size,
                             class_mode = 'categorical',
                             interpolation = 'nearest',
                             shuffle = False,
                             batch_size = batch_size)

I am going to use the class_weights option during modeling this first revision, but have not had that much success due to the domanence of the CMD class in the hidden test set and training data. Here are the weights: (Note: I used this for version 2 but removed it for version 3 do to poor training shown in the loss per epoch chart. This was consistent in the competition as well.)

In [None]:
total = disease_count['count'].sum()

wt_0 = (1 / disease_count['count'][0] )*(total)
wt_1 = (1 / disease_count['count'][1] )*(total) 
wt_2 = (1 / disease_count['count'][2] )*(total) 
wt_3 = (1 / disease_count['count'][3] )*(total)  
wt_4 = (1 / disease_count['count'][4] )*(total) 

class_weight = {0: wt_0, 1: wt_1, 2: wt_2, 3: wt_3, 4: wt_4}
class_weight

# Modeling using EfficientNetB4

I show the model architecture below. 


In [None]:


########################################  MODELING PART #############################################

#%% MODEL CREATION 
backend.clear_session()

model = Sequential()

model.add(EfficientNetB4(input_shape = shape, include_top = False,
                             weights = 'imagenet',
                             drop_connect_rate=0.5))
model.add(GlobalAveragePooling2D())
model.add(Flatten())
model.add(Dense(256, activation = 'relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)))
model.add(BatchNormalization())
model.add(Dense(classes, activation = 'softmax'))

   
leaf_model = model
leaf_model.summary()

from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, GlobalAveragePooling2D

In [None]:
keras.utils.plot_model(leaf_model)

In [None]:
step_size_train = train_set.n//train_set.batch_size
step_size_test = val_set.n//val_set.batch_size

#Compile
loss = [tf.keras.losses.CategoricalCrossentropy(
    name='categorical_crossentropy'),
            
       ]
metrics = [tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy"),
           FBETA(beta = 2),
           tf.keras.metrics.Precision(name='precision'),
           tf.keras.metrics.Recall(name='recall'),           
        ]
checkpoint_cb = ModelCheckpoint('CassavaModelloss_Rev0.h5',
                                    save_best_only=True,
                                    monitor = 'val_loss',
                                    mode='min')
es = EarlyStopping(monitor='val_categorical_accuracy', patience = 7, restore_best_weights = True)

plateau = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.6, patience = 2, verbose = 1)
    
leaf_model.compile(optimizer = Adam(learning_rate = .001),
                        loss = 'categorical_crossentropy',
                        metrics = metrics) 
#fit model
history = leaf_model.fit( 
    train_set, 
    steps_per_epoch = step_size_train, 
    epochs = 50,
    validation_data = val_set,
    validation_steps = step_size_test,
    #class_weight = class_weight,
    verbose = 1,
    callbacks=[es, plateau, checkpoint_cb] 
)
leaf_model.save('CassavaModel_Rev0.h5')

# Model Performance

After a maximum of 50 epochs with some early stopping if the model starts to plateau I review how the model learned. Ideally< I would like to see a steady increase in accuracy for both training a validation steps.  Based of the LB stuck at 91% or less a 90% on my accuracy is expected. The loass should show a steady drop with both training and validation tracking together to a plateau at the minimum loss. 

In [None]:
# plot accuracy vs epoch
plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot loss values vs epoch
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.ylim([-1, 3])
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Evaluate against test data.
scores = leaf_model.evaluate(val_set, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

I like to review my models against a confusion matrix so that I can see where the model is not performing at its best. In these models CBB and Healthy are the consistent problem classifications. This is the entire original dataset.

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder

#plot the random images.

true_label, true_image = image_copy['names'], image_copy['image_id']

val_images = os.listdir(traindir + '/')

pred = leaf_model.predict(data_set)
pred_classes=np.argmax(pred,axis=1)

code = LabelEncoder()
label = code.fit_transform(true_label)

#Code obtained from: https://scikit-learn.org/0.18/auto_examples/model_selection/plot_confusion_matrix.html
import itertools
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.cool):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        cm = np.round_(cm,2)
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    #print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

#confusion matrix
conf_mat=confusion_matrix(label,pred_classes)

plt.figure()
plot_confusion_matrix(conf_mat, classes = map_classes, normalize = True)

In [None]:
print("The accuracy of the model is fully reviewed using the sklearn metrics classification report.")
print(classification_report(label,pred_classes))

# Model Inference on Testset

This is one area that I learned a significant amount during this code competition. I had never thought about creating an ensemble of multiple models or using data augmentation to fix the noise on the test dataset. So here I show the code of an ensemble (with models that I have previously uploaded) and multi-iteration augmentation of the images for testing. Since I am  not submitting this notebook, I will use my validation set for presentation purposes. 

In [None]:
model1 = keras.models.load_model('../input/models/Cassava_rev15.h5',custom_objects={'FBETA':FBETA()})
model2 = keras.models.load_model('../input/models/Cassava_rev18.h5',custom_objects={'FBETA':FBETA()})
model3 = keras.models.load_model('../input/models/Cassava_rev19loss.h5',custom_objects={'FBETA':FBETA()})
model4 = keras.models.load_model('../input/models/Cassava_rev22.h5',custom_objects={'FBETA':FBETA()})
model5 = keras.models.load_model('../input/models/Cassava_rev14.h5',custom_objects={'FBETA':FBETA()})

In [None]:
val_set_aug = datagen_train.flow_from_dataframe(image_copy,
                             directory = traindir,
                             x_col = 'image_id',
                             y_col = 'names',
                             target_size = size,
                             class_mode = 'categorical',
                             interpolation = 'nearest',
                             shuffle = False,
                             batch_size = 1)

In [None]:
preds = []

preds.append(model1.predict(data_set, verbose=1))
preds.append(model2.predict(data_set, verbose=1))    
preds.append(model3.predict(data_set, verbose=1))    
preds.append(model4.predict(data_set, verbose=1))    
preds.append(model5.predict(data_set, verbose=1)) 

i=0
for i in range(0,5):
    preds.append(model1.predict(val_set_aug, verbose=1))
    preds.append(model2.predict(val_set_aug, verbose=1))    
    preds.append(model3.predict(val_set_aug, verbose=1))    
    preds.append(model4.predict(val_set_aug, verbose=1))    
    preds.append(model5.predict(val_set_aug, verbose=1))
    print(f"This is run{i+1} through augmentation.")
    i=+1 

p_ave = np.mean(preds, axis=0)
pred_classes = np.argmax(p_ave,axis=1)


In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder

#plot the random images.

true_label, true_image = image_copy['names'], image_copy['image_id']
val_images = os.listdir(traindir + '/')
code = LabelEncoder()
label = code.fit_transform(true_label)

#Code obtained from: https://scikit-learn.org/0.18/auto_examples/model_selection/plot_confusion_matrix.html
import itertools
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.cool):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        cm = np.round_(cm,2)
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

#confusion matrix
conf_mat=confusion_matrix(label,pred_classes)

plt.figure()
plot_confusion_matrix(conf_mat, classes = map_classes, normalize = True)

In [None]:
print("The accuracy of the model is fully reviewed using the sklearn metrics classification report.")
print(classification_report(label,pred_classes))

# Conclusion

Overall, my models were able to achieve a 88.6% on the 30% hidden test set. Eagerly awaiting the count down to see the Private testset board open up.