# EfficientNet+Augmentation+Loss Function for Noisy Labels for Cassava Disease Classification using TF/Keras

This notebook is copied from Francois Lemarchand Efficientnet notebook

This notebook presents a full pipeline to load the data, apply advanced data augmentation, train an EfficientNet and use the model to predict over the test images. To make it possible to run within the allocated time for notebooks, this notebook will only present a single fold with a split of 85% for training and 15% for validation.

In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import os
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.utils import shuffle
from sklearn.utils import class_weight
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import train_test_split, KFold
import random
import cv2
from imgaug import augmenters as iaa
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, Activation, Input, BatchNormalization,Lambda, GlobalAveragePooling2D, Concatenate, Lambda
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.experimental import CosineDecay
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import EfficientNetB5, EfficientNetB3, EfficientNetB4
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.layers.experimental.preprocessing import RandomCrop,CenterCrop, RandomRotation
from tensorflow.keras.preprocessing.image import load_img
from keras.preprocessing import image


In [None]:
training_folder = '../input/cassava-leaf-disease-classification/train_images/'

In [None]:
img = Image.open("../input/cassava-leaf-disease-classification/train_images/1277648239.jpg")
plt.imshow(img)
plt.show()

# Prepare the training and validation data generators

In [None]:
samples_df = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")
samples_df = shuffle(samples_df, random_state=42)
samples_df["filepath"] = training_folder+samples_df["image_id"]
samples_df[:10]

In [None]:
samples_df.info()

As it is a multi-class classification problem (5 classes), we will one-hot encode the target variable.

In [None]:
# One_hot Encoding
y=samples_df['label'].values
y = to_categorical(y)

In [None]:
batch_size = 8
image_size = 512
input_shape = (image_size, image_size, 3)
dropout_rate = 0.4
classes_to_predict = sorted(samples_df.label.unique())

In [None]:
X_train, X_test, y_train, y_test = train_test_split(samples_df, y, random_state=42, test_size=0.2)

In [None]:
training_data = tf.data.Dataset.from_tensor_slices((X_train.filepath.values, y_train))
validation_data = tf.data.Dataset.from_tensor_slices((X_test.filepath.values, y_test))

In [None]:
def load_image_and_label_from_path(image_path, label):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    return img,label

AUTOTUNE = tf.data.experimental.AUTOTUNE

training_data = training_data.map(load_image_and_label_from_path, num_parallel_calls=AUTOTUNE)
validation_data = validation_data.map(load_image_and_label_from_path, num_parallel_calls=AUTOTUNE)

In [None]:
## Converting into tensorflow data batches
training_data_batches = training_data.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=AUTOTUNE)
validation_data_batches = validation_data.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=AUTOTUNE)

I also prepare a special dataset that will be fed to the Normalization layer. The EfficientnetB4 provided by tf.keras includes an out-of-the-box Normalization layer fit onto the imagenet dataset. Therefore, we can pull that layer and use the adapt function to refit it to the Cassava Disease dataset.

In [None]:
adapt_data = tf.data.Dataset.from_tensor_slices(X_train.filepath.values)
def adapt_mode(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = layers.experimental.preprocessing.Rescaling(1.0 / 255)(img)
    return img

adapt_data = adapt_data.map(adapt_mode, num_parallel_calls=AUTOTUNE)
adapt_data_batches = adapt_data.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=AUTOTUNE)

# Applying Data Augementation Layers

The data augmentation preprocessing layers below will be used when training the model but disabled in inference mode.








In [None]:
def brighten_img(x, max_delta=0.1):
    x = tf.image.random_brightness(x, max_delta)
    return x
def brighten(max_delta=0.1):
    return layers.Lambda(lambda x: brighten_img(x, max_delta))

In [None]:
def saturate_img(x, lower=0.6, upper=1.3):
    x = tf.image.random_saturation(x, lower, upper)
    return x
def saturate(lower=0.6, upper=1.3):
    return layers.Lambda(lambda x: saturate_img(x, lower, upper))

In [None]:
data_augmentation_layers = tf.keras.Sequential(
    [
        
        layers.experimental.preprocessing.RandomCrop(height=image_size, width=image_size),
        layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        layers.experimental.preprocessing.RandomRotation(0.25),
        layers.experimental.preprocessing.RandomZoom((-0.2, 0)),
        layers.experimental.preprocessing.RandomContrast((0.2,0.2)),
        brighten(),
        saturate(),
        
    ]
)

### Visualizing what our augmentation will look like on images

Simply reusing some of the code from this tutorial to show what our augmentations look like. I add the image previously opened to a batch and pass it through the data augmentation layers.

In [None]:
image = Image.open("../input/cassava-leaf-disease-classification/train_images/1481899695.jpg")
plt.imshow(image)
plt.show()

In [None]:
image = tf.expand_dims(np.array(image), 0)

In [None]:
plt.figure(figsize=(12, 12))
for i in range(16):
  augmented_image = data_augmentation_layers(image)
  ax = plt.subplot(4, 4, i + 1)
  plt.imshow(augmented_image[0])
  plt.axis("off")

# Build the model

I am using an EfficientNetB4 on top of which I add some outputs layers to predict our 5 disease classes. I decided to load the imagenet pretrained weights locally to keep the internet off (part of the requirements to submit a kernel to this competition).

In [None]:
def create_model():
    efficientnet= EfficientNetB4(weights="../input/tfkeras-efficientnet-weights/efficientnetb4_notop.h5", 
                                   include_top=False, 
                                   input_shape=input_shape, 
                                   )


    input_layer = Input(shape = input_shape)
    augmented = data_augmentation_layers(input_layer)
    efficientnet = efficientnet(augmented)
    pooling = layers.GlobalAveragePooling2D()(efficientnet)
    dropout = layers.Dropout(dropout_rate)(pooling)
    outputs = Dense(len(classes_to_predict), activation="softmax")(dropout)
    model = Model(inputs=input_layer, outputs=outputs)

    return model

model = create_model() # define your model normally
model.summary()

The 3rd layer of the Efficientnet is the Normalization layer, which can be tuned to our new dataset instead of imagenet. Be patient on this one, it does take a bit of time as we're going through the entire training set.

In [None]:
%%time
model.get_layer('efficientnetb4').get_layer('normalization').adapt(adapt_data_batches)

# Implementation of Bi Tempered Logistic Loss Function
As their is noisy labelling in the data,I am going to use bi tempered logistic loss.



In [None]:
# Tempered Softmax Activation

def log_t(u, t):
    epsilon = 1e-7
    """Compute log_t for `u`."""
    if t == 1.0:
        return tf.math.log(u + epsilon)
    else:
        return (u**(1.0 - t) - 1.0) / (1.0 - t)
# Bi Tempered Logistic Loss
def bi_tempered_logistic_loss(y_pred, y_true, t1, label_smoothing=0.0):
    """Bi-Tempered Logistic Loss with custom gradient.
    Args:
    y_pred: A multi-dimensional probability tensor with last dimension `num_classes`.
    y_true: A tensor with shape and dtype as y_pred.
    t1: Temperature 1 (< 1.0 for boundedness).
    label_smoothing: A float in [0, 1] for label smoothing.
    Returns:
    A loss tensor.
    """
    y_pred = tf.cast(y_pred, tf.float32)
    y_true = tf.cast(y_true, tf.float32)

    if label_smoothing > 0.0:
        num_classes = tf.cast(tf.shape(y_true)[-1], tf.float32)
        y_true = (1 - num_classes /(num_classes - 1) * label_smoothing) * y_true + label_smoothing / (num_classes - 1)

    temp1 = (log_t(y_true + 1e-7, t1) - log_t(y_pred, t1)) * y_true
    temp2 = (1 / (2 - t1)) * (tf.math.pow(y_true, 2 - t1) - tf.math.pow(y_pred, 2 - t1))
    loss_values = temp1 - temp2

    return tf.math.reduce_sum(loss_values, -1)

class BiTemperedLogisticLoss(tf.keras.losses.Loss):
    def __init__(self, t1=0.8, label_smoothing=0.2):
        super(BiTemperedLogisticLoss, self).__init__()
        self.t1 = t1
        self.label_smoothing = label_smoothing

    def call(self, y_true, y_pred):
        return bi_tempered_logistic_loss(y_pred, y_true, self.t1, self.label_smoothing)

In [None]:
epochs = 8
decay_steps = int(round(len(X_train)/batch_size))*epochs
cosine_decay = CosineDecay(initial_learning_rate=1e-5, decay_steps=decay_steps, alpha=0.3)
callbacks = [ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)]
loss = BiTemperedLogisticLoss()
model.compile(loss=loss, optimizer=tf.keras.optimizers.Adam(cosine_decay), metrics=["accuracy"])

In [None]:
history = model.fit(training_data_batches,
                  epochs = epochs, 
                  validation_data = validation_data_batches,
                  callbacks = callbacks)

First, we will check that we perform on similar level on both the training and validation. The training curve will also tell us if we stopped training too early or may have overfitted in comparison to the validation data.

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Accuracy over epochs')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='best')
plt.show()

We load the best weights that were kept from the training phase. Just to check how our model is performing, we will attempt predictions over the validation set. This can help to highlight any classes that will be consistently miscategorised.

In [None]:
model.load_weights("best_model.h5")

# Prediction on test images

In [None]:
def scan_over_image(img_path, crop_size=512):
    '''
    Will extract 512x512 images covering the whole original image
    with some overlap between images
    '''
    
    img = Image.open(img_path)
    img_height, img_width = img.size
    img = np.array(img)
    
    y = random.randint(0,img_height-crop_size)
    x = random.randint(0,img_width-crop_size)

    x_img_origins = [0,img_width-crop_size]
    y_img_origins = [0,img_height-crop_size]
    img_list = []
    for x in x_img_origins:
        for y in y_img_origins:
            img_list.append(img[x:x+crop_size , y:y+crop_size,:])
  
    return np.array(img_list)

In [None]:
def display_samples(img_path):
    '''
    Display all 512x512 images extracted from original images
    '''
    
    img_list = scan_over_image(img_path)
    sample_number = len(img_list)
    fig = plt.figure(figsize = (8,sample_number))
    for i in range(0,sample_number):
        ax = fig.add_subplot(2, 4, i+1)
        ax.imshow(img_list[i])
        ax.set_title(str(i))
    plt.tight_layout()
    plt.show()

display_samples("../input/cassava-leaf-disease-classification/train_images/3412658650.jpg")

Apply Test time augmentation on the local images extracted from the folder

In [None]:
test_time_augmentation_layers = tf.keras.Sequential(
    [
        layers.experimental.preprocessing.RandomCrop(height=image_size, width=image_size),
        layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        layers.experimental.preprocessing.RandomZoom((-0.2, 0)),
        layers.experimental.preprocessing.RandomContrast((0.2,0.2)),
    ]
)


In [None]:
def predict_and_vote(image_filename, folder, TTA_runs=4):
    '''
    Run the model over 4 local areas of the given image,
    before making a decision depending on the most predicted
    disease.
    '''
    
    #apply TTA to each of the 4 images and sum all predictions for each local image
    localised_predictions = []
    local_image_list = scan_over_image(folder+image_filename)
    for local_image in local_image_list:
        local_image = tf.expand_dims(local_image,0)
        augmented_images = [test_time_augmentation_layers(local_image) for i in range(TTA_runs)]
        predictions = model.predict(np.array(augmented_images[0]))
        localised_predictions.append(np.sum(predictions, axis=0))
    
    #sum all predictions from all 4 images and retrieve the index of the highest value
    global_predictions = np.sum(np.array(localised_predictions),axis=0)
    final_prediction = np.argmax(global_predictions)
    
    return final_prediction

In [None]:
def run_predictions_over_image_list(image_list, folder):
    predictions = []
    with tqdm(total=len(image_list)) as pbar:
        for image_filename in image_list:
            pbar.update(1)
            predictions.append(predict_and_vote(image_filename, folder))
    return predictions

First, I test my entire prediction pipeline on the validation set as we have little visibility over the test set.

In [None]:
X_test["results"] = run_predictions_over_image_list(X_test["image_id"], training_folder)

In [None]:
!cat ../input/cassava-leaf-disease-classification/label_num_to_disease_map.json

In [None]:
X_test[:30]

In [None]:
true_positives = 0
prediction_distribution_per_class = {"0":{"0": 0, "1": 0, "2":0, "3":0, "4":0},
                                     "1":{"0": 0, "1": 0, "2":0, "3":0, "4":0},
                                     "2":{"0": 0, "1": 0, "2":0, "3":0, "4":0},
                                     "3":{"0": 0, "1": 0, "2":0, "3":0, "4":0},
                                     "4":{"0": 0, "1": 0, "2":0, "3":0, "4":0}}
number_of_images = len(X_test)
for idx, pred in X_test.iterrows():
    if int(pred["label"]) == pred.results:
        true_positives+=1
    prediction_distribution_per_class[str(pred["label"])][str(pred.results)] += 1
print("accuracy: {}%".format(true_positives/number_of_images*100))

In [None]:
prediction_distribution_per_class

In [None]:
heatmap_df = pd.DataFrame(columns={"groundtruth","prediction","value"})
for key in prediction_distribution_per_class.keys():
    for pred_key in prediction_distribution_per_class[key].keys():
        value = prediction_distribution_per_class[key][pred_key]/X_test.query("label==@key").count()[0]
        heatmap_df = heatmap_df.append({"groundtruth":key,"prediction":pred_key,"value":value}, ignore_index=True)   

heatmap = heatmap_df.pivot(index='groundtruth', columns='prediction', values='value')
sns.heatmap(heatmap,cmap="Blues")

In [None]:
test_folder = '../input/cassava-leaf-disease-classification/test_images/'
submission_df = pd.DataFrame(columns={"image_id","label"})
submission_df["image_id"] =  os.listdir(test_folder)
submission_df["label"] = 0

In [None]:
submission_df["label"] = run_predictions_over_image_list(submission_df["image_id"], test_folder)

In [None]:
submission_df

In [None]:
submission_df.to_csv("submission.csv", index=False)