**In this project, I will build a model to classify 5 different types of Cassava Plants.<br>
The Data is from the `cassava-classification-competition` live at Kaggle.<br>
I have pre-processed a sample of the data and saved it to Google Cloud Storage.**

In [None]:
import math, re, os
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
from kaggle_datasets import KaggleDatasets
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix


#@title Enable Eager Execution and Print Versions
if tf.__version__ < "2.0.0":
    tf.compat.v1.enable_eager_execution()
    print("Eager execution enabled.")
else:
    print("Eager execution enabled by default.")

print("TensorFlow " + tf.__version__)

AUTO = tf.data.experimental.AUTOTUNE

## Check for TPU

In [None]:
try: # detect TPUs
    tpu = None
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except Exception as e: # detect GPUs
    #strategy = tf.distribute.MirroredStrategy() # for GPU or multi-GPU machines
    strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
    #strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() # for clusters of multi-GPU machines

print("Number of accelerators: ", strategy.num_replicas_in_sync)

## Configuration

In [None]:
!pip install --quiet gs-wrap
import time
import gswrap
client = gswrap.Client('vibrant-reach-282320')
print('gswrap ready for use!')

In [None]:
from tensorflow.keras.preprocessing import image_dataset_from_directory
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from numpy import asarray
from sklearn.model_selection import train_test_split
from skimage.transform import resize
from tqdm import tqdm
import imageio
import keras
from keras.utils import np_utils
import json
import seaborn as sns
from google.cloud import storage
import shutil
from collections import Counter
import glob
from PIL import Image
import time
import tensorflow_hub as hub

print('All imported!')

## Downloading the Data

In [None]:
with strategy.scope():
    st=time.time()
    print('Copying files...')
    client.cp(src="gs://kaggle1980/Kaggle/images",
              dst="./",
              recursive=True, multithreaded=True)
    ed=time.time()
    tot_files = !ls images/*/*.jpg | wc -l
    print(f'{tot_files} files copied in {ed-st} seconds!')

## EDA

Let's understand the data...


In [None]:
sample_submission = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
sample_submission.head()

**Label Distribution**

In [None]:
# Let's see the different labels of cassava leaves as listed in the `label_num_to_disease_map.json` file
labels_json = '../input/cassava-leaf-disease-classification/label_num_to_disease_map.json'

# extracting the json file as a dataframe
labels_dict_df = pd.read_json(labels_json, typ='series').to_frame()
labels_dict_df.columns = ['label']
labels_dict_df

**Class Distribution**

Lets see class distribution of original data

In [None]:
train_csv = '../input/cassava-leaf-disease-classification/train.csv'
train_df = pd.read_csv(train_csv)
print(f'shape is {train_df.shape}')
train_df.head()

In [None]:
count = Counter(train_df.label)

def plot_labels(count):
    # make each val a list
    count = {key:[val] for key, val in count.items()}
    xAxis = [0.02, 0.12, 0.22, 0.32, 0.42]
    yAxis = list(count.values())
    yAxis = [int(i[0]) for i in yAxis]
    
    # create a dataframe
    count_df = pd.DataFrame(count)

    # plot the dataframe
    sns.set_style('ticks')
    count_df.plot(kind='bar', edgecolor='black', linewidth=1.2, align='edge', figsize=(8,5))
    plt.title('% Label Distribution of Cassava Leaves')
    plt.xlabel('Labels')
    plt.ylabel('Count')
    
    for x, y in zip(xAxis, yAxis):
        val = round(y/sum(yAxis),2)
        plt.annotate(str(val), (x,y-1000),fontweight='bold')
        
    plt.show()

print(f'Label count distribution is\n',count)
plot_labels(count)

## Visualizing The Data

In [None]:
train_path = '../input/cassava-leaf-disease-classification/train_images/'

In [None]:
def plot(df, source_path, nrows=1, ncols=5):
    fig = plt.gcf()
    fig.set_size_inches(ncols * 4.5, nrows * 8)

    pic_index = np.random.randint(0, len(df)-(ncols+1), 1)[0]
    pic_index += ncols
    _pix = [os.path.join(source_path, fname) 
                    for fname in df.image_id[pic_index-ncols:pic_index]]
    
    
    for i, img_path in enumerate(_pix):
    # Set up subplot; subplot indices start at 1
        sp = plt.subplot(nrows, ncols, i + 1)
        sp.axis('Off') # Don't show axes (or gridlines)

        img = imageio.imread(img_path)
        plt.imshow(img)

    plt.show()

In [None]:
cbb_df = train_df[train_df.label==0]  # Cassava Bacterial Blight leaves 
cbsd_df = train_df[train_df.label==1]  # Cassava Brown Streak Disease leaves
cgm_df = train_df[train_df.label==2]  # Cassava Green Mottle leaves
cmd_df = train_df[train_df.label==3]  # Cassava Healthy Disease leaves
healthy_df = train_df[train_df.label==4]

# Let's see one of em
healthy_df.head(3)

In [None]:
# healthy leaves

plot(healthy_df, train_path)

In [None]:
# Cassava Bacterial Blight leaves

plot(cbb_df, train_path)

In [None]:
# Cassava Brown Streak Disease leaves

plot(cbsd_df, train_path)

In [None]:
# Cassava Green Mottle leaves

plot(cgm_df, train_path)

In [None]:
# Cassava Mosaic Disease leaves

plot(cmd_df, train_path)

## Working with Balanced Data

We selected just 6500 images from the dominant CMD leaves class and augmented the rest minor classes to produce a roughly balanced dataset...

Let's read it

In [None]:
dirs = ['zero','one', 'two', 'three', 'four']
label = [0, 1, 2, 3, 4]

df_img = []
df_lab = []

for name, label in tqdm(zip(dirs, label)):
    x = os.listdir('./images/'+name)
    y = [label]*len(x)
    df_img.extend(x)
    df_lab.extend(y)


new_train_df = pd.DataFrame([df_img, df_lab]).T
new_train_df.columns = ['image_id', 'label']

print(f'new df shape is {new_train_df.shape}')
new_train_df.head()

**Let's see the new distribution of Data**

In [None]:
count = Counter(new_train_df.label)

print(f'New Label Count Distribution is\n',count)
plot_labels(count)

## Generate a Dataset

Here. we'd create a dataset of flattened images. Then we'd separate via train and val and use for the prediction.

**Let's define some parameters...**

In [None]:
SIZE= 299  # ideal for exception model
IMAGE_SIZE = (SIZE, SIZE)
BATCH_SIZE = 16
NUM_CLASS = 5
VAL_SPLIT = 0.15

if tpu:
    BATCH_SIZE = 16*strategy.num_replicas_in_sync  # A TPU has 8 cores so this will be 128
else:
    BATCH_SIZE = BATCH_SIZE  # On Colab/GPU, a higher batch size does not help and sometimes does not fit on the GPU (OOM)
    
STEP_SIZE_TRAIN = int(np.ceil(len(new_train_df)*(1-VAL_SPLIT) / BATCH_SIZE))
STEP_SIZE_VALID = int(np.ceil(len(new_train_df)*(VAL_SPLIT) / BATCH_SIZE))

print('All set!')

In [None]:
train_ds = image_dataset_from_directory(directory='./images',
                                label_mode='categorical',
                                batch_size=BATCH_SIZE,
                                image_size=IMAGE_SIZE,
                                seed=0,
                                validation_split=VAL_SPLIT,
                                subset='training',
                                interpolation="nearest")

val_ds = image_dataset_from_directory(
                                directory='./images',
                                label_mode='categorical',
                                batch_size=BATCH_SIZE,
                                image_size=IMAGE_SIZE,
                                seed=0,
                                validation_split=VAL_SPLIT,
                                subset='validation',
                                interpolation="nearest"
)

In [None]:
type(val_ds)

visualize a few of the images and their labels

In [None]:
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(str(labels[i][-1]))
        plt.axis("off")


<h3>Using image data augmentation</h3>

When you don't have a large image dataset, it's a good practice to artificially introduce sample diversity by applying random yet realistic transformations to the training images, such as random horizontal flipping or small random rotations. This helps expose the model to different aspects of the training data while slowing down overfitting.

In [None]:
from keras import layers
from keras.layers.experimental.preprocessing import RandomCrop 
from keras.layers.experimental.preprocessing import RandomFlip
from keras.layers.experimental.preprocessing import RandomRotation
from keras.layers.experimental.preprocessing import RandomZoom
from keras.layers.experimental.preprocessing import RandomHeight
from keras.layers.experimental.preprocessing import RandomWidth
from keras.layers.experimental.preprocessing import Rescaling
print('Done!')

In [None]:
data_augmentation = keras.Sequential(
    [
        RandomFlip("horizontal"),
        RandomFlip('vertical'),
        RandomRotation(0.1),
        RandomZoom(0.2, 0.2, seed=0),
        RandomHeight(factor=0.2, interpolation='nearest'),
        RandomWidth(factor=0.2, interpolation='nearest')
    ]
)
print('Data-augmentation Set!')

**Let's see an image being randomly augmented**

In [None]:
plt.figure(figsize=(10, 10))
for images, _ in train_ds.take(1):
    for i in range(9):
        augmented_images = data_augmentation(images)
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(augmented_images[0].numpy().astype("uint8"))
        plt.axis("off")

## Configure the dataset for performance

Let's make sure to use buffered prefetching so we can yield data from disk without having I/O becoming blocking:

In [None]:
train_ds = train_ds.prefetch(buffer_size=BATCH_SIZE)
val_ds = val_ds.prefetch(buffer_size=BATCH_SIZE)
print('Done')

## Build a model

First, we train a **base-model** with all layers frozen and we train with only the added layer to the top that we provide. We do this for about 30 Epochs.</br>Then next, we unfreeze about 20 layers and train our model with these layers, but we ensure not to unfreeze the batch-norm layers, otherwise we mess up the model's original training.

In [None]:
# Next, some imports

from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D, Flatten
from keras.layers import Dense, Dropout, BatchNormalization
from keras.optimizers import RMSprop, Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

print('Done!')

<h3> The Base Model</h3>

In [None]:
base_model = keras.applications.Xception(
    weights="imagenet",  # Load weights pre-trained on ImageNet.
    input_shape=(SIZE, SIZE, 3),
    include_top=False
)

# Freeze the base_model
base_model.trainable = False

# Create new model on top
inputs = keras.Input(shape=(SIZE, SIZE, 3))
x = data_augmentation(inputs)  # Apply random data augmentation
x = Rescaling(scale=1./255)(x)  # Rescale the data

# Pre-trained Xception weights requires that input be normalized
# from (0, 255) to a range (-1., +1.), the normalization layer
# does the following, outputs = (inputs - mean) / sqrt(var)
norm_layer = keras.layers.experimental.preprocessing.Normalization()
mean = np.array([127.5] * 3)
var = mean ** 2
# Scale inputs to [-1, +1]
x = norm_layer(x)
norm_layer.set_weights([mean, var])

# The base model contains batchnorm layers. We want to keep them in inference mode
# when we unfreeze the base model for fine-tuning, so we make sure that the
# base_model is running in inference mode here.

x = base_model(x, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.4)(x)  # Regularize with dropout
x = keras.layers.Dense(256, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)
x = keras.layers.Dense(512, activation='relu')(x)
x = keras.layers.Dropout(0.3)(x)
x = keras.layers.Dense(1024, activation='relu')(x)
x = keras.layers.Dropout(0.4)(x)
outputs = keras.layers.Dense(NUM_CLASS, activation='softmax')(x)

print('Done!')

Fit the model with the necessary callBacks and params...

In [None]:
def fit_(model, REDUCE_LR=True, FINE_TUNE=False):
    '''Compiling the model'''
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits = False,
                                                   label_smoothing=0.0001,
                                                   name='categorical_crossentropy' )
    
    model.compile(optimizer = Adam(learning_rate=LEARNING_RATE),
                        loss = loss, #'categorical_crossentropy'
                        metrics = METRIC) #'acc'
    
    # Stop training when the val_loss has stopped decreasing for 6 epochs.
    es = EarlyStopping(monitor='val_loss', 
                       mode='min', 
                       patience=EARLY_PATIENCE,
                       restore_best_weights=True, 
                       verbose=1)
    
    # Save the model with the minimum validation loss
    checkpoint_cb = ModelCheckpoint(CHECK_Pt_NAME,
                                    save_best_only=True,
                                    monitor = 'val_loss',
                                    mode='min')
    
    # reduce learning rate
    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                                  factor = REDUCE_FACTOR,
                                  patience = REDUCE_PATIENCE,
                                  min_lr = MIN_LEARNING_RATE,
                                  mode = 'min',
                                  verbose = 1)
    
    
    def increase_lr_exp(epoch, lr, wait=EARLY_PATIENCE//2):
        """This method exponentially increases LR 
            by a fixed Pct.ideal for Fine-Tuning models
        """
        if epoch < wait+1:
            return lr
        else:
            return lr * np.exp(0.1)
        
    
    increase_lr = tf.keras.callbacks.LearningRateScheduler(increase_lr_exp)
    
    if REDUCE_LR:
        CALLBACKS=[es, checkpoint_cb, reduce_lr]
    else:
        if FINE_TUNE:
            CALLBACKS=[es, checkpoint_cb, increase_lr]
        else:
            CALLBACKS=[es, checkpoint_cb]
        
    history = model.fit( train_ds,
                         validation_data = val_ds,
                         epochs= EPOCHS,
                         batch_size = BATCH_SIZE,
                         steps_per_epoch = STEP_SIZE_TRAIN,
                         validation_steps = STEP_SIZE_VALID,
                         callbacks=CALLBACKS)
    
    model.save(SAVE_NAME)  
    
    return history
print('Fit defined!')

## Training Part 1: `FEATURE-EXTRACTION`

In [None]:
EPOCHS = 40
METRIC = 'categorical_accuracy'
LEARNING_RATE = 0.05
MIN_LEARNING_RATE = 1e-5
EARLY_PATIENCE = 6
REDUCE_PATIENCE = 3
REDUCE_FACTOR = 0.8
CHECK_Pt_NAME = "Cassava_best_model.h5"
SAVE_NAME = 'Cassava_model.h5'

In [None]:
with strategy.scope():
    model = keras.Model(inputs, outputs)
    print(model.summary())
    results = fit_(model)
    
    start_time= time.time()
    print('Starting Training...')

    last5_mean_val_accuracy = results.history["val_categorical_accuracy"][-5:]
    print("LAST 5 MEAN VAL-ACCURACY:", np.mean(last5_mean_val_accuracy))
    print("TRAINING TIME: ", time.time() - start_time, " secs!")

In [None]:
#%% CHECKING THE METRIC

print('Train_Cat-Acc: ', max(results.history['categorical_accuracy']))
print('Val_Cat-Acc: ', max(results.history['val_categorical_accuracy']))

In [None]:
#%% PLOTTING RESULTS (Train vs Validation FOLDER 1)

def Train_Val_Plot(acc,val_acc,loss,val_loss):
    
    fig, (ax1, ax2) = plt.subplots(1,2, figsize= (15,10))
    fig.suptitle(" MODEL'S METRICS VISUALIZATION ", fontsize=20)

    ax1.plot(range(1, len(acc) + 1), acc)
    ax1.plot(range(1, len(val_acc) + 1), val_acc)
    ax1.set_title('History of Accuracy', fontsize=15)
    ax1.set_xlabel('Epochs', fontsize=15)
    ax1.set_ylabel('Accuracy', fontsize=15)
    ax1.legend(['training', 'validation'])


    ax2.plot(range(1, len(loss) + 1), loss)
    ax2.plot(range(1, len(val_loss) + 1), val_loss)
    ax2.set_title('History of Loss', fontsize=15)
    ax2.set_xlabel('Epochs', fontsize=15)
    ax2.set_ylabel('Loss', fontsize=15)
    ax2.legend(['training', 'validation'])
    plt.show()
    

Train_Val_Plot(results.history['categorical_accuracy'],results.history['val_categorical_accuracy'],
               results.history['loss'],results.history['val_loss'])

### Loading The Best Model

In [None]:
best_model = keras.models.load_model(CHECK_Pt_NAME)
try:
    best_model.summary()
except Exception as e:
    print(e)

## Training Part 2: `Model FINE-TUNING`

We can fine-tune the model by unfreezing a few of it's later layers and then retraining these on our data.
We must ensure not to touch the batch-nrem layers if any, so as not to destroy the model's previous learning.

Let's first confirm howmany layers the model has...

In [None]:
print(len(model.layers))

Next we set some new params for fine tuning

In [None]:
EPOCHS = 70
METRIC = 'categorical_accuracy'
LEARNING_RATE = 1e-4  # fine-tuning should start with very small LR
EARLY_PATIENCE = 10
CHECK_Pt_NAME = "Cassava_best_finetuned_model.h5"
SAVE_NAME = 'Cassava_model.h5'

So we define a method that unfreezes a given number of the last hidden layers of the model... <br>Without unfreezing the batch-norm layers, else we mess up the entire model's learning.<br>Then we pass a really small learning rate and retrain the model with our data. <br>This function also recompiles the model after unfreezing the weights.

In [None]:
def unfreeze_model(model, num_layers):
    # We unfreeze the top layers while leaving BatchNorm layers frozen
    ind = num_layers
    for layer in model.layers[-num_layers:]:
        if not isinstance(layer, layers.BatchNormalization):
            model.layers[-ind].trainable = True
        ind-=1
    return model

In [None]:
# Let's unfreeze the last 10 layers and train these with our pretrained model.

num_layers = 10

model = unfreeze_model(model, num_layers)
print('model unfrozen!')

In [None]:
model.summary()

### Retrain with more hidden layers of the base model, 

In [None]:
with strategy.scope():
    print('Starting Training...')
    start_time = time.time()
    results = fit_(model, REDUCE_LR=False, FINE_TUNE=True)

    last5_mean_val_accuracy = results.history["val_categorical_accuracy"][-5:]
    print("LAST 5 MEAN VAL-ACCURACY:", np.mean(last5_mean_val_accuracy))
    print("TRAINING TIME: ", time.time() - start_time, " secs")

### Evaluation

In [None]:
#%% CHECKING THE METRIC

print('Train_Cat-Acc: ', max(results.history['categorical_accuracy']))
print('Val_Cat-Acc: ', max(results.history['val_categorical_accuracy']))

In [None]:
#%% PLOTTING RESULTS (Train vs Validation FOLDER 1)

def Train_Val_Plot(acc,val_acc,loss,val_loss):
    
    fig, (ax1, ax2) = plt.subplots(1,2, figsize= (15,10))
    fig.suptitle(" MODEL'S METRICS VISUALIZATION ", fontsize=20)

    ax1.plot(range(1, len(acc) + 1), acc)
    ax1.plot(range(1, len(val_acc) + 1), val_acc)
    ax1.set_title('History of Accuracy', fontsize=15)
    ax1.set_xlabel('Epochs', fontsize=15)
    ax1.set_ylabel('Accuracy', fontsize=15)
    ax1.legend(['training', 'validation'])


    ax2.plot(range(1, len(loss) + 1), loss)
    ax2.plot(range(1, len(val_loss) + 1), val_loss)
    ax2.set_title('History of Loss', fontsize=15)
    ax2.set_xlabel('Epochs', fontsize=15)
    ax2.set_ylabel('Loss', fontsize=15)
    ax2.legend(['training', 'validation'])
    plt.show()
    

Train_Val_Plot(results.history['categorical_accuracy'],results.history['val_categorical_accuracy'],
               results.history['loss'],results.history['val_loss'])

#### Pass the Best Fine-Tuned-Model to the Submission Notebook and Submit...