# Inference Notebook is found here
[Inference Notebook](https://www.kaggle.com/shubham219/experiment-with-models-in-keras-inference)

This is my first Computer Vision Competition. This is notebook is just for some random experiments and to see how things impacts the model and accuracy. 

If you have any suggestion please comment and please upvote. 

# Updates 

## Update 1 - No Image Augmentation  - VGG16 Base(freeze)
Model was trained on VGG16 base with only one dense layer. VGG16 parameters are freezed and only dense parameters are allowed to trained. Model was slightly overfitting and the rank on public leader board is 0.64

## Update 2 - Image Augmentation - VGG16 Base(freeze)
Model was trained on VGG16 base with only one dense layer. VGG16 parameters are freezed and only dense parameters are allowed to trained. Using Image Augmentation Validation Loss and Validation Accuracy are less than from the previous model(Without Augmentation) but model performed well and accuracy improved to 0.65 on public leader board

## Update 3 - Image Augmentation With Class Weights
Model was trained on VGG16 base with only one dense layer and putting some class weights. VGG16 parameters are freezed and only dense parameters are allowed to trained. Model did not perform well and accuracy got dropped to 0.50

## Update 4 - Few Layers Unfreezed
Model was trained on VGG16 base with only one dense layer but this time unfreezing the last convolution block and training it with low learning rate but the model is trainined first on freezed layer **Model performed really well and got accuracy of 0.763 from 0.65**

## Update 5 - Trying Model Ensembling
Accuracy Jumed to 0.803

# Importing All The Required Liraries

In [None]:
!pip install ../input/efficientnet-keras-source-code/repository/qubvel-efficientnet-c993591
!pip install -U efficientnet
!pip install /kaggle/input/keras-pretrained-imagenet-weights/image_classifiers-1.0.0-py3-none-any.whl
!pip install image-classifiers
!pip install keras-applications==1.0.8

In [None]:
import os
import re
import pandas as pd
import numpy as np
import tensorflow as tf
import shutil
from functools import partial
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split 
from sklearn.utils import class_weight
from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import KFold
from classification_models.tfkeras import Classifiers

print("Tensorflow version -",tf.__version__)
print("Python version")
!python --version

# TPU Config

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print("Device:", tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except:
    strategy = tf.distribute.get_strategy()
print("Number of replicas:", strategy.num_replicas_in_sync)

# Variables

In [None]:
EPOCHS = 100
IMAGE_SIZE = [512, 512]
AUTOTUNE = tf.data.experimental.AUTOTUNE
BUFFER_SIZE = 32
FOLD = 5
SEED = (2, 3)
BATCH_SIZE = AUG_BATCH = 16*strategy.num_replicas_in_sync
GCS_PATH_ORG = KaggleDatasets().get_gcs_path('cassava-leaf-disease-classification')
GCS_PATH_COMB= KaggleDatasets().get_gcs_path('cassava-old-new-data-600-800')

loss_list = []
acc_list = []
val_acc_list = []
val_loss_list = []

# Reading Metadata

In [None]:
data = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')

with open('../input/cassava-leaf-disease-classification/label_num_to_disease_map.json') as file:
    text = file.read()
print(text)
data.head()

# Distribution Of Classes
Dataset is very imbalanced

In [None]:
figure = plt.figure(figsize=(8,4))
(data['label'].value_counts()/len(data)*100).plot(kind='bar')
plt.title("Distribution of Classes")
plt.ylabel('% count of classes')
plt.show()

# Image Augmentation & Other Functions
Below Transformation Code is taken from [Notebook](https://www.kaggle.com/cdeotte/cutmix-and-mixup-on-gpu-tpu)

In [None]:
# Learning rate
LR = 0.01
# Test time augmentation rounds
TTA = 10
# Verbosity
VERBOSE = 2
# Number of classes
N_CLASSES = 5

def onehot(image,label):
    CLASSES = 5
    return image,tf.one_hot(label,CLASSES)

def cutmix(image, label, PROBABILITY = 1.0):
    # input image - is a batch of images of size [n,dim,dim,3] not a single image of [dim,dim,3]
    # output - a batch of images with cutmix applied
    DIM = IMAGE_SIZE[0]
    CLASSES = 5
    
    imgs = []; labs = []
    for j in range(AUG_BATCH):
        # DO CUTMIX WITH PROBABILITY DEFINED ABOVE
        P = tf.cast( tf.random.uniform([],0,1)<=PROBABILITY, tf.int32)
        # CHOOSE RANDOM IMAGE TO CUTMIX WITH
        k = tf.cast( tf.random.uniform([],0,AUG_BATCH),tf.int32)
        # CHOOSE RANDOM LOCATION
        x = tf.cast( tf.random.uniform([],0,DIM),tf.int32)
        y = tf.cast( tf.random.uniform([],0,DIM),tf.int32)
        b = tf.random.uniform([],0,1) # this is beta dist with alpha=1.0
        WIDTH = tf.cast( DIM * tf.math.sqrt(1-b),tf.int32) * P
        ya = tf.math.maximum(0,y-WIDTH//2)
        yb = tf.math.minimum(DIM,y+WIDTH//2)
        xa = tf.math.maximum(0,x-WIDTH//2)
        xb = tf.math.minimum(DIM,x+WIDTH//2)
        # MAKE CUTMIX IMAGE
        one = image[j,ya:yb,0:xa,:]
        two = image[k,ya:yb,xa:xb,:]
        three = image[j,ya:yb,xb:DIM,:]
        middle = tf.concat([one,two,three],axis=1)
        img = tf.concat([image[j,0:ya,:,:],middle,image[j,yb:DIM,:,:]],axis=0)
        imgs.append(img)
        # MAKE CUTMIX LABEL
        a = tf.cast(WIDTH*WIDTH/DIM/DIM,tf.float32)
        if len(label.shape)==1:
            lab1 = tf.one_hot(label[j],CLASSES)
            lab2 = tf.one_hot(label[k],CLASSES)
        else:
            lab1 = label[j,]
            lab2 = label[k,]
        labs.append((1-a)*lab1 + a*lab2)
            
    # RESHAPE HACK SO TPU COMPILER KNOWS SHAPE OF OUTPUT TENSOR (maybe use Python typing instead?)
    image2 = tf.reshape(tf.stack(imgs),(AUG_BATCH,DIM,DIM,3))
    label2 = tf.reshape(tf.stack(labs),(AUG_BATCH,CLASSES))
    return image2,label2


def mixup(image, label, PROBABILITY = 1.0):
    # input image - is a batch of images of size [n,dim,dim,3] not a single image of [dim,dim,3]
    # output - a batch of images with mixup applied
    DIM = IMAGE_SIZE[0]
    CLASSES = 5
    
    imgs = []; labs = []
    for j in range(AUG_BATCH):
        # DO MIXUP WITH PROBABILITY DEFINED ABOVE
        P = tf.cast( tf.random.uniform([],0,1)<=PROBABILITY, tf.float32)
        # CHOOSE RANDOM
        k = tf.cast( tf.random.uniform([],0,AUG_BATCH),tf.int32)
        a = tf.random.uniform([],0,1)*P # this is beta dist with alpha=1.0
        # MAKE MIXUP IMAGE
        img1 = image[j,]
        img2 = image[k,]
        imgs.append((1-a)*img1 + a*img2)
        # MAKE CUTMIX LABEL
        if len(label.shape)==1:
            lab1 = tf.one_hot(label[j],CLASSES)
            lab2 = tf.one_hot(label[k],CLASSES)
        else:
            lab1 = label[j,]
            lab2 = label[k,]
        labs.append((1-a)*lab1 + a*lab2)
            
    # RESHAPE HACK SO TPU COMPILER KNOWS SHAPE OF OUTPUT TENSOR (maybe use Python typing instead?)
    image2 = tf.reshape(tf.stack(imgs),(AUG_BATCH,DIM,DIM,3))
    label2 = tf.reshape(tf.stack(labs),(AUG_BATCH,CLASSES))
    return image2,label2



def data_augment(image, target):
    
    # For Generating A Random Vaue between 0 and 1
    p_spatial = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_rotate = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_pixel_1 = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_pixel_2 = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_pixel_3 = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_crop = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
            
    # Flips
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    
    if p_spatial > 0.75:
        image = tf.image.transpose(image)
        
    # Rotates
    if p_rotate > 0.75:
        image = tf.image.rot90(image, k = 3) # rotate 270ยบ
    elif p_rotate > 0.5:
        image = tf.image.rot90(image, k = 2) # rotate 180ยบ
    elif p_rotate > 0.25:
        image = tf.image.rot90(image, k = 1) # rotate 90ยบ
        
    # Pixel-level transforms
    if p_pixel_1 >= 0.4:
        image = tf.image.random_saturation(image, lower = 0.7, upper = 1.3)
    if p_pixel_2 >= 0.4:
        image = tf.image.random_contrast(image, lower = 0.8, upper = 1.2)
    if p_pixel_3 >= 0.4:
        image = tf.image.random_brightness(image, max_delta = 0.1)
        
    # Crops
    if p_crop > 0.7:
        if p_crop > 0.9:
            image = tf.image.central_crop(image, central_fraction = 0.7)
        elif p_crop > 0.8:
            image = tf.image.central_crop(image, central_fraction = 0.8)
        else:
            image = tf.image.central_crop(image, central_fraction = 0.9)
    elif p_crop > 0.4:
        crop_size = tf.random.uniform([], int(IMAGE_SIZE[0] * 0.8), IMAGE_SIZE[0], dtype = tf.int32)
        image = tf.image.random_crop(image, size = [crop_size, crop_size, 3])
    
    image = tf.image.resize(image, size = IMAGE_SIZE)

    image = tf.reshape(image, [*IMAGE_SIZE, 3])

    return image, target



def transform(image,label):
    # THIS FUNCTION APPLIES BOTH CUTMIX AND MIXUP
    DIM = IMAGE_SIZE[0]
    CLASSES = 5
    SWITCH = 0.5
    CUTMIX_PROB = 0.666
    MIXUP_PROB = 0.666
    # FOR SWITCH PERCENT OF TIME WE DO CUTMIX AND (1-SWITCH) WE DO MIXUP
    image2, label2 = cutmix(image, label, CUTMIX_PROB)
    image3, label3 = mixup(image, label, MIXUP_PROB)
    imgs = []; labs = []
    for j in range(AUG_BATCH):
        P = tf.cast( tf.random.uniform([],0,1)<=SWITCH, tf.float32)
        imgs.append(P*image2[j,]+(1-P)*image3[j,])
        labs.append(P*label2[j,]+(1-P)*label3[j,])
    # RESHAPE HACK SO TPU COMPILER KNOWS SHAPE OF OUTPUT TENSOR (maybe use Python typing instead?)
    image4 = tf.reshape(tf.stack(imgs),(AUG_BATCH,DIM,DIM,3))
    label4 = tf.reshape(tf.stack(labs),(AUG_BATCH,CLASSES))
    return image4,label4



def get_val_tta(filenames, ordered = True):
    dataset = load_dataset(filenames, ordered = ordered)
    dataset = dataset.map(data_augment, num_parallel_calls = AUTOTUNE)
    dataset = dataset.repeat()
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset



LR_START = 0.00001
LR_MAX = 0.00005 * strategy.num_replicas_in_sync
LR_MIN = 0.00001
LR_RAMPUP_EPOCHS = 5
LR_SUSTAIN_EPOCHS = 0
LR_EXP_DECAY = .8

def lrfn(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        lr = (LR_MAX - LR_MIN) * LR_EXP_DECAY**(epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS) + LR_MIN
    return lr
    
lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose = True)

# Reading TF Records

In [None]:
# Decode the data
def decode_image(image):
    print("Reading Image")
    image = tf.image.decode_jpeg(image,channels=3)
    image = tf.cast(image, tf.float32)
    image = tf.image.resize(image, IMAGE_SIZE)
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    return image

def read_tfrecord(example, labeled):
    tfrecord_format = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.int64)
    } if labeled else {
        "image": tf.io.FixedLenFeature([], tf.string),
        "image_name": tf.io.FixedLenFeature([], tf.string)
    }
    
    example = tf.io.parse_single_example(example, tfrecord_format)
    image = decode_image(example['image'])
    if labeled:
        label = tf.cast(example['target'], tf.int32)
        return image, label
    idnum = example['image_name']
    return image, idnum

def load_dataset(filenames, labeled=True):
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False  # disable order, increase speed
    dataset = tf.data.TFRecordDataset(
        filenames
    )  # automatically interleaves reads from multiple files
    dataset = dataset.with_options(
        ignore_order
    )  # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(
        partial(read_tfrecord, labeled=labeled), num_parallel_calls=AUTOTUNE
    )
    # returns a dataset of (image, label) pairs if labeled=True or just images if labeled=False
    return dataset

def get_training_dataset(filenames, labeled=True ,augment=False, one_hot=True):
    
    dataset = load_dataset(filenames, labeled=labeled)
   
    if augment:
        dataset = dataset.map(data_augment, num_parallel_calls = AUTOTUNE)
        dataset = dataset.repeat()
        dataset = dataset.batch(AUG_BATCH)
#         dataset = dataset.map(transform, num_parallel_calls = AUTOTUNE)

    if one_hot:
        dataset = dataset.map(onehot,num_parallel_calls=AUTOTUNE
                             )
#     dataset = dataset.cache()
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
   
    return dataset

def get_validation_dataset(filenames, labeled=True,augment=False, one_hot=True):
    
    dataset = load_dataset(filenames, labeled=labeled)
    
    if augment:
        dataset = dataset.map(data_augment)
        dataset = dataset.batch(BATCH_SIZE)
#         dataset = dataset.map(transform)
    
    dataset = dataset.batch(BATCH_SIZE)
    
    if one_hot:
        dataset = dataset.map(onehot,num_parallel_calls=AUTOTUNE
                             )
#     dataset = dataset.cache()
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
   
    return dataset


# Splitting the Data Into Train And Test

In [None]:
# Old and New iMages
x_train, x_test = train_test_split(tf.io.gfile.glob(GCS_PATH_COMB + '/*.tfrec'),
                                   test_size=0.3,
                                   random_state=123)


# Creatig the data Set 
train_ds = get_training_dataset(x_train, augment=True)
valid_ds = get_validation_dataset(x_test, augment=False, one_hot=True)


# Counting the Number Of Files
train_files_cnt = sum([int(i) for i in re.findall('train\d+-(\d+)', str(x_train))])
valid_files_cnt = sum([int(i) for i in re.findall('train\d+-(\d+)', str(x_test))])

print(f'Number of Training And Validation Files are - {train_files_cnt} and {valid_files_cnt}')

# Visualizing The Training Dataset
Some of the images contains lots of leaves and image is taken from far


In [None]:
figure = plt.figure(figsize=(10,10))

for image, label in train_ds.take(1):
    print(label.shape)
    for i in range(9):
        plt.subplot(330+1+i)
        plt.imshow(np.array(image[i]).astype('uint8'))
#         plt.title(int(label[i]))
        plt.axis('off')
        
plt.show()

# Visualizing The Validation Dataset

In [None]:
figure = plt.figure(figsize=(10,10))

for image, label in valid_ds.take(1):
    print(image.shape)
    print(label.shape)
    for i in range(9):
        plt.subplot(330+1+i)
        plt.imshow(np.array(image[i]).astype('uint8'))
#         plt.title(int(label[i]))
        plt.axis('off')
        
plt.show()

# Model Preparation

In [None]:
def make_model(input_shape):
    
    inputs = tf.keras.Input(shape=input_shape)
    
    #Rescaling
    scales_input = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)(inputs)
    
    # Model 1
    base1 = conv_base1(scales_input)
    base1_x = tf.keras.layers.GlobalAveragePooling2D()(base1)
    base1_x = tf.keras.layers.Dense(256)(base1_x)
    base1_x = tf.keras.layers.Activation('sigmoid')(base1_x)
    
    
    # Model 2
    base2 = conv_base2(scales_input)
    base2_x = tf.keras.layers.GlobalAveragePooling2D()(base2)
    base2_x = tf.keras.layers.Dense(256)(base2_x)
    base2_x = tf.keras.layers.Activation('sigmoid')(base2_x)
    
    # Model 3
    base3 = conv_base3(scales_input)
    base3_x = tf.keras.layers.GlobalAveragePooling2D()(base3)
    base3_x = tf.keras.layers.Dense(256)(base3_x)
    base3_x = tf.keras.layers.Activation('sigmoid')(base3_x)
  
    #Concatenation
    models = tf.keras.layers.concatenate([base1_x, base2_x, base3_x], axis=-1)
    x = tf.keras.layers.BatchNormalization()(models)
    x = tf.keras.layers.Dense(4096)(x)      
    x = tf.keras.layers.LeakyReLU(0.2)(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    
    x = tf.keras.layers.Dense(512)(x)      
    x = tf.keras.layers.LeakyReLU(0.2)(x)
    x = tf.keras.layers.Dropout(0.5)(x)
 
    x = tf.keras.layers.Dense(256)(x)      
    x = tf.keras.layers.LeakyReLU(0.2)(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    
    
    output = tf.keras.layers.Dense(5, activation='softmax')(x)
    
    model = tf.keras.Model(inputs, output)
    
    return model



# Using Pretrained Convolution Base
Freezing the layers

In [None]:
from efficientnet.tfkeras import EfficientNetB7
class_wg_root = '/kaggle/input/keras-pretrained-imagenet-weights/'


with strategy.scope():
    
    # Model base 1
    conv_base1 = EfficientNetB7(include_top=False,
                                weights='imagenet'
                               )

    conv_base1.trainable=False
     
    # Model base 2
    SeResNeXT50, preprocess_input = Classifiers.get('seresnext50')
    SRNXT = SeResNeXT50(input_shape=(*IMAGE_SIZE, 3), include_top=False, weights=None)
    SRNXT.load_weights(class_wg_root + 'seresnext50_imagenet_1000_no_top.h5')
    conv_base2 = SRNXT

    conv_base2.trainable=False

    conv_base3 = tf.keras.applications.InceptionResNetV2(include_top=False, 
                                                         weights='imagenet'
                                                        )
    conv_base3.trainable=False

    model = make_model([*IMAGE_SIZE,3])
    
    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001),
                  metrics = tf.keras.metrics.CategoricalAccuracy(),
                  loss = tf.keras.losses.CategoricalCrossentropy()
                 )

# Network Architecture

In [None]:
display(tf.keras.utils.plot_model(model))

In [None]:
model.summary()

# 5 Fold Validation

In [None]:
cv = KFold(n_splits=FOLD, shuffle=True, random_state=123)

X = pd.Series(tf.io.gfile.glob(GCS_PATH_COMB + '/*.tfrec'))

for n_fold, (train, test) in enumerate(cv.split(X)):
    
    print("Fold No ", n_fold+1)
    print("Train Indexes -", train)
    print("Test Indexes -", test)
    # Creatig the data Set 
    train_ds = get_training_dataset(X[train.tolist()], augment=True)
    valid_ds = get_validation_dataset(X[test].tolist(), augment=False, one_hot=True)
    
    history = model.fit(train_ds,
                        validation_data=valid_ds,
                        epochs=EPOCHS,
                        steps_per_epoch = train_files_cnt//BATCH_SIZE,
                        batch_size=BATCH_SIZE,
                        callbacks=[tf.keras.callbacks.ModelCheckpoint(f'model_v0.77.h5',
                                                                      save_best_only=True),
                                   
                                   tf.keras.callbacks.EarlyStopping(patience=5,
                                                                    restore_best_weights=True)
                        ]
         )
    
    acc_list.append(np.mean(history.history['categorical_accuracy']))
    loss_list.append(np.mean(history.history['loss']))
    val_acc_list.append(np.mean(history.history['val_categorical_accuracy']))
    val_loss_list.append(np.mean(history.history['val_loss']))

In [None]:
plt.figure(figsize=(8,5))
plt.plot(list(range(FOLD)), acc_list, label='Training Accuracy')
plt.plot(list(range(FOLD)), val_acc_list, label='Validation Accuracy')
plt.xlabel('N-Folds')
plt.ylabel('Mean Accuracy')
plt.legend()
plt.show()

plt.figure(figsize=(8,5))
plt.plot(list(range(FOLD)), loss_list, label='Training Loss')
plt.plot(list(range(FOLD)), val_loss_list, label='Validation Loss')
plt.xlabel('N-Folds')
plt.ylabel('Mean Loss')
plt.legend()
plt.show()


# Loss Vs Epochs

# Unfreezing few conv layers now

In [None]:
print('Number of layer in conv base 1 are ', len(conv_base1.layers))
print('Number of layer in conv base 2 are ', len(conv_base2.layers))
print('Number of layer in conv base 3 are ', len(conv_base3.layers))

not_to_train = 0.85
# Training only 20% of the lower layers
conv_base1_to_train = int(not_to_train*len(conv_base1.layers))
conv_base2_to_train = int(not_to_train*len(conv_base2.layers))
conv_base3_to_train = int(not_to_train*len(conv_base3.layers))

print("\nFrom layer numbers to train")
print(conv_base1_to_train)
print(conv_base2_to_train)
print(conv_base3_to_train)

conv_base1.trainable=True
conv_base2.trainable=True
conv_base3.trainable=True

# # adding regularization
# regularizer = tf.keras.regularizers.l1(0.00001)

# for layer in model.layers:
#     for attr in ['kernel_regularizer']:
#         if hasattr(layer, attr):
#           setattr(layer, attr, regularizer)

def unfreeze_model(model, layers_to_train):
    
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.BatchNormalization): 
            layer.trainable = False
  
    for layer in model.layers[:layers_to_train]:   
        if not isinstance(layer, tf.keras.layers.BatchNormalization): 
            layer.trainable = False

unfreeze_model(conv_base1, conv_base1_to_train)
unfreeze_model(conv_base2, conv_base2_to_train)
unfreeze_model(conv_base3, conv_base3_to_train)

In [None]:
model.summary()

In [None]:
# initial_learning_rate = 0.01
# epochs = 100
# decay = initial_learning_rate / epochs
# def lr_time_based_decay(epoch, lr):
#     return lr * 1 / (1 + decay * epoch)

In [None]:
loss_list = []
acc_list = []
val_acc_list = []
val_loss_list = []


cv = KFold(n_splits=FOLD, shuffle=True, random_state=123)

# X = pd.Series(tf.io.gfile.glob(GCS_PATH_COMB + '/*.tfrec'))

with strategy.scope():

    model.compile(optimizer = tf.keras.optimizers.Adam(1e-4),
                  metrics = tf.keras.metrics.CategoricalAccuracy(),
                  loss = tf.keras.losses.CategoricalCrossentropy()
                 )

for n_fold, (train, test) in enumerate(cv.split(X)):
    
    print("Fold No ", n_fold+1)
    print("Train Indexes -", train)
    print("Test Indexes -", test)
    # Creatig the data Set 
    train_ds = get_training_dataset(X[train.tolist()], augment=True)
    valid_ds = get_validation_dataset(X[test].tolist(), augment=False, one_hot=True)
    
    # .map(lambda x, y : (x, tf.one_hot(tf.cast(y, tf.uint8), depth=5)))
    
    history = model.fit(train_ds,
                    validation_data=valid_ds,\
                    epochs=EPOCHS,
                    steps_per_epoch = train_files_cnt//BATCH_SIZE,
                    batch_size=BATCH_SIZE,
                    callbacks=[tf.keras.callbacks.ModelCheckpoint(f'fineTuned_v0.77.h5', save_best_only=True),
                               tf.keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True),
                               tf.keras.callbacks.ReduceLROnPlateau(factor=0.2,
                                                                    patience=7,
                                                                    verbose=1,
                                                                    min_lr=0.00001)

                                 
                    ]
         )
    
    acc_list.append(np.mean(history.history['categorical_accuracy']))
    loss_list.append(np.mean(history.history['loss']))
    val_acc_list.append(np.mean(history.history['val_categorical_accuracy']))
    val_loss_list.append(np.mean(history.history['val_loss']))

In [None]:
plt.figure(figsize=(8,5))
plt.plot(list(range(FOLD)),acc_list, label='Training Accuracy')
plt.plot(list(range(FOLD)), val_acc_list, label='Validation Accuracy')
plt.xlabel('N-Folds')
plt.ylabel('Mean Accuracy')
plt.legend()
plt.show()

plt.figure(figsize=(8,5))
plt.plot(list(range(FOLD)), loss_list, label='Training Loss')
plt.plot(list(range(FOLD)), val_loss_list, label='Validation Loss')
plt.xlabel('N-Folds')
plt.ylabel('Mean Loss')
plt.legend()
plt.show()