In [None]:
###### First things first - Go to Accelerator and turn on GPU

###### Importing necessary files/ modules 
import random, re
import numpy as np
import tensorflow as tf
from kaggle_datasets import KaggleDatasets
print('Tensorflow version ' + tf.__version__)
import os

In [None]:
###### Seed Everything
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

###### Config
DIM = 128
IMAGE_SIZE = [DIM, DIM]
EPOCHS = 5
BATCH_SIZE = 64

In [None]:
####### In training we have internet enabled so we can use get_gcs_path. In inference notebook we cannot use internet so it will be done differently
GCS_DS_PATH = KaggleDatasets().get_gcs_path('cassava-leaf-disease-classification')

TRAINING_FILENAMES =  tf.io.gfile.glob(GCS_DS_PATH + '/train_tfrecords/*.tfrec')
TEST_FILENAMES = tf.io.gfile.glob(GCS_DS_PATH + '/test_tfrecords/*.tfrec') 
print(TRAINING_FILENAMES)



In [None]:
####### Necessary functions for image augmentation and reading tf records
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)
    image = tf.cast(image, tf.float32) / 255.0  # convert image to floats in [0, 1] range
    image = tf.image.resize(image, [DIM, DIM])
    image = tf.reshape(image, [DIM, DIM, 3])
    return image

def read_labeled_tfrecord(example):
    LABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string), 
        "target": tf.io.FixedLenFeature([], tf.int64), 
    }
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    label = tf.cast(example['target'], tf.int32)
    return image, label 


def load_dataset(filenames, labeled = True, ordered = False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False 
        
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.with_options(ignore_order) 
    dataset = dataset.map(read_labeled_tfrecord ) 
    return dataset


def data_augment(image, label):
    
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    switch = tf.random.uniform([], 0., 1., dtype=tf.float32)
    if  switch >= 0.66 :
        crop_size = int(DIM/2)
        image = tf.image.random_crop(image, size=[crop_size, crop_size, 3])

    else :
        image = tf.image.random_saturation(image, 0.9, 1.1)
        image = tf.image.random_contrast(image, 0.9, 1.1)
        image = tf.image.random_brightness(image, 0.1)

    image = tf.image.resize(image, [DIM, DIM])
    image = tf.reshape(image, [DIM, DIM, 3])       
    return image, label


def get_training_dataset(dataset, do_aug=True):
    
    dataset = dataset.repeat() 
    dataset = dataset.map(data_augment)
    dataset = dataset.map(onehot)

    dataset = dataset.shuffle(2048)
    dataset = dataset.batch(BATCH_SIZE)
    return dataset

def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

NUM_TRAINING_IMAGES = int( count_data_items(TRAINING_FILENAMES) )
STEPS_PER_EPOCH = NUM_TRAINING_IMAGES // BATCH_SIZE


In [None]:
####  We used categorical cross entropy and we have 5 type of outputs. So 2 will be represented as 0 0 1 0 0
def onehot(image,label):
    CLASSES = 5
    return image,tf.one_hot(label,CLASSES)

In [None]:
######### Training the ResNet50 model and saving the trained model to be used in Inference. 
######### Either download the saved model or save the version using quick save and under advanced options make sure always save output is checked.

loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05)

histories = []
models = []
def train():
   
    train_dataset = load_dataset(TRAINING_FILENAMES, labeled = True)
    rnet =  tf.keras.applications.ResNet50(   
                input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3),
                weights='imagenet',  
                include_top=False 
            )
    rnet.trainable = True
    model = tf.keras.Sequential([
        rnet,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(5, activation='softmax',dtype='float32')
    ])
    model.compile(
        optimizer='adam',
        loss = loss ,
        metrics=['categorical_accuracy'] )
    model.fit(
                get_training_dataset(train_dataset), 
                steps_per_epoch = STEPS_PER_EPOCH,
                epochs = EPOCHS,
                verbose=1 )
    return model                   
    
model = train()
model.save('model_Resnet50.h5')
