<a href="https://colab.research.google.com/github/mandal-rahul/CS231n/blob/master/casavva.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Improvements
 - `Custom training loop`: Using a custom training loop greatly improves the training time and resource usage.
 - `Maximize MXU and minimize Idle time`: I have made a few adjustments to the Tensorflow pipeline to improve performance.

Experiments
 - Small improvements using external data (2019 competition).
 - Small improvements from using `CCE label smoothing`.
 - Small improvements from using `CutOut`.
 - Small improvements from `oversmapling` classes `0`, `1`, `2` and `4`.
 - Small improvements from keeping `batch normalization` layers frozen.
 - No relevant improvements from using `class weights`.
 - No relevant improvements from using `MixUp`.
 - No relevant improvements from using different backbones.
 - Worse performance by using different image resolution even the default `EfficientNet` input size.
 - Changing `Sparse CCE` to `CCE` has no impact, as expected.
 - Was not able to make progressive unfreezing work.
 - Changing the `learning rate` batch wise seems more efficent than epoch wise, specially for the warm up phase.

## Dependencies

In [None]:
colab = True
if not colab:
  !pip install -U --quiet tensorflow==2.3.2
  !pip install  cloud-tpu-client
!pip install image-classifiers
!pip install  --upgrade  --quiet adabelief-tf
!pip install --quiet --upgrade efficientnet

In [None]:
import math, os, re, warnings, random, time
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from classification_models.keras import Classifiers
if not colab:
  from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score
import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.backend as K
from tensorflow.keras import optimizers, Sequential, losses, metrics, Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import efficientnet.tfkeras as efn
import tensorflow_addons as tfa
from adabelief_tf import AdaBeliefOptimizer
from pathlib import Path
from sklearn.metrics import roc_auc_score

def seed_everything(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'

seed = 0
seed_everything(seed)
warnings.filterwarnings('ignore')

### Hardware configuration

Note that we have `32` cores, this is because the `TPU v2 Pod` have more cores than a single `TPU v3` which has `8` cores.

In [None]:
# TPU or GPU detection
# Detect hardware, return appropriate distribution strategy
#from cloud_tpu_client import Client
print(tf.__version__)


try:
    #Client().configure_tpu_version(tf.__version__, restart_type='ifNeeded')
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print(f'Running on TPU {tpu.master()}')
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

AUTO = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')

# hyperparameters

In [None]:


lr_schedule = 'cosine' 
          #'grad'
BATCH_SIZE = 16 * REPLICAS*2
T_1 = 0.4
T_2 = 0.8
SMOOTH_FRACTION = 0.2
N_ITER = 5
epsilon = 1e-16
#lr start
if lr_schedule == 'cosine':
  LR_START = 1e-8
  LR_MIN = 1e-8
  LR_MAX = 0.00004 * strategy.num_replicas_in_sync
  LR_RAMPUP_EPOCHS = 6
  LR_SUSTAIN_EPOCHS = 0
  N_CYCLES = .55
else:    
  LEARNING_RATE = 1e-5 * REPLICAS
  LR_START = 0.00001
  LR_MAX = 0.00004 * strategy.num_replicas_in_sync
  LR_MIN = 0.00001
  LR_RAMPUP_EPOCHS = 7
  LR_SUSTAIN_EPOCHS = 0
  LR_EXP_DECAY = .78
# Lr end
EPOCHS = 20
HEIGHT = 512
WIDTH = 512
HEIGHT_RS = 512
WIDTH_RS = 512
CHANNELS = 3
N_CLASSES = 5
N_FOLDS = 3
FOLDS_USED = 5
patience = 10




class cfg:
    
    colab_instance=1
    #base_save_path = '/content/drive/MyDrive/models/' 
    base_save_path = './models/'
    if colab:
      base_save_path = '/content/drive/MyDrive/models/'
    models = [  efn.EfficientNetB2, 
                efn.EfficientNetB4, efn.EfficientNetB6
             ]
    model_input_shape = [512,512,512]
    #img_resize_shape =512
    model = efn.EfficientNetB4
    model_name = model.__name__
    num_exp = 5                      #     models list should be in asc order of input size
    display_dataset = True
    run_exp = True, 
    exp_csv_path =base_save_path+f'Accuracy_{colab_instance}.csv'
    optimizers = ['adam','ranger','adabelief']
    optimiser_name = 'ranger'
    external_data = False
    


# Learning rate scheduler

In [None]:
def lrfn(epoch):
  if lr_schedule == 'cosine':
    return lrfnCosine(epoch)
  else:
    return lrfnGrad(epoch)

def lrfnCosine(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        progress = (epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS) / (EPOCHS - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS)
        lr = LR_MAX * (0.5 * (1.0 + tf.math.cos(math.pi * N_CYCLES * 2.0 * progress)))
        if LR_MIN is not None:
            lr = tf.math.maximum(LR_MIN, lr)
            
    return lr

def lrfnGrad(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        lr = (LR_MAX - LR_MIN) * LR_EXP_DECAY**(epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS) 
    return lr

rng = [i for i in range(EPOCHS)]
y = [lrfn(x) for x in rng]
plt.plot(rng, y)
print("Learning rate schedule: {:.3g} to {:.3g} to {:.3g}".format(y[0], max(y), y[-1]))

# Load data

In [None]:
def count_data_items(filenames):
    n = [int(re.compile(r'-([0-9]*)\.').search(filename).group(1)) for filename in filenames]
    return np.sum(n)


database_base_path = '/kaggle/input/cassava-leaf-disease-classification/'
#train = pd.read_csv(f'{database_base_path}train.csv')
#print(f'Train samples: {len(train)}')
if colab:
  GCS_PATH = 'gs://kds-579a462f598557fb5d480ef78228eeb1d1ea5ce43dc47e674fa5a644'
  GCS_PATH_EXT = 'gs://kds-388cbf339ce4bb04f6032d8af6dfeab8858192d193051d8be6df0631' 
  GCS_PATH_CLASSES = 'gs://kds-ebf0f9ac962adcdab49275d8fb50516ecedc4b726a0944f99b4ff60f'
  GCS_PATH_EXT_CLASSES = 'gs://kds-62aaac1dd1d2587cfc0981ec461352d1527965ab1e895d7d253a8816'  
else:
# GCS_PATH = KaggleDatasets().get_gcs_path('cassava-leaf-disease-classification') # Original dataset
  GCS_PATH = KaggleDatasets().get_gcs_path(f'cassava-leaf-disease-50-tfrecords-center-512x512') # Center croped and resized (50 TFRecord)
  GCS_PATH_EXT = KaggleDatasets().get_gcs_path(f'cassava-leaf-disease-50-tfrecords-external-512x512') # Center croped and resized (50 TFRecord) (External)
  GCS_PATH_CLASSES = KaggleDatasets().get_gcs_path(f'cassava-leaf-disease-50-tfrecords-classes-512x512') # Center croped and resized (50 TFRecord) by classes
  GCS_PATH_EXT_CLASSES = KaggleDatasets().get_gcs_path(f'cassava-leaf-disease-ext-50-tfrec-classes-512x512') # Center croped and resized (50 TFRecord) (External) by classes



# FILENAMES_COMP = tf.io.gfile.glob(GCS_PATH + '/train_tfrecords/*.tfrec') # Original TFRecords
FILENAMES_COMP = tf.io.gfile.glob(GCS_PATH + '/*.tfrec')
FILENAMES_2019 = tf.io.gfile.glob(GCS_PATH_EXT + '/*.tfrec')

FILENAMES_COMP_CBB = tf.io.gfile.glob(GCS_PATH_CLASSES + '/CBB*.tfrec')
FILENAMES_COMP_CBSD = tf.io.gfile.glob(GCS_PATH_CLASSES + '/CBSD*.tfrec')
FILENAMES_COMP_CGM = tf.io.gfile.glob(GCS_PATH_CLASSES + '/CGM*.tfrec')
FILENAMES_COMP_CMD = tf.io.gfile.glob(GCS_PATH_CLASSES + '/CMD*.tfrec')
FILENAMES_COMP_Healthy = tf.io.gfile.glob(GCS_PATH_CLASSES + '/Healthy*.tfrec')

FILENAMES_2019_CBB = tf.io.gfile.glob(GCS_PATH_EXT_CLASSES + '/CBB*.tfrec')
FILENAMES_2019_CBSD = tf.io.gfile.glob(GCS_PATH_EXT_CLASSES + '/CBSD*.tfrec')
FILENAMES_2019_CGM = tf.io.gfile.glob(GCS_PATH_EXT_CLASSES + '/CGM*.tfrec')
FILENAMES_2019_CMD = tf.io.gfile.glob(GCS_PATH_EXT_CLASSES + '/CMD*.tfrec')
FILENAMES_2019_Healthy = tf.io.gfile.glob(GCS_PATH_EXT_CLASSES + '/Healthy*.tfrec')


TRAINING_FILENAMES = (FILENAMES_COMP + 
                      FILENAMES_2019 + 
                      (2 * FILENAMES_COMP_CBB) + 
                      (2 * FILENAMES_2019_CBB) + 
                      (2 * FILENAMES_COMP_CBSD) + 
                      (2 * FILENAMES_2019_CBSD) + 
                      (2 * FILENAMES_COMP_CGM) + 
                      (2 * FILENAMES_2019_CGM) + 
                      (2 * FILENAMES_COMP_Healthy) + 
                      (2 * FILENAMES_2019_Healthy))

NUM_TRAINING_IMAGES = count_data_items(TRAINING_FILENAMES)

print(f'GCS: train images: {NUM_TRAINING_IMAGES}')
#display(train.head())

CLASSES = ['Cassava Bacterial Blight', 
           'Cassava Brown Streak Disease', 
           'Cassava Green Mottle', 
           'Cassava Mosaic Disease', 
           'Healthy']

# Augmentation

In [None]:
def data_augment(image, label):
    p_rotation = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_spatial = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_rotate = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_1 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_2 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_3 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_shear = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_crop = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_cutout = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    
    # Shear
    if p_shear > .2:
        if p_shear > .6:
            image = transform_shear(image, HEIGHT, shear=20.)
        else:
            image = transform_shear(image, HEIGHT, shear=-20.)
            
    # Rotation
    if p_rotation > .2:
        if p_rotation > .6:
            image = transform_rotation(image, HEIGHT, rotation=45.)
        else:
            image = transform_rotation(image, HEIGHT, rotation=-45.)
            
    # Flips
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    if p_spatial > .75:
        image = tf.image.transpose(image)
        
    # Rotates
    if p_rotate > .75:
        image = tf.image.rot90(image, k=3) # rotate 270º
    elif p_rotate > .5:
        image = tf.image.rot90(image, k=2) # rotate 180º
    elif p_rotate > .25:
        image = tf.image.rot90(image, k=1) # rotate 90º
        
    # Pixel-level transforms
    if p_pixel_1 >= .4:
        image = tf.image.random_saturation(image, lower=.7, upper=1.3)
    if p_pixel_2 >= .4:
        image = tf.image.random_contrast(image, lower=.8, upper=1.2)
    if p_pixel_3 >= .4:
        image = tf.image.random_brightness(image, max_delta=.1)
        
    # Crops
    if p_crop > .6:
        if p_crop > .9:
            image = tf.image.central_crop(image, central_fraction=.5)
        elif p_crop > .8:
            image = tf.image.central_crop(image, central_fraction=.6)
        elif p_crop > .7:
            image = tf.image.central_crop(image, central_fraction=.7)
        else:
            image = tf.image.central_crop(image, central_fraction=.8)
    elif p_crop > .3:
        crop_size = tf.random.uniform([], int(HEIGHT*.6), HEIGHT, dtype=tf.int32)
        image = tf.image.random_crop(image, size=[crop_size, crop_size, CHANNELS])
            
    image = tf.image.resize(image, size=[HEIGHT, WIDTH])

    if p_cutout > .5:
        image = data_augment_cutout(image)
        
    return image, label

## Auxiliary functions

In [None]:
# data augmentation @cdeotte kernel: https://www.kaggle.com/cdeotte/rotation-augmentation-gpu-tpu-0-96
def transform_rotation(image, height, rotation):
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated
    DIM = height
    XDIM = DIM%2 #fix for size 331
    
    rotation = rotation * tf.random.uniform([1],dtype='float32')
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    
    # ROTATION MATRIX
    c1 = tf.math.cos(rotation)
    s1 = tf.math.sin(rotation)
    one = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    rotation_matrix = tf.reshape(tf.concat([c1,s1,zero, -s1,c1,zero, zero,zero,one],axis=0),[3,3])

    # LIST DESTINATION PIXEL INDICES
    x = tf.repeat( tf.range(DIM//2,-DIM//2,-1), DIM )
    y = tf.tile( tf.range(-DIM//2,DIM//2),[DIM] )
    z = tf.ones([DIM*DIM],dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(rotation_matrix,tf.cast(idx,dtype='float32'))
    idx2 = K.cast(idx2,dtype='int32')
    idx2 = K.clip(idx2,-DIM//2+XDIM+1,DIM//2)
    
    # FIND ORIGIN PIXEL VALUES 
    idx3 = tf.stack( [DIM//2-idx2[0,], DIM//2-1+idx2[1,]] )
    d = tf.gather_nd(image, tf.transpose(idx3))
        
    return tf.reshape(d,[DIM,DIM,3])

def transform_shear(image, height, shear):
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly sheared
    DIM = height
    XDIM = DIM%2 #fix for size 331
    
    shear = shear * tf.random.uniform([1],dtype='float32')
    shear = math.pi * shear / 180.
        
    # SHEAR MATRIX
    one = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)
    shear_matrix = tf.reshape(tf.concat([one,s2,zero, zero,c2,zero, zero,zero,one],axis=0),[3,3])    

    # LIST DESTINATION PIXEL INDICES
    x = tf.repeat( tf.range(DIM//2,-DIM//2,-1), DIM )
    y = tf.tile( tf.range(-DIM//2,DIM//2),[DIM] )
    z = tf.ones([DIM*DIM],dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(shear_matrix,tf.cast(idx,dtype='float32'))
    idx2 = K.cast(idx2,dtype='int32')
    idx2 = K.clip(idx2,-DIM//2+XDIM+1,DIM//2)
    
    # FIND ORIGIN PIXEL VALUES 
    idx3 = tf.stack( [DIM//2-idx2[0,], DIM//2-1+idx2[1,]] )
    d = tf.gather_nd(image, tf.transpose(idx3))
        
    return tf.reshape(d,[DIM,DIM,3])

# CutOut
def data_augment_cutout(image, min_mask_size=(int(HEIGHT * .1), int(HEIGHT * .1)), 
                        max_mask_size=(int(HEIGHT * .125), int(HEIGHT * .125))):
    p_cutout = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    
    if p_cutout > .85: # 10~15 cut outs
        n_cutout = tf.random.uniform([], 10, 15, dtype=tf.int32)
        image = random_cutout(image, HEIGHT, WIDTH, 
                              min_mask_size=min_mask_size, max_mask_size=max_mask_size, k=n_cutout)
    elif p_cutout > .6: # 5~10 cut outs
        n_cutout = tf.random.uniform([], 5, 10, dtype=tf.int32)
        image = random_cutout(image, HEIGHT, WIDTH, 
                              min_mask_size=min_mask_size, max_mask_size=max_mask_size, k=n_cutout)
    elif p_cutout > .25: # 2~5 cut outs
        n_cutout = tf.random.uniform([], 2, 5, dtype=tf.int32)
        image = random_cutout(image, HEIGHT, WIDTH, 
                              min_mask_size=min_mask_size, max_mask_size=max_mask_size, k=n_cutout)
    else: # 1 cut out
        image = random_cutout(image, HEIGHT, WIDTH, 
                              min_mask_size=min_mask_size, max_mask_size=max_mask_size, k=1)

    return image

def random_cutout(image, height, width, channels=3, min_mask_size=(10, 10), max_mask_size=(80, 80), k=1):
    assert height > min_mask_size[0]
    assert width > min_mask_size[1]
    assert height > max_mask_size[0]
    assert width > max_mask_size[1]

    for i in range(k):
      mask_height = tf.random.uniform(shape=[], minval=min_mask_size[0], maxval=max_mask_size[0], dtype=tf.int32)
      mask_width = tf.random.uniform(shape=[], minval=min_mask_size[1], maxval=max_mask_size[1], dtype=tf.int32)

      pad_h = height - mask_height
      pad_top = tf.random.uniform(shape=[], minval=0, maxval=pad_h, dtype=tf.int32)
      pad_bottom = pad_h - pad_top

      pad_w = width - mask_width
      pad_left = tf.random.uniform(shape=[], minval=0, maxval=pad_w, dtype=tf.int32)
      pad_right = pad_w - pad_left

      cutout_area = tf.zeros(shape=[mask_height, mask_width, channels], dtype=tf.uint8)

      cutout_mask = tf.pad([cutout_area], [[0,0],[pad_top, pad_bottom], [pad_left, pad_right], [0,0]], constant_values=1)
      cutout_mask = tf.squeeze(cutout_mask, axis=0)
      image = tf.multiply(tf.cast(image, tf.float32), tf.cast(cutout_mask, tf.float32))

    return image

In [None]:
# Datasets utility functions
def decode_image(image_data):
    """
        Decode a JPEG-encoded image to a uint8 tensor.
    """
    image = tf.image.decode_jpeg(image_data, channels=3)
    return image

def scale_image(image, label):
    """
        Cast tensor to float and normalizes (range between 0 and 1).
    """
    image = tf.cast(image, tf.float32)
    image /= 255.0
    return image, label

def prepare_image(image, label):
    """
        Resize and reshape images to the expected size.
    """
    image = tf.image.resize(image, [HEIGHT_RS, WIDTH_RS])
    image = tf.reshape(image, [HEIGHT_RS, WIDTH_RS, 3])
    return image, label

def read_tfrecord(example, labeled=True):
    """
        1. Parse data based on the 'TFREC_FORMAT' map.
        2. Decode image.
        3. If 'labeled' returns (image, label) if not (image, name).
    """
    if labeled:
        TFREC_FORMAT = {
            'image': tf.io.FixedLenFeature([], tf.string), 
            'target': tf.io.FixedLenFeature([], tf.int64), 
        }
    else:
        TFREC_FORMAT = {
            'image': tf.io.FixedLenFeature([], tf.string), 
            'image_name': tf.io.FixedLenFeature([], tf.string), 
        }
    example = tf.io.parse_single_example(example, TFREC_FORMAT)
    image = decode_image(example['image'])
    if labeled:
        label_or_name = tf.cast(example['target'], tf.int32)
        # One-Hot Encoding needed to use "categorical_crossentropy" loss
        label_or_name = tf.one_hot(tf.cast(label_or_name, tf.int32), N_CLASSES)
    else:
        label_or_name = example['image_name']
    return image, label_or_name

def get_dataset(FILENAMES, labeled=True, ordered=False, repeated=False, 
                cached=False, augment=False):
    """
        Return a Tensorflow dataset ready for training or inference.
    """
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False
        dataset = tf.data.Dataset.list_files(FILENAMES)
        dataset = dataset.interleave(tf.data.TFRecordDataset, num_parallel_calls=AUTO)
    else:
        dataset = tf.data.TFRecordDataset(FILENAMES, num_parallel_reads=AUTO)
        
    dataset = dataset.with_options(ignore_order)
    
    dataset = dataset.map(lambda x: read_tfrecord(x, labeled=labeled), num_parallel_calls=AUTO)
    
    if augment:
        dataset = dataset.map(data_augment, num_parallel_calls=AUTO)
        
    dataset = dataset.map(scale_image, num_parallel_calls=AUTO)
    dataset = dataset.map(prepare_image, num_parallel_calls=AUTO)
    
    if not ordered:
        dataset = dataset.shuffle(2048)
    if repeated:
        dataset = dataset.repeat()
        
    dataset = dataset.batch(BATCH_SIZE)
    
    if cached:
        dataset = dataset.cache()
    dataset = dataset.prefetch(AUTO)
    return dataset

def unfreeze_model(model):
    # Unfreeze layers while leaving BatchNorm layers frozen
    for layer in model.layers:
        if not isinstance(layer, L.BatchNormalization):
            layer.trainable = True
        else:
            layer.trainable = False
                
def unfreeze_block(model, block_name=None, n_top=3):
    # Unfreeze layers while leaving BatchNorm layers frozen
    for layer in model.layers[:-n_top]:
        if isinstance(layer, L.BatchNormalization):
            layer.trainable = False
        else:
            if block_name and (block_name in layer.name):
                layer.trainable = True

In [None]:
# Visualization utility functions
np.set_printoptions(threshold=15, linewidth=80)

def batch_to_numpy_images_and_labels(data):
    images, labels = data
    numpy_images = images.numpy()
    numpy_labels = labels.numpy()
    if numpy_labels.dtype == object: # binary string in this case, these are image ID strings
        numpy_labels = [None for _ in enumerate(numpy_images)]
    # If no labels, only image IDs, return None for labels (this is the case for test data)
    return numpy_images, numpy_labels

def title_from_label_and_target(label, correct_label):
    if correct_label is None:
        return CLASSES[label], True
    correct = (label == correct_label)
    return "{} [{}{}{}]".format(CLASSES[label], 'OK' if correct else 'NO', u"\u2192" if not correct else '',
                                CLASSES[correct_label] if not correct else ''), correct

def display_one_flower(image, title, subplot, red=False, titlesize=16):
    plt.subplot(*subplot)
    plt.axis('off')
    plt.imshow(image)
    if len(title) > 0:
        plt.title(title, fontsize=int(titlesize) if not red else int(titlesize/1.2), color='red' if red else 'black', 
                  fontdict={'verticalalignment':'center'}, pad=int(titlesize/1.5))
    return (subplot[0], subplot[1], subplot[2]+1)

def display_batch_of_images(databatch, predictions=None):
    """This will work with:
    display_batch_of_images(images)
    display_batch_of_images(images, predictions)
    display_batch_of_images((images, labels))
    display_batch_of_images((images, labels), predictions)
    """
    # data
    images, labels = batch_to_numpy_images_and_labels(databatch)
    labels = np.argmax(labels, axis=-1)
    if labels is None:
        labels = [None for _ in enumerate(images)]
        
    # auto-squaring: this will drop data that does not fit into square or square-ish rectangle
    rows = int(math.sqrt(len(images)))
    cols = len(images)//rows
        
    # size and spacing
    FIGSIZE = 13.0
    SPACING = 0.1
    subplot=(rows,cols,1)
    if rows < cols:
        plt.figure(figsize=(FIGSIZE,FIGSIZE/cols*rows))
    else:
        plt.figure(figsize=(FIGSIZE/rows*cols,FIGSIZE))
    
    # display
    for i, (image, label) in enumerate(zip(images[:rows*cols], labels[:rows*cols])):
        title = '' if label is None else CLASSES[label]
        correct = True
        if predictions is not None:
            title, correct = title_from_label_and_target(predictions[i], label)
        dynamic_titlesize = FIGSIZE*SPACING/max(rows,cols)*40+3 # magic formula tested to work from 1x1 to 10x10 images
        subplot = display_one_flower(image, title, subplot, not correct, titlesize=dynamic_titlesize)
    
    #layout
    plt.tight_layout()
    if label is None and predictions is None:
        plt.subplots_adjust(wspace=0, hspace=0)
    else:
        plt.subplots_adjust(wspace=SPACING, hspace=SPACING)
    plt.show()
    
# Visualize model predictions
def dataset_to_numpy_util(dataset, N):
    dataset = dataset.unbatch().batch(N)
    for images, labels in dataset:
        numpy_images = images.numpy()
        numpy_labels = labels.numpy()
        break;  
    return numpy_images, numpy_labels

def title_from_label_and_target(label, correct_label):
    label = np.argmax(label, axis=-1)
    correct = (label == correct_label)
    return "{} [{}{}{}]".format(label, str(correct), ', shoud be ' if not correct else '',
                                correct_label if not correct else ''), correct

def display_one_flower_eval(image, title, subplot, red=False):
    plt.subplot(subplot)
    plt.axis('off')
    plt.imshow(image)
    plt.title(title, fontsize=14, color='red' if red else 'black')
    return subplot+1

def display_9_images_with_predictions(images, predictions, labels):
    subplot=331
    plt.figure(figsize=(13,13))
    for i, image in enumerate(images):
        title, correct = title_from_label_and_target(predictions[i], labels[i])
        subplot = display_one_flower_eval(image, title, subplot, not correct)
        if i >= 8:
            break;
              
    plt.tight_layout()
    plt.subplots_adjust(wspace=0.1, hspace=0.1)
    plt.show()


# Model evaluation
def plot_metrics(history):
    fig, axes = plt.subplots(2, 1, sharex='col', figsize=(30, 15))
    axes = axes.flatten()
    print(history)
    
    axes[0].plot(history['loss'], label='Train loss')
    axes[0].plot(history['val_loss'], label='Validation loss')
    axes[0].legend(loc='best', fontsize=16)
    axes[0].set_title('Loss')
    axes[0].axvline(np.argmin(history['loss']), linestyle='dashed')
    axes[0].axvline(np.argmin(history['val_loss']), linestyle='dashed', color='orange')
    
    axes[1].plot(history['accuracy'], label='Train accuracy')
    axes[1].plot(history['val_accuracy'], label='Validation accuracy')
    axes[1].plot([i*1000 for i in y], label='learning rate')
    axes[1].legend(loc='best', fontsize=16)
    axes[1].set_title('Accuracy')
    axes[1].axvline(np.argmax(history['accuracy']), linestyle='dashed')
    axes[1].axvline(np.argmax(history['val_accuracy']), linestyle='dashed', color='orange')

    plt.xlabel('Epochs', fontsize=22)
    sns.despine()
    plt.show()

# Training data samples (with augmentation)

In [None]:
# train_dataset = get_dataset(FILENAMES_COMP, ordered=True, augment=True)
# train_iter = iter(train_dataset.unbatch().batch(20))

# display_batch_of_images(next(train_iter))
# display_batch_of_images(next(train_iter))

## Datasets distribution

### Competition data

In [None]:
# ds_comp = get_dataset(FILENAMES_COMP)
# labels_comp = [target.numpy() for img, target in iter(ds_comp.unbatch())]
# labels_comp = np.argmax(labels_comp, axis=-1)

# fig, ax = plt.subplots(1, 1, figsize=(18, 8))
# ax = sns.countplot(y=labels_comp, palette='viridis')
# ax.tick_params(labelsize=16)

# plt.show()

### 2019 competition data

In [None]:
# ds_2019 = get_dataset(FILENAMES_2019)
# labels_2019 = [target.numpy() for img, target in iter(ds_2019.unbatch())]
# labels_2019 = np.argmax(labels_2019, axis=-1)

# fig, ax = plt.subplots(1, 1, figsize=(18, 8))
# ax = sns.countplot(y=labels_2019, palette='viridis')
# ax.tick_params(labelsize=16)

# plt.show()

### Dataset oversampled

In [None]:
# FILENAMES_COMP_OVER = (FILENAMES_COMP + 
#                        FILENAMES_2019 + 
#                        (2 * FILENAMES_COMP_CBB) + 
#                        (2 * FILENAMES_2019_CBB) + 
#                        (2 * FILENAMES_COMP_CBSD) + 
#                        (2 * FILENAMES_2019_CBSD) + 
#                        (2 * FILENAMES_COMP_CGM) + 
#                        (2 * FILENAMES_2019_CGM) + 
#                        (2 * FILENAMES_COMP_Healthy) + 
#                        (2 * FILENAMES_2019_Healthy))

# ds_comp = get_dataset(FILENAMES_COMP_OVER)
# labels_comp = [target.numpy() for img, target in iter(ds_comp.unbatch())]
# labels_comp = np.argmax(labels_comp, axis=-1)

# fig, ax = plt.subplots(1, 1, figsize=(18, 8))
# ax = sns.countplot(y=labels_comp, palette='viridis')
# ax.tick_params(labelsize=16)

# plt.show()

### Learning rate schedule

We are going to use a `cosine learning rate schedule with a warm-up phase`, this may be a good idea since we are using a pre-trained model, the warm-up phase will be useful to avoid the pre-trained weights degradation resulting in catastrophic forgetting, during the schedule the learning rate will slowly decrease to very low values, this helps the model to land on more stable weights.

# Model

In [None]:
def model_fn(input_shape, N_CLASSES):
    inputs = L.Input(shape=input_shape, name='input_image')
    base_model = cfg.model(input_tensor=inputs, 
                                    include_top=False, 
                                    weights='noisy-student', 
                                    pooling='avg')
    base_model.trainable = False
    print(f'model is {cfg.model.__name__} and name is {cfg.model_name}')
    #x = L.Dropout(.5)(base_model.output)
    #output = L.Dense(N_CLASSES, activation='softmax', name='output')(x)
    output = L.Dense(N_CLASSES, activation='softmax', name='output')(base_model.output)
    model = Model(inputs=inputs, outputs=output)
    
    unfreeze_model(model)
    
    if cfg.optimiser_name == 'adam':
          optimizer = tf.keras.optimizers.Adam()
    elif cfg.optimiser_name== 'ranger':
          opt = tfa.optimizers.RectifiedAdam()
          optimizer = tfa.optimizers.Lookahead(opt, sync_period=6, slow_step_size=0.5)
    else :
          optimizer = AdaBeliefOptimizer(learning_rate=1e-3, epsilon=epsilon, rectify=True)
    
    model.compile(optimizer=optimizer,
                      loss=BiTemperedLogisticLoss(t1=T_1, t2=T_2, lbl_smth=SMOOTH_FRACTION, n_iter=N_ITER),
                      metrics=['accuracy'], )
    return model
    

    

#loss and checkpoint implementation

In [None]:
#@title Loss implementation
"""Robust Bi-Tempered Logistic Loss Based on Bregman Divergences.
 Source: https://bit.ly/3jSol8T
 """

import functools
import tensorflow as tf

def for_loop(num_iters, body, initial_args):
    """Runs a simple for-loop with given body and initial_args.
    Args:
      num_iters: Maximum number of iterations.
      body: Body of the for-loop.
      initial_args: Args to the body for the first iteration.
    Returns:
      Output of the final iteration.
    """
    for i in range(num_iters):
        if i == 0:
            outputs = body(*initial_args)
        else:
            outputs = body(*outputs)
    return outputs


def log_t(u, t):
    """Compute log_t for `u`."""

    def _internal_log_t(u, t):
        return (u ** (1.0 - t) - 1.0) / (1.0 - t)

    return tf.cond(
        tf.math.equal(t, 1.0), lambda: tf.math.log(u),
        functools.partial(_internal_log_t, u, t))


def exp_t(u, t):
    """Compute exp_t for `u`."""

    def _internal_exp_t(u, t):
        return tf.nn.relu(1.0 + (1.0 - t) * u) ** (1.0 / (1.0 - t))

    return tf.cond(
        tf.math.equal(t, 1.0), lambda: tf.math.exp(u),
        functools.partial(_internal_exp_t, u, t))


def compute_normalization_fixed_point(activations, t, num_iters=5):
    """Returns the normalization value for each example (t > 1.0).
    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      t: Temperature 2 (> 1.0 for tail heaviness).
      num_iters: Number of iterations to run the method.
    Return: A tensor of same rank as activation with the last dimension being 1.
    """

    mu = tf.math.reduce_max(activations, -1, keepdims=True)
    normalized_activations_step_0 = activations - mu
    shape_normalized_activations = tf.shape(normalized_activations_step_0)

    def iter_body(i, normalized_activations):
        logt_partition = tf.math.reduce_sum(
            exp_t(normalized_activations, t), -1, keepdims=True)
        normalized_activations_t = tf.reshape(
            normalized_activations_step_0 * tf.math.pow(logt_partition, 1.0 - t),
            shape_normalized_activations)
        return [i + 1, normalized_activations_t]

    _, normalized_activations_t = for_loop(num_iters, iter_body,
                                           [0, normalized_activations_step_0])

    logt_partition = tf.math.reduce_sum(
        exp_t(normalized_activations_t, t), -1, keepdims=True)
    return -log_t(1.0 / logt_partition, t) + mu


def compute_normalization_binary_search(activations, t, num_iters=10):
    """Returns the normalization value for each example (t < 1.0).
    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      t: Temperature 2 (< 1.0 for finite support).
      num_iters: Number of iterations to run the method.
    Return: A tensor of same rank as activation with the last dimension being 1.
    """
    mu = tf.math.reduce_max(activations, -1, keepdims=True)
    normalized_activations = activations - mu
    shape_activations = tf.shape(activations)
    effective_dim = tf.cast(
        tf.math.reduce_sum(
            tf.cast(
                tf.greater(normalized_activations, -1.0 / (1.0 - t)), tf.int32),
            -1,
            keepdims=True), tf.float32)
    shape_partition = tf.concat([shape_activations[:-1], [1]], 0)
    lower = tf.zeros(shape_partition)
    upper = -log_t(1.0 / effective_dim, t) * tf.ones(shape_partition)

    def iter_body(i, lower, upper):
        logt_partition = (upper + lower) / 2.0
        sum_probs = tf.math.reduce_sum(exp_t(
            normalized_activations - logt_partition, t), -1, keepdims=True)
        update = tf.cast(tf.less(sum_probs, 1.0), tf.float32)
        lower = tf.reshape(lower * update + (1.0 - update) * logt_partition,
                           shape_partition)
        upper = tf.reshape(upper * (1.0 - update) + update * logt_partition,
                           shape_partition)
        return [i + 1, lower, upper]

    _, lower, upper = for_loop(num_iters, iter_body, [0, lower, upper])
    logt_partition = (upper + lower) / 2.0
    return logt_partition + mu


def compute_normalization(activations, t, num_iters=5):
    """Returns the normalization value for each example.
    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      t: Temperature 2 (< 1.0 for finite support, > 1.0 for tail heaviness).
      num_iters: Number of iterations to run the method.
    Return: A tensor of same rank as activation with the last dimension being 1.
    """
    return tf.cond(
        tf.less(t, 1.0),
        functools.partial(compute_normalization_binary_search, activations, t,
                          num_iters),
        functools.partial(compute_normalization_fixed_point, activations, t,
                          num_iters))


def tempered_softmax(activations, t, num_iters=5):
    """Tempered softmax function.
    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      t: Temperature tensor > 0.0.
      num_iters: Number of iterations to run the method.
    Returns:
      A probabilities tensor.
    """
    t = tf.convert_to_tensor(t)
    normalization_constants = tf.cond(
        tf.math.equal(t, 1.0),
        lambda: tf.math.log(tf.math.reduce_sum(tf.exp(activations), -1, keepdims=True)),
        functools.partial(compute_normalization, activations, t, num_iters))
    return exp_t(activations - normalization_constants, t)


def bi_tempered_logistic_loss(activations,
                              labels,
                              t1,
                              t2,
                              label_smoothing=0.0,
                              num_iters=5):
    """Bi-Tempered Logistic Loss with custom gradient.
    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      labels: A tensor with shape and dtype as activations.
      t1: Temperature 1 (< 1.0 for boundedness).
      t2: Temperature 2 (> 1.0 for tail heaviness, < 1.0 for finite support).
      label_smoothing: Label smoothing parameter between [0, 1).
      num_iters: Number of iterations to run the method.
    Returns:
      A loss tensor.
    """
    with tf.name_scope('bitempered_logistic'):
        t1 = tf.convert_to_tensor(t1)
        t2 = tf.convert_to_tensor(t2)
        one = tf.convert_to_tensor(1.0)

        if label_smoothing > 0.0:
            num_classes = tf.cast(tf.shape(labels)[-1], tf.float32)
            labels = (
                             1 - num_classes /
                             (num_classes - 1) * label_smoothing) * labels + label_smoothing / (
                             num_classes - 1)

        @tf.custom_gradient
        def _custom_gradient_bi_tempered_logistic_loss(activations):
            """Bi-Tempered Logistic Loss with custom gradient.
            Args:
              activations: A multi-dimensional tensor with last dim `num_classes`.
            Returns:
              A loss tensor, grad.
            """
            with tf.name_scope('gradient_bitempered_logistic'):
                probabilities = tempered_softmax(activations, t2, num_iters)
                loss_values = tf.math.multiply(
                    labels,
                    log_t(labels + 1e-10, t1) -
                    log_t(probabilities, t1)) - 1.0 / (2.0 - t1) * (
                                      tf.math.pow(labels, 2.0 - t1) - tf.math.pow(probabilities, 2.0 - t1))

                def grad(d_loss):
                    """Explicit gradient calculation.
                    Args:
                      d_loss: Infinitesimal change in the loss value.
                    Returns:
                      Loss gradient.
                    """
                    delta_probs = probabilities - labels
                    forget_factor = tf.math.pow(probabilities, t2 - t1)
                    delta_probs_times_forget_factor = tf.math.multiply(delta_probs,
                                                                       forget_factor)
                    delta_forget_sum = tf.math.reduce_sum(
                        delta_probs_times_forget_factor, -1, keepdims=True)
                    escorts = tf.math.pow(probabilities, t2)
                    escorts = escorts / tf.math.reduce_sum(escorts, -1, keepdims=True)
                    derivative = delta_probs_times_forget_factor - tf.math.multiply(
                        escorts, delta_forget_sum)
                    return tf.math.multiply(d_loss, derivative)

                return loss_values, grad

        loss_values = _custom_gradient_bi_tempered_logistic_loss(activations)

        loss_values = tf.math.reduce_sum(loss_values, -1)

        return loss_values



In [None]:
with strategy.scope():
  class BiTemperedLogisticLoss(tf.keras.losses.Loss):
    def __init__(self, t1, t2, lbl_smth, n_iter):
      super(BiTemperedLogisticLoss, self).__init__()
      self.t1 = t1
      self.t2 = t2
      self.lbl_smth = lbl_smth
      self.n_iter = n_iter

    def call(self, y_true, y_pred):
      return bi_tempered_logistic_loss(y_pred, y_true, self.t1, self.t2, self.lbl_smth, self.n_iter)

In [None]:
def get_checkpoint(model_save_path, is_save_best = True):
    return ModelCheckpoint(model_save_path, 
                             monitor= 'val_loss', 
                             verbose=1, 
                             save_best_only=is_save_best, 
                             mode= 'min', 
                             save_weights_only = False)
    
def get_early_stopping():
    return EarlyStopping(monitor = 'val_loss', min_delta = 0.0001, 
                           patience = patience, mode = 'min', verbose = 1,
                           restore_best_weights = True)
    

def get_learning_rate_decay():
  return ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, 
                              patience = 3, min_delta = 0.0001, 
                              mode = 'min', verbose = 1)


def get_model_callback( fold_num):
    cfg.model_save_path_best = cfg.model_save_dir+f'{cfg.model_name}_best_fold_{fold_num}_.h5'
    print("Best model save path: ", cfg.model_save_path_best)

    

    checkpoint_best = get_checkpoint(cfg.model_save_path_best, is_save_best = True)
    #LrPlateau =get_learning_rate_decay()

    early_stopping = get_early_stopping()
#     learning_rate_decay = get_learning_rate_decay()
    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=True)
    
#     if WANDB:
#         wandb.run.name= f'{MODEL_NAME}_fold_{fold_num}'
#         [WandbCallback(),checkpoint_best, checkpoint_last, early_stopping]

    return [checkpoint_best, early_stopping , lr_callback]

# Training

In [None]:
def get_train_vald_ds(idxT,idxV):
        FILENAMES_COMP = tf.io.gfile.glob([GCS_PATH + '/Id_train%.2i*.tfrec' % x for x in idxT])      
        FILENAMES_COMP_CBB = tf.io.gfile.glob([GCS_PATH_CLASSES + '/CBB%.2i*.tfrec' % x for x in idxT])
        FILENAMES_COMP_CBSD = tf.io.gfile.glob([GCS_PATH_CLASSES + '/CBSD%.2i*.tfrec' % x for x in idxT])
        FILENAMES_COMP_CGM = tf.io.gfile.glob([GCS_PATH_CLASSES + '/CGM%.2i*.tfrec' % x for x in idxT])
        FILENAMES_COMP_Healthy = tf.io.gfile.glob([GCS_PATH_CLASSES + '/Healthy%.2i*.tfrec' % x for x in idxT])
        
        
        if cfg.external_data:
            FILENAMES_2019 = tf.io.gfile.glob([GCS_PATH_EXT + '/Id_train%.2i*.tfrec' % x for x in idxT])
            FILENAMES_2019_CBB = tf.io.gfile.glob([GCS_PATH_EXT_CLASSES + '/CBB%.2i*.tfrec' % x for x in idxT])
            FILENAMES_2019_CBSD = tf.io.gfile.glob([GCS_PATH_EXT_CLASSES + '/CBSD%.2i*.tfrec' % x for x in idxT])
            FILENAMES_2019_CGM = tf.io.gfile.glob([GCS_PATH_EXT_CLASSES + '/CGM%.2i*.tfrec' % x for x in idxT])
            FILENAMES_2019_Healthy = tf.io.gfile.glob([GCS_PATH_EXT_CLASSES + '/Healthy%.2i*.tfrec' % x for x in idxT])
            TRAIN_FILENAMES = (FILENAMES_COMP + 
                           FILENAMES_2019 + 
                           (2 * FILENAMES_COMP_CBB) + 
                           (2 * FILENAMES_2019_CBB) + 
                           (2 * FILENAMES_COMP_CBSD) + 
                           (2 * FILENAMES_2019_CBSD) + 
                           (2 * FILENAMES_COMP_CGM) + 
                           (2 * FILENAMES_2019_CGM) + 
                           (2 * FILENAMES_COMP_Healthy) + 
                           (2 * FILENAMES_2019_Healthy))
        else:
            TRAIN_FILENAMES = (FILENAMES_COMP + 
                           
                           (2 * FILENAMES_COMP_CBB) + 
                           (2 * FILENAMES_COMP_CBSD) + 
                           (2 * FILENAMES_COMP_CGM) + 
                           (2 * FILENAMES_COMP_Healthy) )
                          
        

        VALID_FILENAMES = tf.io.gfile.glob([GCS_PATH + '/Id_train%.2i*.tfrec' % x for x in idxV])
        #np.random.shuffle(TRAIN_FILENAMES)
        return TRAIN_FILENAMES,VALID_FILENAMES
        
    
    

In [None]:
def train_kfold(exp_num):
    
    skf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=seed)
    oof_pred = []; oof_labels = []; history_list = [];


    for fold,(idxT, idxV) in enumerate(skf.split(np.arange(50))):
        if fold >= FOLDS_USED:
            break
        if tpu: tf.tpu.experimental.initialize_tpu_system(tpu)
        K.clear_session()
        print(f'\nFOLD: {fold+1}')
        #print(f'TRAIN: {idxT} VALID: {idxV}')
        
        TRAIN_FILENAMES,VALID_FILENAMES = get_train_vald_ds(idxT,idxV)
        
    
        ct_train = count_data_items(TRAIN_FILENAMES)
        ct_valid = count_data_items(VALID_FILENAMES)
        print(f'count  {ct_train} and {ct_valid}')

        step_size = (ct_train / BATCH_SIZE)
        valid_step_size = (ct_valid / BATCH_SIZE) 
        #total_steps=(total_epochs * step_size)
        #warmup_steps=(warmup_epochs * step_size)


        # Build TF datasets
        train_ds =get_dataset(TRAIN_FILENAMES, labeled=True, ordered=False, repeated=True, augment=True)

        valid_ds = get_dataset(VALID_FILENAMES, labeled=True,cached=True, ordered=True, repeated=True, augment=False)

        callback_list=get_model_callback(fold+1)



        with strategy.scope():
            model = model_fn((None, None, CHANNELS), N_CLASSES)
             # unfreeze all layers except "batch normalization"
            history = model.fit( x=train_ds,
                                    steps_per_epoch = step_size,
                                    epochs = EPOCHS,
                                    validation_data =  valid_ds,
                                    validation_steps = valid_step_size,
                                    callbacks = callback_list
                                  )

        history_list.append(history.history)
        oof_val_acc = np.max(history.history['val_accuracy'])
        print(f'\nFOLD: {fold+1}')
        plot_metrics(history.history)



        ### RESULTS
        print(f"#### FOLD {fold+1} OOF Accuracy = {oof_val_acc:.3f}")

        #history_list.append(history)
        # Load best model weights
        print(f'loading model from {cfg.model_save_path_best}')
        model.load_weights(cfg.model_save_path_best)

        # OOF predictions
        ds_valid = get_dataset(VALID_FILENAMES, ordered=True)
        targets_fold =[target.numpy() for img, target in iter(ds_valid.unbatch())]
        oof_labels.append(targets_fold)
        x_oof = ds_valid.map(lambda image, target: image)
        #oof_pred.append(np.argmax(model.predict(x_oof), axis=-1))
        pred_fold = model.predict(x_oof)
        oof_pred.append(pred_fold)
        auc_score = roc_auc_score(targets_fold, pred_fold, multi_class='ovr')
        print(f' roc score for fold {fold} is {auc_score:.3f}')
        cfg.df_acc.loc[len(cfg.df_acc)] = { "Model": cfg.model_name,
                       'Reshape': HEIGHT_RS,
                      'accuracy' :f'{oof_val_acc:.4f}',
                      'auc' : auc_score,
                       'fold' :fold+1,
                       'optimizer':cfg.optimiser_name,
                       'time':pd.Timestamp.now().strftime("%m-%d-%Y %H:%M:%S")
                      }
        cfg.df_acc.to_csv(cfg.exp_csv_path,index=False)
    
    y_true_hot= np.concatenate(oof_labels)
    y_true = np.argmax(y_true_hot, axis=-1)
    y_pred_hot = np.concatenate(oof_pred)
    y_pred = np.argmax(y_pred_hot, axis=-1)
    df_oof_hot = pd.DataFrame({'Target':y_true})
    y_pred_hot_with_target = np.hstack((y_pred_hot,y_true.reshape(-1,1)))
    
    
    df_oof_hot = pd.DataFrame(y_pred_hot_with_target,columns=list('12345')+['Target'])
    df_oof = pd.DataFrame([y_pred,y_true]).T
    df_oof.columns=['Pred','Target']
    
    df_oof.to_csv(Path(cfg.model_save_dir).joinpath(f'{cfg.model_name}_OOF.csv'),index=False)
    df_oof_hot.to_csv(Path(cfg.model_save_dir).joinpath(f'{cfg.model_name}_OOF_Hot.csv'),index=False)
    
    
    print(classification_report(y_true, y_pred, target_names=CLASSES))
    
        
    # for fold, history in enumerate(history_list):
    #     print(f'\nFOLD: {fold+1}')
    #     plot_metrics(history)
    print(cfg.df_acc.tail(10))
    print(f'Overall accuracy of the model {accuracy_score(y_true, y_pred)}')

#LR and runner

In [None]:



if Path(cfg.exp_csv_path).exists():
  cfg.df_acc = pd.read_csv(cfg.exp_csv_path)
else:
  cfg.df_acc = pd.DataFrame(columns=['Model','Reshape','accuracy','auc','optimizer','fold','time'])

if cfg.run_exp:
  cfg.num_exp=1

for i in range(cfg.num_exp):
  if not cfg.run_exp:      
    cfg.model = cfg.models[i]
    cfg.optimiser_name = cfg.optimizers[i]
    #cfg.img_resize_shape = cfg.model_input_shape[i]
  #print(f"img reshape size=({cfg.img_resize_shape})")
#   cfg.HEIGHT = cfg.img_resize_shape
#   cfg.WIDTH  = cfg.img_resize_shape
  cfg.folder_name=str(cfg.colab_instance)+"__"+cfg.model_name +"/"
  cfg.model_save_dir = cfg.base_save_path + cfg.folder_name
  train_kfold(i)
 
#   if i<=ds_extreme_index:
#     dataset_path = dataset_gs[512]
#     print('taking 512 size ds')
#   else:
#     dataset_path = dataset_gs[512]
#     print('taking 512 size ds')
#   DATASET_FILENAMES = tf.io.gfile.glob(dataset_path + '/*.tfrec')  
#   if display_dataset:
#     NUM_FILES = len(DATASET_FILENAMES)
#     print("Number of files: ",NUM_FILES)
#     display(DATASET_FILENAMES)
#     display_dataset=False 

  
    





