# Artificial Neural Networks and Deep Learning

---

## Homework 1: Minimal Working Example

To make your first submission, follow these steps:
1. Create a folder named `[2024-2025] AN2DL/Homework 1` in your Google Drive.
2. Upload the `training_set.npz` file to this folder.
3. Upload the Jupyter notebook `Homework 1 - Minimal Working Example.ipynb`.
4. Load and process the data.
5. Implement and train your model.
6. Submit the generated `.zip` file to Codabench.


## ⚙️ Import Libraries

In [None]:
# If you want to use Ranger optimizer install these (on Kaggle this tf seems to break CUDA...)
#!pip3 install tensorflow==2.14.0
#!pip3 install tensorflow-addons

In [None]:
from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
import json
import numpy as np
import random
import seaborn as sns
import tensorflow as tf

In [None]:
seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)

In [None]:
# Define if we wanna assign different class weights (for class imbalance) during model fitting
USE_CLASS_WEIGHTS = False

## ⏳ Load and inspect the data

In [None]:
# Define img shape. Input image is 96x96 hence based on the specified value it will be enlarged or CENTER cropped
IMG_SIZE = 96

if IMG_SIZE < 96:
	print('Image will center cropped!')
elif IMG_SIZE > 96:
	print('Image will be enlarged!')

In [None]:
DATASET = "training_set.npz"
OUTLIERS = "training-data-filter/blacklist.json"

In [None]:
# TODO: maybe adjust
train_ratio = 0.80
validation_ratio = 0.10
test_ratio = 0.10

assert train_ratio + validation_ratio + test_ratio == 1


In [None]:
# Labels
labels = {
	0: "Basophil",
	1: "Eosinophil",
	2: "Erythroblast",
	3: "Immature granulocytes",
	4: "Lymphocyte",
	5: "Monocyte",
	6: "Neutrophil",
	7: "Platelet"
}

In [None]:
data = np.load(DATASET)
X = data['images']
y = data['labels']

X = (X).astype('float32')

print('Before data points filter shape:', X.shape, y.shape)

with open(OUTLIERS, 'r') as file:
	blacklist = json.load(file)
blacklist = sorted(blacklist['blacklist'])
X = np.delete(X, blacklist, axis=0)
y = np.delete(y, blacklist, axis=0)

print('After data points filter shape:', X.shape, y.shape)

# Percentages taken from:  https://arxiv.org/pdf/2110.09508
train_size = int(X.shape[0] * train_ratio)
val_size = int(X.shape[0] * validation_ratio)
test_size = X.shape[0] - train_size - val_size

if not USE_CLASS_WEIGHTS:
	# Convert to one hot encoding
	y = tfk.utils.to_categorical(y)

	if test_size >= len(labels):
		X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed, test_size=test_size, stratify=y)
		X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=seed, test_size=val_size, stratify=y_train)
		print(X_train.shape, X_val.shape, X_test.shape, y_train.shape, y_val.shape, y_test.shape)
	else:
		X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=seed, test_size=val_size, stratify=y)
		print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)
else:
	if test_size >= len(labels):
		X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed, test_size=test_size, stratify=y)
		X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=seed, test_size=val_size, stratify=y_train)
	else:
		X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=seed, test_size=val_size, stratify=y)
		print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)
	# Copy for later
	y_train_cat = y_train
	# Convert to one hot encoding
	y_train = tfk.utils.to_categorical(y_train)
	y_val = tfk.utils.to_categorical(y_val)
	if test_size >= len(labels):
		y_test = tfk.utils.to_categorical(y_test)
		print(X_train.shape, X_val.shape, X_test.shape, y_train.shape, y_val.shape, y_test.shape)
	else:
		print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

In [None]:
# Inspect data
# Display a sample of images from the training-validation dataset
num_img = 10
random_indices = random.sample(range(len(X_val)), num_img)

fig, axes = plt.subplots(1, num_img, figsize=(20, 20))

def get_label(y):
    index = np.where(y == 1)[0]
    return labels[int(index)]

# Iterate through the selected number of images
for i, idx in enumerate(random_indices):
    ax = axes[i % num_img]
    ax.imshow(np.squeeze(X_val[idx] / 255), vmin=0., vmax=1.)
    ax.set_title(get_label(y_val[idx]))
    ax.axis('off')

# Adjust layout and display the images
plt.tight_layout()
plt.show()

## 🌊 Generate class weights

In [None]:
if USE_CLASS_WEIGHTS:
    # Flat the train labels
    y_train_cat_flat = np.ravel(y_train_cat)

    # Make weights proportional to class imbalance

    class_weights = compute_class_weight(
        class_weight='balanced', 
        classes=np.unique(data['labels']), 
        y=y_train_cat_flat
    )

    class_weight_dict = dict(enumerate(class_weights))
    from pprint import pprint
    print('Class weights:')
    pprint(class_weight_dict)

## 🛠️ Train and Save the Model

In [None]:
# Define training setup
epochs = 400
batch_size = 32

In [None]:
# Define optimizer setup
lr = 1e-3
fine_tuning_lr = 1e-4
# One of:
# SGD
# Adam
# AdamW
# Lion
# Ranger
opt_name = "AdamW"
fine_tuning_opt_name = "AdamW"

opt_exp_decay_rate: float | None = None
# Decay at how many epochs
opt_decay_epoch_delta = 7

In [None]:

# Define dense params
dropouts_layers = [0.4]
# Note, the base model outputs a size which is different based on the model being used, hence make attention on the first dense size
dense_layers = [8]

# Example for more layers:
#dropouts_layers = [0.5, 0.3, 0.4]
#dense_layers = [256, 64, 8]

In [None]:
# Define fitting callbacks. Comment out from dict the unwanted ones
model_fit_callbacks = {
	#'ReduceLROnPlateau': tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6, verbose=1),
	'EarlyStopping': tfk.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1) 
}

In [None]:
# just to free or not the memory
FREE_MODEL = False

In [None]:
# Base model name being used. One of:
# vgg19
# vgg16
# efficientnetv2-b0
# efficientnetv2-b3
# efficientnetv2-s
# efficientnetv2-m
# efficientnetv2-l
# They all use global average pooling
base_model_name = 'efficientnetv2-l'

In [None]:
# Define if to load a trained classier based on the same base model
LOAD_TRAINED_CLASSIFIER = False
trained_classifier_model_file = ""

In [None]:
# Layers to fine tune. Use `all` to enable all layers.
ACTIVATION_POLICY = 'prefix' #or 'same'
based_model_layer_blocks_to_activate = set([
	# Excluding the first two blocks
	'block3',
	'block4',
	'block5',
	'block6',
	'block7',
])

In [None]:
# This will download every weight. If you have issues (e.g. on Kaggle) please delete the unwanted dict entries
base_model_dict = {
  'vgg19': tfk.applications.VGG19(
      include_top=False,
      input_shape=(IMG_SIZE, IMG_SIZE, 3),
      input_tensor=None,
      pooling='avg',
      weights="imagenet",
  ),
  'vgg16': tfk.applications.VGG16(
      include_top=False,
      input_shape=(IMG_SIZE, IMG_SIZE, 3),
      input_tensor=None,
      pooling='avg',
      weights="imagenet",
  ),
  'efficientnetv2-l': tfk.applications.EfficientNetV2L(
    include_preprocessing=True,
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    input_tensor=None,
    pooling='avg',
    weights="imagenet",
  ),
  'efficientnetv2-s': tfk.applications.EfficientNetV2S(
    include_preprocessing=True,
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    input_tensor=None,
    pooling='avg',
    weights="imagenet",
  ),
  'efficientnetv2-m': tfk.applications.EfficientNetV2M(
    include_preprocessing=True,
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    input_tensor=None,
    pooling='avg',
    weights="imagenet",
  ),
  'efficientnetv2-b0': tfk.applications.EfficientNetV2B0(
    include_preprocessing=True,
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    input_tensor=None,
    pooling='avg',
    weights="imagenet",
  ),
  'efficientnetv2-b3': tfk.applications.EfficientNetV2B3(
    include_preprocessing=True,
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    input_tensor=None,
    pooling='avg',
    weights="imagenet",
  ),

}

def get_base_model():
  if LOAD_TRAINED_CLASSIFIER:
    return None
  # Initialise imageNet model with pretrained weights, for transfer learning
  assert(base_model_name in base_model_dict)
  m = base_model_dict[base_model_name]

  # Freeze
  m.trainable = False
  return m

In [None]:
# Define your manual augmentation. This work on single and np array of images but attention on the random selection below (written for single inputs in this case)
# This fn has to applied to the input images before injecting X_train to the network.
def augment_image(input):
  def apply(image):
    import random
    # Normalize
    image = image / 255

    # Random flip
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)

    ## Random zoom by cropping
    image = tf.image.central_crop(image, random.uniform(0.70, 0.9999))
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))

    ## Brightness and contrast adjustment
    image = tf.image.random_brightness(image, max_delta=0.2)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)

    ## Hue and saturation adjustment (only for RGB images)
    image = tf.image.random_saturation(image, lower=0.8, upper=1.2)
    image = tf.image.random_hue(image, max_delta=0.1)

    ## Gaussian noise
    noise = tf.random.normal(shape=tf.shape(image), mean=0.0, stddev=0.05)
    image = tf.add(image, noise)

    ## Clip values to keep them in [0, 1]
    image = tf.clip_by_value(image, 0.0, 1.0)

    # Rescale back
    image = image * 255
    return image

  # Randomly decide whether to augment
  should_augment = tf.random.uniform([], 0, 1) > 0.5  # 50% chance
  return tf.cond(
      should_augment,
      lambda: apply(input),  # If True, apply augmentation
      lambda: input  # If False, return the image unchanged
  )

In [None]:
# Preprocess batch images
def preprocess_images(images, labels):
  augmented = []
  for i in images:
    image = augment_image(i)
    augmented.append(image)
  return np.array(augmented), labels

In [None]:
# Test augmentation
rows = 2
cols = 2
num_img = rows * cols
partialX = X_train[:num_img]
partialY = y_train[:num_img]

augX, augY = preprocess_images(partialX, partialY)

# Inspect data
fig, axes = plt.subplots(rows, cols, figsize=(20, 8))
print(axes.shape)

def get_label(y):
  index = np.where(y == 1)[0]
  return labels[int(index)]

# Iterate through the selected number of images
for i in range(num_img):
  ax = axes[i // cols, i % cols]
  ax.imshow(np.squeeze(augX[i] / 255), vmin=0., vmax=1.)
  ax.set_title(get_label(augY[i]))
  ax.axis('off')

# Adjust layout and display the images
plt.tight_layout()
plt.show()

In [None]:
# General augmentations with Keras layers
def build_augmentation(name = 'preprocessing'):
	augmentation = tf.keras.Sequential([
		tfkl.RandomRotation(0.167),                              # Rotate images randomly by ±60% of a full rotation
		tfkl.RandomTranslation(0.1, 0.1),                        # Randomly translate images by ±10% in x and y
	], name=name)
	return augmentation

In [None]:
def augmentation_fn_for_augmentation_generator_cutmix(images, labels, fractionCM = 0.90, fractionAUG = 0.01):
  import keras_cv
  cutmix_layer = keras_cv.layers.CutMix(alpha=1.0, seed=seed)
  shuffled_indices = tf.random.shuffle(tf.range(len(labels)))

  # Split the batch
  cutmix_size = int(len(labels) * fractionCM)
  aug_size = int(len(labels) * fractionAUG)
  
  cutmix_indices = shuffled_indices[:cutmix_size]
  aug_indices = shuffled_indices[cutmix_size+1:cutmix_size+aug_size]
  clean_indices = shuffled_indices[cutmix_size+aug_size+1:]

  # Gather images and labels based on the shuffled indices
  images_cutmix = tf.gather(images, cutmix_indices)
  labels_cutmix = tf.gather(labels, cutmix_indices)

  images_aug = tf.gather(images, aug_indices)
  labels_aug = tf.gather(labels, aug_indices)

  images_clean = tf.gather(images, clean_indices)
  labels_clean = tf.gather(labels, clean_indices)
  
  # Apply augmentations
  cutmix = cutmix_layer({"images": images_cutmix, "labels": labels_cutmix})
  augmented = build_augmentation()(images_aug)

  # Concatenate the CutMix and non-CutMix parts back together
  images_combined = tf.concat([cutmix["images"], images_clean, augmented, images_cutmix], axis=0)
  labels_combined = tf.concat([cutmix["labels"], labels_clean, labels_aug, labels_cutmix], axis=0)
  
  
  shuffled_indices = tf.random.shuffle(tf.range(len(labels_combined)))
  images_combined = tf.gather(images_combined, cutmix_indices)
  labels_combined = tf.gather(labels_combined, cutmix_indices)
  
  return images_combined, labels_combined

In [None]:
# Perform augmentation on X_train and send a batch of augmented images to the network. Using this tf structure as this is directly compatible with the `fit` method
class AugmentationGeneratorCutMix(tf.keras.utils.Sequence):
  def __init__(self, images, labels, batch_size, aug_fn, fractionCM=0.30, fractionAUG=0.30):
    self.images = images
    self.labels = labels
    self.batch_size = batch_size
    self.fractionCM = fractionCM
    self.fractionAUG = fractionAUG
    self.aug_fn = aug_fn

  def __len__(self):
    # Number of batches / epoch
    return int(np.ceil(len(self.images) / self.batch_size))

  def __getitem__(self, idx):
    # Get a batch of images
    batch_images = self.images[idx * self.batch_size: (idx + 1) * self.batch_size]
    batch_labels = self.labels[idx * self.batch_size: (idx + 1) * self.batch_size]

    # Apply aug
    augmented_images, augmented_labels = self.aug_fn(batch_images, batch_labels, 
                    self.fractionCM, self.fractionAUG)
    # Show example
    #num_images = 10
    #plt.figure(figsize=(15, 5))
    #for i in range(min(num_images, len(augmented_images))):
    #  plt.subplot(1, num_images, i + 1)
    #  plt.imshow((augmented_images[i]).numpy().astype(np.uint8))
    #  plt.axis('off')
    #  plt.show()

    return augmented_images, augmented_labels

def get_AugmentationGeneratorCutMix():
  global y_train
  y_train = tf.convert_to_tensor(y_train, dtype=tf.float32) 
  train_generator = AugmentationGeneratorCutMix(X_train, y_train, batch_size, augmentation_fn_for_augmentation_generator_cutmix, fractionCM=0.90, fractionAUG=0.01)
  return train_generator

In [None]:
# This augmentation pipeline will double the inputs (X) and apply the augmix on half of them (the returned dataset will have both augmented and not augmented data shuffled).
def get_AugmentationGeneratorAugMix(X, y, batch):
  import keras_cv
  augmix = keras_cv.layers.AugMix([0, 255])

  assert(isinstance(X, np.ndarray))
  assert(isinstance(y, np.ndarray))

  # Define a keras_cv augmentation pipeline
  augmentations = keras_cv.layers.Augmenter([
      augmix
  ])

  # Apply augmentations only to X
  def augment(features, labels):
    features = augmentations(features)  # Apply keras_cv transformations to features
    return features, labels  # Return features and labels unchanged

  dataset = tf.data.Dataset.from_tensor_slices((X, y))
  # Apply the mapping function to the dataset
  dataset_aug = dataset.map(augment, num_parallel_calls=tf.data.AUTOTUNE)
  # Concat augmented with original
  dataset_aug = dataset_aug.concatenate(tf.data.Dataset.from_tensor_slices((X, y)))

  # Shuffle, batch, and optionally preprocess
  dataset_aug = (dataset_aug
             .shuffle(buffer_size=X.shape[0] * 2)   # Shuffle the entire dataset
             .batch(batch)                  # Create batches
             .prefetch(tf.data.AUTOTUNE))  
  return dataset_aug

In [None]:
# Test the augmix augmentation
N = 5
test_augmix = get_AugmentationGeneratorAugMix(X_train[:N], y_train[:N], N*2)

# Take one batch from the dataset
for batch in test_augmix.take(1):
  X_batch, y_batch = batch  # Features and labels

  # Inspect data
  fig, axes = plt.subplots(1, N*2, figsize=(20, 8))
  # Iterate through the selected number of images
  for i in range(N*2):
    ax = axes[i%(N*2)]
    ax.imshow(np.squeeze(X_batch[i].numpy() / 255), vmin=0., vmax=1.)
    ax.axis('off')

  # Adjust layout and display the images
  plt.tight_layout()
  plt.show()

  break



In [None]:
# By default we build a new model with the given base model
# If the flag LOAD_TRAINED_CLASSIFIER is True, we load the model and load the base_model from the loaded model, hence any base_model being passed is ignored
def build_model(base_model = None, restore_base = True, out_shape = y_train.shape[-1], trained_classifier_model_name = 'vgg19'):
	assert(len(dropouts_layers) == len(dense_layers))
	assert(dense_layers[-1] == len(labels))

	if LOAD_TRAINED_CLASSIFIER:
		assert(trained_classifier_model_file != '')
		m = tf.keras.models.load_model(trained_classifier_model_file)

		# Extract the base model
		if restore_base:
			base_model = m.get_layer(trained_classifier_model_name)
			for l in base_model.layers:
				l.trainable = False
			return m

	if restore_base and base_model is not None:
		for l in base_model.layers:
			l.trainable = False

	inputs = tfk.Input(shape=X_train[0].shape, name='input_layer')
	# Define augmentation layers
	augmentation = build_augmentation()
	# Define network
	x = augmentation(inputs)
	if base_model is not None:
		x = base_model(x)
	for i, (drop, dense) in enumerate(zip(dropouts_layers, dense_layers)):
		x = tfkl.Dropout(drop, name=f'dropout{i}')(x)
		# Skip last dense as it's the output
		if i == len(dropouts_layers)-1:
			break
		x = tfkl.Dense(dense, activation='relu', name=f'dense{i}')(x)
	outputs = tfkl.Dense(dense_layers[-1], activation='softmax', name=f'dense{len(dense_layers)-1}')(x)

	# Define the complete model linking input and output
	m = tfk.Model(inputs=inputs, outputs=outputs, name='model')
	return m

In [None]:
def get_callbacks():
	return [i for i in model_fit_callbacks.values()]

In [None]:
def fit_model(model, data_loader = None):
	if data_loader is None:
		if USE_CLASS_WEIGHTS:
			print('Fitting with class weights!')
			fit_history = model.fit(
	      x=X_train,
	      y=y_train,
	      batch_size=batch_size,
	      epochs=epochs,
	      validation_data=(X_val, y_val),
	      class_weight = class_weight_dict,
	      callbacks=get_callbacks()
	    ).history
		else:
			fit_history = model.fit(
	      x=X_train,
	      y=y_train,
	      batch_size=batch_size,
	      epochs=epochs,
	      validation_data=(X_val, y_val),
	      callbacks=get_callbacks()
	    ).history
		return fit_history
	else:
		if USE_CLASS_WEIGHTS:
			print('Fitting with class weights!')
			fit_history = model.fit(
        data_loader,
	      batch_size=batch_size,
	      epochs=epochs,
	      validation_data=(X_val, y_val),
	      class_weight = class_weight_dict,
	      callbacks=get_callbacks()
	    ).history
		else:
			fit_history = model.fit(
        data_loader,
	      batch_size=batch_size,
	      epochs=epochs,
	      validation_data=(X_val, y_val),
	      callbacks=get_callbacks()
	    ).history
		return fit_history

In [None]:
def enable_feature_extractor_layers(extractor, layers):
  extractor.trainable = True
  for i, layer in enumerate(extractor.layers):
    layer.trainable = False
	# Set the based_model_layer_blocks_to_activate layers as trainable
  if 'all' in layers:
    for i, layer in enumerate(extractor.layers):
      layer.trainable = True
  else:
    if ACTIVATION_POLICY == 'same':
      for i, layer in enumerate(extractor.layers):
        if layer.name in layers:
          layer.trainable = True
    elif ACTIVATION_POLICY == 'prefix':
      for i, layer in enumerate(extractor.layers):
        block = layer.name[:6]
        if block in layers:
          layer.trainable = True

	# Print layer indices, names, and trainability status
  print('\n\nBase model training configuration:')
  for i, layer in enumerate(extractor.layers):
	  print(i, layer.name, layer.trainable)

In [None]:
# Taken from https://github.com/SeanSdahl/RangerOptimizerTensorflow/blob/master/module.py
def build_ranger(lr=1e-3, weight_decay=0.0):
  try:
    import tensorflow_addons as tfa
  except:
    raise Exception("You have to install tensorflow_addons package for Ranger. Please note that this package is available up to tensorflow==2.14")
  def ranger(sync_period=6,
           slow_step_size=0.5,
           learning_rate=lr,
           beta_1=0.9,
           beta_2=0.999,
           epsilon=1e-7,
           weight_decay=weight_decay,
           amsgrad=False,
           sma_threshold=5.0,
           total_steps=0,
           warmup_proportion=0.1,
           min_lr=0.,
           name="Ranger"):
    inner = tfa.optimizers.RectifiedAdam(learning_rate, beta_1, beta_2, epsilon, weight_decay, amsgrad, sma_threshold, total_steps, warmup_proportion, min_lr, name)
    optim = tfa.optimizers.Lookahead(inner, sync_period, slow_step_size, name)
    return optim
  return ranger()

In [None]:
def get_optimizer(is_fine_tuning = False, use_decay_fine_tuning = False, **kwargs):
	decay = opt_exp_decay_rate
	if is_fine_tuning and not use_decay_fine_tuning:
		decay = None

	opt = opt_name if not is_fine_tuning else fine_tuning_opt_name

	if opt == "SGD":
		optimizer = tf.keras.optimizers.SGD(learning_rate=lr, momentum=0.9 if 'momentum' not in kwargs else kwargs['momentum'])
		if decay is not None:
			lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
					initial_learning_rate=fine_tuning_lr if is_fine_tuning else lr,
					decay_steps=opt_decay_epoch_delta * (X_train.shape[0] // batch_size),  # Decay every 7 epochs
					decay_rate=opt_exp_decay_rate,
					staircase=True
			)
			optimizer.learning_rate = lr_schedule
			print(f'\n\n{"Finetuning: " if is_fine_tuning else "NotFinetuning: "}using {opt} optimizer with exp decay {decay} (momentum = {optimizer.momentum})\n\n')
			return optimizer
		else:
			optimizer.learning_rate = fine_tuning_lr if is_fine_tuning else lr
			print(f'\n\n{"Finetuning: " if is_fine_tuning else "NotFinetuning: "}using {opt} optimizer (momentum = {optimizer.momentum})\n\n')
			return optimizer

	elif opt == "Adam":
		if 'weight_decay' in kwargs:
			optimizer = tf.keras.optimizers.Adam(weight_decay=kwargs['weight_decay'])
		else:
			optimizer = tf.keras.optimizers.Adam()
		if decay is not None:
			lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
					initial_learning_rate=fine_tuning_lr if is_fine_tuning else lr,
					decay_steps=opt_decay_epoch_delta * (X_train.shape[0] // batch_size),  # Decay every 7 epochs
					decay_rate=opt_exp_decay_rate,
					staircase=True
			)
			optimizer.learning_rate = lr_schedule
			print(f'\n\n{"Finetuning: " if is_fine_tuning else "NotFinetuning: "}using {opt} optimizer with exp decay of {decay} weight decay = {optimizer.weight_decay}\n\n')
			return optimizer
		else:
			optimizer.learning_rate = fine_tuning_lr if is_fine_tuning else lr
			print(f'\n\n{"Finetuning: " if is_fine_tuning else "NotFinetuning: "}using {opt} optimizer (weight decay = {optimizer.weight_decay})\n\n')
			return optimizer

	elif opt == "AdamW":
		if 'weight_decay' in kwargs:
			optimizer = tf.keras.optimizers.AdamW(weight_decay=kwargs['weight_decay'])
		else:
			optimizer = tf.keras.optimizers.AdamW()
		if decay is not None:
			lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
					initial_learning_rate=fine_tuning_lr if is_fine_tuning else lr,
					decay_steps=opt_decay_epoch_delta * (X_train.shape[0] // batch_size),  # Decay every 7 epochs
					decay_rate=opt_exp_decay_rate,
					staircase=True
			)
			optimizer.learning_rate = lr_schedule
			print(f'\n\n{"Finetuning: " if is_fine_tuning else "NotFinetuning: "}using {opt} optimizer with exp decay of {decay} weight decay = {optimizer.weight_decay}\n\n')
			return optimizer
		else:
			optimizer.learning_rate = fine_tuning_lr if is_fine_tuning else lr
			print(f'\n\n{"Finetuning: " if is_fine_tuning else "NotFinetuning: "}using {opt} optimizer (weight decay = {optimizer.weight_decay})\n\n')
			return optimizer

	elif opt == "Lion":
		if 'weight_decay' in kwargs:
			optimizer = tf.keras.optimizers.Lion(weight_decay=kwargs['weight_decay'])
		else:
			optimizer = tf.keras.optimizers.Lion()
		if decay is not None:
			lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
					initial_learning_rate=fine_tuning_lr if is_fine_tuning else lr,
					decay_steps=opt_decay_epoch_delta * (X_train.shape[0] // batch_size),  # Decay every 7 epochs
					decay_rate=opt_exp_decay_rate,
					staircase=True
			)
			optimizer.learning_rate = lr_schedule
			print(f'\n\n{"Finetuning: " if is_fine_tuning else "NotFinetuning: "}using {opt} optimizer with exp decay of {decay} weight decay = {optimizer.weight_decay}\n\n')
			return optimizer
		else:
			optimizer.learning_rate = fine_tuning_lr if is_fine_tuning else lr
			print(f'\n\n{"Finetuning: " if is_fine_tuning else "NotFinetuning: "}using {opt} optimizer (weight decay = {optimizer.weight_decay})\n\n')
			return optimizer
	elif opt == "Ranger":
		optimizer = build_ranger(lr=lr if not is_fine_tuning else fine_tuning_lr, weight_decay=0.0 if 'weight_decay' not in kwargs else kwargs['weight_decay'])
		if decay is not None:
			raise RuntimeError("Not supported")
		else:
			optimizer.learning_rate = fine_tuning_lr if is_fine_tuning else lr
			print(f'\n\n{"Finetuning: " if is_fine_tuning else "NotFinetuning: "}using {opt} optimizer\n\n')
			return optimizer

In [None]:
def display_model(model):
	# Display a summary of the model architecture
	model.summary(expand_nested=True)

	# Display model architecture with layer shapes and trainable parameters
	tfk.utils.plot_model(model, expand_nested=True, show_trainable=True, show_shapes=True, dpi=70)

In [None]:
'''
Augment images before training. Use this for custom augmentation which cannot fit inside a data loader nor a Keras Sequence
'''
# Use custom augmentation
#X_train, y_train = preprocess_images(X_train, y_train)

'''
Classifier training
'''
model = build_model(base_model=get_base_model())
base_model = model.get_layer(base_model_name)

if not LOAD_TRAINED_CLASSIFIER:
  model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=get_optimizer(is_fine_tuning=False), metrics=['accuracy'])
  #display_model(model)
  
  # Fit the initial model
  print('\n\nFitting classifier\n\n')
  # Pass a data loader if we want to pass a loader object which applied some aug (e.g. cutmix)
  class_fit_history = fit_model(model, data_loader=get_AugmentationGeneratorAugMix(X_train, y_train, batch_size))
  
  intermediate_val_acc = round(max(class_fit_history['val_accuracy']) * 100, 2)
  # Save intermediate model
  model_filename = f'{base_model_name}-intermediateDONOTUSE-finetuned{len(based_model_layer_blocks_to_activate) if "all" not in based_model_layer_blocks_to_activate else "all"}blocks-{str(intermediate_val_acc)}-{datetime.now().strftime("%y%m%d_%H%M")}.keras'
  model.save(model_filename)

'''
Fine tuning
'''
# Enable fine tuning
enable_feature_extractor_layers(base_model, based_model_layer_blocks_to_activate)

model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=get_optimizer(is_fine_tuning=True), metrics=['accuracy'])
#display_model(model)

# Fit the initial finetuned model
print('\n\nFine tuning\n\n')
# Pass a data loader if we want to pass a loader object which applied some aug (e.g. cutmix)
fine_tuning_fit_history = fit_model(model, data_loader=get_AugmentationGeneratorAugMix(X_train, y_train, batch_size))

# Calculate and print the best validation accuracy achieved
final_val_accuracy = round(max(fine_tuning_fit_history['val_accuracy']) * 100, 2)
print(f'Final validation accuracy: {final_val_accuracy}%')

# Save the trained model to a file, including final accuracy in the filename
model_filename = f'{base_model_name}-finetuned{len(based_model_layer_blocks_to_activate) if "all" not in based_model_layer_blocks_to_activate else "all"}blocks-{str(final_val_accuracy)}-{datetime.now().strftime("%y%m%d_%H%M")}.keras'
model.save(model_filename)

# Free memory by deleting the model instance
if FREE_MODEL:
  del model

In [None]:
def plot_trainig(fit):
	# Create figure and subplots for loss and accuracy
	fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 6))

	# Plot loss for both re-trained and transfer learning models
	ax1.plot(fit['loss'], alpha=0.3, color='#00008b', label='training loss', linestyle='--')
	ax1.plot(fit['val_loss'], label='validation loss', alpha=0.8, color='#ffa500')
	ax1.set_title('Categorical Crossentropy')
	ax1.legend(loc='upper left')
	ax1.grid(alpha=0.3)

	# Plot accuracy for both re-trained and transfer learning models
	ax2.plot(fit['accuracy'], alpha=0.3, color='#00008b', label='training accuracy', linestyle='--')
	ax2.plot(fit['val_accuracy'], label='validation accuracy', alpha=0.8, color='#ffa500')
	ax2.set_title('Accuracy')
	ax2.legend(loc='upper left')
	ax2.grid(alpha=0.3)

	# Adjust layout to prevent label overlap and display the plots
	plt.tight_layout()
	plt.show()

#plot_trainig(class_fit_history)
plot_trainig(fine_tuning_fit_history)

## 👔 Load a trained model (if needed!)

In [None]:
#model = tf.keras.models.load_model('vgg19-finetuned16layers-99.13-241114_1414.kera#s')

## ✍🏿 Make evaluation

In [None]:
##loss, acc = model.evaluate(X_test, y_test, verbose=2)
##print('Model, accuracy: {:5.2f}%'.format(100 * acc))

# Predict labels for the entire test set
predictions = model.predict(X_test, verbose=0)

# Display the shape of the predictions
print("Predictions Shape:", predictions.shape)

# Convert predictions to class labels
pred_classes = np.argmax(predictions, axis=-1)

# Extract ground truth classes
true_classes = np.argmax(y_test, axis=-1)

# Calculate and display test set accuracy as percentage
accuracy = accuracy_score(true_classes, pred_classes)
print(f'Accuracy score over the test set: {round(100 * accuracy, 2)}%')

# Calculate and display test set precision as percentage
precision = precision_score(true_classes, pred_classes, average='weighted')
print(f'Precision score over the test set: {round(100 * precision, 2)}%')

# Calculate and display test set recall as percentage
recall = recall_score(true_classes, pred_classes, average='weighted')
print(f'Recall score over the test set: {round(100 * recall, 2)}%')

# Calculate and display test set F1 score as percentage
f1 = f1_score(true_classes, pred_classes, average='weighted')
print(f'F1 score over the test set: {round(100 * f1, 2)}%')

# Compute the confusion matrix
cm = confusion_matrix(true_classes, pred_classes)

# Calculate the percentages for each element in the confusion matrix
cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100

# Combine numbers and percentages into a single string for annotation
annot = np.array([f"{num}\n({percent:.2f}%)" for num, percent in zip(cm.flatten(), cm_percentage.flatten())]).reshape(cm.shape)

# Plot the confusion matrix with percentages
plt.figure(figsize=(10, 8))
sns.heatmap(cm_percentage.T, annot=annot, fmt='', xticklabels=list(labels.values()), yticklabels=list(labels.values()), cmap='Blues')
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.title('Confusion Matrix (Percentages)')
plt.show()


## 📊 Prepare Your Submission

To prepare your submission, create a `.zip` file that includes all the necessary code to run your model. It **must** include a `model.py` file with the following class:

```python
# file: model.py
class Model:
    def __init__(self):
        """Initialize the internal state of the model."""

    def predict(self, X):
        """Return a numpy array with the labels corresponding to the input X."""
```

The next cell shows an example implementation of the `model.py` file, which includes loading model weights from the `weights.keras` file and conducting predictions on provided input data. The `.zip` file is created and downloaded in the last notebook cell.

❗ Feel free to modify the method implementations to better fit your specific requirements, but please ensure that the class name and method interfaces remain unchanged.

In [None]:
%%writefile model.py
import numpy as np

import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl


class Model:
    def __init__(self):
        """
        Initialize the internal state of the model. Note that the __init__
        method cannot accept any arguments.

        The following is an example loading the weights of a pre-trained
        model.
        """
        self.neural_network = tfk.models.load_model('vgg19-finetuned14layers-97.84-241115_0029.keras')

    def predict(self, X):
        """
        Predict the labels corresponding to the input X. Note that X is a numpy
        array of shape (n_samples, 96, 96, 3) and the output should be a numpy
        array of shape (n_samples,). Therefore, outputs must no be one-hot
        encoded.

        The following is an example of a prediction from the pre-trained model
        loaded in the __init__ method.
        """
        preds = self.neural_network.predict(X)
        if len(preds.shape) == 2:
            preds = np.argmax(preds, axis=1)
        return preds

In [None]:
from datetime import datetime
filename = f'submission_{datetime.now().strftime("%y%m%d_%H%M%S")}.zip'

# Add files to the zip command if needed
!zip {filename} model.py vgg19-finetuned14layers-97.84-241115_0029.keras