# Artificial Neural Networks and Deep Learning

---

## Homework 1: Minimal Working Example

To make your first submission, follow these steps:
1. Create a folder named `[2024-2025] AN2DL/Homework 1` in your Google Drive.
2. Upload the `training_set.npz` file to this folder.
3. Upload the Jupyter notebook `Homework 1 - Minimal Working Example.ipynb`.
4. Load and process the data.
5. Implement and train your model.
6. Submit the generated `.zip` file to Codabench.


## ⚙️ Import Libraries

In [None]:
import numpy as np

import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
from sklearn.model_selection import train_test_split
import random
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
import seaborn as sns

seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)

## ⏳ Load and inspect the data

In [None]:
data = np.load('training_set.npz')
X = data['images']
y = data['labels']

X = (X).astype('float32')
# Convert to one hoot encoding
y = tfk.utils.to_categorical(y)

print('Before data points filter shape:', X.shape, y.shape)

import json
with open('training-data-filter/blacklist.json', 'r') as file:
	blacklist = json.load(file)
blacklist = sorted(blacklist['blacklist'])
X = np.delete(X, blacklist, axis=0)
y = np.delete(y, blacklist, axis=0)

print('After data points filter shape:', X.shape, y.shape)

# Percentages taken from:  https://arxiv.org/pdf/2110.09508
# TODO: maybe adjust
train_size = int(X.shape[0] * 0.64)
val_size = int(X.shape[0] * 0.24)
test_size = X.shape[0] - train_size - val_size

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed, test_size=test_size, stratify=y)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=seed, test_size=val_size, stratify=y_train)

print(X_train.shape, X_val.shape, X_test.shape, y_train.shape, y_val.shape, y_test.shape)

In [None]:
# Labels
labels = {
	0: "Basophil",
	1: "Eosinophil",
	2: "Erythroblast",
	3: "Immature granulocytes",
	4: "Lymphocyte",
	5: "Monocyte",
	6: "Neutrophil",
	7: "Platelet"
}

In [None]:
# Inspect data
# Display a sample of images from the training-validation dataset
num_img = 10
random_indices = random.sample(range(len(X_val)), num_img)

fig, axes = plt.subplots(1, num_img, figsize=(20, 20))

def get_label(y):
    index = np.where(y == 1)[0]
    return labels[int(index)]

# Iterate through the selected number of images
for i, idx in enumerate(random_indices):
    ax = axes[i % num_img]
    ax.imshow(np.squeeze(X_val[idx] / 255), vmin=0., vmax=1.)
    ax.set_title(get_label(y_val[idx]))
    ax.axis('off')

# Adjust layout and display the images
plt.tight_layout()
plt.show()

## 🛠️ Train and Save the Model

In [None]:
# Define img shape for vgg
IMG_SIZE = 224

In [None]:
# Initialise imageNet model with pretrained weights, for transfer learning
vgg = tfk.applications.VGG19(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    pooling='avg',
)

# Display a summary of the model architecture
#vgg.summary(expand_nested=True)

# Display model architecture with layer shapes and trainable parameters
#tfk.utils.plot_model(vgg, expand_nested=True, show_trainable=True, show_shapes=True, dpi=70)


In [None]:
epochs = 800
batch_size = 32

In [None]:
# Aug and optimizer params are taken from: https://arxiv.org/pdf/2110.09508

vgg.trainable = False

# Define input layer with shape matching the input images
inputs = tfk.Input(shape=X_train[0].shape, name='input_layer')

# Apply data augmentation for training robustness
augmentation = tf.keras.Sequential([
    tfkl.RandomFlip("horizontal_and_vertical"),
    tfkl.RandomRotation(0.167), # 60%
    tfkl.CenterCrop(IMG_SIZE, IMG_SIZE)
], name='preprocessing')

x = augmentation(inputs)

# Pass augmented inputs through the MobileNetV3Small feature extractor
x = vgg(x)

# Add a dropout layer for regularization
x = tfkl.Dropout(0.3, name='dropout')(x)

# Add final Dense layer for classification with softmax activation
outputs = tfkl.Dense(y_train.shape[-1], activation='softmax', name='dense')(x)

# Define the complete model linking input and output
tl_model = tfk.Model(inputs=inputs, outputs=outputs, name='model')

lr = 1e-3
optimizer = tf.keras.optimizers.SGD(learning_rate=lr, momentum=0.9)
# Define a learning rate schedule that decays by a factor of 0.1 every 7 epochs
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=lr,
    decay_steps=7 * (X_train.shape[0] // batch_size),  # Decay every 7 epochs
    decay_rate=0.1,
    staircase=True
)
optimizer.learning_rate = lr_schedule

# Compile the model with categorical cross-entropy loss and Adam optimizer
tl_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=optimizer, metrics=['accuracy'])

# Display a summary of the model architecture
#tl_model.summary(expand_nested=True)

# Display model architecture with layer shapes and trainable parameters
#tfk.utils.plot_model(tl_model, expand_nested=True, show_trainable=True, show_shapes=True, dpi=70)

In [None]:
# Train the model
tl_history = tl_model.fit(
    x=X_train,
    y=y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val),
    callbacks=[tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=20, restore_best_weights=True)]
).history

# Calculate and print the best validation accuracy achieved
final_val_accuracy = round(max(tl_history['val_accuracy']) * 100, 2)
print(f'Final validation accuracy: {final_val_accuracy}%')

# Save the trained model to a file, including final accuracy in the filename
from datetime import datetime
model_filename = f'EfficientNetV2L[{str(final_val_accuracy)}][{datetime.now().strftime("%y%m%d_%H%M%S")}].keras'
tl_model.save(model_filename)

# Free memory by deleting the model instance
#del tl_model

In [None]:
# Create figure and subplots for loss and accuracy
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 6))

# Plot loss for both re-trained and transfer learning models
ax1.plot(tl_history['loss'], alpha=0.3, color='#4D61E2', label='training loss', linestyle='--')
ax1.plot(tl_history['val_loss'], label='validation loss', alpha=0.8, color='#4D61E2')
ax1.set_title('Categorical Crossentropy')
ax1.legend(loc='upper left')
ax1.grid(alpha=0.3)

# Plot accuracy for both re-trained and transfer learning models
ax2.plot(tl_history['accuracy'], alpha=0.3, color='#4D61E2', label='training accuracy', linestyle='--')
ax2.plot(tl_history['val_accuracy'], label='validation accuracy', alpha=0.8, color='#4D61E2')
ax2.set_title('Accuracy')
ax2.legend(loc='upper left')
ax2.grid(alpha=0.3)

# Adjust layout to prevent label overlap and display the plots
plt.tight_layout()
plt.show()

## 👔 Load a trained model (if needed!)

In [None]:
tl_model = tf.keras.models.load_model('KaggleEfficientNetV2L85.1241109_182031.keras')

## ✍🏿 Make evaluation

In [None]:
##loss, acc = tl_model.evaluate(X_test, y_test, verbose=2)
##print('Model, accuracy: {:5.2f}%'.format(100 * acc))
#
## Predict labels for the entire test set
#predictions = tl_model.predict(X_test, verbose=0)
#
## Display the shape of the predictions
#print("Predictions Shape:", predictions.shape)
#
## Convert predictions to class labels
#pred_classes = np.argmax(predictions, axis=-1)
#
## Extract ground truth classes
#true_classes = np.argmax(y_test, axis=-1)
#
## Calculate and display test set accuracy
#accuracy = accuracy_score(true_classes, pred_classes)
#print(f'Accuracy score over the test set: {round(accuracy, 4)}')
#
## Calculate and display test set precision
#precision = precision_score(true_classes, pred_classes, average='weighted')
#print(f'Precision score over the test set: {round(precision, 4)}')
#
## Calculate and display test set recall
#recall = recall_score(true_classes, pred_classes, average='weighted')
#print(f'Recall score over the test set: {round(recall, 4)}')
#
## Calculate and display test set F1 score
#f1 = f1_score(true_classes, pred_classes, average='weighted')
#print(f'F1 score over the test set: {round(f1, 4)}')
#
## Compute the confusion matrix
#cm = confusion_matrix(true_classes, pred_classes)
#
## Combine numbers and percentages into a single string for annotation
#annot = np.array([f"{num}" for num in cm.flatten()]).reshape(cm.shape)
#
## Plot the confusion matrix
#plt.figure(figsize=(10, 8))
#sns.heatmap(cm.T, annot=annot, fmt='', xticklabels=list(labels.values()), yticklabels=list(labels.values()), cmap='Blues')
#plt.xlabel('True labels')
#plt.ylabel('Predicted labels')
#plt.show()


# Loss and accuracy from the evaluation, if you still want to keep the commented code
# loss, acc = tl_model.evaluate(X_test, y_test, verbose=2)
# print('Model, accuracy: {:5.2f}%'.format(100 * acc))

# Predict labels for the entire test set
predictions = tl_model.predict(X_test, verbose=0)

# Display the shape of the predictions
print("Predictions Shape:", predictions.shape)

# Convert predictions to class labels
pred_classes = np.argmax(predictions, axis=-1)

# Extract ground truth classes
true_classes = np.argmax(y_test, axis=-1)

# Calculate and display test set accuracy as percentage
accuracy = accuracy_score(true_classes, pred_classes)
print(f'Accuracy score over the test set: {round(100 * accuracy, 2)}%')

# Calculate and display test set precision as percentage
precision = precision_score(true_classes, pred_classes, average='weighted')
print(f'Precision score over the test set: {round(100 * precision, 2)}%')

# Calculate and display test set recall as percentage
recall = recall_score(true_classes, pred_classes, average='weighted')
print(f'Recall score over the test set: {round(100 * recall, 2)}%')

# Calculate and display test set F1 score as percentage
f1 = f1_score(true_classes, pred_classes, average='weighted')
print(f'F1 score over the test set: {round(100 * f1, 2)}%')

# Compute the confusion matrix
cm = confusion_matrix(true_classes, pred_classes)

# Calculate the percentages for each element in the confusion matrix
cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100

# Combine numbers and percentages into a single string for annotation
annot = np.array([f"{num}\n({percent:.2f}%)" for num, percent in zip(cm.flatten(), cm_percentage.flatten())]).reshape(cm.shape)

# Plot the confusion matrix with percentages
plt.figure(figsize=(10, 8))
sns.heatmap(cm_percentage.T, annot=annot, fmt='', xticklabels=list(labels.values()), yticklabels=list(labels.values()), cmap='Blues')
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.title('Confusion Matrix (Percentages)')
plt.show()


## 📊 Prepare Your Submission

To prepare your submission, create a `.zip` file that includes all the necessary code to run your model. It **must** include a `model.py` file with the following class:

```python
# file: model.py
class Model:
    def __init__(self):
        """Initialize the internal state of the model."""

    def predict(self, X):
        """Return a numpy array with the labels corresponding to the input X."""
```

The next cell shows an example implementation of the `model.py` file, which includes loading model weights from the `weights.keras` file and conducting predictions on provided input data. The `.zip` file is created and downloaded in the last notebook cell.

❗ Feel free to modify the method implementations to better fit your specific requirements, but please ensure that the class name and method interfaces remain unchanged.

In [None]:
%%writefile model.py
import numpy as np

import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl


class Model:
    def __init__(self):
        """
        Initialize the internal state of the model. Note that the __init__
        method cannot accept any arguments.

        The following is an example loading the weights of a pre-trained
        model.
        """
        self.neural_network = tfk.models.load_model('KaggleEfficientNetV2L90.46.keras')

    def predict(self, X):
        """
        Predict the labels corresponding to the input X. Note that X is a numpy
        array of shape (n_samples, 96, 96, 3) and the output should be a numpy
        array of shape (n_samples,). Therefore, outputs must no be one-hot
        encoded.

        The following is an example of a prediction from the pre-trained model
        loaded in the __init__ method.
        """
        preds = self.neural_network.predict(X)
        if len(preds.shape) == 2:
            preds = np.argmax(preds, axis=1)
        return preds

In [None]:
from datetime import datetime
filename = f'submission_{datetime.now().strftime("%y%m%d_%H%M%S")}.zip'

# Add files to the zip command if needed
!zip {filename} model.py KaggleEfficientNetV2L90.46.keras