In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive/My Drive/Colab Notebooks/AN2DL/Homework1

# Import libraries

In [None]:
import numpy as np
import os
import random
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format='retina'
#plt.style.use('ggplot')

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay

import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.models import Model

from pathlib import Path

print(tf.__version__)

# Define variables

In [None]:
IMAGE_SHAPE = [96,96]
INPUT_SHAPE = (*IMAGE_SHAPE,3)
BATCH_SIZE = 16
SEED = 42
DATASET_DIR = Path() / 'training_data_final'

tf.random.set_seed(SEED)

In [None]:
MODELS_DIR = Path() / 'models'
MODELS_DIR_RESNET = MODELS_DIR / 'model_resnet50'
MODELS_DIR_VGG = MODELS_DIR / 'model_vgg16'

# Instantiate dataset generators

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# train generator with augmentation
train_image_gen  = ImageDataGenerator(rotation_range=40,
                                      width_shift_range=0.2,
                                      height_shift_range=0.2,
                                      zoom_range=[0.5,1.5],
                                      brightness_range=[0.5,1.5],
                                      shear_range=0.2,
                                      vertical_flip=True,
                                      horizontal_flip=True,
                                      fill_mode='reflect',
                                      validation_split = 0.15
                                      )

# validation generator without augmentation
validation_image_gen = ImageDataGenerator(validation_split = 0.15)

train_dataset = train_image_gen.flow_from_directory(directory=DATASET_DIR,
                                                    target_size=IMAGE_SHAPE,
                                                    color_mode='rgb',
                                                    classes=None,
                                                    class_mode='categorical',
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=False,
                                                    seed=SEED,
                                                    subset='training',
                                                    )

train_dataset_notta = validation_image_gen.flow_from_directory(directory=DATASET_DIR,
                                                    target_size=IMAGE_SHAPE,
                                                    color_mode='rgb',
                                                    classes=None,
                                                    class_mode='categorical',
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=False,
                                                    seed=SEED,
                                                    subset='training',
                                                    )

validation_dataset = validation_image_gen.flow_from_directory(directory=DATASET_DIR,
                                                              target_size=IMAGE_SHAPE,
                                                              color_mode='rgb',
                                                              classes=None,
                                                              class_mode='categorical',
                                                              batch_size=BATCH_SIZE,
                                                              shuffle=False,
                                                              seed=SEED,
                                                              subset='validation'
                                                              )

validation_dataset_tta = train_image_gen.flow_from_directory(directory=DATASET_DIR, # uso il train_image_gen per aver augmentation, ma di lui prendo lo split validation
                                                              target_size=IMAGE_SHAPE,
                                                              color_mode='rgb',
                                                              classes=None,
                                                              class_mode='categorical',
                                                              batch_size=BATCH_SIZE,
                                                              shuffle=False,
                                                              seed=SEED,
                                                              subset='validation'
                                                              )


# Load models

In [None]:
model_vgg16 = keras.models.load_model(MODELS_DIR_VGG / '04_finetuning_pass3')
model_resnet50 = keras.models.load_model(MODELS_DIR_RESNET / '03_finetuning_pass2')
model_ensemble = keras.models.load_model(MODELS_DIR / 'ensemble_vgg16-resnet50')

# Load true labels

In [None]:
ytrue_val = validation_dataset.labels
ytrue_train = train_dataset.labels

# Compute predictions

Both on the **train** and the **validation** set.

In [None]:
TTA_STEPS = 10

Predictions for VGG16.

In [None]:
ypred_vgg_val = model_vgg16.predict(validation_dataset)
ypred_vgg_val = np.argmax(ypred_vgg_val, axis=1)

In [None]:
ypred_vgg_train = model_vgg16.predict(train_dataset_notta)
ypred_vgg_train = np.argmax(ypred_vgg_train, axis=1)

Predictions for ResNet-50.

In [None]:
ypred_resnet_val = model_resnet50.predict(validation_dataset)
ypred_resnet_val = np.argmax(ypred_resnet_val, axis=1)

In [None]:
ypred_resnet_train = model_resnet50.predict(train_dataset_notta)
ypred_resnet_train = np.argmax(ypred_resnet_train, axis=1)

Compute predictions for the ensemble model.

In [None]:
ypred_ensemble_val = model_ensemble.predict(validation_dataset)
ypred_ensemble_val = np.argmax(ypred_ensemble_val, axis=1)

In [None]:
ypred_ensemble_train = model_ensemble.predict(train_dataset_notta)
ypred_ensemble_train = np.argmax(ypred_ensemble_train, axis=1)

Compute predictions for the ensemble model using TTA.

In [None]:
ypred_ensemble_TTA_val = []

for i in range(TTA_STEPS):
    preds = model_ensemble.predict(validation_dataset_tta)
    ypred_ensemble_TTA_val.append(preds)

ypred_ensemble_TTA_val = np.array(ypred_ensemble_TTA_val)
ypred_ensemble_TTA_val = np.mean(ypred_ensemble_TTA_val, axis=0) # take average of predictions of augmented images
ypred_ensemble_TTA_val = np.argmax(ypred_ensemble_TTA_val, axis=1) # argmax across classes

In [None]:
ypred_ensemble_TTA_train = []

for i in range(TTA_STEPS):
    preds = model_ensemble.predict(train_dataset)
    ypred_ensemble_TTA_train.append(preds)

ypred_ensemble_TTA_train = np.array(ypred_ensemble_TTA_train)
ypred_ensemble_TTA_train = np.mean(ypred_ensemble_TTA_train, axis=0) # take average of predictions of augmented images
ypred_ensemble_TTA_train = np.argmax(ypred_ensemble_TTA_train, axis=1) # argmax across classes

Save everything for future use.

In [None]:
PRED_DIR = Path() / 'report_material'
PRED_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
ypred = [ypred_vgg_val,
         ypred_vgg_train,
         ypred_resnet_val,
         ypred_resnet_train,
         ypred_ensemble_val,
         ypred_ensemble_train,
         ypred_ensemble_TTA_val,
         ypred_ensemble_TTA_train]

np.save(str(PRED_DIR / 'ypred.npy'), ypred, allow_pickle=True)

Load everything without re-running predictions.

In [None]:
(ypred_vgg_val,
ypred_vgg_train,
ypred_resnet_val,
ypred_resnet_train,
ypred_ensemble_val,
ypred_ensemble_train,
ypred_ensemble_TTA_val,
ypred_ensemble_TTA_train) = np.load(str(PRED_DIR / 'ypred.npy'), allow_pickle=True)

# Compute metrics

## Accuracy

In [None]:
def print_accuracy(y_true, y_pred, model_name='', SIGNIFICANT_FIGURES=4):
    print(f'Accuracy for model {model_name:<25}: {np.round(accuracy_score(y_true, y_pred), SIGNIFICANT_FIGURES)}')

print_accuracy(ytrue_val, ypred_vgg_val, 'VGG16 on val')
print_accuracy(ytrue_train, ypred_vgg_train, 'VGG16 on train')
print_accuracy(ytrue_val, ypred_resnet_val, 'ResNet-50 on val')
print_accuracy(ytrue_train, ypred_resnet_train, 'ResNet-50 on train')
print_accuracy(ytrue_val, ypred_ensemble_val, 'ensemble on val')
print_accuracy(ytrue_train, ypred_ensemble_train, 'ensemble on train')
print_accuracy(ytrue_val, ypred_ensemble_TTA_val, 'ensemble + TTA on val')
print_accuracy(ytrue_train, ypred_ensemble_TTA_train, 'ensemble + TTA on train')

## F1-scores

In [None]:
def print_f1score(y_true, y_pred, model_name='', SIGNIFICANT_FIGURES=3):
    print(f'F1-score for model {model_name:<25}: {np.round(f1_score(y_true, y_pred, average=None), SIGNIFICANT_FIGURES)}')

print_f1score(ytrue_val, ypred_vgg_val, 'VGG16 on val')
print_f1score(ytrue_train, ypred_vgg_train, 'VGG16 on train')
print_f1score(ytrue_val, ypred_resnet_val, 'ResNet-50 on val')
print_f1score(ytrue_train, ypred_resnet_train, 'ResNet-50 on train')
print_f1score(ytrue_val, ypred_ensemble_val, 'ensemble on val')
print_f1score(ytrue_train, ypred_ensemble_train, 'ensemble on train')
print_f1score(ytrue_val, ypred_ensemble_TTA_val, 'ensemble + TTA on val')
print_f1score(ytrue_train, ypred_ensemble_TTA_train, 'ensemble + TTA on train')

## Confusion matrix

In [None]:
LABELS = [f'Species{i+1}' for i in range(8)]
Y_TRUE = ytrue_val
Y_PRED = ypred_ensemble_TTA_val

In [None]:
cm = confusion_matrix(Y_TRUE, Y_PRED)
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # normalize confusion matrix

In [None]:
# 2 line method
# disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=LABELS)
# disp.plot(cmap=plt.cm.Blues)

ax = plt.subplot()

sns.heatmap(cm, annot=True)  # annot=True to annotate cells, fmt='g' to disable scientific notation

# labels, title and ticks
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
#ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(LABELS)
ax.yaxis.set_ticklabels(LABELS)

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
plt.setp(ax.get_yticklabels(), rotation=0, ha='right', rotation_mode='anchor')

plt.savefig(str(Path() / 'report_material' / 'confusion.pdf'), bbox_inches='tight')
plt.show()