# Import Libraries and Configuration

In [None]:
# Import libraries and set default options
import datetime
import calendar
import random
import math
import time
import pandas as pd
import numpy as np
from array import array
import pickle
import requests

import matplotlib.pyplot as plt

import sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import NMF, PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
from sklearn.metrics import classification_report

from sklearn.neighbors import KNeighborsClassifier,KNeighborsRegressor
from sklearn.model_selection import cross_validate

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import  datasets, layers, models
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.preprocessing import text
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout, SimpleRNN, Input
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers.legacy import Adam

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 500)

cifar100_labels = ['apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle', 'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur', 'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house', 'kangaroo', 'keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion', 'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain', 'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear', 'pickup_truck', 'pine_tree', 'plain', 'plate', 'poppy', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose', 'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake', 'spider', 'squirrel', 'streetcar', 'sunflower', 'sweet_pepper', 'table', 'tank', 'telephone', 'television', 'tiger', 'tractor', 'train', 'trout', 'tulip', 'turtle', 'wardrobe', 'whale', 'willow_tree', 'wolf', 'woman', 'worm']

# Part 2: Transfer Learning and Image Processing

## Utilities

### Load saved models and data

**Note:**
Some known bugs in tensorflow on M1 based Macs have led to issues with serialising and deserialising models, blocking the use of .keras or .pkl formats and forcing the use of the legacy .h5. This also appears to be forcing the optimizer to reinitialise which may lead to different results here than in the training notebooks.

See:\
https://github.com/tensorflow/tensorflow/issues/61915 \
https://github.com/keras-team/tf-keras/issues/46 

In [None]:
def returnBlockTwoSet(block2_classes):
    (train_images, train_labels), (test_images, test_labels) = datasets.cifar100.load_data(label_mode='fine')

    class_names = test_labels.reshape(-1) 
    # Normalise pixel values 0-1
    train_images_normalised, test_images_normalised = train_images / 255.0, test_images / 255.0
    
    # block2_classes = classes[50:]

    train_labels = train_labels.reshape(-1)
    test_labels = test_labels.reshape(-1)

    train_mask2 = np.isin(train_labels, block2_classes)
    test_mask2 = np.isin(test_labels, block2_classes)

    train_images_2 = np.take(train_images_normalised, np.where(train_mask2)[0], axis=0)
    train_labels_2 = np.take(train_labels, np.where(train_mask2)[0])
    test_images_2 = np.take(test_images_normalised, np.where(test_mask2)[0], axis=0)
    test_labels_2 = np.take(test_labels, np.where(test_mask2)[0])

    return train_images_2, train_labels_2, test_images_2, test_labels_2


In [None]:
def loadData():
    print(f"Part 2: Loading saved models and data")
    part2imgclass_bestmodel_url = "https://github.com/smulkerrins/dlassignment/raw/refs/heads/main/part2imgclass_bestmodel.h5"
    filename = 'part2imgclass_bestmodel.h5'
    r = requests.get(part2imgclass_bestmodel_url)
    f = open(filename,'wb')
    f.write(r.content)
    f.close()
    
    part2imgclass_autoencoder_url = "https://github.com/smulkerrins/dlassignment/raw/refs/heads/main/part2imgclass_autoencoder.h5"
    filename = 'part2imgclass_autoencoder.h5'
    r = requests.get(part2imgclass_autoencoder_url)
    f = open(filename,'wb')
    f.write(r.content)
    f.close()
    
    part2_block2_classes_url = "https://github.com/smulkerrins/dlassignment/raw/refs/heads/main/part2_block2_classes.pkl"
    filename = 'part2_block2_classes.pkl'
    r = requests.get(part2_block2_classes_url)
    f = open(filename,'wb')
    f.write(r.content)
    f.close()
    
    print(f"Part 2: Files downloaded")

    part2imgclass_bestmodel = tf.keras.models.load_model('part2imgclass_bestmodel.h5')
    part2imgclass_autoencoder = tf.keras.models.load_model('part2imgclass_autoencoder.h5')
    
    with open("part2_block2_classes.pkl", 'rb') as part2_block2_classes_picklefile:
        block_classes = pickle.load(part2_block2_classes_picklefile)
    
    
    
    print(f"Part 2: Saved models and data loaded")

    train_images, train_labels, test_images, test_labels = returnBlockTwoSet(block_classes)
    print(f"Part 2: CIFAR-100 set loaded")

    return part2imgclass_bestmodel, part2imgclass_autoencoder, block_classes, train_images, train_labels, test_images, test_labels

### Scoring and Plotting Methods

In [None]:
def classificationReportAndScore(model, history, test_images, test_labels, block_classes):
    subset_class_names = np.unique([cifar100_labels[i] for i in block_classes])

    # Generate predictions (using subset 1 for example)
    predictions = model.predict(test_images)
    predicted_labels = np.argmax(predictions, axis=1)
    
    true_labels_text = np.array([cifar100_labels[label] for label in test_labels])
    predicted_labels_text = np.array([cifar100_labels[label] for label in predicted_labels])

    # print(classification_report(test_labels, np.argmax(predictions, axis=1)))

    report = classification_report(true_labels_text, predicted_labels_text, labels=subset_class_names, output_dict=True)

    # Convert report to DataFrame
    df = pd.DataFrame(report).transpose()
    
    # Sort by F1-score
    df = df.sort_values(by='f1-score', ascending=False)
    
    # Print the DataFrame
    print(df.to_markdown(numalign="left", stralign="left"))


    overall_accuracy = report['accuracy'] 
    print(f"Overall accuracy: {overall_accuracy:.4f}") 

In [None]:
def confusionMatrix(model, history, test_images, test_labels, block_classes):
    subset_class_names = np.unique([cifar100_labels[i] for i in block_classes])

    # Generate predictions (using subset 1 for example)
    predictions = model.predict(test_images)
    predicted_labels = np.argmax(predictions, axis=1)
    
    true_labels_text = np.array([cifar100_labels[label] for label in test_labels])
    predicted_labels_text = np.array([cifar100_labels[label] for label in predicted_labels])
    
    # Confusion matrix
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(true_labels_text, predicted_labels_text, labels=subset_class_names) 
    
    
    # Plotting the confusion matrix
    plt.figure(figsize=(10, 10))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    
    # Add labels to the plot
    tick_marks = np.arange(len(subset_class_names))
    plt.xticks(tick_marks, subset_class_names, rotation=45, ha='right')
    plt.yticks(tick_marks, subset_class_names)
    
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.show()

In [None]:
def plotTrainingAndValidationLoss(train_loss, val_loss ):
    plt.plot(train_loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Training and Validation Loss')
    # modelConfig["plots"].append(plt)
    plt.show()
    

def plotTrainingAndValidationAccuracy(train_acc, val_acc ):
    plt.plot(train_acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Training and Validation Accuracy')
    # modelConfig["plots"].append(plt)
    plt.show()
    
def plotModel(history):
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']
    train_acc = history.history['accuracy'] 
    val_acc = history.history['val_accuracy']

    plotTrainingAndValidationLoss(train_loss, val_loss)
    plotTrainingAndValidationAccuracy(train_acc, val_acc)

def plotAutoencoder(history):
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']

    plotTrainingAndValidationLoss(train_loss, val_loss)

In [None]:
def evaluateAutoencoder(autoencoder, test_images):
    mse = autoencoder.evaluate(test_images, test_images)  # Calculate MSE on the test set
    print("Mean Squared Error:", mse)

In [None]:
def visualTestAutoencoder(autoencoder, test_images):
    reconstructed_images = autoencoder.predict(test_images)

    for i in range(5):
        plt.figure(figsize=(6, 3))
        plt.subplot(1, 2, 1)
        plt.imshow(test_images[i])
        plt.title("Original")
        plt.axis('off')
        plt.subplot(1, 2, 2)
        plt.imshow(reconstructed_images[i])
        plt.title("Reconstructed")
        plt.axis('off')
        plt.show()

In [None]:
def evaluateAndAnalyse(model, history, test_images, test_labels, block_classes):
    subset_class_names = np.unique([cifar100_labels[i] for i in block_classes])

    # Generate predictions (using subset 1 for example)
    predictions = model.predict(test_images)
    predicted_labels = np.argmax(predictions, axis=1)
    
    true_labels_text = np.array([cifar100_labels[label] for label in test_labels])
    predicted_labels_text = np.array([cifar100_labels[label] for label in predicted_labels])
    
    # Confusion matrix
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(true_labels_text, predicted_labels_text, labels=subset_class_names) 
    
    
    # Plotting the confusion matrix
    plt.figure(figsize=(10, 10))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    
    # Add labels to the plot
    tick_marks = np.arange(len(subset_class_names))
    plt.xticks(tick_marks, subset_class_names, rotation=45, ha='right')
    plt.yticks(tick_marks, subset_class_names)
    
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.show()

    # from sklearn.metrics import classification_report
    print(classification_report(test_labels, np.argmax(predictions, axis=1)))

    report = classification_report(true_labels_text, predicted_labels_text, labels=subset_class_names, output_dict=True)

    # Convert report to DataFrame
    df = pd.DataFrame(report).transpose()
    
    # Sort by F1-score
    df = df.sort_values(by='f1-score', ascending=False)
    
    # Print the DataFrame
    print(df.to_markdown(numalign="left", stralign="left"))


    overall_accuracy = report['accuracy'] 
    print(f"Overall accuracy: {overall_accuracy:.4f}") 

## Image Classification Model

In [None]:
model, autoencoder, block_classes, train_images, train_labels, test_images, test_labels = loadData()

### Training
N/A history was impossible to serialise/deserialise due to above bug

In [None]:
# plotModel(model.history)

### Classification and Scoring
N/A classification report and accuracy score was impossible to serialise/deserialise due to above bug

In [None]:
# classificationReportAndScore(model, model.history, test_images, test_labels, block_classes)

### Confusion Matrix
N/A confusion matrix was impossible to serialise/deserialise due to above bug

In [None]:
# confusionMatrix(model, model.history, test_images, test_labels, block_classes)

## Autoencoder

In [None]:
### Visual Comparison

In [None]:
visualTestAutoencoder(autoencoder, test_images)