# NEURAL NETWORKS AND DEEP LEARNING

---
A.A. 2021/22 (6 CFU) - Dr. Alberto Testolin, Dr. Umberto Michieli
---


# Homework 2 - Unsupervised Deep Learning

### Author: Michele Guadagnini - Mt.1230663

In [None]:
### ADDITIONAL LIBRARIES THAT NEED INSTALLATION (uncomment if needed)

#!pip install optuna
#!pip install pytorch-lightning

### the followings are required to plot and save figures about optuna study
#!pip install plotly
#!pip install kaleido

### the following one is required to print a model summary
#!pip install torchinfo

In [None]:
# PyTorch imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from torch.utils.data import random_split
import torchvision
from torchvision import transforms

# python imports
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import copy
import logging
import datetime
import json

# additional libraries
import plotly.express as px
import plotly.graph_objects as go
import torchinfo
import optuna
import pytorch_lightning as pl

# reduce verbosity 
logging.getLogger("optuna").setLevel(logging.ERROR)
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)

In [None]:
# to have reproducible results: 
from pytorch_lightning.utilities.seed import seed_everything

### 'seed_everything' internally calls the followings:
#    random.seed(seed)
#    np.random.seed(seed)
#    torch.manual_seed(seed)
#    torch.cuda.manual_seed_all(seed)

In [None]:
# load user settings from file 'settings.py'
import settings

<a name="toc"></a>
# Table of contents:  
---

1. [**Autoencoder**](#autoencoder)
    1. [Hyper-parameters optimization with Optuna](#optunaAE)
    1. [Model training](#trainingAE)
    1. [Model testing and analysis](#testingAE)   

1. [**Transfer Learning**](#transfer_learning)
    1. [Encoder fine-tuning for classification](#finetuning)
    1. [Model testing and analysis](#finetesting)
    
1. [**Denoising autoencoder**](#denoising_autoencoder)
    1. [Model training](#trainingDAE)
    1. [Model testing and analysis](#testingDAE)

1. [**Variational Autoencoder ($\beta$-VAE)**](#VAE)
    1. [Model training](#trainingVAE)
    1. [Model testing and analysis](#testingVAE)
    
---

<a name="autoencoder"></a> 
# Autoencoder 
     
[Table of contents](#toc) 

<a name="optunaAE"></a> 
## Hyper-parameters optimization with Optuna

In [None]:
# set random state
seed_everything(seed=settings.MAGIC_NUM)

In [None]:
from data_management.fashion_mnist import FashionMNISTDataModule

datamodule = FashionMNISTDataModule(data_dir   = settings.DATASETS_DIR, 
                                    batch_size = 256,
                                    Nsamples   = None,
                                    valid_frac = 8800./60000.,
                                    random_state = settings.MAGIC_NUM,
                                   )

In [None]:
from autoencoder.symmetric_autoencoder import SymmetricAutoencoder, SymmetricAutoencoderHPS

# convolutional architectures definition
### each line is: kernel size, stride, padding
proposed_conv = [ [[3, 2, 1], # shape through network: 28   -> 14 ->  7 -> 5
                   [3, 2, 1],
                   [3, 1, 0],                  
                  ],
                  [[5, 2, 1], # shape through network: 28   -> 13 ->  6 -> 6
                   [5, 2, 1],
                   [3, 1, 1],                   
                  ],
                  [[7, 2, 1], # shape through network: 28   -> 12 ->  5 -> 5
                   [5, 2, 1],
                   [3, 1, 1],                   
                  ],
                ]

# channels configurations definition
proposed_channels = [[16,32,32],
                     [16,32,64],
                     [32,32,64],
                    ]

# hyper-parameters space dictionary
# Notes:
### 1. no batch norm to ensure independence of encodings for different images. Instance norm instead.
### 2. dropout is applied only after a linear layer; some tests showed that a small dropout rate help the model
###    learn a better clustered latent space
### 3. we will use the pytorch lightning functionality "auto_lr_find" for the learning rate

hps_dict = dict(conv_configs        = proposed_conv,  
                channels_configs    = proposed_channels,  
                n_linear            = [1],                # number of linear layers #FIXED
                linear_units_range  = [128, 256, 8],      # min, max, step
                latent_space_range  = [10, 40, 2],        # min, max, step
                instance_norm       = [True, False],      # instance norm
                Pdropout_range      = [0.16, 0.16],       # dropout                 #FIXED
                activations         = ["leaky_relu"],                               #FIXED
                optimizers          = ["adam", "sgd", "adamax"], 
                learning_rate_range = [1., 1.],           # we will use "auto_lr_find" functionality of Pytorch Lightning 
                L2_penalty_range    = [1e-7, 1e-4],
                momentum_range      = [0.91, 0.91],       # momentum paramater (used only with sgd)  #FIXED
               )

hp_space = SymmetricAutoencoderHPS(hps_dict)

In [None]:
from utilities.train_tools import Objective

### optuna study objective function
objective = Objective(model_class = SymmetricAutoencoder, 
                      datamodule  = datamodule, 
                      hp_space    = hp_space,
                      max_epochs  = 60, 
                      use_gpu     = settings.USE_GPU,
                      early_stop_patience = 10,
                     )

### MedianPruner
pruner = optuna.pruners.MedianPruner(n_startup_trials = 10,    # trials to complete before starting to prune
                                     n_warmup_steps   = 20,    # steps to take before evaluating pruning
                                     interval_steps   = 10,    # steps between trial pruning checks
                                    )

# Make the default sampler behave in a deterministic way
sampler = optuna.samplers.TPESampler(n_startup_trials = 30,    # use random sampling at beginning
                                     #seed = settings.MAGIC_NUM,
                                    )
### create study
os.makedirs(settings.autoencoder.OPTUNA_DIR, exist_ok=True)

study_name = settings.autoencoder.OPTUNA_STUDY_NAME + "_local"
study = optuna.create_study(study_name = study_name, 
                            direction  = "minimize",
                            pruner     = pruner,
                            sampler    = sampler,
                            storage    = "sqlite:///"+settings.autoencoder.OPTUNA_DIR+"/"+study_name+".db",
                            load_if_exists = True,
                           )

In [None]:
### run optimization
logging.captureWarnings(True)

Ntrials = 5
MaxTime = None

### In the following cells we will load a database with the results of the study runned on COLAB with 
###   GPU support. To run the study instead, uncomment the lines below.

#print("Starting study '"+study.study_name+f"' with n_trials={Ntrials} and timeout={MaxTime}")
#study.optimize(objective, 
#               n_trials       = Ntrials, 
#               timeout        = MaxTime, # timeout in seconds
#               gc_after_trial = True,    # run garbage collection 
#              ) 

logging.captureWarnings(False)

### Study results analysis

In [None]:
### load the COLAB study database (change 'study_name' to load a different study)
study_name = settings.autoencoder.OPTUNA_STUDY_NAME + "_COLAB"

study = optuna.load_study(study_name, 
                          storage = "sqlite:///"+settings.autoencoder.OPTUNA_DIR+"/"+study_name+".db",
                         )

In [None]:
# print dataframe with top-K trials
K = 10

study_df = study.trials_dataframe()
study_df.drop(columns="user_attrs_hypers", inplace=True)
study_df.drop(columns="datetime_complete", inplace=True)
study_df = study_df.sort_values(by="value")

study_df.head(K)

In [None]:
from utilities.train_tools import OptimizationInspector

optuna_inspector = OptimizationInspector(study, settings.autoencoder.OPTUNA_DIR, figsize=(900,500))

In [None]:
# parameters sets for parallel plots 
parallel_sets = [["architecture", #name suffix
                  "channels_config_id","conv_config_id","linear_units_0","latent_space_dim",
                 ],
                 ["optimization", #name suffix
                  "optimizer","L2_penalty","learning_rate","instance_norm",#"Pdropout",#"momentum",
                 ],
                ]

# parameters sets for contour plots
contour_sets = [["channels_config_id","conv_config_id"],
                ["linear_units_0", "latent_space_dim"],
                ["learning_rate","L2_penalty"],
               ]

# parameters sets for slice plots
slice_sets   = [["conv_config_id","channels_config_id","linear_units_0","latent_space_dim","optimizer"],
               ]

# parameters to use for importance plot
importance_params = ["conv_config_id","channels_config_id","linear_units_0","latent_space_dim",
                     "optimizer","L2_penalty","instance_norm",
                    ]

In [None]:
optuna_inspector.plot_all(parallel_sets     = parallel_sets,
                          contour_sets      = contour_sets,
                          slice_sets        = slice_sets,
                          importance_params = importance_params,
                          save = True,
                          show = "111111111", #"110001000",    # show options
                         )

In [None]:
optuna_inspector.print_summary()
optuna_inspector.save_best_hypers_json(settings.autoencoder.BEST_HYPERS_FILE)

<a name="trainingAE"></a> 
## Model training

In [None]:
# set random state
seed_everything(seed=settings.MAGIC_NUM)

In [None]:
import json

# read best hyper-parameters from file
with open(settings.autoencoder.BEST_HYPERS_FILE, "r") as file:
    best_hypers = json.load(file)

In [None]:
from data_management.fashion_mnist import FashionMNISTDataModule

### define datamodule
datamodule = FashionMNISTDataModule(data_dir   = settings.DATASETS_DIR, 
                                    batch_size = 256,
                                    Nsamples   = None,
                                    valid_frac = 8800./60000.,
                                    random_state = settings.MAGIC_NUM,
                                   )

In the cell below a summary of the model to train is showed.

In [None]:
from autoencoder.symmetric_autoencoder import SymmetricAutoencoder

# print model summary
shape = datamodule.get_sample_size()
net   = SymmetricAutoencoder(shape, 
                             params        = best_hypers["params"],
                             optimizer     = best_hypers["optimizer"],
                             learning_rate = best_hypers["learning_rate"],
                             L2_penalty    = best_hypers["L2_penalty"],
                             momentum      = best_hypers["momentum"],
                            )

# build dummy data batch 
dummy_batch = list(shape)
dummy_batch.insert(0, 256)

# print summary
torchinfo.summary(net, 
                  dummy_batch, 
                  col_width = 20, 
                  col_names = ("output_size","num_params","mult_adds",),  #"input_size",
                  depth     = 4, 
                  row_settings = ("var_names",),
                 )

In the following cell we define some callbacks that will be useful during and after training.

In [None]:
from utilities.train_tools import LossesTracker
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from utilities.encoder_tools import ImageReconstruction

### callbacks
# track the losses (train and valid) during training
losses_tracker = LossesTracker()

# early stopping
early_stop = EarlyStopping(monitor   = "val_loss", 
                           min_delta = 0.0001, 
                           patience  = 10, 
                           verbose   = False, 
                           check_on_train_epoch_end=False, # check early_stop at end of training epoch
                          )

# checkpoint the model if the monitored value improved
checkpoint = ModelCheckpoint(dirpath  = settings.CHECKPOINT_DIR, 
                             filename = "checkpt_{epoch}_{val_loss:.2f}", 
                             monitor  = "val_loss",
                            )

# reconstruction of a test sample at every epoch
sample_id = 34    # bag
dataset = torchvision.datasets.FashionMNIST(settings.DATASETS_DIR, 
                                            train    = False, 
                                            download = True,
                                           )
sample = transforms.functional.to_tensor(dataset[sample_id][0]).unsqueeze(dim=0)
if settings.USE_GPU:
    sample = sample.to("cuda")

rec_callback = ImageReconstruction(sample, 
                                   to_show   = False, 
                                   save_path = settings.autoencoder.RECONSTRUCTIONS_DIR,
                                  )

In [None]:
### define model and hyper-parameters
model = SymmetricAutoencoder(input_size    = datamodule.get_sample_size(),
                             params        = best_hypers["params"],
                             optimizer     = best_hypers["optimizer"],
                             learning_rate = best_hypers["learning_rate"],
                             L2_penalty    = best_hypers["L2_penalty"],
                             momentum      = best_hypers["momentum"],
                            )

### define trainer
trainer = pl.Trainer(logger     = False,
                     max_epochs = 200,
                     gpus       = 1 if settings.USE_GPU else None,
                     callbacks  = [losses_tracker, early_stop, checkpoint, rec_callback],
                     val_check_interval   = 1.,
                     enable_model_summary = False,
                     num_sanity_val_steps = 0,     # disable validation sanity check before training
                     auto_lr_find = False,
                    )

In [None]:
print( "Training started at:", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S") )
fit_begin = time.time()  # measure running time

trainer.fit(model, datamodule=datamodule) # run the training

fit_time = time.time() - fit_begin
print( "Training ended at:", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S") )
print(f"Fit time:", str(datetime.timedelta(seconds=fit_time)) )

In [None]:
from utilities.plot_tools import plot_history

save_path = settings.autoencoder.ROOT_DIR + "/train_history.pdf"

plot_history(losses_tracker.train, losses_tracker.valid, ylog=True, save_path=save_path)

In [None]:
import shutil

# copy best model checkpoint into the results directory
shutil.copy(checkpoint.best_model_path, settings.autoencoder.BEST_MODEL_CKPT_FILE)

<a name="testingAE"></a> 
## Model testing and analysis

In [None]:
# set random state
seed_everything(seed=settings.MAGIC_NUM)

In [None]:
from pytorch_lightning.callbacks import ModelCheckpoint
from data_management.fashion_mnist import FashionMNISTDataModule
from autoencoder.symmetric_autoencoder import SymmetricAutoencoder

### define datamodule
datamodule = FashionMNISTDataModule(data_dir   = settings.DATASETS_DIR, 
                                    batch_size = 256,
                                    Nsamples   = None,
                                    valid_frac = 8800./60000.,
                                    random_state = settings.MAGIC_NUM,
                                   )

### load model from checkpoint
model = SymmetricAutoencoder.load_from_checkpoint( settings.autoencoder.BEST_MODEL_CKPT_FILE )
model.freeze()

In [None]:
from utilities.encoder_tools import EncodedRepresentation

# callback to save the encoded representations of the test samples
encoded_test_repr = EncodedRepresentation()

trainer = pl.Trainer(logger     = False,
                     gpus       = 1 if settings.USE_GPU else None,
                     callbacks  = [encoded_test_repr],
                    )

result = trainer.test(model, datamodule=datamodule, verbose=False)
test_loss = result[0]["test_loss"]
print("TEST LOSS: ", test_loss)

### Latent Space exploration

In [None]:
from utilities.encoder_tools import LatentSpaceAnalyzer

# test dataset encoded samples
Latent_analyzer = LatentSpaceAnalyzer(encoded_test_repr.encoded_samples,
                                      encoded_test_repr.labels,
                                      label_names = datamodule.get_label_names(),
                                      save_path = settings.autoencoder.ROOT_DIR,
                                     )

In [None]:
Latent_analyzer.PCA_reduce(n_components=2, filename="PCA_reduced_space.pdf")

In [None]:
Latent_analyzer.TSNE_reduce(n_components=2, perplexity=80, filename="TSNE_reduced_space.pdf")

In [None]:
Latent_analyzer.Isomap_reduce(n_components=2, filename="Isomap_reduced_space.pdf")

### Image generation from latent codes

In [None]:
test_encodings_df = pd.DataFrame(encoded_test_repr.encoded_samples)
test_enc_max = max(test_encodings_df.max())
test_enc_min = min(test_encodings_df.min())

print(f"Range of test dataset encoded values: [{test_enc_min}, {test_enc_max}]")

# latent dimension
latent_dim = model.enc_hp["latent_space_dim"]

### generate some examples of images from random latent codes
examples  = 20
images    = []
encodings = []
for ii in range(examples):
    # randomly sample from latent space
    latent_code = np.random.uniform(test_enc_min*0.6, test_enc_max*0.6, latent_dim)
    # append encoding
    encodings.append(latent_code)                                    
    # generate image
    encoded_sample = torch.tensor(latent_code).float().unsqueeze(dim=0)
    model.eval()
    with torch.no_grad():
        generated = model.decoder(encoded_sample)
    # append image
    images.append(generated)     
    
encodings_df = pd.DataFrame(encodings)
encodings_df

In [None]:
from utilities.plot_tools import plot_img_grid

_ = plot_img_grid((4,5), images, to_show=True, axis_off=False, figsize=(10,8), 
                  folder_path = settings.autoencoder.ROOT_DIR,
                  filename    = "generated_images.pdf",
                 )

In [None]:
# overlap sampled points to the PCA plot by applying to them the same transformation
Latent_analyzer.PCA_overlap_points(encodings, to_show=True, filename=None)

### Latent space path along centroids 

In [None]:
### Compute centroids of clusters in latent space for each label and generate images
examples = len(datamodule.get_label_names())
centroids_imgs = []
centroids      = []
for ii in range(examples):
    # compute centroid for i-esim label    
    mask = [(ll == ii) for ll in encoded_test_repr.labels]
    filtered_samples = np.array(encoded_test_repr.encoded_samples)[mask] 
    latent_code = np.mean(filtered_samples, axis=0)    
    # append encoding
    centroids.append(latent_code)                                    
    # generate image
    encoded_sample = torch.tensor(latent_code).float().unsqueeze(dim=0)
    model.eval()
    with torch.no_grad():
        generated = model.decoder(encoded_sample)
    # append image
    centroids_imgs.append(generated)     
    
centroids_df = pd.DataFrame(centroids)
centroids_df

### Plot the images generated from the centroids 
#  NB: clusters does not have a particular shape (not hyper-spheric nor convex in general), 
#      so the obtained centroids could still produce a bad representation of the object  
from utilities.plot_tools import plot_img_grid

_ = plot_img_grid((2,5), centroids_imgs, to_show=True, axis_off=False, figsize=(10,4.5), 
                  folder_path = settings.autoencoder.ROOT_DIR,
                  titles      = datamodule.get_label_names(),
                  filename    = "centroids_images.pdf",
                 )

In [None]:
# gif for path along centroids
order = [7,5,9,8,6,2,4,0,3,1]
steps = 15
pause = 1

# sample latent codes on path
latent_codes = [centroids[order[0]]]*pause
for it in range(10-1):
    # path between two centroids
    path = np.linspace(centroids[order[it]], centroids[order[it+1]], steps)
    
    # latent code of intermediate images
    for pt in range(steps):
        latent_codes.append(path[pt])  #row
        
    # repeat the centroid code (like a pause)
    latent_codes.extend([centroids[order[it+1]]]*pause)
    
# decode into images
path_images = []
for code in latent_codes:
    # generate image
    encoded_sample = torch.tensor(code).float().unsqueeze(dim=0)
    model.eval()
    with torch.no_grad():
        generated = model.decoder(encoded_sample)
    # append image
    path_images.append(generated)  

In [None]:
# onward and backward 
path_images = path_images + path_images[::-1]

In [None]:
from PIL import Image

# resize images
bigger_images = [transforms.functional.resize(img=img, size=28*4) for img in path_images]

# convert tensors to PIL
imgs = [transforms.functional.to_pil_image(img.cpu().squeeze()) for img in bigger_images]

# duration is the number of milliseconds between frames
save_path = settings.autoencoder.ROOT_DIR + "/centroids_path.gif"
imgs[0].save(save_path, save_all=True, append_images=imgs[1:], duration=80, loop=0)

In [None]:
# load and display the animation
from IPython.display import HTML

if os.path.isfile(save_path):
    gif = HTML(f'<img src="{save_path}">')
    
gif

### Visualization of learned manifold reduced with PCA

In the following cells we plot a set of images sampled from the latent space reduced with PCA. We will compare it with the one obtained from the variational model.

In [None]:
# get ranges of latent variables
PCA_reduced_df = pd.DataFrame(Latent_analyzer.PCA_reduced_samples)
PCA_enc_max = PCA_reduced_df.max()
PCA_enc_min = PCA_reduced_df.min()

# build a uniform grid for sampling
samples_h = 20
samples_v = 20
x_latent_code = np.linspace(PCA_enc_min[0]*0.6, PCA_enc_max[0]*0.6, samples_h)
y_latent_code = np.linspace(PCA_enc_min[1]*0.6, PCA_enc_max[1]*0.6, samples_v)

### generate some images
images    = []
encodings = []
for jj in range(samples_v):
    for ii in range(samples_h):
        # get 2-dim PCA reduced code
        PCA_code = np.array([x_latent_code[ii], y_latent_code[-jj-1]])
        # inverse transform of code
        latent_code = Latent_analyzer.pca.inverse_transform(PCA_code)
        # append encoding
        encodings.append(latent_code)                                    
        # generate image
        encoded_sample = torch.tensor(latent_code).float().unsqueeze(dim=0)
        model.eval()
        with torch.no_grad():
            generated = model.decoder(encoded_sample)
        # append image
        images.append(generated)     


In [None]:
_ = plot_img_grid((samples_h,samples_v), images, to_show=True, axis_off=True, figsize=(12,12), 
                  folder_path = settings.autoencoder.ROOT_DIR,
                  filename    = "manifold_images_PCA.pdf",
                 )

---

<a name="transfer_learning"></a>
# Transfer Learning 
     
[Table of contents](#toc) 

In [None]:
# set random state
seed_everything(seed=settings.MAGIC_NUM)

<a name="finetuning"></a> 
## Encoder fine-tuning for classification

In this section we create a simple fully-connected network that takes as input the flattened output of the convolutional part of the encoder trained above and outputs a class label. This approach is equivalent to create a CNN with freezed parameters in the convolutional layers, but it allows us to call the forward pass of the convolutional layers just once for each samples instead that at every epoch, saving a lot of repeated computations and time.

In [None]:
from pytorch_lightning.callbacks import ModelCheckpoint
from autoencoder.symmetric_autoencoder import SymmetricAutoencoder
from data_management.fashion_mnist import FashionMNISTDataModule

### define datamodule
datamodule = FashionMNISTDataModule(data_dir   = settings.DATASETS_DIR, 
                                    batch_size = 256,
                                    Nsamples   = None,
                                    valid_frac = 8800./60000.,
                                    random_state = settings.MAGIC_NUM,
                                   )
datamodule.prepare_data()
datamodule.setup()

### load model from checkpoint
autoencoder_model = SymmetricAutoencoder.load_from_checkpoint( settings.autoencoder.BEST_MODEL_CKPT_FILE )
autoencoder_model.freeze()

In [None]:
# Check gradient tracking of layers
for param_name, param in autoencoder_model.encoder.named_parameters():
    ww = 28
    print(f"{param_name: <{ww}} : requires_grad={param.requires_grad}")

In the following cells we compute the preprocessed inputs of the train and validation datasets to be passed to the classifier.

In [None]:
from data_management.data_tools import DefaultDataset
from torch.utils.data import DataLoader

# compute encoded representation of train dataset
train_encodings = []
with torch.no_grad():
    for data,target in datamodule.train_dataloader():
        encoded = autoencoder_model.encoder.flatten(autoencoder_model.encoder.encoder_cnn(data))
        for it in range(len(target)):
            train_encodings.append( (encoded[it], target[it]) )

train_encodings = DefaultDataset(train_encodings)
train_encs = DataLoader(train_encodings, batch_size=256, shuffle=True)

In [None]:
# compute encoded representation of valid dataset
val_encodings = []
with torch.no_grad():
    for data,target in datamodule.val_dataloader():
        encoded = autoencoder_model.encoder.flatten(autoencoder_model.encoder.encoder_cnn(data))
        for it in range(len(target)):
            val_encodings.append( (encoded[it], target[it]) )

val_encodings = DefaultDataset(val_encodings)
val_encs = DataLoader(val_encodings, batch_size=500, shuffle=False)

Here we define the classifier to be trained. It is a fully-connected neural network with 2 linear layers. The first layer is cloned from the first linear block of the encoder, on which training and gradient tracking is activated. The second layer is a simple linear layer that outputs the class label.

In [None]:
from autoencoder.transfer_learning import EncoderClassifier

num_classes = len(datamodule.get_label_names())

# compute flatten layer dimension
cnn_out = autoencoder_model._compute_shapes()[0]
out_dim = np.prod(cnn_out)

net = EncoderClassifier(input_dim     = out_dim,
                        activation    = "leaky_relu",
                        linear_config = [autoencoder_model.enc_hp["linear_config"][0]],
                        num_classes   = num_classes,
                        optimizer     = "adamax",
                        learning_rate = 1.,      # lr_finder will change it
                        L2_penalty    = 0.,
                       )

# substitute first layer with pretrained one
net.classifier[0] = autoencoder_model.encoder.encoder_lin[0]

# activate gradient tracking on first (pretrained) layer
for param in net.classifier[0].parameters():
    param.requires_grad = True
    
net

In [None]:
# Check gradient tracking of layers
for param_name, param in net.named_parameters():
    ww = 28
    print(f"{param_name: <{ww}} : requires_grad={param.requires_grad}")

In [None]:
from utilities.train_tools import LossesTracker
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint

### callbacks
# track the losses (train and valid) during training
losses_tracker = LossesTracker()

# early stopping
early_stop = EarlyStopping(monitor   = "val_loss", 
                           min_delta = 0.0005, 
                           patience  = 5, 
                           verbose   = False, 
                           check_on_train_epoch_end=False, # check early_stop at end of validation epoch
                          )

# checkpoint the model if the monitored value improved
checkpoint = ModelCheckpoint(dirpath  = settings.CHECKPOINT_DIR, 
                             filename = "checkpt_transfer_learning_{epoch}_{val_loss:.2f}", 
                             monitor  = "val_loss",
                            )

# define trainer
trainer = pl.Trainer(logger     = False,
                     default_root_dir = settings.transfer_learning.ROOT_DIR,
                     max_epochs = 50,
                     gpus       = 1 if settings.USE_GPU else None,
                     callbacks  = [losses_tracker, early_stop, checkpoint],
                     enable_model_summary = False,
                     num_sanity_val_steps = 0,     # disable validation sanity check before training
                     auto_lr_find = True,
                    )

In the cells below we run a `PyTorch-Lightning` utility to estimate a good learning rate. The suggested value is chosen as the point with the steepest descent in a plot *loss* vs *learning rate*. For reference see:
[learning_rate_finder](https://pytorch-lightning.readthedocs.io/en/stable/advanced/training_tricks.html#learning-rate-finder)

In [None]:
os.makedirs(settings.transfer_learning.ROOT_DIR, exist_ok=True)

# Run learning rate finder
lr_finder = trainer.tune(net, train_dataloaders=train_encs, val_dataloaders=val_encs)["lr_find"]

# plot 
fig = px.scatter(lr_finder.results, 
                 x="lr", y="loss",
                 labels = {"lr"   :"Learning Rate",
                           "loss" :"Loss",
                          },
                 title = "Learning Rate finder",
                 log_x = True,
                )
if lr_finder._optimal_idx is not None:
    index = lr_finder._optimal_idx
    fig.add_trace(go.Scatter(x=[lr_finder.results["lr"][index]], 
                             y=[lr_finder.results["loss"][index]], 
                             mode = 'markers',
                             marker_symbol = 'star',
                             marker_size = 15,
                             showlegend = False,
                             name = "Suggestion",
                 )          )
fig.show()
fig.write_image(settings.transfer_learning.ROOT_DIR + "/lr_finder.pdf")  

print("Suggested learning rate: ", net.lr)

In [None]:
print( "Training started at:", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S") )

trainer.fit(net, 
            train_dataloaders=train_encs,
            val_dataloaders  =val_encs,
           )

print( "Training ended at:", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S") )

In [None]:
from utilities.plot_tools import plot_history

os.makedirs(settings.transfer_learning.ROOT_DIR, exist_ok=True)
save_path = settings.transfer_learning.ROOT_DIR + "/train_history.pdf"

plot_history(losses_tracker.train, losses_tracker.valid, ylog=False, save_path=save_path)

In [None]:
import shutil

# copy best model checkpoint into the results directory
shutil.copy(checkpoint.best_model_path, settings.transfer_learning.BEST_MODEL_CKPT_FILE)

<a name="finetesting"></a> 
## Model testing and analysis

### Test loss and confusion matrix

In [None]:
from pytorch_lightning.callbacks import ModelCheckpoint
from autoencoder.transfer_learning import EncoderClassifier
from autoencoder.symmetric_autoencoder import SymmetricAutoencoder
from data_management.fashion_mnist import FashionMNISTDataModule

# define datamodule
datamodule = FashionMNISTDataModule(data_dir   = settings.DATASETS_DIR, 
                                    batch_size = 256,
                                    Nsamples   = None,
                                    valid_frac = 8800./60000.,
                                    random_state = settings.MAGIC_NUM,
                                   )
datamodule.prepare_data()
datamodule.setup()

# load model from checkpoint
autoencoder_model = SymmetricAutoencoder.load_from_checkpoint( settings.autoencoder.BEST_MODEL_CKPT_FILE )
autoencoder_model.freeze()

net = EncoderClassifier.load_from_checkpoint(settings.transfer_learning.BEST_MODEL_CKPT_FILE)
net.freeze()

# define trainer
trainer = pl.Trainer(logger = False,
                     gpus   = 1 if settings.USE_GPU else None,
                    )

In [None]:
from data_management.data_tools import DefaultDataset
from torch.utils.data import DataLoader

# compute encoded representation of test dataset
test_encodings = []
with torch.no_grad():
    for data,target in datamodule.test_dataloader():
        encoded = autoencoder_model.encoder.flatten(autoencoder_model.encoder.encoder_cnn(data))
        for it in range(len(target)):
            test_encodings.append( (encoded[it], target[it]) )
            
test_encodings = DefaultDataset(test_encodings)
test_encs = DataLoader(test_encodings, batch_size=500, shuffle=False)

In [None]:
predict_output = trainer.predict(net, dataloaders=test_encs, return_predictions=True)

In [None]:
import torchmetrics
accuracy = torchmetrics.Accuracy()

losses  = []
labels  = []
outputs = []
predictions = []
accuracies  = []
for batch_output in predict_output:
    outputs.append(batch_output["outputs"])
    labels.append(batch_output["labels"])
    
    # test losses (cross entropy)
    losses.append(batch_output["test_loss"])
    
    # predictions
    preds = batch_output["outputs"].argmax(dim=1, keepdim=True)
    predictions.append(preds)
    
    # accuracy
    accs = accuracy(preds.view_as(batch_output["labels"]), batch_output["labels"])
    accuracies.append(accs)
    

final_test_loss = np.mean(losses)
final_test_acc  = np.mean(accuracies)
print("FINAL TEST LOSS VALUE: {}".format(final_test_loss))
print("FINAL TEST ACCURACY  : {}".format(final_test_acc ))

In [None]:
from utilities.plot_tools import plot_confusion_matrix

label_names = datamodule.get_label_names()

# plot confusion matrix
true_labels = torch.cat(labels)
predictions = torch.cat(predictions)
plot_confusion_matrix(predictions, 
                      true_labels, 
                      label_names, 
                      figsize   = (8,8), 
                      save_path = settings.transfer_learning.ROOT_DIR + "/confusion_mat.pdf",
                     )

### Convolutional kernels visualization

Here we visualize the trained kernels of the encoder in order to compare them with the ones of the supervised classifier in homework 1.

In [None]:
autoencoder_model.encoder

In [None]:
from utilities.plot_tools import plot_img_grid

# retrieve convolutional layers
ConvLayers = [module for module in autoencoder_model.encoder.modules() if isinstance(module, nn.Conv2d)]
print(f"Model has {len(ConvLayers)} convolutional layers")

# plot filters 
for it, layer in enumerate(ConvLayers):
    # retrieve kernels from layer
    kernels = layer.weight.detach().cpu().clone()
    
    title = f"Filters of Convolutional layer #{it+1}"
    
    if kernels.size(1) != 1:  # if there is more than 1 input channel, select random channel_id
        channel_id = np.random.randint(layer.in_channels)
        kernels = kernels[:, channel_id].unsqueeze(dim=1)
        title = title + f" - channel #{channel_id}"
        
    # normalize to range [0,1] for better visualization
    kmin = torch.min(kernels).item()
    kmax = torch.max(kernels).item()
    kernels = (kernels - kmin)/(kmax - kmin)
    
    # plot filters
    cols = 16
    rows = kernels.size(0) // cols 
    
    figsize = (12, 12*rows/(cols-3)+0.4)
    
    plot_img_grid(grid_shape = (rows,cols), 
                  images     = kernels, 
                  titles     = None, 
                  folder_path= settings.transfer_learning.ROOT_DIR, 
                  filename   = f"conv_filters_layer_{it+1}.pdf", 
                  to_show    = True, 
                  figsize    = figsize, 
                  suptitle   = title, 
                  cmap       = "Greys", 
                  axis_off   = True,
                 )

---

<a name="denoising_autoencoder"></a>
# Denoising Autoencoder

[Table of contents](#toc) 

<a name="trainingDAE"></a> 
## Model training

In [None]:
# set random state
seed_everything(seed=settings.MAGIC_NUM)

In [None]:
from data_management.fashion_mnist import FashionMNISTDataModule

datamodule = FashionMNISTDataModule(data_dir   = settings.DATASETS_DIR, 
                                    batch_size = 256,
                                    Nsamples   = None,
                                    valid_frac = 8800./60000.,
                                    random_state = settings.MAGIC_NUM,
                                   )

In [None]:
import json

# read best hyper-parameters of the autoencoder model from file (we will use the already optimized 
#   hyper-parameters as a starting point for the denoising autoencoder)
with open(settings.autoencoder.BEST_HYPERS_FILE, "r") as file:
    best_hypers = json.load(file)

# Modifications to hyper-parameters
#best_hypers["params"]["Pdropout"] = 0.
#best_hypers["params"]["instance_norm"] = False

In [None]:
best_hypers

In [None]:
from utilities.train_tools import LossesTracker
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from utilities.encoder_tools import ImageReconstruction

### callbacks
# track the losses (train and valid) during training
losses_tracker = LossesTracker()

# early stopping
early_stop = EarlyStopping(monitor   = "val_loss", 
                           min_delta = 0.0001, 
                           patience  = 10, 
                           verbose   = False, 
                           check_on_train_epoch_end=False, # check early_stop at end of training epoch
                          )

# checkpoint the model if the monitored value improved
checkpoint = ModelCheckpoint(dirpath  = settings.CHECKPOINT_DIR, 
                             filename = "checkpt_denoising_{epoch}_{val_loss:.2f}", 
                             monitor  = "val_loss",
                            )

# reconstruction of a test sample at every epoch
sample_id = 34    # bag
dataset = torchvision.datasets.FashionMNIST(settings.DATASETS_DIR, 
                                            train    = False, 
                                            download = True,
                                           )
sample = transforms.functional.to_tensor(dataset[sample_id][0]).unsqueeze(dim=0)
if settings.USE_GPU:
    sample = sample.to("cuda")

rec_callback = ImageReconstruction(sample, 
                                   to_show   = False, 
                                   save_path = settings.denoisingAE.RECONSTRUCTIONS_DIR,
                                  )

In [None]:
### define transformation to corrupt the input images
from data_management.data_tools import AddGaussianNoise

corruption_p = 0.5   # probability of a transform to be applied

# sequence of transformation
corruption = transforms.Compose([transforms.RandomHorizontalFlip( p=corruption_p ),
                                 transforms.RandomVerticalFlip( p=corruption_p ),
                                 AddGaussianNoise( p=corruption_p, mean=0, std=0.4 ),     # custom
                                 transforms.RandomErasing( p=corruption_p ),
                                ])

In [None]:
from autoencoder.denoising_autoencoder import DenoisingAutoencoder

### define model and hyper-parameters
model = DenoisingAutoencoder(input_size    = datamodule.get_sample_size(),
                             params        = best_hypers["params"],
                             optimizer     = best_hypers["optimizer"],
                             learning_rate = best_hypers["learning_rate"],
                             L2_penalty    = best_hypers["L2_penalty"],
                             momentum      = best_hypers["momentum"],
                             corruption    = corruption,
                            )
### define trainer
trainer = pl.Trainer(logger     = False,
                     max_epochs = 200,
                     gpus       = 1 if settings.USE_GPU else None,
                     callbacks  = [losses_tracker, early_stop, checkpoint, rec_callback],
                     val_check_interval   = 1.,
                     enable_model_summary = False,
                     num_sanity_val_steps = 0,     # disable validation sanity check before training
                     auto_lr_find = True,
                    )

In [None]:
os.makedirs(settings.denoisingAE.ROOT_DIR, exist_ok=True)

# Run learning rate finder
lr_finder = trainer.tune(model, datamodule=datamodule)["lr_find"]

# plot 
fig = px.scatter(lr_finder.results, 
                 x="lr", y="loss",
                 labels = {"lr"   :"Learning Rate",
                           "loss" :"Loss",
                          },
                 title = "Learning Rate finder",
                 log_x = True,
                )
if lr_finder._optimal_idx is not None:
    index = lr_finder._optimal_idx
    fig.add_trace(go.Scatter(x=[lr_finder.results["lr"][index]], 
                             y=[lr_finder.results["loss"][index]], 
                             mode = 'markers',
                             marker_symbol = 'star',
                             marker_size = 15,
                             showlegend = False,
                             name = "Suggestion",
                 )          )
fig.show()
fig.write_image(settings.denoisingAE.ROOT_DIR + "/lr_finder.pdf")  

print("Suggested learning rate: ", model.lr)

In [None]:
print( "Training started at:", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S") )
fit_begin = time.time()  # measure running time

trainer.fit(model, datamodule=datamodule) # run the training

fit_time = time.time() - fit_begin
print( "Training ended at:", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S") )
print(f"Fit time:", str(datetime.timedelta(seconds=fit_time)) )

In [None]:
from utilities.plot_tools import plot_history

save_path = settings.denoisingAE.ROOT_DIR + "/train_history.pdf"

plot_history(losses_tracker.train, losses_tracker.valid, ylog=True, save_path=save_path)

In [None]:
import shutil

# copy best model checkpoint into the results directory
shutil.copy(checkpoint.best_model_path, settings.denoisingAE.BEST_MODEL_CKPT_FILE)

<a name="testingDAE"></a> 
## Model testing and analysis

In [None]:
# set random state
seed_everything(seed=settings.MAGIC_NUM)

In [None]:
from pytorch_lightning.callbacks import ModelCheckpoint
from data_management.fashion_mnist import FashionMNISTDataModule
from autoencoder.denoising_autoencoder import DenoisingAutoencoder

### define datamodule
datamodule = FashionMNISTDataModule(data_dir   = settings.DATASETS_DIR, 
                                    batch_size = 256,
                                    Nsamples   = None,
                                    valid_frac = 8800./60000.,
                                    random_state = settings.MAGIC_NUM,
                                   )
### load model from checkpoint
model = DenoisingAutoencoder.load_from_checkpoint( settings.denoisingAE.BEST_MODEL_CKPT_FILE )
model.freeze()

In [None]:
from utilities.encoder_tools import EncodedRepresentation

# callback to save the encoded representations of the test samples
encoded_test_repr = EncodedRepresentation()

trainer = pl.Trainer(logger     = False,
                     gpus       = 1 if settings.USE_GPU else None,
                     callbacks  = [encoded_test_repr],
                    )

result = trainer.test(model, datamodule=datamodule, verbose=False)
test_loss = result[0]["test_loss"]
print("TEST LOSS: ", test_loss)

### Latent Space exploration

In [None]:
from utilities.encoder_tools import LatentSpaceAnalyzer

# test dataset encoded samples
Latent_analyzer = LatentSpaceAnalyzer(encoded_test_repr.encoded_samples,
                                      encoded_test_repr.labels,
                                      label_names = datamodule.get_label_names(),
                                      save_path = settings.denoisingAE.ROOT_DIR,
                                     )

In [None]:
Latent_analyzer.PCA_reduce(n_components=2, filename="PCA_reduced_space.pdf")

In [None]:
Latent_analyzer.TSNE_reduce(n_components=2, perplexity=80, filename="TSNE_reduced_space.pdf")

In [None]:
Latent_analyzer.Isomap_reduce(n_components=2, filename="Isomap_reduced_space.pdf")

### Image generation from latent codes

In [None]:
test_encodings_df = pd.DataFrame(encoded_test_repr.encoded_samples)
test_enc_max = max(test_encodings_df.max())
test_enc_min = min(test_encodings_df.min())

print(f"Range of test dataset encoded values: [{test_enc_min}, {test_enc_max}]")

# latent dimension
latent_dim = model.enc_hp["latent_space_dim"]

### generate some examples of images from random latent codes
examples  = 20
images    = []
encodings = []
for ii in range(examples):
    # randomly sample from latent space
    latent_code = np.random.uniform(test_enc_min, test_enc_max, latent_dim)
    # append encoding
    encodings.append(latent_code)                                    
    # generate image
    encoded_sample = torch.tensor(latent_code).float().unsqueeze(dim=0)
    model.eval()
    with torch.no_grad():
        generated = model.decoder(encoded_sample)
    # append image
    images.append(generated)     
    
encodings_df = pd.DataFrame(encodings)
encodings_df

In [None]:
from utilities.plot_tools import plot_img_grid

_ = plot_img_grid((4,5), images, to_show=True, axis_off=False, figsize=(10,8), 
                  folder_path = settings.denoisingAE.ROOT_DIR,
                  filename    = "generated_images.pdf",
                 )

### Visualization of learned manifold reduced with PCA

In [None]:
# get ranges of latent variables
PCA_reduced_df = pd.DataFrame(Latent_analyzer.PCA_reduced_samples)
PCA_enc_max = PCA_reduced_df.max()
PCA_enc_min = PCA_reduced_df.min()

# build a uniform grid for sampling
samples_h = 20
samples_v = 20
x_latent_code = np.linspace(PCA_enc_min[0]*0.6, PCA_enc_max[0]*0.6, samples_h)
y_latent_code = np.linspace(PCA_enc_min[1]*0.6, PCA_enc_max[1]*0.6, samples_v)

### generate some images
images    = []
encodings = []
for jj in range(samples_v):
    for ii in range(samples_h):
        # get 2-dim PCA reduced code
        PCA_code = np.array([x_latent_code[ii], y_latent_code[-jj-1]])
        # inverse transform of code
        latent_code = Latent_analyzer.pca.inverse_transform(PCA_code)
        # append encoding
        encodings.append(latent_code)                                    
        # generate image
        encoded_sample = torch.tensor(latent_code).float().unsqueeze(dim=0)
        model.eval()
        with torch.no_grad():
            generated = model.decoder(encoded_sample)
        # append image
        images.append(generated)     


In [None]:
_ = plot_img_grid((samples_h,samples_v), images, to_show=True, axis_off=True, figsize=(12,12), 
                  folder_path = settings.denoisingAE.ROOT_DIR,
                  filename    = "manifold_images_PCA.pdf",
                 )

### Latent space path along centroids 

In [None]:
### Compute centroids of clusters for each label and generate images

# generate some examples of images from centroids of latent space clusters
examples = len(datamodule.get_label_names())
centroids_imgs = []
centroids      = []
for ii in range(examples):
    # compute centroid for i-esim label    
    mask = [(ll == ii) for ll in encoded_test_repr.labels]
    filtered_samples = np.array(encoded_test_repr.encoded_samples)[mask] 
    latent_code = np.mean(filtered_samples, axis=0)    
    # append encoding
    centroids.append(latent_code)                                    
    # generate image
    encoded_sample = torch.tensor(latent_code).float().unsqueeze(dim=0)
    model.eval()
    with torch.no_grad():
        generated = model.decoder(encoded_sample)
    # append image
    centroids_imgs.append(generated)     
    
centroids_df = pd.DataFrame(centroids)
centroids_df

### Plot the images generated from the centroids 
from utilities.plot_tools import plot_img_grid

_ = plot_img_grid((2,5), centroids_imgs, to_show=True, axis_off=False, figsize=(10,4.5), 
                  folder_path = settings.denoisingAE.ROOT_DIR,
                  titles      = datamodule.get_label_names(),
                  filename    = "centroids_images.pdf",
                 )

In [None]:
# gif for path along centroids
order = [7,5,9,8,6,2,4,0,3,1]
steps = 15
pause = 1

# sample latent codes on path
latent_codes = [centroids[order[0]]]*pause
for it in range(10-1):
    # path between two centroids
    path = np.linspace(centroids[order[it]], centroids[order[it+1]], steps)
    
    # latent code of intermediate images
    for pt in range(steps):
        latent_codes.append(path[pt])  #row
        
    # repeat the centroid code (like a pause)
    latent_codes.extend([centroids[order[it+1]]]*pause)
    
# decode into images
path_images = []
for code in latent_codes:
    # generate image
    encoded_sample = torch.tensor(code).float().unsqueeze(dim=0)
    model.eval()
    with torch.no_grad():
        generated = model.decoder(encoded_sample)
    # append image
    path_images.append(generated)  

In [None]:
# onward and backward 
path_images = path_images + path_images[::-1]

In [None]:
from PIL import Image

# resize images
bigger_images = [transforms.functional.resize(img=img, size=28*4) for img in path_images]

# convert tensors to PIL
imgs = [transforms.functional.to_pil_image(img.cpu().squeeze()) for img in bigger_images]

# duration is the number of milliseconds between frames
save_path = settings.denoisingAE.ROOT_DIR + "/centroids_path.gif"
imgs[0].save(save_path, save_all=True, append_images=imgs[1:], duration=80, loop=0)

In [None]:
from IPython.display import HTML

if os.path.isfile(save_path):
    gif = HTML(f'<img src="{save_path}">')
    
gif

### Denoising capability test

In [None]:
### define transformation to corrupt the input images
from data_management.data_tools import AddGaussianNoise

corruption_p = 0.5   # probability of a transform to be applied

# sequence of transformation
corruption = transforms.Compose([transforms.RandomHorizontalFlip( p=corruption_p ),
                                 transforms.RandomVerticalFlip( p=corruption_p ),
                                 AddGaussianNoise( p=corruption_p, mean=0, std=0.4 ),     # custom
                                 transforms.RandomErasing( p=corruption_p ),
                                ])

In [None]:
dataset = torchvision.datasets.FashionMNIST(settings.DATASETS_DIR, 
                                            train    = False, 
                                            download = True,
                                           )
# select samples at random
Nsamples   = 8
sample_ids = np.random.randint(0, 10000, Nsamples)

images = []
titles = []
for idx in sample_ids:
    sample = transforms.functional.to_tensor(dataset[idx][0]).unsqueeze(dim=0)
    if settings.USE_GPU:
        sample = sample.to("cuda")                           
    # apply corruption
    noisy = corruption(sample)
    # reconstruct image
    model.eval()
    with torch.no_grad():
        generated = model(noisy)
    # store results
    images += [sample, noisy, generated]
    titles += [f"original (id: {idx})", "corrupted", "reconstr."]


In [None]:
from utilities.plot_tools import plot_img_grid

_ = plot_img_grid((4,6), images, to_show=True, axis_off=True, figsize=(10,8), 
                  titles      = titles,
                  folder_path = settings.denoisingAE.ROOT_DIR,
                  filename    = "denoising_images.pdf",
                 )

### Visualize convolutional layers kernels

In [None]:
# retrieve convolutional layers
ConvLayers = [module for module in model.encoder.modules() if isinstance(module, nn.Conv2d)]
print(f"Model has {len(ConvLayers)} convolutional layers")

# plot filters 
for it, layer in enumerate(ConvLayers):
    # retrieve kernels from layer
    kernels = layer.weight.detach().cpu().clone()
    
    title = f"Filters of Convolutional layer #{it+1}"
    
    if kernels.size(1) != 1:  # if there is more than 1 input channel, select random channel_id
        channel_id = np.random.randint(layer.in_channels)
        kernels = kernels[:, channel_id].unsqueeze(dim=1)
        title = title + f" - channel #{channel_id}"
        
    # normalize to range [0,1] for better visualization
    kmin = torch.min(kernels).item()
    kmax = torch.max(kernels).item()
    kernels = (kernels - kmin)/(kmax - kmin)
    
    # plot filters
    cols = 16
    rows = kernels.size(0) // cols 
    
    figsize = (12, 12*rows/(cols-3)+0.4)
    
    plot_img_grid(grid_shape = (rows,cols), 
                  images     = kernels, 
                  titles     = None, 
                  folder_path= settings.denoisingAE.ROOT_DIR, 
                  filename   = f"conv_filters_layer_{it+1}.pdf", 
                  to_show    = True, 
                  figsize    = figsize, 
                  suptitle   = title, 
                  cmap       = "Greys", 
                  axis_off   = True,
                 )

---

<a name="VAE"></a>
# Variational Autoencoder ($\beta$-VAE)
     
[Table of contents](#toc) 

With this new model architecture we aim at a more disentangled representation of the dataset.

<a name="trainingVAE"></a> 
## Model training

In [None]:
# set random state
seed_everything(seed=settings.MAGIC_NUM)

In [None]:
import json

# read best hyper-parameters from file
with open(settings.autoencoder.BEST_HYPERS_FILE, "r") as file:
    best_hypers = json.load(file)

In [None]:
# adding the beta parameter
best_hypers["params"]["beta"] = 2.   

In [None]:
best_hypers

In [None]:
from data_management.fashion_mnist import FashionMNISTDataModule

### define datamodule
datamodule = FashionMNISTDataModule(data_dir   = settings.DATASETS_DIR, 
                                    batch_size = 256,
                                    Nsamples   = None,
                                    valid_frac = 8800./60000.,
                                    random_state = settings.MAGIC_NUM,
                                   )

In the cell below a summary of the model to train is showed.

In [None]:
from autoencoder.variational_autoencoder import VariationalAutoencoder

# print model summary
shape = datamodule.get_sample_size()
net   = VariationalAutoencoder(shape, 
                               params        = best_hypers["params"],
                               optimizer     = best_hypers["optimizer"],
                               learning_rate = best_hypers["learning_rate"],
                               L2_penalty    = best_hypers["L2_penalty"],
                               momentum      = best_hypers["momentum"],
                              )
# build dummy data batch 
dummy_batch = list(shape)
dummy_batch.insert(0, 256)

# print summary
torchinfo.summary(net, 
                  dummy_batch, 
                  col_width = 20, 
                  col_names = ("output_size","num_params","mult_adds",),  #"input_size",
                  depth     = 4, 
                  row_settings = ("var_names",),
                 )

In the following cell we define some callbacks that will be useful during and after training.

In [None]:
from utilities.train_tools import LossesTracker
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from utilities.encoder_tools import ImageReconstruction

### callbacks
# track the losses (train and valid) during training
losses_tracker = LossesTracker()

# early stopping
early_stop = EarlyStopping(monitor   = "val_loss", 
                           min_delta = 0.0001, 
                           patience  = 10, 
                           verbose   = False, 
                           check_on_train_epoch_end=False, # check early_stop at end of training epoch
                          )

# checkpoint the model if the monitored value improved
checkpoint = ModelCheckpoint(dirpath  = settings.CHECKPOINT_DIR, 
                             filename = "checkpt_variational_{epoch}_{val_loss:.2f}", 
                             monitor  = "val_loss",
                            )

# reconstruction of a test sample at every epoch
sample_id = 34    # bag
dataset = torchvision.datasets.FashionMNIST(settings.DATASETS_DIR, 
                                            train    = False, 
                                            download = True,
                                           )
sample = transforms.functional.to_tensor(dataset[sample_id][0]).unsqueeze(dim=0)
if settings.USE_GPU:
    sample = sample.to("cuda")

rec_callback = ImageReconstruction(sample, 
                                   to_show   = False, 
                                   save_path = settings.variationalAE.RECONSTRUCTIONS_DIR,
                                  )

In [None]:
### define model and hyper-parameters
model = VariationalAutoencoder(input_size    = datamodule.get_sample_size(),
                               params        = best_hypers["params"],
                               optimizer     = best_hypers["optimizer"],
                               learning_rate = best_hypers["learning_rate"],
                               L2_penalty    = best_hypers["L2_penalty"],
                               momentum      = best_hypers["momentum"],
                              )

### define trainer
trainer = pl.Trainer(logger     = False,
                     max_epochs = 200,
                     gpus       = 1 if settings.USE_GPU else None,
                     callbacks  = [losses_tracker, early_stop, checkpoint, rec_callback],
                     val_check_interval   = 1.,
                     enable_model_summary = False,
                     num_sanity_val_steps = 0,     # disable validation sanity check before training
                     auto_lr_find = False,
                    )

In [None]:
print( "Training started at:", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S") )
fit_begin = time.time()  # measure running time

trainer.fit(model, datamodule=datamodule) # run the training

fit_time = time.time() - fit_begin
print( "Training ended at:", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S") )
print(f"Fit time:", str(datetime.timedelta(seconds=fit_time)) )

In [None]:
from utilities.plot_tools import plot_history

save_path = settings.variationalAE.ROOT_DIR + "/train_history.pdf"

plot_history(losses_tracker.train, losses_tracker.valid, ylog=True, save_path=save_path)

In [None]:
import shutil

# copy best model checkpoint into the results directory
shutil.copy(checkpoint.best_model_path, settings.variationalAE.BEST_MODEL_CKPT_FILE)

<a name="testingVAE"></a> 
## Model testing and analysis

In [None]:
# set random state
seed_everything(seed=settings.MAGIC_NUM)

In [None]:
from pytorch_lightning.callbacks import ModelCheckpoint
from data_management.fashion_mnist import FashionMNISTDataModule
from autoencoder.variational_autoencoder import VariationalAutoencoder

### define datamodule
datamodule = FashionMNISTDataModule(data_dir   = settings.DATASETS_DIR, 
                                    batch_size = 256,
                                    Nsamples   = None,
                                    valid_frac = 8800./60000.,
                                    random_state = settings.MAGIC_NUM,
                                   )

### load model from checkpoint
model = VariationalAutoencoder.load_from_checkpoint( settings.variationalAE.BEST_MODEL_CKPT_FILE )
model.freeze()

In [None]:
from utilities.encoder_tools import EncodedRepresentation

# callback to save the encoded representations of the test samples
encoded_test_repr = EncodedRepresentation()

trainer = pl.Trainer(logger     = False,
                     gpus       = 1 if settings.USE_GPU else None,
                     callbacks  = [encoded_test_repr],
                    )

result = trainer.test(model, datamodule=datamodule, verbose=False)
test_loss = result[0]["test_loss"]
print("TEST LOSS: ", test_loss)

### Latent Space exploration

In [None]:
from utilities.encoder_tools import LatentSpaceAnalyzer

# test dataset encoded samples
Latent_analyzer = LatentSpaceAnalyzer(encoded_test_repr.encoded_samples,
                                      encoded_test_repr.labels,
                                      label_names = datamodule.get_label_names(),
                                      save_path = settings.variationalAE.ROOT_DIR,
                                     )

In [None]:
Latent_analyzer.PCA_reduce(n_components=2, filename="PCA_reduced_space.pdf")

In [None]:
Latent_analyzer.TSNE_reduce(n_components=2, perplexity=50, filename="TSNE_reduced_space.pdf")

In [None]:
Latent_analyzer.Isomap_reduce(n_components=2, filename="Isomap_reduced_space.pdf")

### Image generation from latent codes

In [None]:
test_encodings_df = pd.DataFrame(encoded_test_repr.encoded_samples)
test_enc_max = max(test_encodings_df.max())
test_enc_min = min(test_encodings_df.min())

print(f"Range of test dataset encoded values: [{test_enc_min}, {test_enc_max}]")

# latent dimension
latent_dim = model.enc_hp["latent_space_dim"]

### generate some examples of images from random latent codes
examples  = 20
images    = []
encodings = []
for ii in range(examples):
    # randomly sample from latent space
    latent_code = np.random.uniform(test_enc_min*0.6, test_enc_max*0.6, latent_dim)
    # append encoding
    encodings.append(latent_code)                                    
    # generate image
    encoded_sample = torch.tensor(latent_code).float().unsqueeze(dim=0)
    model.eval()
    with torch.no_grad():
        generated = model.decoder(encoded_sample)
    # append image
    images.append(generated)     
    
encodings_df = pd.DataFrame(encodings)
encodings_df

In [None]:
from utilities.plot_tools import plot_img_grid

_ = plot_img_grid((4,5), images, to_show=True, axis_off=False, figsize=(10,8), 
                  folder_path = settings.variationalAE.ROOT_DIR,
                  filename    = "generated_images.pdf",
                 )

In [None]:
# overlap sampled points to the PCA plot by applying to them the same transformation
Latent_analyzer.PCA_overlap_points(encodings, to_show=True, filename=None)

### Visualization of learned manifold reduced with PCA

In [None]:
# get ranges of latent variables
PCA_reduced_df = pd.DataFrame(Latent_analyzer.PCA_reduced_samples)
PCA_enc_max = PCA_reduced_df.max()
PCA_enc_min = PCA_reduced_df.min()

# build a uniform grid for sampling
samples_h = 20
samples_v = 20
x_latent_code = np.linspace(PCA_enc_min[0]*0.6, PCA_enc_max[0]*0.6, samples_h)
y_latent_code = np.linspace(PCA_enc_min[1]*0.6, PCA_enc_max[1]*0.6, samples_v)

### generate some images
images    = []
encodings = []
for jj in range(samples_v):
    for ii in range(samples_h):
        # get 2-dim PCA reduced code
        PCA_code = np.array([x_latent_code[ii], y_latent_code[-jj-1]])
        # inverse transform of code
        latent_code = Latent_analyzer.pca.inverse_transform(PCA_code)
        # append encoding
        encodings.append(latent_code)                                    
        # generate image
        encoded_sample = torch.tensor(latent_code).float().unsqueeze(dim=0)
        model.eval()
        with torch.no_grad():
            generated = model.decoder(encoded_sample)
        # append image
        images.append(generated)     


In [None]:
_ = plot_img_grid((samples_h,samples_v), images, to_show=True, axis_off=True, figsize=(12,12), 
                  folder_path = settings.variationalAE.ROOT_DIR,
                  filename    = "manifold_images_PCA.pdf",
                 )

### Latent space path along centroids 

In [None]:
### Compute centroids of clusters for each label and generate images

# generate some examples of images from centroids of latent space clusters
examples = len(datamodule.get_label_names())
centroids_imgs = []
centroids      = []
for ii in range(examples):
    # compute centroid for i-esim label    
    mask = [(ll == ii) for ll in encoded_test_repr.labels]
    filtered_samples = np.array(encoded_test_repr.encoded_samples)[mask] 
    latent_code = np.mean(filtered_samples, axis=0)    
    # append encoding
    centroids.append(latent_code)                                    
    # generate image
    encoded_sample = torch.tensor(latent_code).float().unsqueeze(dim=0)
    model.eval()
    with torch.no_grad():
        generated = model.decoder(encoded_sample)
    # append image
    centroids_imgs.append(generated)     
    
centroids_df = pd.DataFrame(centroids)
centroids_df

### Plot the images generated from the centroids 
from utilities.plot_tools import plot_img_grid

_ = plot_img_grid((2,5), centroids_imgs, to_show=True, axis_off=False, figsize=(10,4.5), 
                  folder_path = settings.variationalAE.ROOT_DIR,
                  titles      = datamodule.get_label_names(),
                  filename    = "centroids_images.pdf",
                 )

In [None]:
# gif for path along centroids
order = [7,5,9,8,6,2,4,0,3,1]
steps = 15
pause = 1

# sample latent codes on path
latent_codes = [centroids[order[0]]]*pause
for it in range(10-1):
    # path between two centroids
    path = np.linspace(centroids[order[it]], centroids[order[it+1]], steps)
    
    # latent code of intermediate images
    for pt in range(steps):
        latent_codes.append(path[pt])  #row
        
    # repeat the centroid code (like a pause)
    latent_codes.extend([centroids[order[it+1]]]*pause)
    
# decode into images
path_images = []
for code in latent_codes:
    # generate image
    encoded_sample = torch.tensor(code).float().unsqueeze(dim=0)
    model.eval()
    with torch.no_grad():
        generated = model.decoder(encoded_sample)
    # append image
    path_images.append(generated)  

In [None]:
# onward and backward 
path_images = path_images + path_images[::-1]

In [None]:
from PIL import Image

# resize images
bigger_images = [transforms.functional.resize(img=img, size=28*4) for img in path_images]

# convert tensors to PIL
imgs = [transforms.functional.to_pil_image(img.cpu().squeeze()) for img in bigger_images]

# duration is the number of milliseconds between frames
save_path = settings.variationalAE.ROOT_DIR + "/centroids_path.gif"
imgs[0].save(save_path, save_all=True, append_images=imgs[1:], duration=80, loop=0)

In [None]:
from IPython.display import HTML

if os.path.isfile(save_path):
    gif = HTML(f'<img src="{save_path}">')
    
gif