# NEURAL NETWORKS AND DEEP LEARNING

---
A.A. 2021/22 (6 CFU) - Dr. Alberto Testolin, Dr. Umberto Michieli
---


# Homework 2 - Unsupervised Deep Learning

### Author: Michele Guadagnini - Mt.1230663

In [1]:
### ADDITIONAL LIBRARIES THAT NEED INSTALLATION (uncomment if needed)

#!pip install optuna
#!pip install pytorch-lightning

### the followings are required to plot and save figures about optuna study
#!pip install plotly
#!pip install kaleido

### the following one is required to print a model summary
#!pip install torchinfo

In [2]:
# PyTorch imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from torch.utils.data import random_split
import torchvision
from torchvision import transforms

# python imports
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import copy
import logging
import datetime
import json

# additional libraries
import plotly.express as px
import torchinfo
import optuna
import pytorch_lightning as pl

# reduce verbosity 
logging.getLogger("optuna").setLevel(logging.ERROR)
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)

In [3]:
# to have reproducible results: 
from pytorch_lightning.utilities.seed import seed_everything

### 'seed_everything' internally calls the followings:
#    random.seed(seed)
#    np.random.seed(seed)
#    torch.manual_seed(seed)
#    torch.cuda.manual_seed_all(seed)

In [4]:
# load user settings from file 'settings.py'
import settings

GPU not available
Available CPU cores: 2


# Table of contents:  <a name="toc"></a>

1. [**Autoencoder**](#autoencoder)
    1. Hyper-parameters optimization with Optuna
    1. Study results analysis
    1. Model training
    1. Model testing and analysis
        * Latent Space exploration
        * Image generation from latent codes
        * Convolutional filters visualization **TODO**
        
1. [**Transfer Learning**](#transfer_learning)
    1. fine tune the denoising encoder 
    1. compare results with homework 1

1. [**Denoising autoencoder**](#denoising_autoencoder)
    1. hyper-parameters tuning (optuna anche qui??)
    1. training (with denoising example per epoch)
    1. test the denoising capability

1. [**Variational Autoencoder ($\beta$-VAE)**](#VAE)
    1. tune and train the model
    1. explore latent space
    1. generate samples

# Autoencoder <a name="autoencoder"></a> 
<div style="text-align: right"
     
[Table of contents](#toc) 

In [5]:
# set random state
seed_everything(seed=settings.MAGIC_NUM)

23

## Hyper-parameters optimization with Optuna

In [6]:
from data_management.fashion_mnist import FashionMNISTDataModule

datamodule = FashionMNISTDataModule(data_dir   = settings.DATASETS_DIR, 
                                    batch_size = 256,
                                    Nsamples   = 16384,
                                    valid_frac = 1./4.,
                                    random_state = settings.MAGIC_NUM,
                                   )

In [7]:
from autoencoder.symmetric_autoencoder import SymmetricAutoencoder, SymmetricAutoencoderHPS

# convolutional architectures definition
### each line is: kernel size, stride, padding
proposed_conv = [ [[3, 2, 0], # shape through network: 28   -> 13 -> 13 -> 6 -> 6 -> 4
                   [3, 1, 1],
                   [3, 2, 0],
                   [3, 1, 1],
                   [3, 1, 0],
                  ],
                  [[5, 2, 1], # shape through network: 28   -> 13 -> 13 -> 6 -> 6 -> 4
                   [3, 1, 1],
                   [3, 2, 0],
                   [3, 1, 1],
                   [3, 1, 0],
                  ],
                  [[3, 2, 1], # shape through network: 28   -> 14 ->  8 -> 4 -> 4 -> 2
                   [3, 2, 2],
                   [3, 2, 1],
                   [3, 1, 1],
                   [3, 2, 1],
                  ],
                  [[5, 2, 1], # shape through network: 28   -> 13 -> 13 -> 6 -> 6 -> 4
                   [3, 1, 1],
                   [5, 2, 1],
                   [3, 1, 1],
                   [5, 1, 1],
                  ],
                  [[7, 2, 2], # shape through network: 28   -> 13 ->  8 -> 8 -> 4 -> 4
                   [3, 2, 2],
                   [3, 1, 1],
                   [3, 2, 1],
                   [3, 1, 1],
                  ],
                  [[5, 2, 1], # shape through network: 28   -> 13 -> 13 -> 7 -> 7 -> 5
                   [5, 1, 2],
                   [5, 2, 2],
                   [5, 1, 2],
                   [5, 1, 1],
                  ],
                  [[3, 2, 1], # shape through network: 28   -> 14 -> 14 -> 6 -> 6 -> 4
                   [5, 1, 2],
                   [5, 2, 1],
                   [3, 1, 1],
                   [5, 1, 1],
                  ],
                ]

# channels configurations definition
proposed_channels = [[16,32,32, 64, 64],
                     [16,32,64, 64, 64],
                     [16,32,64, 64, 32],
                     [16,32,64,128, 64],
                     [16,32,32, 32, 64],
                     [32,32,64, 64,128],
                     [32,32,32, 64, 64],
                    ]

# hyper-parameters space dictionary
# Notes:
### 1. no batch norm to ensure independence of encodings for different images. Instance norm instead.
### 2. dropout is applied only after a linear layer

hps_dict = dict(conv_configs        = proposed_conv,  
                channels_configs    = proposed_channels,  
                n_linear            = [1],                # number of linear layers
                linear_units_range  = [64, 256, 16],      # min, max, step
                latent_space_range  = [30, 80, 2],        # min, max, step
                instance_norm       = [True, False],      # instance norm
                Pdropout_range      = [0., 0.3],          # dropout
                activations         = ["relu"],       
                optimizers          = ["adam", "sgd", "adamax"], 
                learning_rate_range = [5e-5, 1e-1],
                L2_penalty_range    = [1e-6, 1e-4],
                momentum_range      = [0.8, 0.99],        # used with SGD optimizer or RMSprop
               )

hp_space = SymmetricAutoencoderHPS(hps_dict)

In [8]:
from utilities.train_tools import Objective

### optuna study objective function
objective = Objective(model_class = SymmetricAutoencoder, 
                      datamodule  = datamodule, 
                      hp_space    = hp_space,
                      max_epochs  = 30, 
                      use_gpu     = settings.USE_GPU,
                      early_stop_patience = 5,
                     )

### MedianPruner
pruner = optuna.pruners.MedianPruner(n_startup_trials = 10,    # trials to complete before starting to prune
                                     n_warmup_steps   = 20,    # steps to take before evaluating pruning
                                     interval_steps   = 10,    # steps between trial pruning checks
                                    )

# Make the default sampler behave in a deterministic way
sampler = optuna.samplers.TPESampler(seed = settings.MAGIC_NUM,
                                     n_startup_trials = 20,    # use random sampling at beginning
                                    )
### create study
os.makedirs(settings.autoencoder.OPTUNA_DIR, exist_ok=True)

study_name = settings.autoencoder.OPTUNA_STUDY_NAME
study = optuna.create_study(study_name = study_name, 
                            direction  = "minimize",
                            pruner     = pruner,
                            sampler    = sampler,
                            storage    = "sqlite:///"+settings.autoencoder.OPTUNA_DIR+"/"+study_name+"_16384.db",
                            load_if_exists = True,
                           )


In [None]:
### run optimization
logging.captureWarnings(True)

Ntrials = 5
MaxTime = None

print("Starting study '"+study.study_name+f"' with n_trials={Ntrials} and timeout={MaxTime}")
study.optimize(objective, 
               n_trials       = Ntrials, 
               timeout        = MaxTime, # timeout in seconds
               gc_after_trial = True,    # run garbage collection 
              ) 

logging.captureWarnings(False)

Starting study 'HP_search_autoencoder' with n_trials=5 and timeout=None
Trial [0] started at: 05/04/2022 23:57:55


HBox(children=(HTML(value='Validation sanity check'), FloatProgress(value=1.0, bar_style='info', layout=Layout…

HBox(children=(HTML(value='Training'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

## Study results analysis

In [None]:
study_name = settings.autoencoder.OPTUNA_STUDY_NAME # name of the study (used also for storage)
study = optuna.load_study(study_name, 
                          storage = "sqlite:///"+settings.autoencoder.OPTUNA_DIR+"/"+study_name+"_16384.db",
                         )

In [None]:
# print dataframe with top-K trials
K = 10

study_df = study.trials_dataframe()
study_df.drop(columns="user_attrs_hypers", inplace=True)
study_df.drop(columns="datetime_complete", inplace=True)
study_df = study_df.sort_values(by="value")

study_df.head(K)

In [None]:
from utilities.train_tools import OptimizationInspector

optuna_inspector = OptimizationInspector(study, settings.autoencoder.OPTUNA_DIR, figsize=(900,500))

In [None]:
# parameters sets for parallel plots 
parallel_sets = [["architecture", #name suffix
                  "channels_config_id","conv_config_id", "latent_space_dim",
                  "linear_units_0", #"linear_units_1",
                 ],
                 ["optimization", #name suffix
                  "optimizer","learning_rate","L2_penalty","instance_norm",
                  "Pdropout", #"momentum",
                 ],
                ]

# parameters sets for contour plots
contour_sets = [["learning_rate","L2_penalty"],
                ["channels_config_id","conv_config_id"],
                ["linear_units_0", "latent_space_dim"],
               ]

# parameters sets for slice plots
slice_sets   = [["channels_config_id","conv_config_id","latent_space_dim","linear_units_0","optimizer"],
               ]

In [None]:
optuna_inspector.plot_all(parallel_sets = parallel_sets,
                          contour_sets  = contour_sets,
                          slice_sets    = slice_sets,
                          save = True,
                          show = "110001000",    # show options
                         )

In [None]:
optuna_inspector.print_summary()
optuna_inspector.save_best_hypers_json(settings.autoencoder.BEST_HYPERS_FILE)

## Model training

In [None]:
import json

# read best hyper-parameters from file
with open(settings.autoencoder.BEST_HYPERS_FILE, "r") as file:
    best_hypers = json.load(file)

In [None]:
from data_management.fashion_mnist import FashionMNISTDataModule

### define datamodule
datamodule = FashionMNISTDataModule(data_dir   = settings.DATASETS_DIR, 
                                    batch_size = 256,
                                    Nsamples   = None,
                                    valid_frac = 8800./60000.,
                                    random_state = settings.MAGIC_NUM,
                                   )

In the cell below a summary of the model to train is showed.

In [None]:
from autoencoder.symmetric_autoencoder import SymmetricAutoencoder

# print model summary
shape = datamodule.get_sample_size()
net   = SymmetricAutoencoder(shape, 
                             params        = best_hypers["params"],
                             optimizer     = best_hypers["optimizer"],
                             learning_rate = best_hypers["learning_rate"],
                             L2_penalty    = best_hypers["L2_penalty"],
                             momentum      = best_hypers["momentum"],
                            )

# build dummy data batch 
dummy_batch = list(shape)
dummy_batch.insert(0, 256)

# print summary
torchinfo.summary(net, 
                  dummy_batch, 
                  col_width = 17, 
                  col_names = ("input_size","output_size","num_params","mult_adds",),
                  depth     = 4, 
                  row_settings = ("var_names",),
                 )

In the following cell we define some callbacks that will be useful during and after training.

In [None]:
from utilities.train_tools import run_training, LossesTracker
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from utilities.encoder_tools import ImageReconstruction

### callbacks
# track the losses (train and valid) during training
losses_tracker = LossesTracker()

# early stopping
early_stop = EarlyStopping(monitor="val_loss", 
                           min_delta=0.0001, 
                           patience=5, 
                           verbose=False, 
                           check_on_train_epoch_end=True, # check early_stop at end of training epoch
                          )

# checkpoint the model if the monitored value improved
checkpoint = ModelCheckpoint(dirpath  = settings.CHECKPOINT_DIR, 
                             filename = "checkpt_{epoch}_{val_loss:.2f}", 
                             monitor  = "val_loss",
                            )

# reconstruction of a test sample at every epoch
sample_id = 2
dataset = torchvision.datasets.FashionMNIST(settings.DATASETS_DIR, 
                                            train    = False, 
                                            download = True,
                                           )
sample = transforms.functional.to_tensor(dataset[sample_id][0]).unsqueeze(dim=0)
rec_path = settings.autoencoder.RECONSTRUCTIONS_DIR

rec_callback = ImageReconstruction(sample, to_show=False, save_path=rec_path)

In [None]:
# measure running time
fit_begin = time.time() 

model, trainer, callbacks = run_training(SymmetricAutoencoder, 
                                         datamodule = datamodule, 
                                         hypers     = best_hypers, 
                                         callbacks  = [losses_tracker, early_stop, checkpoint, rec_callback],
                                         max_epochs = 50,
                                         use_gpu    = settings.USE_GPU,
                                        )

fit_time = time.time() - fit_begin
print(f"Fit time:", str(datetime.timedelta(seconds=fit_time)) )

In [None]:
from utilities.plot_tools import plot_history

plot_history(losses_tracker.train, losses_tracker.valid, ylog=False)

In [None]:
import shutil

# copy best model checkpoint into the results directory
shutil.copy(checkpoint.best_model_path, settings.autoencoder.BEST_MODEL_CKPT_FILE)

## Model testing and analysis

In [None]:
from pytorch_lightning.callbacks import ModelCheckpoint

In [None]:
### define datamodule
datamodule = FashionMNISTDataModule(data_dir   = settings.DATASETS_DIR, 
                                    batch_size = 256,
                                    Nsamples   = None,
                                    valid_frac = 1/8.,
                                    random_state = settings.MAGIC_NUM,
                                   )

### load model from checkpoint
model = SymmetricAutoencoder.load_from_checkpoint( settings.autoencoder.BEST_MODEL_CKPT_FILE )  

In [None]:
from utilities.encoder_tools import EncodedRepresentation

# callback to save the encoded representations of the test samples
encoded_test_repr = EncodedRepresentation()

trainer = pl.Trainer(logger     = False,
                     gpus       = 1 if settings.USE_GPU else None,
                     callbacks  = [encoded_test_repr],
                     #enable_model_summary = False,
                     #num_sanity_val_steps = 0,     # disable validation sanity check before training
                    )

result = trainer.test(model, datamodule=datamodule, verbose=False)
test_loss = result[0]["test_loss"]
print("TEST LOSS: ", test_loss)

### Latent Space exploration

In [None]:
from utilities.encoder_tools import LatentSpaceAnalyzer

# test dataset encoded samples
label_names = [datamodule.get_label_names()[ii] for ii in encoded_test_repr.labels]

Latent_analyzer = LatentSpaceAnalyzer(encoded_test_repr.encoded_samples,
                                      encoded_test_repr.labels,
                                      label_names,
                                      save_path = settings.autoencoder.ROOT_DIR,
                                     )

In [None]:
Latent_analyzer.PCA_reduce(n_components=2, filename="PCA_reduced_space.pdf")

In [None]:
Latent_analyzer.TSNE_reduce(n_components=2, perplexity=80, filename="TSNE_reduced_space.pdf")

### Image generation from latent codes

In [None]:
from utilities.encoder_tools import generate_from_latent_code

test_encodings_df = pd.DataFrame(encoded_test_repr.encoded_samples)
test_enc_max = max(test_encodings_df.max())
test_enc_min = min(test_encodings_df.min())

print(f"Range of test dataset encoded values: [{test_enc_min}, {test_enc_max}]")

# latent dimension
latent_dim = model.enc_hp["latent_space_dim"]

### generate some examples of images from random latent codes
examples  = 20
images    = []
encodings = []
for ii in range(examples):
    # randomly sample from latent space
    latent_code = np.random.uniform(test_enc_min, test_enc_max, latent_dim)
    encodings.append(latent_code)                                    # append encoding
    images.append(generate_from_latent_code(latent_code, model))     # append image
    
encodings_df = pd.DataFrame(encodings)
encodings_df

In [None]:
from utilities.plot_tools import plot_img_grid

_ = plot_img_grid((4,5), images, to_show=True, axis_off=False, figsize=(10,8))

### Convolutional filters visualization

In [None]:
# TODO 

# Transfer Learning <a name="transfer_learning"></a>
<div style="text-align: right"
     
[Table of contents](#toc) 

# Denoising Autoencoder <a name="denoising_autoencoder"></a>
<div style="text-align: right"
     
[Table of contents](#toc) 

# Variational Autoencoder <a name="VAE"></a>
<div style="text-align: right"
     
[Table of contents](#toc) 