# NEURAL NETWORKS AND DEEP LEARNING

---
A.A. 2021/22 (6 CFU) - Dr. Alberto Testolin, Dr. Umberto Michieli
---


# Homework 2 - Unsupervised Deep Learning

### Author: Michele Guadagnini - Mt.1230663

In [None]:
### ADDITIONAL LIBRARIES THAT NEED INSTALLATION (uncomment if needed)

#!pip install optuna
#!pip install pytorch-lightning

### the followings are required to plot and save figures about optuna study
#!pip install plotly
#!pip install kaleido

### the following one is required to print a model summary
#!pip install torchinfo

In [None]:
# PyTorch imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from torch.utils.data import random_split
import torchvision
from torchvision import transforms

# python imports
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import copy
import logging
import datetime
import json

# additional libraries
import plotly.express as px
from torchinfo import summary
import optuna
import pytorch_lightning as pl

# reduce verbosity 
logging.getLogger("optuna").setLevel(logging.ERROR)
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)

In [None]:
# to have reproducible results: 
from pytorch_lightning.utilities.seed import seed_everything

### 'seed_everything' internally calls the followings:
#    random.seed(seed)
#    np.random.seed(seed)
#    torch.manual_seed(seed)
#    torch.cuda.manual_seed_all(seed)

In [None]:
# load user settings from file 'settings.py'
import settings

# Table of contents:  <a name="toc"></a>

1. [**Autoencoder**](#autoencoder)
    1. optuna optimization
    1. training (with reconstruction example per epoch)
    1. analyze the network and the latent space

1. [**Denoising autoencoder**](#denoising_autoencoder)
    1. hyper-parameters tuning (optuna anche qui??)
    1. training (with denoising example per epoch)
    1. test the denoising capability

1. [**Transfer Learning**](#transfer_learning)
    1. fine tune the denoising encoder 
    1. compare results with homework 1

1. [**Variational Autoencoder ($\beta$-VAE)**](#VAE)
    1. tune and train the model
    1. explore latent space
    1. generate samples

# Autoencoder <a name="autoencoder"></a> 
<div style="text-align: right"
     
[Table of contents](#toc) 

In [None]:
# set random state
seed_everything(seed=settings.MAGIC_NUM)

In [None]:
from autoencoder.symmetric_autoencoder import SymmetricAutoencoder, SymmetricAutoencoderHPS
from data_management.fashion_mnist import FashionMNISTDataModule

In [None]:
datamodule = FashionMNISTDataModule(data_dir   = settings.DATASETS_DIR, 
                                    batch_size = 256,
                                    Nsamples   = 16384,
                                    valid_frac = 1./4.,
                                    random_state = settings.MAGIC_NUM,
                                   )

In [None]:
from autoencoder.symmetric_autoencoder import SymmetricAutoencoderHPS

# hyper-parameters space definition
### each line is: kernel size, stride, padding
proposed_conv = [ [[3, 2, 0], # shape through network: 28  -> 13  -> 6  -> 4
                   [3, 2, 0],
                   [3, 1, 0],
                  ],
                  [[5, 2, 1], # shape through network: 28  -> 13  -> 6  -> 4
                   [3, 2, 0],
                   [3, 1, 0],
                  ],
                  [[3, 2, 1], # shape through network: 28  -> 14  -> 7  -> 3
                   [3, 2, 1], 
                   [3, 2, 0],
                  ],
                  [[5, 2, 1], # shape through network: 28  -> 13  -> 7  -> 7
                   [3, 2, 1],
                   [3, 1, 1],
                  ],
                  [[7, 2, 2], # shape through network: 28  -> 13  -> 8  -> 8 
                   [3, 2, 2],
                   [3, 1, 1],
                  ],
                  [[9, 4, 2], # shape through network: 28  -> 6   -> 6  -> 6
                   [5, 1, 2],
                   [3, 1, 1],
                  ],
                  [[5, 2, 1], # shape through network: 28  -> 13  -> 7  -> 7  
                   [5, 2, 2],
                   [3, 1, 1],
                  ],
                ]

hps_dict = dict(conv_configs        = proposed_conv,  
                channels_range      = [16, 128, 8],   # min, max, step
                n_linear            = [1],            # number of linear layers
                linear_units_range  = [32, 256, 16],  # min, max, step
                latent_space_range  = [4, 64, 2],     # min, max, step
                batch_norm          = [True, False],  # batch norm is applied only between conv. layers
                Pdropout_range      = [0., 0.3],      # dropout is applied only between linear layers
                activations         = ["relu"],       
                optimizers          = ["adam", "sgd", "adamax"], 
                learning_rate_range = [5e-5, 1e-1],
                L2_penalty_range    = [1e-6, 1e-4],
                momentum_range      = [0.6, 0.99],    # used with SGD optimizer
               )

hp_space = SymmetricAutoencoderHPS(hps_dict)

In [None]:
from utilities.train_tools import Objective

### optuna study objective function
objective = Objective(model_class = SymmetricAutoencoder, 
                      datamodule  = datamodule, 
                      hp_space    = hp_space,
                      max_epochs  = 30, 
                      use_gpu     = settings.USE_GPU,
                      early_stop_patience = 3,
                     )

### MedianPruner
pruner = optuna.pruners.MedianPruner(n_startup_trials = 10,    # trials to complete before starting to prune
                                     n_warmup_steps   = 20,    # steps to take before evaluating pruning
                                     interval_steps   = 10,    # steps between trial pruning checks
                                    )

# Make the default sampler behave in a deterministic way
sampler = optuna.samplers.TPESampler(seed = settings.MAGIC_NUM,
                                     n_startup_trials = 10,    # use random sampling at beginning
                                    )
### create study
os.makedirs(settings.autoencoder.OPTUNA_DIR, exist_ok=True)

study = optuna.create_study(study_name = settings.autoencoder.OPTUNA_STUDY_NAME, 
                            direction  = "minimize",
                            pruner     = pruner,
                            sampler    = sampler,
                            storage    = "sqlite:///"+settings.autoencoder.OPTUNA_DIR+"/"+study_name+".db",
                            load_if_exists = True,
                           )

### run optimization
logging.captureWarnings(True)

Ntrials = 10
MaxTime = None

print("Starting study '"+study.study_name+f"' with n_trials={Ntrials} and timeout={MaxTime}")
study.optimize(objective, 
               n_trials       = Ntrials, 
               timeout        = MaxTime, # timeout in seconds
               gc_after_trial = True,    # run garbage collection 
              ) 

logging.captureWarnings(False)

# Denoising Autoencoder <a name="denoising_autoencoder"></a>
<div style="text-align: right"
     
[Table of contents](#toc) 

# Transfer Learning <a name="transfer_learning"></a>
<div style="text-align: right"
     
[Table of contents](#toc) 

# Variational Autoencoder <a name="VAE"></a>
<div style="text-align: right"
     
[Table of contents](#toc) 