#NEURAL NETWORKS AND DEEP LEARNING

## Homework 1 - Supervised Deep Learning

### Regression task

Puppin Michele - 1227474

In [None]:
# Import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from google.colab import files

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, Subset

# Set the seed
np.random.seed(25)
torch.manual_seed(25)

In [None]:
# Set device
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f"Training device: {device}")

## Dataset

In [None]:
# Load and transform dataset
class CsvDataset(Dataset):

  def __init__(self, csv_file, transform=None):
    self.transform = transform
    self.data = pd.read_csv(csv_file)

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    sample = (self.data.iloc[idx]['input'], self.data.iloc[idx]['label'])
    if self.transform:
        sample = self.transform(sample)
    return sample

# Convert sample to Tensors
class ToTensor(object):
    def __call__(self, sample):
        x, y = sample
        return (torch.tensor([x]).float(),
                torch.tensor([y]).float())

In [None]:
# Download dataset
!wget -P regression_dataset https://gitlab.dei.unipd.it/gadaleta/nnld-2020-21-lab-resources/-/raw/master/homework_1_regression_dataset/train_data.csv
!wget -P regression_dataset https://gitlab.dei.unipd.it/gadaleta/nnld-2020-21-lab-resources/-/raw/master/homework_1_regression_dataset/test_data.csv 

In [None]:
# Read dataset
train_dataset = CsvDataset('regression_dataset/train_data.csv', transform = transforms.Compose([ToTensor()]))
test_dataset  = CsvDataset('regression_dataset/test_data.csv',  transform = transforms.Compose([ToTensor()]))

## Early stopping

In [None]:
# Early stopping class definition
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

## Network definition

In [None]:
class Net(nn.Module):
    
    def __init__(self, Ni, Nh1, Nh2, No, DropProb = 0.2):
        super().__init__()
        self.fc1 = nn.Linear(in_features = Ni, out_features = Nh1)
        self.fc2 = nn.Linear(in_features = Nh1, out_features = Nh2)
        self.out = nn.Linear(in_features = Nh2, out_features = No)
        self.drp = nn.Dropout(p = DropProb)
        self.act = nn.ReLU()
        print('Network initialized')
        
    def forward(self, x):
        x = self.act(self.fc1(x))
        x = self.drp(x)
        x = self.act(self.fc2(x))
        x = self.drp(x)
        x = self.out(x)
        return x
    
    def train_nn(self, train_loader, optimizer, loss_func, device):
        train_loss= []
        self.train()
        for sample_batched in train_loader:
            x_batch = sample_batched[0].to(device)
            label_batch = sample_batched[1].to(device)
            out = self.forward(x_batch)
            loss = loss_func(out, label_batch)
            self.zero_grad()
            loss.backward()
            optimizer.step()
            loss_batch = loss.detach().cpu().numpy()
            train_loss.append(loss_batch)
        return train_loss
    
    def validation_nn(self, val_loader, loss_func, device):
        val_loss = []
        self.eval() 
        with torch.no_grad():
            for sample_batched in val_loader:
                x_batch = sample_batched[0].to(device)
                label_batch = sample_batched[1].to(device)
                out = self.forward(x_batch)
                loss = loss_func(out, label_batch)
                loss_batch = loss.detach().cpu().numpy()
                val_loss.append(loss_batch)
        return val_loss
    
    def fit(self, train_loader, val_loader, optimizer, loss_func, epochs, device):
        train_loss_log = []
        val_loss_log = []

        early_stopping = EarlyStopping(patience = 25, verbose = False)

        for epoch in range(epochs):
            # Training
            train_loss = self.train_nn(train_loader, optimizer, loss_func, device)
            train_loss_log.append(np.mean(train_loss))
            # Validation
            val_loss = self.validation_nn(val_loader, loss_func, device)
            val_loss_log.append(np.mean(val_loss))

            early_stopping(np.mean(val_loss), self)
            if early_stopping.early_stop:
                print("Early stopping")
                break
            
        return train_loss_log, val_loss_log

    def test(self, test_loader, loss_func, device):
        test_loss = []
        self.eval() 
        with torch.no_grad(): 
            for sample_batched in test_loader:
                x_batch = sample_batched[0].to(device)
                label_batch = sample_batched[1][0].to(device)
                out = self.forward(x_batch)
                loss = loss_func(out, label_batch)
                loss_batch = loss.detach().cpu().numpy()
                test_loss.append(loss_batch)
        return np.mean(test_loss)

    def predict(self, input_loader, device):
        output = []
        self.eval()
        with torch.no_grad(): 
            for sample_batched in input_loader:
                x_batch = sample_batched[0].to(device)
                out = self.forward(x_batch)
                out = out.cpu().numpy()
                output.append(out)
        return output

    def save(self, path):
        torch.save(self.state_dict(), path)
        
    def load(self, path):
        self.load_state_dict( torch.load(path) )
        
    def restart(self):
        self.__init__()

## K-fold Cross Validation

In [None]:
# Divide dataset in K folds
def Kfolds_divider(dataset, Kfold=4 ):

    n = len(dataset)
    folds_len = n // Kfold
    folds_idx = np.array([ [j for j in range(i*folds_len,(i+1)*folds_len ) ] for i in range(Kfold)  ])
    folds = [ Subset(dataset, folds_idx[i]) for i in range(Kfold)]

    return folds

In [None]:
# CrossValidation
def CrossValidation(config, dataset, Kfold, rep, device):

    par_log = []
    train_loss_log = []
    val_loss_log = []

    folds = Kfolds_divider(dataset, Kfold)

    for i in range(rep):
        print(i)
        for j in range(len(folds)):

            train_set = folds[:j]+folds[j+1:]
            train_set = torch.utils.data.dataset.ConcatDataset( train_set )
            train_load = DataLoader(train_set, batch_size=20, shuffle=True,  num_workers=0)
            val_load   = DataLoader(folds[j],  batch_size=20, shuffle=False, num_workers=0)
            
            # Random parameter selection
            sample_params = {}

            for k in config.keys():
                sample_params[k] = np.random.choice(config[k])

            par_log.append(sample_params)

            Ni  = 1
            Nh1 = sample_params['Nh']
            Nh2 = sample_params['Nh']*2
            No  = 1
            DropProb = sample_params['Dropout']

            model = Net(Ni, Nh1, Nh2, No, DropProb).to(device)

            loss_func = nn.MSELoss() 
            epochs = sample_params['Epochs']

            if sample_params['Optimizer']=='Adam':
                  opt = optim.Adam(model.parameters(), lr = sample_params['LearningRate'], weight_decay = sample_params['Regularization'])
            if sample_params['Optimizer']=='SGD':
                  opt = optim.SGD( model.parameters(), lr = sample_params['LearningRate'], weight_decay = sample_params['Regularization'], momentum=0.9)

            # Training & validation
            train_loss, val_loss = model.fit(train_load, val_load, opt, loss_func, epochs, device)
            
            # Storing train/loss validation
            train_loss_log.append( train_loss )
            val_loss_log.append( val_loss )

    return par_log, train_loss_log, val_loss_log    

## Training and Testing the network

### Model selection

In [None]:
# Define parameters range
dict_params = {
            'Nh'              : [16, 32, 64, 128],
            'LearningRate'    : [0.1, 0.01, 0.001, 0.0001],
            'Regularization'  : [1e-3, 1e-4, 1e-5, 1e-6],
            'Dropout'         : [0, 0.10, 0.15, 0.20],
            'Epochs'          : [1000],
            'Optimizer'       : ['SGD', 'Adam']
         }

In [None]:
params_list, train_loss_list, val_loss_list = CrossValidation(dict_params, train_dataset, 4, 20, device)

In [None]:
# Select best parameters
best_params = params_list[np.argmin([v[-1] for v in val_loss_list])]
best_params

### Train with best parameters

In [None]:
best_params = {
            'Nh'              : 128,
            'LearningRate'    : 0.0001,
            'Regularization'  : 1e-5,
            'Dropout'         : 0.15,
            'Epochs'          : 1000,
            'Optimizer'       : 'Adam'
         }

In [None]:
train_load = DataLoader(train_dataset, batch_size=20, shuffle=True, num_workers=0 )
val_load = train_load

Ni  = 1
Nh1 = best_params['Nh']
Nh2 = best_params['Nh']*2
No  = 1
DropProb = best_params['Dropout']

model = Net(Ni, Nh1, Nh2, No, DropProb).to(device)

loss_func = nn.MSELoss() 
epochs = best_params['Epochs']

if best_params['Optimizer']=='Adam':
        opt = optim.Adam(model.parameters(), lr = best_params['LearningRate'], weight_decay = best_params['Regularization'])
if best_params['Optimizer']=='SGD':
        opt = optim.SGD( model.parameters(), lr = best_params['LearningRate'], weight_decay = best_params['Regularization'], momentum=0.9)

# Training & validation
train_loss, val_loss = model.fit(train_load, val_load, opt, loss_func, epochs, device)

print('Training loss:', train_loss[-1])

In [None]:
# Plot Training and Validation loss
plt.plot(train_loss, label='Training')
plt.plot(val_loss, label='Validation')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.savefig('TrainValLoss_Reg.pdf', bbox_inches='tight')
files.download('TrainValLoss_Reg.pdf')
plt.show()

In [None]:
# Save trained model
model.save('net_reg_parameters.torch')
torch.save(opt.state_dict(), 'optimizer_reg_state.torch')

### Test the trained model

In [None]:
test_load = DataLoader(test_dataset, batch_size=20, shuffle=True, num_workers=0 )

# Initialization of the net
Ni  = 1
Nh1 = best_params['Nh']
Nh2 = best_params['Nh']*2
No  = 1
DropProb = best_params['Dropout']

net = Net(Ni, Nh1, Nh2, No, DropProb).to(device)

model.load('net_reg_parameters.torch')

loss_func = nn.MSELoss() 

test_loss = model.test(test_load, loss_func, device)

print('Test loss:', test_loss)

### Plot predictions

In [None]:
# Prepare input values
x_vec = np.linspace(-5, 5, 1000).reshape(1000, 1)
lb = np.ones(1000).reshape(1000, 1)
x = np.hstack((x_vec, lb)).reshape(1000, 2)
x = pd.DataFrame(x, columns=['input', 'label'])
x.to_csv('pred_data.csv', index=False)

In [None]:
pred_dataset = CsvDataset('/content/pred_data.csv', transform = transforms.Compose([ToTensor()]))
pred_loader = DataLoader(pred_dataset, batch_size=20, shuffle=False, num_workers=2)

# Run the network to get predictions
y_vec = np.array(model.predict(pred_loader, device)).flatten()

In [None]:
train_df = pd.read_csv('regression_dataset/train_data.csv')
test_df = pd.read_csv('regression_dataset/test_data.csv')

# Plot output
plt.figure(figsize=(12,8))
plt.plot(x['input'], y_vec, 'g--', label='Network output')
plt.scatter(train_df['input'], train_df['label'], label='Training set')
plt.scatter(test_df['input'], test_df['label'], label='Test set')
plt.xlabel('Input', fontsize=20)
plt.ylabel('Label', fontsize=20)
plt.grid(ls='--')
plt.legend(fontsize=18)
plt.tick_params(labelsize=16)

plt.savefig('Predictions_Reg.pdf', bbox_inches='tight')
files.download('Predictions_Reg.pdf')
plt.show()

## Weights Histogram

In [None]:
# First hidden layer
h1_w = model.fc1.weight.data.cpu().numpy() 
h1_b = model.fc1.bias.data.cpu().numpy() 

# Second hidden layer
h2_w = model.fc2.weight.data.cpu().numpy()
h2_b = model.fc2.bias.data.cpu().numpy() 

# Output layer
out_w = model.out.weight.data.cpu().numpy() 
out_b = model.out.bias.data.cpu().numpy() 

# Weights histogram
fig, axs = plt.subplots(3, 1, figsize=(12,8))
axs[0].hist(h1_w.flatten(), 50)
axs[0].set_title('First hidden layer weights')
axs[1].hist(h2_w.flatten(), 50)
axs[1].set_title('Second hidden layer weights')
axs[2].hist(out_w.flatten(), 50)
axs[2].set_title('Output layer weights')
[ax.grid() for ax in axs]
plt.tight_layout()
plt.savefig('Weights_Reg.pdf', bbox_inches='tight')
files.download('Weights_Reg.pdf')
plt.show()

## Activation Profiles

In [None]:
def get_activation(layer, input, output):
    global activation
    activation = torch.relu(output) 

### Register hook  
hook_handle = model.fc2.register_forward_hook(get_activation)

### Analyze activations
model = model.to(device)
model.eval()
with torch.no_grad():
    x1 = torch.tensor([-5.0]).float().to(device)
    y1 = model(x1)
    z1 = activation
    x2 = torch.tensor([0.0]).float().to(device)
    y2 = model(x2)
    z2 = activation
    x3 = torch.tensor([5.0]).float().to(device)
    y3 = model(x3)
    z3 = activation

### Remove hook
hook_handle.remove()

### Plot activations
fig, axs = plt.subplots(3, 1, figsize=(12,8))
axs[0].stem(z1.cpu().numpy(), use_line_collection=True)
axs[0].set_title('Last layer activations for input x=%.2f' % x1)
axs[1].stem(z2.cpu().numpy(), use_line_collection=True)
axs[1].set_title('Last layer activations for input x=%.2f' % x2)
axs[2].stem(z3.cpu().numpy(), use_line_collection=True)
axs[2].set_title('Last layer activations for input x=%.2f' % x3)
plt.tight_layout()
plt.savefig('Activations_Reg.pdf', bbox_inches='tight')
files.download('Activations_Reg.pdf')
plt.show()