# Running a VAE on Fashion MNIST

### Creating the VAE Model

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

import torch
import torch.nn as nn
from torch.utils.data import random_split

import numpy as np
import torchvision
from tqdm import tqdm
from torchvision.utils import save_image, make_grid

import matplotlib.pyplot as plt
plt.style.use('seaborn-darkgrid')
dataset_path = '~/datasets'

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x_dim  = 784
hidden_dim = 50
latent_dim = 50


#################
batch_size = 100
lr = 1e-3
epochs = 50
#################

from torchvision.datasets import MNIST, FashionMNIST
import torchvision.transforms as transforms
from torch.utils.data import DataLoader


mnist_transform = transforms.Compose([transforms.ToTensor(),])
#ToTensor converts the  data images to torch tensor
kwargs = {'num_workers': 1, 'pin_memory': True}
#The above is some setting with the GPUs

train_dataset = FashionMNIST(dataset_path, transform=mnist_transform, train=True, download=True)
test_dataset  = FashionMNIST(dataset_path, transform=mnist_transform, train=False, download=True)

train_size = 60000


train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, **kwargs)
test_loader  = DataLoader(dataset=test_dataset,  batch_size=batch_size, shuffle=False, **kwargs)

class Encoder(nn.Module):

    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(Encoder, self).__init__()

        self.FC_layer_1 = nn.Linear(input_dim, hidden_dim)
        self.FC_layer_2 = nn.Linear(hidden_dim, hidden_dim)
        self.FC_mean  = nn.Linear(hidden_dim, latent_dim)
        self.FC_var   = nn.Linear (hidden_dim, latent_dim)

        self.LeakyReLU = nn.LeakyReLU(0.2)

        self.training = True

    def forward(self, x):
        h_1      = self.LeakyReLU(self.FC_layer_1(x))
        # R^{image_dim} \ni x -> = ReLU(A_1(x)) = h_1 \in R^{hidden_dim}
        h_2       = self.LeakyReLU(self.FC_layer_2(h_1))
        # R^{hidden_dim} \ni h_1 -> ReLU(A_2(h_1)) = h_2 \in R^{hidden_dim}
        mean     = self.FC_mean(h_2)
        log_var  = self.FC_var(h_2)
        # R^{hidden_dim} \ni h_2 -> (A_31(h_2),A_32(h_2)) = (mean,log_var) \in R^{hidden_dim} x R^{hidden_dim}

        # encoder produces mean and log of variance i.e., parameters of a Gaussian distribution "q"

        return mean, log_var


class Decoder(nn.Module):
    def __init__(self, latent_dim, hidden_dim, output_dim):
        super(Decoder, self).__init__()

        self.FC_dec_layer_1 = nn.Linear(latent_dim, hidden_dim)
        self.FC_dec_layer_2 = nn.Linear(hidden_dim, hidden_dim)
        self.FC_output = nn.Linear(hidden_dim, output_dim)

        self.LeakyReLU = nn.LeakyReLU(0.2)

    def forward(self, z):
        dec_h_1     = self.LeakyReLU(self.FC_dec_layer_1(z))
        # R^{latent_dim} \ni z -> ReLU(B1(z)) = dec_h_1 \in R^{hidden_dim}

        dec_h_2     = self.LeakyReLU(self.FC_dec_layer_2(dec_h_1))
        # R^{hidden_dim} \ni dec_h_1 -> ReLU(B2(dec_h_1)) = dec_h_2 \in R^{hidden_dim}

        x_hat = torch.sigmoid(self.FC_output(dec_h_2))
        #R^{hidden_dim} \ni dec_h_2 -> Sigmoid(B3(dec_h_2)) = x_hat \in R^{output_dim}

        return x_hat


def reparameterization(mean, var):
    epsilon = torch.randn_like(var).to(DEVICE)
    # sampling epsilon ~ N(0,I_{latent-dimension x latent-dimension})
    y = mean + var*epsilon
    # The so-called "reparameterization trick"
    return y
## Now we define the final model


class Model(nn.Module):
    def __init__(self, Encoder, Decoder):
        super(Model, self).__init__()

        self.Encoder = Encoder
        self.Decoder = Decoder


    def forward(self, x):
        mean, log_var = self.Encoder(x)
        y = reparameterization(mean, torch.exp(0.5 * log_var))
        x_hat = self.Decoder(y)

        return x_hat, mean, log_var

### Experiment Code

In [None]:

import pickle
from datetime import datetime
from torch.optim import Adam, SGD
def experiment_run(algo, eta, gamma, delta, epochs):
    algo = algo.lower()
    delta = delta if delta else 0
    print()
    print("Starting new experimen at", datetime.now())
    print("Parameters:", algo, eta, gamma, delta, epochs)
    print()
    encoder = Encoder(input_dim=x_dim, hidden_dim=hidden_dim, latent_dim=latent_dim)
    decoder = Decoder(latent_dim=latent_dim, hidden_dim = hidden_dim, output_dim = x_dim)
    model = Model(Encoder=encoder, Decoder=decoder).to(DEVICE)

    def loss_function(x, x_hat, mean, log_var):
        MSE_Loss = nn.functional.mse_loss(x_hat, x, reduction='sum')
        KLD      = - 0.5 * torch.sum(1 + log_var - mean.pow(2) - (log_var).exp())
        # For dim = 1, torch.sum(a_ij, dim=1) = \sum_{j=1}^d a_ij
        # 1 here is an all ones matrix of size (minibatch_size, latent_dimension)
        return MSE_Loss + KLD

    if algo == "adam":
        optimizer = Adam(model.parameters(), lr=eta)
    else:
        optimizer = SGD(model.parameters(), lr=eta)

    print("Starting VAE training")
    model.train()
    Training_Loss = []
    Risk = []
    Epoch = []
    grad_norms = []

    for epoch in range(epochs):

        # Optional learning rate scheduling
        if epoch == 99 or epoch == 149:
            eta *= 0.1

        # At the beginning of each epoch we calculate the training and the test loss.
        training_loss = 0
        for batch_number, (r, _) in enumerate(train_loader):
                r = r.view(batch_size, x_dim)
                r = r.to(DEVICE)
                r_hat, mean, log_var = model(r)
                mini_batch_loss = loss_function(r, r_hat, mean, log_var)
                training_loss += mini_batch_loss.item()
                #mini_batch_loss.item() = training loss on the current mini-batch
                #training_loss is accumulating the mini-batch losses to compute the loss on the entire training data.

        Training_Loss.append(training_loss/(batch_number*batch_size))

        #Now we compute the test loss at the same model parameters at which the above training loss was calculated.
        test_loss = 0
        for test_batch_number, (t, _) in enumerate(test_loader):
                t = t.view(batch_size, x_dim)
                t = t.to(DEVICE)
                t_hat, mean, log_var = model(t)
                mini_batch_loss = loss_function(t, t_hat, mean, log_var)
                test_loss += mini_batch_loss.item()

        Risk.append(test_loss/(test_batch_number*batch_size))

        for batch_number, (x, _) in enumerate(train_loader):
            x = x.view(batch_size, x_dim)
            x = x.to(DEVICE)

            #(x,_) pulls out a mini-batch from the train_loader which has now been converted into an enumeratable data type
            #There is some ancilliary information attached to each mini-batch which we dont care about and that is held in that "_"
            #batch_number adds 1 to itself everytime a mini-batch is pulled out
            #Thus the batch_number counts the number of mini-batches in the training data - a number we did not know till now.

            optimizer.zero_grad()

            x_hat, mean, log_var = model(x)
            mini_batch_loss = loss_function(x, x_hat, mean, log_var)
            mini_batch_loss.backward()

            # If GClip or d-GClip, adjust step size per iteration
            norm_grad_f = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=float("inf")).item()
            grad_norms.append(norm_grad_f)
            if algo == "gclip" or algo == "d-gclip":
                h = min(eta, eta * max(delta, gamma / norm_grad_f) )
                for g in optimizer.param_groups:
                    g["lr"] = h
            optimizer.step()

        print("\tEpoch", epoch + 1, "complete. Latest training and testing losses:", Training_Loss[-1], Risk[-1])

        Epoch.append(epoch+1)


    print("The VAE training is over! Final training loss:", Training_Loss[-1], " test_loss:", Risk[-1])
    data = {"training_loss": Training_Loss, "testing_loss": Risk, "gradient_norms":grad_norms}
    return data, model


### Run Experiments

In [None]:
experiments = [{"algo":"adam", "eta":1e-3, "gamma":0, "delta":0},
                {"algo":"gd", "eta":1e-4, "gamma":0, "delta":0},
                {"algo":"gclip", "eta":1e-3, "gamma":200, "delta":0},
                {"algo":"d-gclip", "eta":1e-3, "gamma":200, "delta":0.1}]


experiment_results = []
for exp in experiments:
    epochs = 200
    data, model = experiment_run(**exp, epochs=epochs)
    epoch = [i+1 for i in range(epochs)]
    print(data)
    experiment_results.append([exp, data])



---

