# Build a Convolutional VAE and train it on Cifar10

As we already mentioned during the lesson, here you are on your own. You will have to build everything from scratch. Your goal is to build a convolutional VAE of your own structure and train it on CIFAR10. Hyperparameters, optimizers, losses are also your own choice. 

Of course you can and should borrow stuff from the lessons and previous Jupyter notebooks.This is especially true for the data loading functionalities. You should use our custom CIFAR10 dataloader, as we did in the previous exercises.

As always, feel free to advice the Pytorch documentation (links in the Appendix). Please make sure to have a working code before you leave. Good luck.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
from torchvision import utils
from torchvision import transforms

In [3]:
from torchvision.datasets import CIFAR10

In [4]:
# Commenting out for local run. This cell is required for executing the notebook in the course website's environment.
# import os
# import sys
# cwd = os.getcwd()
#add CIFAR10 data in the environment
# sys.path.append(cwd + '/../cifar10') 
# from Cifar10Dataloader import CIFAR10

In [5]:
batch_size = 4

In [6]:
def load_data():
    #convert the images to tensor and normalized them
    transform = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])

    trainset = CIFAR10(root='~/data/cifar10',  download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                              shuffle=False, num_workers=1)
    return trainloader

In [7]:
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        self.features =16
        # encoder
        self.enc1 = nn.Linear(in_features=3072, out_features=128)
        self.enc2 = nn.Linear(in_features=128, out_features=self.features * 2)

        # decoder
        self.dec1 = nn.Linear(in_features=self.features, out_features=128)
        self.dec2 = nn.Linear(in_features=128, out_features=3072)

    def forward(self, x):
        # encoding
        x = F.relu(self.enc1(x))
        x = self.enc2(x).view(-1, 2, self.features)
        # get `mu` and `log_var`
        mu = x[:, 0, :]  # the first feature values as mean
        log_var = x[:, 1, :]  # the other feature values as variance
        # get the latent vector through reparameterization
        z = self.reparameterize(mu, log_var)

        # decoding
        x = F.relu(self.dec1(z))
        # reconstruction = torch.sigmoid(self.dec2(x))
        reconstruction = self.dec2(x)
        return reconstruction, mu, log_var

    def reparameterize(self, mu, log_var):
        """
        :param mu: mean from the encoder's latent space
        :param log_var: log variance from the encoder's latent space
        """
        std = torch.exp(0.5 * log_var)  # standard deviation
        eps = torch.randn_like(std)  # generate sample of the same size
        sample = mu + (eps * std)  # sampling as if coming from the input space
        return sample

In [8]:
def final_loss(recon_loss, mu, logvar):
    """
    This function will add the reconstruction loss (BCELoss) and the
    KL-Divergence.
    KL-Divergence = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    :param recon_loss: recontruction loss
    :param mu: the mean from the latent vector
    :param logvar: log variance from the latent vector
    """
    RL = recon_loss
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return RL + KLD

In [9]:
def train(model,training_data):

    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss(reduction="sum")

    running_loss = 0.0

    for epoch in range(2):  # loop over the dataset multiple times

        for i, data in enumerate(training_data, 0):
            inputs, _ = data
            inputs = inputs.view(inputs.size(0), -1)
            # print(f"min(inputs): {torch.min(input=inputs)} :: max(inputs): {torch.max(input=inputs)}")

            optimizer.zero_grad()
            reconstruction, mu, logvar = model(inputs)
            # print(f"min(reconstruction): {torch.min(input=reconstruction)} :: max(reconstruction): {torch.max(input=reconstruction)}")
            recon_loss = criterion(reconstruction, inputs)
            loss = final_loss(recon_loss, mu, logvar)
            running_loss += loss.item()
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 500 == 499:  # print every 500 mini-batches
                print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 500))
                running_loss = 0.0

    PATH = '../output/cifar_net.pth'
    torch.save(model.state_dict(), PATH)

    print('Finished Training')

In [10]:
def main():
    trainloader = load_data()
    model = VAE()
    train(model=model, training_data=trainloader)

In [11]:
main()

Files already downloaded and verified
[1,   500] loss: 3729.379
[1,  1000] loss: 2751.245
[1,  1500] loss: 2558.947
[1,  2000] loss: 2408.045
[1,  2500] loss: 2369.778
[1,  3000] loss: 2319.809
[1,  3500] loss: 2316.240
[1,  4000] loss: 2320.323
[1,  4500] loss: 2324.318
[1,  5000] loss: 2287.615
[1,  5500] loss: 2266.271
[1,  6000] loss: 2265.530
[1,  6500] loss: 2270.950
[1,  7000] loss: 2244.749
[1,  7500] loss: 2301.104
[1,  8000] loss: 2267.449
[1,  8500] loss: 2182.348
[1,  9000] loss: 2211.504
[1,  9500] loss: 2279.092
[1, 10000] loss: 2169.204
[1, 10500] loss: 2218.209
[1, 11000] loss: 2207.201
[1, 11500] loss: 2198.290
[1, 12000] loss: 2174.549
[1, 12500] loss: 2206.873
[2,   500] loss: 2213.193
[2,  1000] loss: 2211.459
[2,  1500] loss: 2215.658
[2,  2000] loss: 2177.216
[2,  2500] loss: 2185.665
[2,  3000] loss: 2173.165
[2,  3500] loss: 2194.527
[2,  4000] loss: 2188.716
[2,  4500] loss: 2218.004
[2,  5000] loss: 2177.760
[2,  5500] loss: 2158.639
[2,  6000] loss: 2189.698
