Writing A Variational Encoder in PyTorch
First Lets Import all the libraries needed

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

In [0]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Loading the Data, Lets use our favourite dataset MNIST

In [7]:
transformer = transforms.Compose(transforms.ToTensor())

train_data = datasets.MNIST(root='.data/', train=True, download=True, transform=transformer)
test_data = datasets.MNIST(root='.data/',train=False, download=True, transform=transformer)

0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to .data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:06, 1609323.87it/s]                             


Extracting .data/MNIST/raw/train-images-idx3-ubyte.gz to .data/MNIST/raw


  0%|          | 0/28881 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to .data/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 136521.66it/s]           
  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting .data/MNIST/raw/train-labels-idx1-ubyte.gz to .data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to .data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 2328553.93it/s]                           
0it [00:00, ?it/s]

Extracting .data/MNIST/raw/t10k-images-idx3-ubyte.gz to .data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to .data/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 51434.58it/s]            

Extracting .data/MNIST/raw/t10k-labels-idx1-ubyte.gz to .data/MNIST/raw
Processing...
Done!





Initializing Data Set Iterators for easy and batched access

In [0]:
BATCH_SIZE = 64

train_iterator = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_iterator = DataLoader(test_data, batch_size=BATCH_SIZE)


Now Lets Define the HyperParameters of our VAE 
Since, VAE is nothing but a combination of Autoencoders, lets define how our architecture will look like

* size of each input
* hidden dimension
* latent vector dimension
* learning rate      

In [0]:
INPUT_DIM = 28 * 28
HIDDEN_DIM = 256
LATENT_DIM = 20
lr = 1e-3

In VAE we have one Encoder $ q_\phi (z | x) $ , Lets first define that

In [0]:
class Encoder(nn.Module):
    '''
        This is the Encoder of VAE
    '''
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(Encoder, self).__init__()

        self.linear = nn.Linear(input_dim, hidden_dim)
        self.mu = nn.Linear(hidden_dim, latent_dim)
        self.var = nn.Linear(hidden_dim, latent_dim)

    def forward(self, x):

        hidden = F.relu(self.linear(x))
        mu_z = self.mu(hidden)
        var_z = self.var(hidden)

        return mu_z, var_z


Now, Lets Code Decoder $ p_\theta (x | z) $  which will take latent as input and give generated image as output 

In [0]:
class Decoder(nn.Module):
    '''
        This is the Decoder part of VAE
    '''
    def __init__(self, latent_dim, hidden_dim, output_dim):
        super(Decoder, self).__init__()
        self.linear = nn.Linear(latent_dim, hidden_dim)
        self.out = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        hidden = F.relu(self.linear(x))
        output = torch.sigmoid(self.out(hidden))
    
        return output

Now we have both our encoder and decoder, Lets write the final architecture of our VAE


In [0]:
class VAE(nn.Module):
    def __init__(self, enc, dec):
        self.encoder = enc
        self.decoder = dec
    
    def sampling(self, mu, var):
        std = torch.exp(var / 2)
        eps = torch.randn_like(std)
        return eps.mul(std).add_(mu)

    def forward(self, x):

        mu_z, var_z = self.enc(x)

        x_sample = sampling(mu_z, var_z)
    
        prediction = self.decoder(x_sample)

        return prediction, mu_z, var_z
        