In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torchvision.utils import make_grid, save_image

import os
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

%matplotlib inline

In [2]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
print('Running on device:', device)
if use_cuda:
    print('Using GPU:',
          torch.cuda.get_device_name(torch.cuda.current_device()))

Running on device: cuda:0
Using GPU: NVIDIA TITAN RTX


In [3]:
root = '/home/therock/data2/devnagari_data/'

expr_name = 'devnagari_ann_vae'
model_name = expr_name + '_PyTorch_model.pt'

In [4]:
batch_size = 256
# each image in dataset is 32x32 pixels
image_dim = 32
learning_rate = 0.001
num_epochs = 100

train_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize(image_dim),
    transforms.ToTensor(),
])


train_data = datasets.ImageFolder(os.path.join(root, 'Train'),
                                  transform=train_transform)

train_data_len = len(train_data)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

In [5]:
decoded_out_dir = expr_name + '_decoded'
if not os.path.exists(decoded_out_dir):
    os.mkdir(decoded_out_dir)


def to_img(x):
    x = x.view(x.size(0), 1, 32, 32)
    return x

In [6]:
class VAEEncoder(nn.Module):
    def __init__(self, in_dim=10):

        super(VAEEncoder, self).__init__()
        self.in_dim = in_dim
        self.fc1_dim = int(in_dim * 0.80)
        self.fc2_dim = int(in_dim * 0.50)
        self.latent_dim = int(in_dim * 0.30)

        self.fc1 = nn.Linear(self.in_dim, self.fc1_dim)
        self.fc2 = nn.Linear(self.fc1_dim, self.fc2_dim)
        self.mu = nn.Linear(self.fc2_dim, self.latent_dim)
        self.var = nn.Linear(self.fc2_dim, self.latent_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        latent_mu = self.mu(x)
        latent_var = self.var(x)
        return latent_mu, latent_var


class VAEDecoder(nn.Module):
    def __init__(self, in_dim=10):

        super(VAEDecoder, self).__init__()
        self.latent_dim = int(in_dim * 0.30)
        self.fc1_dim = int(in_dim * 0.50)
        self.fc2_dim = int(in_dim * 0.80)
        self.out_dim = in_dim

        self.fc1 = nn.Linear(self.latent_dim, self.fc1_dim)
        self.fc2 = nn.Linear(self.fc1_dim, self.fc2_dim)
        self.out = nn.Linear(self.fc2_dim, self.out_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        pred = torch.sigmoid(self.out(x))
        return pred


class VAE(nn.Module):
    def __init__(self, encd, decd):
        super(VAE, self).__init__()

        self.encoder = encd
        self.decoder = decd

    def forward(self, x):
        latent_mu, latent_var = self.encoder(x)

        std = torch.exp(latent_var / 2)
        eps = torch.randn_like(std)
        x_sample = eps.mul(std).add_(latent_mu)

        # decode
        predicted = self.decoder(x_sample)
        return predicted, latent_mu, latent_var

In [7]:
in_dim = image_dim * image_dim

encoder = VAEEncoder(in_dim=in_dim)
decoder = VAEDecoder(in_dim=in_dim)

model = VAE(encoder, decoder)
if use_cuda:
    model = model.to(device)
print(model)
# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
lowest_loss = float('inf')

VAE(
  (encoder): VAEEncoder(
    (fc1): Linear(in_features=1024, out_features=819, bias=True)
    (fc2): Linear(in_features=819, out_features=512, bias=True)
    (mu): Linear(in_features=512, out_features=307, bias=True)
    (var): Linear(in_features=512, out_features=307, bias=True)
  )
  (decoder): VAEDecoder(
    (fc1): Linear(in_features=307, out_features=512, bias=True)
    (fc2): Linear(in_features=512, out_features=819, bias=True)
    (out): Linear(in_features=819, out_features=1024, bias=True)
  )
)


In [8]:
def count_parameters(model):
    params = [p.numel() for p in model.parameters() if p.requires_grad]
    for i, item in enumerate(params):
        print(f'{i:2} : {item:}')
    print(f'==========\n{sum(params):>6}')


count_parameters(model)

 0 : 838656
 1 : 819
 2 : 419328
 3 : 512
 4 : 157184
 5 : 307
 6 : 157184
 7 : 307
 8 : 157184
 9 : 512
10 : 419328
11 : 819
12 : 838656
13 : 1024
2991820


In [9]:


def train(image_dim, train_loader, model, e):
    global lowest_loss
    # set the train mode
    model.train()

    # loss of the epoch
    train_loss = 0

    for i, (x, _) in enumerate(train_loader):
        # reshape the data into [batch_size, image_dim * image_dim]
        x = x.view(-1, image_dim * image_dim)
        x = x.to(device)

        # forward pass
        x_sample, latent_mu, latent_var = model(x)

        if e % 10 == 0:
            pic = to_img(x_sample.cpu().data)
            save_image(pic, './{}/image_{}.png'.format(decoded_out_dir, e))

        # reconstruction loss
        recon_loss = F.binary_cross_entropy(x_sample, x, reduction='sum')
        # kl divergence loss
        kl_loss = 0.5 * torch.sum(
            torch.exp(latent_var) + latent_mu**2 - 1.0 - latent_var)

        # total loss
        loss = recon_loss + kl_loss
        train_loss += loss.item()

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if loss.item() < lowest_loss:
            lowest_loss = loss.item()
            torch.save(model.state_dict(), model_name)
            #print(f"saved model")

    return train_loss

In [10]:

for e in range(num_epochs):

    train_loss = train(image_dim, train_loader, model, e)
    train_loss /= len(train_data)
    print(f'Epoch {e}, Train Loss: {train_loss}')


Epoch 0, Train Loss: 441.86043563179345
Epoch 1, Train Loss: 385.55062005474747
Epoch 2, Train Loss: 339.82001113930625
Epoch 3, Train Loss: 307.2561562899616
Epoch 4, Train Loss: 287.12344104659525
Epoch 5, Train Loss: 274.1087774736253
Epoch 6, Train Loss: 265.43618648597345
Epoch 7, Train Loss: 259.62570959378996
Epoch 8, Train Loss: 255.52938831222028
Epoch 9, Train Loss: 252.05234045316496
Epoch 10, Train Loss: 248.64785965473146
Epoch 11, Train Loss: 246.40685641783887
Epoch 12, Train Loss: 244.47322355538682
Epoch 13, Train Loss: 242.96627729879316
Epoch 14, Train Loss: 241.57690102501599
Epoch 15, Train Loss: 240.3479739450128
Epoch 16, Train Loss: 239.16503946211637
Epoch 17, Train Loss: 237.9136608106218
Epoch 18, Train Loss: 237.05326389166402
Epoch 19, Train Loss: 236.24863298733217
Epoch 20, Train Loss: 235.65201179367807
Epoch 21, Train Loss: 234.93606530230977
Epoch 22, Train Loss: 234.15479115049553
Epoch 23, Train Loss: 233.3901244804987
Epoch 24, Train Loss: 232.64243

In [11]:
in_dim = image_dim * image_dim
latent_dim = int(in_dim * 0.30)

encoder_test = VAEEncoder(in_dim=in_dim)
decoder_test = VAEDecoder(in_dim=in_dim)

model_test = VAE(encoder_test, decoder_test)
model_test.load_state_dict = torch.load(model_name)
model.eval()

if use_cuda:
    model = model.cuda()
    
sample_batches = 20
decoded_data = torch.FloatTensor(batch_size,1,image_dim,image_dim)

for sb_ in range(sample_batches):
    for i in range(batch_size):
        z = torch.randn(1, latent_dim).to(device)
        reconstructed_img = model.decoder(z).to('cpu')
        img = reconstructed_img.view(image_dim, image_dim).data
        decoded_data[i] = img

    pic = to_img(decoded_data)
    save_image(pic, './{}/image_decoded_{}.png'.format(decoded_out_dir,sb_))
    