In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torchvision.utils import make_grid, save_image

import os
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

%matplotlib inline

In [2]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
print('Running on device:', device)
if use_cuda:
    print('Using GPU:',
          torch.cuda.get_device_name(torch.cuda.current_device()))

Running on device: cuda:0
Using GPU: NVIDIA TITAN RTX


In [3]:
root = '/home/therock/data2/devnagari_data/'

expr_name = 'devnagari_ann_ae'
model_name = expr_name + '_PyTorch_model.pt'

In [4]:
batch_size = 256
# each image in dataset is 32x32 pixels
image_dim = 32
learning_rate = 0.001
num_epochs = 100

train_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize(image_dim),
    transforms.ToTensor(),
])


train_data = datasets.ImageFolder(os.path.join(root, 'Train'),
                                  transform=train_transform)
train_data_len = len(train_data)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
class_names = train_data.classes
num_of_classes = len(train_data.classes)

print(f'Training images available: {len(train_data)}')


Training images available: 78200


In [5]:
decoded_out_dir = expr_name + '_devnagari_decoded'
if not os.path.exists(decoded_out_dir):
    os.mkdir(decoded_out_dir)


def to_img(x):
    x = x.view(x.size(0), 1, image_dim, image_dim)
    return x

class Encoder(nn.Module):
    def __init__(self,image_dim=32):

        super(Encoder, self).__init__()
        in_dim = image_dim * image_dim
        l1_per = 0.90
        l2_per = 0.80
        l3_per = 0.60
        l4_per = 0.40
        l5_per = 0.30
        self.encoder = nn.Sequential(
            nn.Linear(in_dim, int(in_dim * l1_per)),
            nn.ReLU(True),
            nn.Linear(int(in_dim * l1_per), int(in_dim * l2_per)),
            nn.ReLU(True),
            nn.Linear(int(in_dim * l2_per), int(in_dim * l3_per)),
            nn.ReLU(True),
            nn.Linear(int(in_dim * l3_per), int(in_dim * l4_per)),
            nn.ReLU(True),
            nn.Linear(int(in_dim * l4_per), int(in_dim * l5_per)),
            nn.ReLU(True),
        )

    def forward(self, x):
        return self.encoder(x)


class Decoder(nn.Module):
    def __init__(self, image_dim=32):

        super(Decoder, self).__init__()
        in_dim = image_dim * image_dim
        l1_per = 0.90
        l2_per = 0.80
        l3_per = 0.60
        l4_per = 0.40
        l5_per = 0.30
        self.decoder = nn.Sequential(
            nn.Linear(int(in_dim * l5_per),
                      int(in_dim * l4_per)), nn.ReLU(True),
            nn.Linear(int(in_dim * l4_per),
                      int(in_dim * l3_per)), nn.ReLU(True),
            nn.Linear(int(in_dim * l3_per),
                      int(in_dim * l2_per)), nn.ReLU(True),
            nn.Linear(int(in_dim * l2_per), int(in_dim * l1_per)),
            nn.ReLU(True), nn.Linear(int(in_dim * l1_per), in_dim), nn.Tanh())

    def forward(self, x):
        return self.decoder(x)


class AutoEncoder(nn.Module):
    def __init__(self, encd, decd):
        super(AutoEncoder, self).__init__()

        self.encoder = encd
        self.decoder = decd

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


In [6]:
encoder = Encoder(image_dim=image_dim)
decoder = Decoder(image_dim=image_dim)

model = AutoEncoder(encoder, decoder)
if use_cuda:
    model = model.cuda()
print(model)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=learning_rate)

AutoEncoder(
  (encoder): Encoder(
    (encoder): Sequential(
      (0): Linear(in_features=1024, out_features=921, bias=True)
      (1): ReLU(inplace=True)
      (2): Linear(in_features=921, out_features=819, bias=True)
      (3): ReLU(inplace=True)
      (4): Linear(in_features=819, out_features=614, bias=True)
      (5): ReLU(inplace=True)
      (6): Linear(in_features=614, out_features=409, bias=True)
      (7): ReLU(inplace=True)
      (8): Linear(in_features=409, out_features=307, bias=True)
      (9): ReLU(inplace=True)
    )
  )
  (decoder): Decoder(
    (decoder): Sequential(
      (0): Linear(in_features=307, out_features=409, bias=True)
      (1): ReLU(inplace=True)
      (2): Linear(in_features=409, out_features=614, bias=True)
      (3): ReLU(inplace=True)
      (4): Linear(in_features=614, out_features=819, bias=True)
      (5): ReLU(inplace=True)
      (6): Linear(in_features=819, out_features=921, bias=True)
      (7): ReLU(inplace=True)
      (8): Linear(in_features=92

In [7]:
def count_parameters(model):
    params = [p.numel() for p in model.parameters() if p.requires_grad]
    for i, item in enumerate(params):
        print(f'{i:2} : {item:}')
    print(f'==========\n{sum(params):>6}')


count_parameters(model)

 0 : 943104
 1 : 921
 2 : 754299
 3 : 819
 4 : 502866
 5 : 614
 6 : 251126
 7 : 409
 8 : 125563
 9 : 307
10 : 125563
11 : 409
12 : 251126
13 : 614
14 : 502866
15 : 819
16 : 754299
17 : 921
18 : 943104
19 : 1024
5160773


In [8]:
lowest_loss = float("inf")
for epoch in range(num_epochs):

    for b, (X_train, y_train) in enumerate(train_loader):

        b += 1

        X_train = X_train.reshape(X_train.size(0), -1)
        X_train = X_train.to(device)

        # Apply the model
        output = model(X_train)
        loss = criterion(output, X_train)  # check loss with X_train itself

        # Update parameters
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if b % 100 == 0:
            print(
                f'epoch: {epoch:2} batch: {b:4} [{batch_size*b:6}/{train_data_len}]  '
                + f'loss: {loss.item():.8f}')

        if loss.item() < lowest_loss:
            lowest_loss = loss.item()
            torch.save(model.state_dict(), model_name)   
            #print(f"saved model")
    
    if epoch % 10 == 0:
        pic = to_img(output.cpu().data)
        save_image(pic, './{}/image_{}.png'.format(decoded_out_dir, epoch))



epoch:  0 batch:  100 [ 25600/78200]  loss: 0.11276305
epoch:  0 batch:  200 [ 51200/78200]  loss: 0.10967003
epoch:  0 batch:  300 [ 76800/78200]  loss: 0.10803904
epoch:  1 batch:  100 [ 25600/78200]  loss: 0.10070109
epoch:  1 batch:  200 [ 51200/78200]  loss: 0.09205042
epoch:  1 batch:  300 [ 76800/78200]  loss: 0.08648089
epoch:  2 batch:  100 [ 25600/78200]  loss: 0.08245252
epoch:  2 batch:  200 [ 51200/78200]  loss: 0.07728819
epoch:  2 batch:  300 [ 76800/78200]  loss: 0.07226260
epoch:  3 batch:  100 [ 25600/78200]  loss: 0.07143919
epoch:  3 batch:  200 [ 51200/78200]  loss: 0.06807701
epoch:  3 batch:  300 [ 76800/78200]  loss: 0.06620750
epoch:  4 batch:  100 [ 25600/78200]  loss: 0.06423061
epoch:  4 batch:  200 [ 51200/78200]  loss: 0.06100991
epoch:  4 batch:  300 [ 76800/78200]  loss: 0.05948922
epoch:  5 batch:  100 [ 25600/78200]  loss: 0.05794276
epoch:  5 batch:  200 [ 51200/78200]  loss: 0.05698378
epoch:  5 batch:  300 [ 76800/78200]  loss: 0.05547250
epoch:  6 

epoch: 49 batch:  300 [ 76800/78200]  loss: 0.02240000
epoch: 50 batch:  100 [ 25600/78200]  loss: 0.02175862
epoch: 50 batch:  200 [ 51200/78200]  loss: 0.02214536
epoch: 50 batch:  300 [ 76800/78200]  loss: 0.02152604
epoch: 51 batch:  100 [ 25600/78200]  loss: 0.02122615
epoch: 51 batch:  200 [ 51200/78200]  loss: 0.02139453
epoch: 51 batch:  300 [ 76800/78200]  loss: 0.02221837
epoch: 52 batch:  100 [ 25600/78200]  loss: 0.02081285
epoch: 52 batch:  200 [ 51200/78200]  loss: 0.02162021
epoch: 52 batch:  300 [ 76800/78200]  loss: 0.02214222
epoch: 53 batch:  100 [ 25600/78200]  loss: 0.02065838
epoch: 53 batch:  200 [ 51200/78200]  loss: 0.02097954
epoch: 53 batch:  300 [ 76800/78200]  loss: 0.02156075
epoch: 54 batch:  100 [ 25600/78200]  loss: 0.02096589
epoch: 54 batch:  200 [ 51200/78200]  loss: 0.02084119
epoch: 54 batch:  300 [ 76800/78200]  loss: 0.02255558
epoch: 55 batch:  100 [ 25600/78200]  loss: 0.02034922
epoch: 55 batch:  200 [ 51200/78200]  loss: 0.02135447
epoch: 55 

epoch: 99 batch:  200 [ 51200/78200]  loss: 0.01659441
epoch: 99 batch:  300 [ 76800/78200]  loss: 0.01806757


In [12]:
in_dim = image_dim * image_dim
latent_dim = int(in_dim * 0.30)

sample_batches = 20
decoded_data = torch.FloatTensor(batch_size,1,image_dim,image_dim)

encoder_test = Encoder(image_dim=image_dim)
decoder_test = Decoder(image_dim=image_dim)

model_test = AutoEncoder(encoder_test, decoder_test)
model_test.load_state_dict = torch.load(model_name)
model_test.eval()

if use_cuda:
    model_test = model_test.cuda()

for sb_ in range(sample_batches):
    for i in range(batch_size):
        z = torch.randn(1, latent_dim).to(device)
        reconstructed_img = model_test.decoder(z).to('cpu')
        img = reconstructed_img.view(image_dim, image_dim).data
        decoded_data[i] = img

    pic = to_img(decoded_data)
    save_image(pic, './{}/image_decoded_{}.png'.format(decoded_out_dir,sb_))
    