In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torchvision.utils import make_grid, save_image

import os
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

%matplotlib inline

In [14]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
print('Running on device:', device)
if use_cuda:
    print('Using GPU:',
          torch.cuda.get_device_name(torch.cuda.current_device()))

Running on device: cuda:0
Using GPU: NVIDIA TITAN RTX


In [15]:
root = '/home/therock/data2/devnagari_data/'

expr_name = 'devnagari_cnn_ae'
model_name = expr_name + '_PyTorch_model.pt'

In [16]:
batch_size = 256
# each image in dataset is 32x32 pixels
image_dim = 32
learning_rate = 0.001
num_epochs = 100

train_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize(image_dim),
    transforms.ToTensor(),
])

train_data = datasets.ImageFolder(os.path.join(root, 'Train'),
                                  transform=train_transform)

train_data_len = len(train_data)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

class_names = train_data.classes
num_of_classes = len(train_data.classes)

print(f'Training images available: {len(train_data)}')

Training images available: 78200


In [17]:
decoded_out_dir = expr_name + '_decoded'
if not os.path.exists(decoded_out_dir):
    os.mkdir(decoded_out_dir)


def to_img(x):
    x = x.view(x.size(0), 1, 32, 32)
    return x

In [18]:
def calc_conv_out(n=1, p=1, f=1, s=1):
        return int(((n + 2 * p - f) / s) + 1)

def calc_deconv_out(n=1, p=1512, f=1, s=1):
        return int(s * (n - 1) + f - 2 * p)

In [19]:
conv1 = calc_conv_out(n=32, f=3, s=1, p=0)
mp1 = calc_conv_out(n=conv1, f=2, s=2, p=0)
conv2 = calc_conv_out(n=mp1, f=5, s=1, p=0)
mp2 = calc_conv_out(n=conv2, f=2, s=2, p=0)
deconv1 = calc_deconv_out(n=mp2, f=3, s=1, p=0)
deconv2 = calc_deconv_out(n=deconv1, f=3, s=2, p=0)
deconv3 = calc_deconv_out(n=deconv2, f=3, s=2, p=0)


print('1:', conv1)
print('2:', mp1)
print('3:', conv2)
print('4:', mp2)
print('de 5:', deconv1)
print('de 6:', deconv2)
print('de 7:', deconv3)


1: 30
2: 15
3: 11
4: 5
de 5: 7
de 6: 15
de 7: 31


In [20]:
class CNNEncoder(nn.Module):
    def __init__(self,latent_dim=64):

        super(CNNEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, stride=1, padding=0),  # b, 32, 30,30
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2),  # b, 32, 15, 15
            nn.Conv2d(16, 8, 5, stride=1, padding=0),  # b, 16, 11, 11
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2)  # b, 8, 5, 5
        )
        
        self.l = nn.Linear( 8*5*5, latent_dim)

    def forward(self, x):
        #print(f"encoder in {x.shape}")
        x = self.encoder(x)
        #print(f"encoded {x.shape}")
        x = x.view(x.shape[0],-1)
        #print(f"encoded flat{x.shape}")
        x = self.l(x)
        #print(f"encoded out{x.shape}")
        return x


class CNNDecoder(nn.Module):
    def __init__(self,latent_dim=64):

        super(CNNDecoder, self).__init__()

        self.l = nn.Linear(latent_dim, 8*5*5)
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(8, 4, 3, stride=1, padding=0),  # b, 12, 11,11
            nn.ReLU(True),
            nn.ConvTranspose2d(4, 2, 3, stride=2, padding=0),  # b, 6, 17, 17
            nn.ReLU(True),
            nn.ConvTranspose2d(2, 1, 3, stride=2, padding=0,
                               output_padding=1),  # b, 1, 32, 32
            nn.Tanh()
        )
        
        

    def forward(self, x):
        #print(f"within decoder in {x.shape}")
        x = self.l(x)
        x = x.view(x.shape[0],8,5,5)
        x = self.decoder(x)
        return x


class CNNAutoencoder(nn.Module):
    def __init__(self, encd, decd,latent_dim=64):
        super(CNNAutoencoder, self).__init__()

        self.encoder = encd
        self.decoder = decd

    def forward(self, x):
        #print(f"auto in {x.shape}")
        x = self.encoder(x)
        #print(f"encoder out {x.shape}")
        x = self.decoder(x)
        #print(f"decoder out {x.shape}")
        return x

In [21]:
latent_dim=256
encoder = CNNEncoder(latent_dim=latent_dim)
decoder = CNNDecoder(latent_dim=latent_dim)

model = CNNAutoencoder(encoder, decoder,latent_dim=latent_dim)
if use_cuda:
    model = model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=learning_rate)

In [22]:
def count_parameters(model):
    params = [p.numel() for p in model.parameters() if p.requires_grad]
    for i, item in enumerate(params):
        print(f'{i:2} : {item:}')
    print(f'==========\n{sum(params):>6}')


count_parameters(model)

 0 : 144
 1 : 16
 2 : 3200
 3 : 8
 4 : 51200
 5 : 256
 6 : 51200
 7 : 200
 8 : 288
 9 : 4
10 : 72
11 : 2
12 : 18
13 : 1
106609


In [23]:
lowest_loss = float("Inf")
for epoch in range(num_epochs):

    for b, (X_train, y_train) in enumerate(train_loader):

        b += 1

        X_train = X_train.to(device)

        # Apply the model
        output = model(X_train)

        loss = criterion(output, X_train)  # check loss with X_train itself

        # Update parameters
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if b % 100 == 0:
            print(
                f'epoch: {epoch:2} batch: {b:4} [{batch_size*b:6}/{train_data_len}]  '
                + f'loss: {loss.item():.8f}')
        if loss.item() < lowest_loss:
            torch.save(model.state_dict(), model_name)
            lowest_loss = loss.item()

    if epoch % 10 == 0:
        pic = to_img(output.cpu().data)
        save_image(pic, './{}/image_{}.png'.format(decoded_out_dir, epoch))
        #print(f"saved model")



epoch:  0 batch:  100 [ 25600/78200]  loss: 0.15014136
epoch:  0 batch:  200 [ 51200/78200]  loss: 0.05642484
epoch:  0 batch:  300 [ 76800/78200]  loss: 0.04489509
epoch:  1 batch:  100 [ 25600/78200]  loss: 0.03840628
epoch:  1 batch:  200 [ 51200/78200]  loss: 0.03448364
epoch:  1 batch:  300 [ 76800/78200]  loss: 0.03077650
epoch:  2 batch:  100 [ 25600/78200]  loss: 0.02967726
epoch:  2 batch:  200 [ 51200/78200]  loss: 0.02822217
epoch:  2 batch:  300 [ 76800/78200]  loss: 0.02762332
epoch:  3 batch:  100 [ 25600/78200]  loss: 0.02659765
epoch:  3 batch:  200 [ 51200/78200]  loss: 0.02664174
epoch:  3 batch:  300 [ 76800/78200]  loss: 0.02561073
epoch:  4 batch:  100 [ 25600/78200]  loss: 0.02554200
epoch:  4 batch:  200 [ 51200/78200]  loss: 0.02487514
epoch:  4 batch:  300 [ 76800/78200]  loss: 0.02328489
epoch:  5 batch:  100 [ 25600/78200]  loss: 0.02338807
epoch:  5 batch:  200 [ 51200/78200]  loss: 0.02256924
epoch:  5 batch:  300 [ 76800/78200]  loss: 0.02318635
epoch:  6 

epoch: 49 batch:  300 [ 76800/78200]  loss: 0.01621786
epoch: 50 batch:  100 [ 25600/78200]  loss: 0.01666618
epoch: 50 batch:  200 [ 51200/78200]  loss: 0.01626888
epoch: 50 batch:  300 [ 76800/78200]  loss: 0.01598421
epoch: 51 batch:  100 [ 25600/78200]  loss: 0.01707932
epoch: 51 batch:  200 [ 51200/78200]  loss: 0.01675444
epoch: 51 batch:  300 [ 76800/78200]  loss: 0.01676785
epoch: 52 batch:  100 [ 25600/78200]  loss: 0.01757481
epoch: 52 batch:  200 [ 51200/78200]  loss: 0.01620903
epoch: 52 batch:  300 [ 76800/78200]  loss: 0.01660142
epoch: 53 batch:  100 [ 25600/78200]  loss: 0.01720817
epoch: 53 batch:  200 [ 51200/78200]  loss: 0.01657968
epoch: 53 batch:  300 [ 76800/78200]  loss: 0.01634727
epoch: 54 batch:  100 [ 25600/78200]  loss: 0.01675836
epoch: 54 batch:  200 [ 51200/78200]  loss: 0.01606390
epoch: 54 batch:  300 [ 76800/78200]  loss: 0.01763296
epoch: 55 batch:  100 [ 25600/78200]  loss: 0.01711484
epoch: 55 batch:  200 [ 51200/78200]  loss: 0.01663565
epoch: 55 

epoch: 99 batch:  200 [ 51200/78200]  loss: 0.01597271
epoch: 99 batch:  300 [ 76800/78200]  loss: 0.01582485


In [25]:
in_dim = image_dim * image_dim

sample_batches = 20
decoded_data = torch.FloatTensor(batch_size,1,image_dim,image_dim)


encoder_test = CNNEncoder(latent_dim=latent_dim)
decoder_test = CNNDecoder(latent_dim=latent_dim)

model_test = CNNAutoencoder(encoder_test, decoder_test,latent_dim=latent_dim)
model_test.load_state_dict = torch.load(model_name)
model_test.eval()

if use_cuda:
    model_test = model_test.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=learning_rate)



for sb_ in range(sample_batches):
    for i in range(batch_size):
        z = torch.randn(1,latent_dim).to(device)
        reconstructed_img = model_test.decoder(z).to('cpu')
        img = reconstructed_img.view(image_dim, image_dim).data
        decoded_data[i] = img

    pic = to_img(decoded_data)
    save_image(pic, './{}/image_decoded_{}.png'.format(decoded_out_dir,sb_))
    