In [0]:
mini_batch_size = 128
first_channel = 64

In [0]:
from torchvision.datasets import utils
import torch.utils.data as data_utils
import torch
import os
import numpy as np
from torch import nn
from torch.distributions.normal import Normal
import torch.distributions as d
import math
from torch.nn.modules import upsampling
from torch.functional import F
from torch.optim import Adam

import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import dataset
from torch.autograd import Variable
from torchvision.utils import save_image
import matplotlib.pyplot as plt
from urllib.request import urlretrieve
from os.path import isfile, isdir
import tqdm
%matplotlib inline

from urllib.request import urlretrieve
from os.path import isfile, isdir

In [0]:
data_dir = '.'

if not isdir(data_dir):
    raise Exception("Data directory doesn't exist!")

class DLProgress(tqdm.tqdm):
    last_block = 0

    def hook(self, block_num=1, block_size=1, total_size=None):
        self.total = total_size
        self.update((block_num - self.last_block) * block_size)
        self.last_block = block_num

if not isfile(data_dir + "train_32x32.mat"):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc='SVHN Training Set') as pbar:
        urlretrieve(
            'http://ufldl.stanford.edu/housenumbers/train_32x32.mat',
            data_dir + 'train_32x32.mat',
            pbar.hook)

if not isfile(data_dir + "test_32x32.mat"):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc='SVHN Testing Set') as pbar:
        urlretrieve(
            'http://ufldl.stanford.edu/housenumbers/test_32x32.mat',
            data_dir + 'test_32x32.mat',
            pbar.hook)
        

def get_data_loader(dataset_location, batch_size):

    transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((.5, .5, .5), (.5, .5, .5))])


    trainvalid = torchvision.datasets.SVHN(
        dataset_location, split='train',
        download=True,
        transform=transform
    )

    trainset_size = int(len(trainvalid) * 0.9)
    trainset, validset = dataset.random_split(
        trainvalid,
        [trainset_size, len(trainvalid) - trainset_size]
    )

    trainloader = torch.utils.data.DataLoader(
        trainset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=2
    )

    validloader = torch.utils.data.DataLoader(
        validset,
        batch_size=batch_size,
    )

    testloader = torch.utils.data.DataLoader(
        torchvision.datasets.SVHN(
            dataset_location, split='test',
            download=True,
            transform=transform
        ),
        batch_size=batch_size,
    )

    return trainloader, validloader, testloader

# Data specifications: number of channels and datapath
nc = 3
dataset_location = "."

trainloader, validloader, testloader = get_data_loader('.', mini_batch_size)

Using downloaded and verified file: ./train_32x32.mat
Using downloaded and verified file: ./test_32x32.mat


In [0]:
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3,16,3,2,1),
            nn.LeakyReLU(0.2,inplace=True),nn.Dropout2d(0.25),
            nn.Conv2d(16,32,3,2,1),
            nn.LeakyReLU(0.2,inplace=True),nn.Dropout2d(0.25),
            nn.BatchNorm2d(32,0.8),
            nn.Conv2d(32,64,3,2,1),
            nn.LeakyReLU(0.2,inplace=True),nn.Dropout2d(0.25),
            nn.BatchNorm2d(64,0.8),
            nn.Conv2d(64,128,3,2,1),
            nn.LeakyReLU(0.2,inplace=True),nn.Dropout2d(0.25),
            nn.BatchNorm2d(128,0.8))
        
        self.en_mean = nn.Linear(128*(2*2), 100)
        self.en_log_var = nn.Linear(128*(2*2), 100)
        self.fully = nn.Linear(100, 512*(4*4))
        
        self.decoder = nn.Sequential(
            nn.BatchNorm2d(128),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128,128,3, stride =1,padding =1),
            nn.BatchNorm2d(128,0.8),
            nn.LeakyReLU(0.2,inplace=True),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128,64,3, stride =1,padding =1),
            nn.BatchNorm2d(64,0.8),
            nn.LeakyReLU(0.2,inplace=True),
            #nn.Upsample(scale_factor=2),
            nn.Conv2d(64,3,3, stride =1,padding =1),
            nn.Tanh()
        )
        

    def encode(self, x):
        x = self.encoder(x)
        x = x.view(-1, 128*(2*2))

        eva = self.en_mean(x)
        log_var = self.en_log_var(x)

        return eva, log_var

    def sampler(self, eva, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)

        return eps * std + eva

    def decode(self, z):
        z = self.fully(z)
        z = z.view(-1, 128,8,8)
        z = self.decoder(z)
        return z

    def forward(self, x):
        eva, log_var = self.encode(x)
        z = self.sampler(eva, log_var)
        return self.decode(z), eva, log_var


model = VAE()

print(model)

if torch.cuda.is_available():
    device = torch.device("cuda")
    use_cuda = True
else:
    device = torch.device("cpu")
    use_cuda = False

print(device)


model = model.to(device)

VAE(
  (encoder): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.2, inplace)
    (2): Dropout2d(p=0.25)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (4): LeakyReLU(negative_slope=0.2, inplace)
    (5): Dropout2d(p=0.25)
    (6): BatchNorm2d(32, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)
    (7): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (8): LeakyReLU(negative_slope=0.2, inplace)
    (9): Dropout2d(p=0.25)
    (10): BatchNorm2d(64, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (12): LeakyReLU(negative_slope=0.2, inplace)
    (13): Dropout2d(p=0.25)
    (14): BatchNorm2d(128, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)
  )
  (en_mean): Linear(in_features=512, out_features=100, bias=True)
  (en_log_var): Linear(in_feature

In [0]:
def loss_function(recon_x, x, eva, log_var):
    
    loss = torch.nn.functional.mse_loss(recon_x, x, reduction='sum')
        
    return loss


def trainVAE(epoch):
    track = [["loss", "eva", "log_var"]]
    model.train()
    train_loss = 0
    for batch_idx, (data, _) in enumerate(trainloader):
        data = data.to(device)
        optimizer.zero_grad()
        recon_batch, eva, log_var = model(data)
        loss = loss_function(recon_batch, data, eva, log_var)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        track.append([loss, eva, log_var])
    train_loss = - train_loss  / len(trainloader.dataset)
    print('====> Epoch: {} ELBO for training: {:.4f}'.format(
          epoch, train_loss  ))
    return track
    

def testVAE(epoch):
    model.eval()
    test_loss = 0
    for batch_idx, (data, _) in enumerate(validloader):
        data = data.to(device)
        recon_batch, eva, log_var = model(data)
        loss = loss_function(recon_batch, data, eva, log_var)
        test_loss += loss.item()
    test_loss = - test_loss  / len(validloader.dataset)
    print('====> Epoch: {} ELBO for validation: {:.4f}'.format(
          epoch, test_loss))

    
optimizer = torch.optim.Adam(model.parameters(), lr = lr)

. Q3.1

In [0]:
!mkdir Q3_1
!rm -r Q3_1/*

def sample_generator(model, num_samples, latent_dim#, update_d
                     , name):
    noise = torch.randn(num_samples, latent_dim).to('cuda')
    noise.require_grad = False
    gen_samples = model.decode(noise).to('cuda')
    gen_samples = gen_samples.view(-1, 3, 32, 32)
    save_image(gen_samples.data.view(num_samples, 3, 32, 32).cpu(), 'Q3_1/3_1_1_VAE_' + str(name) + '.png', nrow = 10, normalize=True)

mkdir: cannot create directory ‘Q3_1’: File exists


. Q3.2

In [0]:
def disentangled_representation(model, dim, eps):
    z = torch.randn(1, 100)
    sample = model.decode(z).to('cuda')
    sample = sample.view(-1, 3, 32, 32)
    save_image(sample.data.view(-1, 3, 32, 32).cpu(), 'Q3_2/3_1_2_VAE_before.png', nrow = 1, normalize=True)
    z = z.repeat(100, 1)
    for i in range(dim):
        z[i][i] = z[i][i] + eps
    sample = model.decode(z).to('cuda')
    sample = sample.view(-1, 3, 32, 32)
    save_image(sample.data.view(-1, 3, 32, 32).cpu(), 'Q3_2/3_1_2_VAE_afetr.png', nrow = 10, normalize=True)
    
def disentangled_representation(model, dim, eps):
    z = torch.randn(1, 100).to('cuda')
    sample = model.decode(z).to('cuda')
    sample = sample.view(-1, 3, 32, 32)
    save_image(sample.data.view(-1, 3, 32, 32).cpu(), 'Q3_2/3_1_2_VAE_before.png', nrow = 1, normalize=True)
    z1 = z
    z = z.repeat(100, 1)
    for i in range(dim):
        z[i][i] = z[i][i] + eps
    outputs = model.decode(z)
    outputs = outputs.view(-1, 3, 32, 32)
    difference = torch.abs(outputs - sample).view(100,-1)
    #sample = outputs.view(-1, 3, 32, 32)
    #save_image(sample.data.view(-1, 3, 32, 32).cpu(), 'Q3_2/3_1_2_GAN_afetr.png', nrow = 10, normalize=True)

    sum_dif = torch.sum(difference, dim=1).detach().cpu().numpy()
    top_sum_diff_indcs = np.unravel_index(np.argsort(sum_dif, axis=None), sum_dif.shape)[0]
    # top_sum_diff_indcs = [top_sum_diff_indcs[x] for x in range(9, 100, 10)]
    top_sum_diff_indcs = top_sum_diff_indcs[-10:]
    print(top_sum_diff_indcs)
    z = z1.repeat(10,1)
    for i in range(10):
        #if i in top_sum_diff_indcs:
            #z[i][i] = z[i][i] + eps
        a = top_sum_diff_indcs[i]
        z[i][a] = z[i][a] + eps
    outputs = model.decode(z)
    sample = outputs.view(-1, 3, 32, 32)
    save_image(sample.data.view(-1, 3, 32, 32).cpu(), 'Q3_2/3_1_2_VAE_after.png', nrow = 10, normalize=True)
    
    
!mkdir Q3_2
!rm -r Q3_2/*

mkdir: cannot create directory ‘Q3_2’: File exists


. Q3.3

In [0]:
def interpolation(model):
    alphas = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    z0 = torch.randn(1, 100).to('cuda')
    z0 = z0.repeat(11, 1)
    z1 = torch.randn(1, 100).to('cuda')
    z1 = z1.repeat(11, 1)
    sample_0 = model.decode(z0).to('cuda').clone().detach()
    sample_1 = model.decode(z1).to('cuda').clone().detach()
    z_inter = torch.zeros(11, 100).to('cuda')
    sample_data_inter = torch.zeros(11, 3, 32, 32).to('cuda')
    for i, alpha in enumerate(alphas):
        z_inter[i-1] = alpha * z0[i-1] + (1 - alpha) * z1[i-1]
        sample_data_inter[i-1] = alpha * sample_0[i-1] + (1 - alpha) * sample_1[i-1]
        
    sample_latent_inter = model.decode(z_inter).to('cuda')
    sample_latent_inter = sample_latent_inter.view(-1, 3, 32, 32)
    save_image(sample_latent_inter.data.view(-1, 3, 32, 32).cpu(), ('Q3_3/3_1_3_VAE_latent_inter.png'), nrow = 11 ,normalize=True)
    save_image(sample_data_inter.data.view(-1, 3, 32, 32).cpu(), ('Q3_3/3_1_3_VAE_data_inter.png'), nrow = 11 ,normalize=True)


!mkdir Q3_3
!rm -r Q3_3/*

. Load "classify_svhn.py" and "svhn_classifier.pt" files

In [0]:
if not isfile(data_dir + "classify_svhn.py"):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc='classify_svhn.py') as pbar:
        urlretrieve(
            'https://github.com/CW-Huang/IFT6135H19_assignment/blob/master/assignment3/classify_svhn.py',
            'content' + 'classify_svhn.py',
            pbar.hook)
        
if not isfile(data_dir + "svhn_classifier.pt"):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc='svhn_classifier.pt') as pbar:
        urlretrieve(
            'https://github.com/CW-Huang/IFT6135H19_assignment/blob/master/assignment3/svhn_classifier.pt',
            data_dir + 'svhn_classifier.pt',
            pbar.hook)
        
isfile(data_dir + "classify_svhn.py")

classify_svhn.py: 106kB [00:00, 328kB/s]  


False

. Q3.4

In [0]:
def image_generator(model):
    for i in range(1000):
        z = torch.randn(1, 100).to('cuda')
        sample = model.decode(z).to('cuda')
        save_image(sample.data.view(-1, 3, 32, 32).cpu(), ('Q3_4/subfolder/sample_' +str(i)+'.png'),normalize=True)


!mkdir Q3_4
!mkdir Q3_4/subfolder
!rm -r Q3_4/subfolder/*

import argparse
import os
import torchvision
import torchvision.transforms as transforms
import torch
import classify_svhn
from classify_svhn import Classifier
from scipy import linalg

SVHN_PATH = "svhn"
PROCESS_BATCH_SIZE = 32


def get_sample_loader(path, batch_size):
    """
    Loads data from `[path]/samples`

    - Ensure that path contains only one directory
      (This is due ot how the ImageFolder dataset loader
       works)
    - Ensure that ALL of your images are 32 x 32.
      The transform in this function will rescale it to
      32 x 32 if this is not the case.

    Returns an iterator over the tensors of the images
    of dimension (batch_size, 3, 32, 32)
    """
    data = torchvision.datasets.ImageFolder(
        path,
        transform=transforms.Compose([
            transforms.Resize((32, 32), interpolation=2),
            classify_svhn.image_transform
        ])
    )
    data_loader = torch.utils.data.DataLoader(
        data,
        batch_size=batch_size,
        num_workers=2,
    )
    return data_loader


def get_test_loader(batch_size):
    """
    Downloads (if it doesn't already exist) SVHN test into
    [pwd]/svhn.

    Returns an iterator over the tensors of the images
    of dimension (batch_size, 3, 32, 32)
    """
    testset = torchvision.datasets.SVHN(
        SVHN_PATH, split='test',
        download=True,
        transform=classify_svhn.image_transform
    )
    testloader = torch.utils.data.DataLoader(
        testset,
        batch_size=batch_size,
    )
    return testloader


def extract_features(classifier, data_loader):
    """
    Iterator of features for each image.
    """
    with torch.no_grad():
        for x, _ in data_loader:
            h = classifier.extract_features(x).numpy()
            for i in range(h.shape[0]):
                yield h[i]


def calculate_fid_score(sample_feature_iterator,
                       testset_feature_iterator):
    eps = 1e-2
    sample_feature = []
    testset_feature = []
    for i in sample_feature_iterator:
        sample_feature.append(i)
    mu = np.mean(sample_feature, axis = 0)
    sigma = np.cov(sample_feature, rowvar = False)
    for j in testset_feature_iterator:
        testset_feature.append(j)
    mu1 = np.mean(testset_feature, axis = 0)
    sigma1 = np.cov(testset_feature, rowvar = False)
#    fid = np.linalg.norm(mu1 - mu)**2 + np.trace(sigma + sigma1 - 2 * (sigma * sigma1)**0.5)
    fid = np.linalg.norm(mu1 - mu)**2 + np.trace(sigma + sigma1 - 2 * linalg.sqrtm(sigma.dot(sigma1)+eps*np.eye(len(sigma))))

    #raise NotImplementedError(
        #print("TO BE IMPLEMENTED.Part of Assignment 3 Quantitative Evaluations")
    #)
    return fid

def show_fid(model, directory):
    classifier = torch.load(model, map_location = 'cpu')
    classifier.eval()
    sample_loader = get_sample_loader(directory, PROCESS_BATCH_SIZE)
    sample_f = extract_features(classifier, sample_loader)
    test_loader = get_test_loader(PROCESS_BATCH_SIZE)
    test_f = extract_features(classifier, test_loader)
    fid_score = calculate_fid_score(sample_f, test_f)
    print("FID score:", fid_score)
    return fid_score

mkdir: cannot create directory ‘Q3_4’: File exists
mkdir: cannot create directory ‘Q3_4/subfolder’: File exists


In [0]:
lr = 2*1e-3
epochs = 10
FID = []
for epoch in tqdm.trange(epochs, leave=True):
    track = trainVAE(epoch)
    testVAE(epoch)
    
    sample_generator(model, 100, 100, epoch)
    
    image_generator(model)
    FID.append(show_fid('svhn_classifier.pt', 'Q3_4'))


torch.save(model.state_dict(), 'VAE_model.pt')
    
disentangled_representation(model, 100, 7)
interpolation(model)
print("best FID:", np.min(FID))




====> Epoch: 0 ELBO for training: -100.4334
====> Epoch: 0 ELBO for validation: -72.5513
Using downloaded and verified file: svhn/test_32x32.mat



 10%|█         | 1/10 [01:25<12:48, 85.36s/it][A

FID score: 88911.7870973881
====> Epoch: 1 ELBO for training: -80.3882
====> Epoch: 1 ELBO for validation: -62.4455
Using downloaded and verified file: svhn/test_32x32.mat



 20%|██        | 2/10 [02:52<11:28, 86.02s/it][A

FID score: 87884.0588438147
====> Epoch: 2 ELBO for training: -69.0767
====> Epoch: 2 ELBO for validation: -45.8138
Using downloaded and verified file: svhn/test_32x32.mat



 30%|███       | 3/10 [04:22<10:10, 87.15s/it][A

FID score: 66044.17745614491
====> Epoch: 3 ELBO for training: -58.1784
====> Epoch: 3 ELBO for validation: -45.9656
Using downloaded and verified file: svhn/test_32x32.mat



 40%|████      | 4/10 [05:50<08:44, 87.44s/it][A

FID score: 75265.41285850503
====> Epoch: 4 ELBO for training: -53.6094
====> Epoch: 4 ELBO for validation: -33.4885
Using downloaded and verified file: svhn/test_32x32.mat



 50%|█████     | 5/10 [07:18<07:17, 87.56s/it][A

FID score: 68240.02785578088
====> Epoch: 5 ELBO for training: -50.4425
====> Epoch: 5 ELBO for validation: -32.0367
Using downloaded and verified file: svhn/test_32x32.mat



 60%|██████    | 6/10 [08:46<05:50, 87.58s/it][A

FID score: (73988.90433525061-1.1154312184595864e-17j)
====> Epoch: 6 ELBO for training: -48.3113
====> Epoch: 6 ELBO for validation: -30.2501
Using downloaded and verified file: svhn/test_32x32.mat



 70%|███████   | 7/10 [10:14<04:23, 87.86s/it][A

FID score: 68012.41346068356
====> Epoch: 7 ELBO for training: -46.4180
====> Epoch: 7 ELBO for validation: -28.2485
Using downloaded and verified file: svhn/test_32x32.mat



 80%|████████  | 8/10 [11:42<02:55, 87.85s/it][A

FID score: 71683.41127404131
====> Epoch: 8 ELBO for training: -46.0002
====> Epoch: 8 ELBO for validation: -32.4800
Using downloaded and verified file: svhn/test_32x32.mat



 90%|█████████ | 9/10 [13:08<01:27, 87.24s/it][A

FID score: 75437.92624642575
====> Epoch: 9 ELBO for training: -44.0202
====> Epoch: 9 ELBO for validation: -25.5911
Using downloaded and verified file: svhn/test_32x32.mat



100%|██████████| 10/10 [14:36<00:00, 87.54s/it][A
[A

FID score: 77286.84978076266
[16 62 52 38 22 13 92 27 55 88]
best FID: (66044.17745614491+0j)


In [0]:
#model = torch.load('VAE_model.pt', map_location='gpu')

# Playground

In [0]:
interpolation(model)