In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision import datasets
from torchvision import transforms
import cnnUtils
import torchvision
import os

Please install sklearn for layer visualization


In [12]:
# MNIST dataset
dataset = datasets.MNIST(root='./data',
                         train=True,
                         transform=transforms.ToTensor(),
                         download=True)

# Data loader
data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                          batch_size=100, 
                                          shuffle=True)

# EBA5 cropped mean and std values
#0.544978628454
#0.0564096715989
setMean = [0.544, 0.544, 0.544]
setStd = [0.056, 0.056, 0.056]

baseDirectory = 'g:/Selim/Thesis/Code/'
setDirectory = 'EBA'
setImageSize = 64
setPath = os.path.join(baseDirectory, setDirectory)

dataTransforms = {
    'train': transforms.Compose([
        transforms.Scale(setImageSize),
        transforms.RandomCrop(setImageSize),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=setMean, std=setStd)
    ]),
    'test': transforms.Compose([
        transforms.Scale(setImageSize),
        transforms.CenterCrop(setImageSize),
        transforms.ToTensor(),
        transforms.Normalize(mean=setMean, std=setStd)
    ]),
}

dataset = torchvision.datasets.ImageFolder(os.path.join(setPath, 'train'), dataTransforms['train'])

data_loader = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=True, num_workers=4)

In [13]:
def to_var(x):
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x)

In [14]:
# VAE model
class VAE(nn.Module):
    def __init__(self, image_size=64, h_dim=400, z_dim=20):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(image_size, h_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(h_dim, z_dim*2))  # 2 for mean and variance.
        
        self.decoder = nn.Sequential(
            nn.Linear(z_dim, h_dim),
            nn.ReLU(),
            nn.Linear(h_dim, image_size),
            nn.Sigmoid())
    
    def reparametrize(self, mu, log_var):
        """"z = mean + eps * sigma where eps is sampled from N(0, 1)."""
        eps = to_var(torch.randn(mu.size(0), mu.size(1)))
        z = mu + eps * torch.exp(log_var/2)    # 2 for convert var to std
        return z
                     
    def forward(self, x):
        print(x)
        h = self.encoder(x)
        mu, log_var = torch.chunk(h, 2, dim=1)  # mean and log variance.
        z = self.reparametrize(mu, log_var)
        out = self.decoder(z)
        return out, mu, log_var
    
    def sample(self, z):
        return self.decoder(z)

In [15]:
vae = VAE()

if torch.cuda.is_available():
    vae.cuda()

In [16]:
optimizer = torch.optim.Adam(vae.parameters(), lr=0.001)
iter_per_epoch = len(data_loader)
data_iter = iter(data_loader)

# fixed inputs for debugging
fixed_z = to_var(torch.randn(100, 20))
fixed_x, _ = next(data_iter)
torchvision.utils.save_image(fixed_x.cpu(), './data/real_images.png')
fixed_x = to_var(fixed_x.view(fixed_x.size(0), -1))
print(fixed_x)

Variable containing:
-1.2409 -1.2409 -1.2409  ...  -0.6106 -0.6106 -0.6106
-1.0308 -0.8207 -0.5406  ...  -0.1204 -0.3305 -0.4706
-0.8908 -0.8207 -0.6807  ...   0.5098 -0.4706 -0.0504
          ...             ⋱             ...          
 3.1008  2.9608  2.7507  ...   0.7199  0.9300  0.7899
-1.9412 -1.8711 -1.8011  ...  -1.0308 -1.1709 -1.3109
 1.6303  1.6303  1.5602  ...   3.5910  3.2409  3.0308
[torch.cuda.FloatTensor of size 10x12288 (GPU 0)]



In [17]:
for epoch in range(10):
    for i, (images, _) in enumerate(data_loader):
        
        images = to_var(images.view(images.size(0), -1))
        out, mu, log_var = vae(images)
        
        # Compute reconstruction loss and kl divergence
        # For kl_divergence, see Appendix B in the paper or http://yunjey47.tistory.com/43
        reconst_loss = F.binary_cross_entropy(out, images, size_average=False)
        kl_divergence = torch.sum(0.5 * (mu**2 + torch.exp(log_var) - log_var -1))
        
        # Backprop + Optimize
        total_loss = reconst_loss + kl_divergence
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()
        
        if i % 100 == 0:
            print ("Epoch[%d/%d], Step [%d/%d], Total Loss: %.4f, "
                   "Reconst Loss: %.4f, KL Div: %.7f" 
                   %(epoch+1, 50, i+1, iter_per_epoch, total_loss.data[0], 
                     reconst_loss.data[0], kl_divergence.data[0]))
    
    # Save the reconstructed images
    reconst_images, _, _ = vae(fixed_x)
    reconst_images = reconst_images.view(reconst_images.size(0), 1, 28, 28)
    cnnUtils.ImShow(reconst_images.data.cpu())
    #torchvision.utils.save_image(reconst_images.data.cpu(), 
        #'./data/reconst_images_%d.png' %(epoch+1))

Variable containing:
-1.0308 -1.1008 -1.1709  ...   0.6499  0.7199  0.9300
-0.2605 -0.2605 -0.1905  ...  -0.3305 -0.3305 -0.3305
-1.5210 -1.5210 -1.5210  ...  -1.5210 -1.5210 -1.5210
          ...             ⋱             ...          
-0.9608 -1.1709 -0.8908  ...   0.4398  0.2997  0.2297
-1.8011 -1.5910 -1.4510  ...  -0.8908 -0.8908 -1.1008
-1.8011 -1.7311 -1.9412  ...   0.0896  0.2997  0.1597
[torch.cuda.FloatTensor of size 10x12288 (GPU 0)]



RuntimeError: size mismatch at d:\downloads\pytorch-master-1\torch\lib\thc\generic/THCTensorMathBlas.cu:243