# AutoEncoder (3) Variational AutoEncoder

- [L1aoXingyu@github]()の，[Variational AutoEncoderの実装](https://github.com/L1aoXingyu/pytorch-beginner/blob/master/08-AutoEncoder/Variational_autoencoder.py)に基づいている．  
- 本来，`nn.Linear`を使っているものを`nn.Conv2D`を利用したものに直している．  

In [1]:
import torch
import os

## create folder in advance
folder = './data/VAE_img'
if not os.path.isdir(folder):
    os.mkdir(folder)

## set folder in advance
model_path = './data/VAE_autoencoder.pth'

## set some constants for learning
num_epochs = 100
batch_size = 128
learning_rate = 1e-3

## (1) Prepare dataset: MNIST hand-written digits

Almost same with the Simple encoder.

In [2]:
from torchvision.datasets import MNIST
from torchvision import transforms

## image to tensor
img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

## tensor to image
def to_img(x):
    x = x.clamp(0, 1)
    x = x.view(x.size(0), 1, 28, 28)
    return x

## dataset with conversion
dataset_train = MNIST('./data', train=True, download=True, transform=img_transform)

In [3]:
from torch.utils.data import DataLoader

dataloader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)

## (2) Prepare model: Variational AutoEncoder

This network is referred from [TODO] ...

In [4]:
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F

## temporary
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()

        self.fc1 = nn.Linear(784, 400)
        self.fc21 = nn.Linear(400, 20)
        self.fc22 = nn.Linear(400, 20)
        self.fc3 = nn.Linear(20, 400)
        self.fc4 = nn.Linear(400, 784)

    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        return self.fc21(h1), self.fc22(h1)

    def reparametrize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        if torch.cuda.is_available():
            eps = torch.cuda.FloatTensor(std.size()).normal_()
        else:
            eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)

    def decode(self, z):
        h3 = F.relu(self.fc3(z))
        return F.sigmoid(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparametrize(mu, logvar)
        return self.decode(z), mu, logvar

In [5]:
from torch import optim

## instantiate model
model = VAE()
if torch.cuda.is_available():
    model.cuda() ## send to GPU

reconstruction_function = nn.MSELoss(size_average=False)

## 
def loss_function(recon_x, x, mu, logvar):
    """
    recon_x: generating images
    x: origin images
    mu: latent mean
    logvar: latent log variance
    """
    BCE = reconstruction_function(recon_x, x)  # mse loss
    
    # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
    KLD = torch.sum(KLD_element).mul_(-0.5)
    
    # KL divergence
    return BCE + KLD

optimizer = optim.Adam(model.parameters(), lr=1e-3)



In [6]:
from torchsummary import summary

## https://github.com/sksq96/pytorch-summary
summary(model, (1, 28*28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 400]         314,000
            Linear-2                [-1, 1, 20]           8,020
            Linear-3                [-1, 1, 20]           8,020
            Linear-4               [-1, 1, 400]           8,400
            Linear-5               [-1, 1, 784]         314,384
Total params: 652,824
Trainable params: 652,824
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 2.49
Estimated Total Size (MB): 2.51
----------------------------------------------------------------




## (3) Training model

In [None]:
from torchvision.utils import save_image

## training
model.train()

for epoch in range(num_epochs):
    train_loss = 0
    
    for batch_idx, data in enumerate(dataloader):
        img, _ = data
        img = img.view(img.size(0), -1) ## for `Linear`
        img = Variable(img)
        if torch.cuda.is_available():
            img = img.cuda() ## send to GPU

        ## feed-forward
        recon_batch, mu, logvar = model(img)
        loss = loss_function(recon_batch, img, mu, logvar)
        
        ## backprop
        optimizer.zero_grad()
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch,
                batch_idx * len(img),
                len(dataloader.dataset), 100. * batch_idx / len(dataloader),
                loss.item() / len(img)))

    print('====> Epoch: {} Average loss: {:.4f}'.format(
        epoch, train_loss / len(dataloader.dataset)))
    if epoch % 10 == 0:
        save = to_img(recon_batch.cpu().data)
        save_image(save, '{}/image_{}.png'.format(folder, epoch))

In [None]:
## save trained model
torch.save(model.state_dict(), model_path)

## (4) Testing model

In [7]:
## load trained model
checkpoint = torch.load(model_path)
model.load_state_dict(checkpoint)
model.eval() ## switch to "evaluate" mode

VAE(
  (fc1): Linear(in_features=784, out_features=400, bias=True)
  (fc21): Linear(in_features=400, out_features=20, bias=True)
  (fc22): Linear(in_features=400, out_features=20, bias=True)
  (fc3): Linear(in_features=20, out_features=400, bias=True)
  (fc4): Linear(in_features=400, out_features=784, bias=True)
)

In [8]:
from torch.utils.data import DataLoader

## load test data & loader
dataset_test = MNIST('./data', train=False, download=True, transform=img_transform)
testloader = DataLoader(dataset_test, batch_size=batch_size, shuffle=True)

In [9]:
from torch.autograd import Variable
from torchvision.utils import save_image

for test in testloader:
    img, _ = test
    save_image(img, '{0}/test_input.png'.format(folder))
    
    img = img.view(img.size(0), -1) ## for `Linear`
    img = Variable(img)
    if torch.cuda.is_available():
        img = img.cuda() ## send to GPU
    
    ## feed-forward
    output, _, _ = model(img)
    pic = to_img(output.cpu().data)
    save_image(pic, '{0}/test_output.png'.format(folder))
    
    break ## generates one batch

(end)