# AutoEncoder (2) ConvNets

- [L1aoXingyu@github]()の，[Conv2Dを使ったAutoEncoderの実装](https://github.com/L1aoXingyu/pytorch-beginner/blob/master/08-AutoEncoder/conv_autoencoder.py)に基づいている．  
- 一部，自前環境のPyTorchで動作しない部分を修正しつつ，結果の可視化等の追加している．  

In [1]:
import torch
import os

## create folder in advance
folder = './data/dc_img'
if not os.path.isdir(folder):
    os.mkdir(folder)

## set folder in advance
model_path = './data/dc_autoencoder.pth'

## set some constants for learning
num_epochs = 100
batch_size = 128
learning_rate = 1e-3

## (1) Prepare dataset: MNIST hand-written digits

Almost same with the Simple encoder.

In [2]:
from torchvision.datasets import MNIST
from torchvision import transforms

## image to tensor
img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

## tensor to image
def to_img(x):
    x = 0.5 * (x + 1)
    x = x.clamp(0, 1)
    x = x.view(x.size(0), 1, 28, 28)
    return x

## dataset with conversion
dataset_train = MNIST('./data', train=True, download=True, transform=img_transform)

In [3]:
from torch.utils.data import DataLoader

dataloader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)

## (2) Prepare model: simple autoencoder

Convolutional AutoEncoder constructed by `Conv2d`.  

In [4]:
from torch import nn

class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, stride=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2),
            nn.Conv2d(16, 8, 3, stride=2, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=1)
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(8, 16, 3, stride=2),
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, 5, stride=3, padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(8, 1, 2, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [5]:
## instantiate model
model = autoencoder().cuda() ## send to GPU

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,
                             weight_decay=1e-5)

In [6]:
from torchsummary import summary

## https://github.com/sksq96/pytorch-summary
summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 10, 10]             160
              ReLU-2           [-1, 16, 10, 10]               0
         MaxPool2d-3             [-1, 16, 5, 5]               0
            Conv2d-4              [-1, 8, 3, 3]           1,160
              ReLU-5              [-1, 8, 3, 3]               0
         MaxPool2d-6              [-1, 8, 2, 2]               0
   ConvTranspose2d-7             [-1, 16, 5, 5]           1,168
              ReLU-8             [-1, 16, 5, 5]               0
   ConvTranspose2d-9            [-1, 8, 15, 15]           3,208
             ReLU-10            [-1, 8, 15, 15]               0
  ConvTranspose2d-11            [-1, 1, 28, 28]              33
             Tanh-12            [-1, 1, 28, 28]               0
Total params: 5,729
Trainable params: 5,729
Non-trainable params: 0
-----------------------------------

## (3) Training model

Once you already have the trained result, you may skip this part...

In [7]:
from torch.autograd import Variable
from torchvision.utils import save_image

## training
model.train()

for epoch in range(num_epochs):
    for data in dataloader:
        img, _ = data
        img = Variable(img).cuda() ## send to GPU
        
        ## feed-forward
        output = model(img)
        loss = criterion(output, img)
        
        ## backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # ===================log========================
    print('epoch [{0}/{1}], loss:{2:.4f}'
          .format(epoch + 1, num_epochs, loss.item()))
    if epoch % 10 == 0:
        pic = to_img(output.cpu().data)
        save_image(pic, '{0}/image_{1}.png'.format(folder, epoch))

epoch [1/100], loss:0.2373
epoch [2/100], loss:0.1783
epoch [3/100], loss:0.1620
epoch [4/100], loss:0.1594
epoch [5/100], loss:0.1480
epoch [6/100], loss:0.1495
epoch [7/100], loss:0.1341
epoch [8/100], loss:0.1443
epoch [9/100], loss:0.1404
epoch [10/100], loss:0.1210
epoch [11/100], loss:0.1254
epoch [12/100], loss:0.1161
epoch [13/100], loss:0.1195
epoch [14/100], loss:0.1175
epoch [15/100], loss:0.1145
epoch [16/100], loss:0.1224
epoch [17/100], loss:0.1139
epoch [18/100], loss:0.1227
epoch [19/100], loss:0.1157
epoch [20/100], loss:0.1139
epoch [21/100], loss:0.1097
epoch [22/100], loss:0.1173
epoch [23/100], loss:0.1073
epoch [24/100], loss:0.1077
epoch [25/100], loss:0.1077
epoch [26/100], loss:0.1035
epoch [27/100], loss:0.1081
epoch [28/100], loss:0.1092
epoch [29/100], loss:0.1128
epoch [30/100], loss:0.1041
epoch [31/100], loss:0.1060
epoch [32/100], loss:0.1109
epoch [33/100], loss:0.1019
epoch [34/100], loss:0.1085
epoch [35/100], loss:0.1043
epoch [36/100], loss:0.1072
e

In [8]:
## save trained model
torch.save(model.state_dict(), model_path)

## (4) Testing model

In [9]:
## load trained model
checkpoint = torch.load(model_path)
model.load_state_dict(checkpoint)
model.eval() ## switch to "evaluate" mode

autoencoder(
  (encoder): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
  )
  (decoder): Sequential(
    (0): ConvTranspose2d(8, 16, kernel_size=(3, 3), stride=(2, 2))
    (1): ReLU(inplace=True)
    (2): ConvTranspose2d(16, 8, kernel_size=(5, 5), stride=(3, 3), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): ConvTranspose2d(8, 1, kernel_size=(2, 2), stride=(2, 2), padding=(1, 1))
    (5): Tanh()
  )
)

In [10]:
from torch.utils.data import DataLoader

## load test data & loader
dataset_test = MNIST('./data', train=False, download=True, transform=img_transform)
testloader = DataLoader(dataset_test, batch_size=batch_size, shuffle=True)

In [11]:
from torch.autograd import Variable

for test in testloader:
    img, _ = test
    save_image(img, '{0}/test_input.png'.format(folder))
    img = Variable(img).cuda() ## send to GPU
    
    ## feed-forward
    output = model(img)
    
    pic = to_img(output.cpu().data)
    save_image(pic, '{0}/test_output.png'.format(folder))
    
    break ## generates one batch

(end)