# Autoencoder

[Reducing the Dimensionality of Data with Neural Networks](http://science.sciencemag.org/content/313/5786/504)

In [None]:
import os
import numpy as np
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image

In [None]:
cuda = torch.cuda.is_available()

### in case GPU is used for the other experiments now...
#cuda = False
###

if cuda:
    print("CUDA is available")

num_epochs = 100
batch_size = 128
learning_rate = 0.001

out_dir = './results/autoencoder'

if not os.path.exists(out_dir):
    print("No out_dir exists. Create one.")
    os.mkdir(out_dir)

In [None]:
img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = MNIST('./data', download=True, transform=img_transform)
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)

In [None]:
class Autoencoder(nn.Module):
    
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(28 * 28, 128),
            nn.ReLU(True),
            nn.Linear(128, 64),
            nn.ReLU(True),
            nn.Linear(64, 12),
            nn.ReLU(True),
            nn.Linear(12, 2)
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(2, 12),
            nn.ReLU(True),
            nn.Linear(12, 64),
            nn.ReLU(True),
            nn.Linear(64, 128),
            nn.ReLU(True),
            nn.Linear(128, 28 * 28),
            nn.Tanh()
        )
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
model = Autoencoder()
if cuda:
    model.cuda()

In [None]:
def to_image(x):
    x = 0.5 * (x + 1)  # [-1, 1] => [0, 1]
    x = x.clamp(0, 1)
    x = x.view(x.size(0), 1, 28, 28)
    return x

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),
                            lr=learning_rate,
                            weight_decay=1e-5)

loss_list = []

for epoch in range(num_epochs):
    for data in train_loader:
        img, _ = data
        #print("original: {}".format(img.size(0)))
        x = img.view(img.size(0), -1)
        #print("reshaped: {}".format(x.size()))
        if cuda:
            x = Variable(x).cuda()
        else:
            x = Variable(x)
            
        xhat = model(x)
        
        loss = criterion(xhat, x)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # logging
        loss_list.append(loss.data[0])
        
    print('epoch [{}/{}], loss: {:.4f}'.format(
        epoch + 1,
        num_epochs,
        loss.data[0]
    ))
    
    if epoch % 10 == 0:
        pic = to_image(xhat.cpu().data)
        save_image(pic, '{}/image_{}.png'.format(out_dir, epoch))
        

In [None]:
loss_list = np.load('{}/loss_list.npy'.format(out_dir))

import pylab
import matplotlib.pyplot as plt
%matplotlib inline

plt.plot(loss_list)
plt.xlabel('iteration')
plt.ylabel('loss')
plt.grid()

In [None]:
from IPython.display import Image
Image('results/autoencoder/image_0.png')

In [None]:
model.load_state_dict(torch.load('{}/autoencoder.pt'.format(out_dir),
                                map_location=lambda storage,
                                loc: storage))

test_dataset = MNIST('./data', download=True, train=False, transform=img_transform)
test_loader = DataLoader(test_dataset, batch_size=10000, shuffle=False)

images, labels = iter(test_loader).next()
images = images.view(10000, -1)
print(images.shape)

z = model.encoder(Variable(images.cuda(), volatile=True)).cpu()
z.data.numpy()

In [None]:
# Need to fix
plt.figure(figsize=(10, 10))
plt.scatter(z[:,0], z[:,1], marker='.', c=labels.numpy(), cmap=pylab.cm.jet)
plt.colorbar()
plt.grid()

### Test on Montezuma's image

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image

In [3]:
class Conv_Autoencoder(nn.Module):
    
    def __init__(self):
        super(Conv_Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=8, stride=4),
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2),
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=2),
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=1)
            #nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1),
            #nn.ReLU(True)
        )
        
        self.decoder = nn.Sequential(
            # Deconvolution = Transoised Convolution
            # https://pytorch.org/docs/stable/nn.html#torch.nn.ConvTranspose2d
            
            # Implementation 1
            # Some random parameters to forcefully work this guy anyways...
            nn.ConvTranspose2d(in_channels=32, out_channels=16, kernel_size=5, stride=3, output_padding=2),
            nn.ReLU(True),
            nn.ConvTranspose2d(in_channels=16, out_channels=8, kernel_size=5, stride=3, output_padding=(0,2)),
            nn.ReLU(True),
            nn.ConvTranspose2d(in_channels=8, out_channels=3, kernel_size=3, stride=2, output_padding=(1,1)),
            nn.Tanh()
            
            # Need to fix. See https://arxiv.org/abs/1603.07285
            # 4.6 Zero padding, non-unit strides, transposed
            #nn.ConvTranspose2d(in_channels=32, out_channels=16, kernel_size=5, stride=3, output_padding=(0,2)),
            #nn.ReLU(True),
            #nn.ConvTranspose2d(in_channels=16, out_channels=3, kernel_size=3, stride=2, output_padding=(1,1)),
            #nn.Tanh()
        )
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [4]:
import gym
env = gym.make('MontezumaRevenge-v0').unwrapped

NUM_DATASET = 1000
s = env.reset()
memory = []

# collecting the dataset as input for Autoencoder
for i in range(NUM_DATASET):
    action = env.action_space.sample()
    s, r, done, info = env.step(action)
    memory.append(s)

[2018-09-12 17:21:54,907] Making new env: MontezumaRevenge-v0


In [5]:
def recon_image(img):
    """
    expected input img: torch.Size([1, 3, 210, 160])
             output   : ndarray.Size([210, 160, 3])
             => squeez 0th dimention and np.transpose(img, (1, 2, 0))
    """
    img = img.cpu()
    img = torch.squeeze(img, 0)
    img = img.detach().numpy()
    img = np.transpose(img, (1, 2, 0))
    
    return img

In [None]:
import cv2

cuda = torch.cuda.is_available()
num_epochs = 100
batch_size = 128
learning_rate = 0.001

model = Conv_Autoencoder()
if cuda:
    print("CUDA is available")
    model.cuda()


criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),
                            lr=learning_rate,
                            weight_decay=1e-5)

loss_list = []

out_dir = './results/autoencoder'

for epoch in range(num_epochs):
    for img in memory:
        # img shape is (210, 160, 3)
        #plt.imshow(img)
        img = np.transpose(img, (2, 0, 1))
        #print("reshaped: {}".format(x.size()))
        img = torch.from_numpy(img)
        img.unsqueeze_(0)  # https://discuss.pytorch.org/t/expected-stride-to-be-a-single-integer-value-or-a-list/17612
        if cuda:
            img = Variable(img).cuda()
            img = img.type(torch.cuda.FloatTensor)
        else:
            img = Variable(img)
            img = img.type(torch.FloatTensor)
        
        #img.float()
        xhat = model(img)
        
        # debug imshow
        hat = recon_image(xhat)
        cv2.imshow('test', hat)
        cv2.waitKey(0)
        
        loss = criterion(xhat, img/255)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # logging
        loss_list.append(loss.data[0])
        
    print('epoch [{}/{}], loss: {:.4f}'.format(
        epoch + 1,
        num_epochs,
        loss.data[0]
    ))

    if epoch % 10 == 0:
        save_image(torch.squeeze(xhat, 0), '{}/image_{}.png'.format(out_dir, epoch))
        
np.save('{}/loss_list.npy'.format(out_dir), np.array(loss_list))
torch.save(model.state_dict(), '{}/mz_autoencoder.pt'.format(out_dir))


CUDA is available




In [None]:
img = env.reset()
img = np.transpose(img, (2, 0, 1))
img = torch.from_numpy(img)
img.unsqueeze_(0)  # https://discuss.pytorch.org/t/expected-stride-to-be-a-single-integer-value-or-a-list/17612
if cuda:
    img = Variable(img).cuda()
    img = img.type(torch.cuda.FloatTensor)
else:
    img = Variable(img)
    img = img.type(torch.FloatTensor)

z = model.encoder(img).cpu()
z = torch.squeeze(z, 0)
z.data.numpy()
print(z.shape)
print("Original is 210*160*3 = {}, Latent space rep is 32*10*7 = {}".format(210*160*3, 32*10*7))

# Variational Autoencoder

You can find a sample implementation in [pytorch/examples](https://github.com/pytorch/examples/blob/master/vae/main.py)