In [1]:
from __future__ import print_function, division

import os, json, sys
sys.path.append('../..') 

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.utils import save_image


from IPython.display import Image

In [2]:
plt.ion()   # interactive mode
folder_data = '/media/tiago/tiagobotari/data'

In [3]:
# Data augmentation and normalization for training
# Just normalization for validation
_image_size = 64
_mean = [0.485, 0.456, 0.406]
_std = [0.229, 0.224, 0.225]

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(256*2),
        transforms.RandomCrop(_image_size),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(.3, .3, .3),
        transforms.ToTensor(),
        transforms.Normalize(_mean, _std)
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(_image_size),
        transforms.ToTensor(),
        transforms.Normalize(_mean, _std)
    ]),
}

In [4]:
ants = False
if ants:
    data_dir = '{:}/hymenoptera_data'.format(folder_data)
    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                              data_transforms[x])
                      for x in ['train', 'val']}
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                                 shuffle=True, num_workers=4)
                  for x in ['train', 'val']}
    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
    class_names = image_datasets['train'].classes

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    device

    data_loader = dataloaders['train']

In [5]:
batch_size = 144
data_dir = '{:}/dogscats'.format(folder_data)
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

data_loader = dataloaders['train']

### Visualize

In [6]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updateda

In [7]:
# # Get a batch of training data
# inputs, classes = next(iter(dataloaders['train']))

# # Make a grid from batch
# out = torchvision.utils.make_grid(inputs)
# print([class_names[x] for x in classes])
# imshow(out )

In [8]:
# # Load MNIST Data
# dataset = datasets.MNIST(root=folder_data, train=True, transform=transforms.ToTensor(), download=False)
# data_loader = torch.utils.data.DataLoader(dataset, batch_size=bs, shuffle=True)

In [9]:
# Fixed input for debugging
fixed_x, _ = next(iter(data_loader))
fixed_x = fixed_x.to(device)
print(fixed_x.size())
save_image(fixed_x, '../image.png')

# Image('real_image.png')

torch.Size([144, 3, 64, 64])


# VAE CNN

In [10]:
def save_image(x, path='real_image.png'):
    torchvision.utils.save_image(x, path)
def flatten(x):
    return to_var(x.view(x.size(0), -1))
def to_var(x):
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x)

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
# from density_lime.densities.density_cvae import VAE

class VAE(nn.Module):
    def __init__(self, image_channels=3, kernel_size=3, stride=1, latent_dim=32):
        super().__init__()
        self.verbose = True
#         self.nodes_dim = nodes_dim
        self.latent_dim = latent_dim
        self.chanels = [32, 64, 128, 256, 512]
        # Encoder
        self.layers_encoder = nn.ModuleList(self.create_layers_encoder(image_channels=image_channels)) 
        
        # Mu and log_var
        in_linear_layer = self.chanels[-1]*4
        self.fc_mu = nn.Linear(in_linear_layer, self.latent_dim)
        self.fc_log_var = nn.Linear(in_linear_layer, self.latent_dim)
        
        self.fc_out = nn.Linear(self.latent_dim, in_linear_layer)
        
        # Decoder
        self.layers_decoder = nn.ModuleList(
            self.create_layers_decoder(
                image_channels=image_channels
            )
        )
    
    def encode(self, x):
        x_in = x
#         print('encode out size: ', x_in.size())
        for layer in self.layers_encoder:
            # TODO: doubt!! no functional here, not sure what is the best option
            x_in = layer(x_in)
#           
#             print('##############################')
#             print(layer)
#             print('encode out size: ', x_in.size())
        return x_in

    def decode(self, z):
        x_in = z
#         print('decode out size: ', x_in.size())
        for layer in self.layers_decoder:
            # TODO: doubt!! no functional here, not sure what is the best option
            x_in = layer(x_in)
#             print('##############################')
#             print(layer)
#             print('decode out size: ', x_in.size())
        return x_in
            
    @staticmethod
    def reparameterize(mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return mu + eps*std    
    
    def forward(self, x):
        x_enconde = self.encode(x)
        x_enconde = torch.flatten(x_enconde, start_dim=1)
#         x_enconde = x_enconde.view(-1, self.nodes_dim)
#         print(x_enconde.size())
        mu, log_var = self.fc_mu(x_enconde), self.fc_log_var(x_enconde)
        z = self.reparameterize(mu, log_var)
        z = self.fc_out(z)
        z = z.view(-1, 512, 2, 2)
        x_out = self.decode(z)
#         print('------------------------------------------------------------')
        return x_out, mu, log_var
    
    def create_layers_encoder(self, image_channels):
        # TODO: Would be nice to have some options here.
        # TODO: doubt!!! I choose to use the Elu layer here. not sure about this.
        # TODO: I am thinking to put a batch normalization between the layers.
        out_chanels = self.chanels
        layers = list()
        for out_chanel in out_chanels:
            layers.append(
                nn.Sequential(
                nn.Conv2d(image_channels, out_chanel, kernel_size=3, stride=2, padding=1)
                , nn.BatchNorm2d(out_chanel)
                , nn.LeakyReLU()   
                )
            )
            image_channels = out_chanel
        return layers
    
    def create_layers_decoder(self, image_channels):
        # TODO: Would be nice to have some options here.
        # TODO: doubt!!! I choose to use the Elu layer here. not sure about this.
        # TODO: I am thinking to put a batch normalization between the layers.
        out_chanels = self.chanels
        out_chanels.reverse()
        print(out_chanels)
        print(out_chanels[:-1])
        layers = list()
        for in_chanel, out_chanel in zip(out_chanels[:-1], out_chanels[1:]) :
            layers.append(
                nn.Sequential(
                 nn.ConvTranspose2d(
                     in_chanel, out_chanel, kernel_size=3, stride=2, padding=1, output_padding=1)
                , nn.BatchNorm2d(out_chanel)
                , nn.LeakyReLU()   
                )
            )
                
        layers.append(
            nn.Sequential(
                            nn.ConvTranspose2d(out_chanels[-1],
                                               out_chanels[-1],
                                               kernel_size=3,
                                               stride=2,
                                               padding=1,
                                               output_padding=1),
                            nn.BatchNorm2d(out_chanels[-1]),
                            nn.LeakyReLU(),
                            nn.Conv2d(out_chanels[-1], out_channels=3,
                                      kernel_size=3, padding=1),
                            nn.Sigmoid()
            )
        )
        return layers

In [12]:
torch.cuda.empty_cache()
image_channels = 3
vae = VAE(image_channels=image_channels, latent_dim=256*2)
if torch.cuda.is_available():
    vae.cuda()
vae

[512, 256, 128, 64, 32]
[512, 256, 128, 64]


VAE(
  (layers_encoder): ModuleList(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (2): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (3): Sequential(
      (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (4): Se

In [13]:
optimizer = torch.optim.Adam(vae.parameters(), lr=1e-3)

In [14]:
def loss_fn(recon_x, x, mu, logvar):
    BCE = F.binary_cross_entropy(recon_x, x, size_average=False)

    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * torch.sum(1 + logvar - mu**2 -  logvar.exp())
    return BCE + KLD

In [15]:
def train_model(epochs=100):
    for epoch in range(epochs):
        for idx, (images, _) in enumerate(data_loader):
            images = images.to(device)
            recon_images, mu, logvar = vae(images)
            loss = loss_fn(recon_images, images, mu, logvar)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if idx%100 == 0:
                print("Epoch[{}/{}] Loss: {:.3f}".format(epoch+1, epochs, loss.data.item()/batch_size))

                recon_x, _, _ = vae(fixed_x)
                save_image(recon_x.view(recon_x.size(0), image_channels, _image_size, _image_size).data.cpu(), f'{folder_data}/reco/i_{epoch}_{idx}.png')

In [16]:
train_model()



Epoch[1/100] Loss: 9889.415
Epoch[1/100] Loss: -193697.417
Epoch[2/100] Loss: -177460.736
Epoch[2/100] Loss: -208981.653
Epoch[3/100] Loss: -206675.681
Epoch[3/100] Loss: -221137.972
Epoch[4/100] Loss: -260147.889
Epoch[4/100] Loss: -244643.639
Epoch[5/100] Loss: -274592.750
Epoch[5/100] Loss: -259257.833
Epoch[6/100] Loss: -255504.611
Epoch[6/100] Loss: -265043.806
Epoch[7/100] Loss: -237246.222
Epoch[7/100] Loss: -262987.861
Epoch[8/100] Loss: -263546.417
Epoch[8/100] Loss: -273784.639
Epoch[9/100] Loss: -244083.944
Epoch[9/100] Loss: -257447.972
Epoch[10/100] Loss: -240269.528
Epoch[10/100] Loss: -275587.528
Epoch[11/100] Loss: -274824.361
Epoch[11/100] Loss: -256969.222
Epoch[12/100] Loss: -252895.028
Epoch[12/100] Loss: -261698.889
Epoch[13/100] Loss: -245230.722
Epoch[13/100] Loss: -295422.556
Epoch[14/100] Loss: -257509.028
Epoch[14/100] Loss: -294517.222
Epoch[15/100] Loss: -282032.944
Epoch[15/100] Loss: -307787.972
Epoch[16/100] Loss: -273506.139
Epoch[16/100] Loss: -262998.2

In [17]:
Hin = 5
padding=1
output_padding = 0
dilation=1
kernel_size=6
stride=2 
print('CNN', (Hin + 2*padding - dilation *(kernel_size-1)-1)/stride + 1)
print('CNN T', (Hin-1)*stride - 2*padding+dilation*(kernel_size-1) + output_padding + 1 )


CNN 1.5
CNN T 12


In [18]:
train_model()

Epoch[1/100] Loss: -300958.139
Epoch[1/100] Loss: -240112.222
Epoch[2/100] Loss: -330360.222
Epoch[2/100] Loss: -301407.111
Epoch[3/100] Loss: -324812.889
Epoch[3/100] Loss: -290236.556
Epoch[4/100] Loss: -335506.028
Epoch[4/100] Loss: -312936.972
Epoch[5/100] Loss: -415193.778
Epoch[5/100] Loss: -356691.139
Epoch[6/100] Loss: -264551.500
Epoch[6/100] Loss: -371432.417
Epoch[7/100] Loss: -249702.722
Epoch[7/100] Loss: -275366.278
Epoch[8/100] Loss: -235963.611
Epoch[8/100] Loss: -275456.389
Epoch[9/100] Loss: -297124.111
Epoch[9/100] Loss: -367607.472
Epoch[10/100] Loss: -288968.556
Epoch[10/100] Loss: -291702.500
Epoch[11/100] Loss: -339982.167
Epoch[11/100] Loss: -245203.000
Epoch[12/100] Loss: -256960.389
Epoch[12/100] Loss: -342855.444
Epoch[13/100] Loss: -286562.750
Epoch[13/100] Loss: -304461.056
Epoch[14/100] Loss: -305016.306
Epoch[14/100] Loss: -332169.806
Epoch[15/100] Loss: -336871.417
Epoch[15/100] Loss: -264312.306
Epoch[16/100] Loss: -276752.889
Epoch[16/100] Loss: -29231

In [24]:
path_model_cvae = '../models/mode_cvae.pth'
torch.save(vae, path_model_cvae)

  "type " + obj.__name__ + ". It won't be checked "


In [25]:
model = torch.load(path_model_cvae)
model.eval()

VAE(
  (layers_encoder): ModuleList(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (2): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (3): Sequential(
      (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (4): Se