In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision
import torch.utils.data as data
import numpy as np

In [None]:
# 2-d latent space, parameter count in same order of magnitude
# as in the original VAE paper (VAE paper has about 3x as many)
latent_dims = 2
num_epochs = 20 # Originally at 100
batch_size = 128
capacity = 64
learning_rate = 1e-3
variational_beta = 1
use_gpu = True

In [None]:
# Make the dataset

# Configure directory to look appropriate for making dataset later
try:
  !mkdir ../working/actual_monet_jpg
  !cp -R ../input/gan-getting-started/monet_jpg ../working/actual_monet_jpg/monet_jpg_inner
except:
  pass

# Define paramaters and transformation for data augmentation
BATCH_SIZE = 10
DATA_PATH = '../working/actual_monet_jpg'
myTransforms = transforms.Compose([
    transforms.ColorJitter(hue=.05, saturation=.05),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

# make loader and data loader
train_data = torchvision.datasets.ImageFolder(root=DATA_PATH, transform = myTransforms)
train_data = torch.utils.data.ConcatDataset([train_data] * 32)
train_data_loader = data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
print("Done")

In [None]:
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        c = capacity
        self.conv00 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=4, stride=3, padding=0) # out: (c/4) x 85 x 85
        self.conv01 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=4, stride=3, padding=0) # out: (c/2) x 85 x 85
        self.conv1 = nn.Conv2d(in_channels=32, out_channels=c, kernel_size=4, stride=2, padding=1) # out: c x 14 x 14 (CHANGE in_channels=1 for monet)
        self.conv2 = nn.Conv2d(in_channels=c, out_channels=c*2, kernel_size=4, stride=2, padding=1) # out: c x 7 x 7
        self.fc_mu = nn.Linear(in_features=c*2*7*7, out_features=latent_dims)
        self.fc_logvar = nn.Linear(in_features=c*2*7*7, out_features=latent_dims)
            
    def forward(self, x):
        x = F.relu(self.conv00(x)) # for monet
        x = F.relu(self.conv01(x)) # for monet
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.view(x.size(0), -1) # flatten batch of multi-channel feature maps to a batch of feature vectors
        x_mu = self.fc_mu(x)
        x_logvar = self.fc_logvar(x)
        return x_mu, x_logvar

class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        c = capacity
        self.fc = nn.Linear(in_features=latent_dims, out_features=c*2*7*7)
        self.conv2 = nn.ConvTranspose2d(in_channels=c*2, out_channels=c, kernel_size=4, stride=2, padding=1)
        self.conv1 = nn.ConvTranspose2d(in_channels=c, out_channels=32, kernel_size=4, stride=2, padding=1) # change to out_channels = 32 for monet
        self.conv01 = nn.ConvTranspose2d(in_channels=32, out_channels=16, kernel_size=4, stride=3, padding=0)
        self.conv00 = nn.ConvTranspose2d(in_channels=16, out_channels=3, kernel_size=4, stride=3, padding=0)
            
    def forward(self, x):
        x = self.fc(x)
        x = x.view(x.size(0), capacity*2, 7, 7) # unflatten batch of feature vectors to a batch of multi-channel feature maps
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv01(x))
        x = torch.sigmoid(self.conv00(x)) # last layer before output is sigmoid, since we are using BCE as reconstruction loss
        return x
    
class VariationalAutoencoder(nn.Module):
    def __init__(self):
        super(VariationalAutoencoder, self).__init__()
        self.encoder = Encoder()
        self.decoder = Decoder()
    
    def forward(self, x):
        latent_mu, latent_logvar = self.encoder(x)
        latent = self.latent_sample(latent_mu, latent_logvar)
        x_recon = self.decoder(latent)
        return x_recon, latent_mu, latent_logvar
    
    def latent_sample(self, mu, logvar):
        if self.training:
            # the reparameterization trick
            std = logvar.mul(0.5).exp_()
            eps = torch.empty_like(std).normal_()
            return eps.mul(std).add_(mu)
        else:
            return mu
    
def vae_loss(recon_x, x, mu, logvar):
    # recon_x is the probability of a multivariate Bernoulli distribution p.
    # -log(p(x)) is then the pixel-wise binary cross-entropy.
    # Averaging or not averaging the binary cross-entropy over all pixels here
    # is a subtle detail with big effect on training, since it changes the weight
    # we need to pick for the other loss term by several orders of magnitude.
    # Not averaging is the direct implementation of the negative log likelihood,
    # but averaging makes the weight of the other loss term independent of the image resolution.
    size = x.size()[0] * x.size()[1] * x.size()[2] * x.size()[3]
    #print(size)
    recon_loss = F.binary_cross_entropy(recon_x.view(-1, size), x.view(-1, size), reduction='sum') # change 784 to 6291456 for monet
    
    # KL-divergence between the prior distribution over latent vectors
    # (the one we are going to sample from when generating new images)
    # and the distribution estimated by the generator for the given image.
    kldivergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    
    return recon_loss + variational_beta * kldivergence
    
    
vae = VariationalAutoencoder()

device = torch.device("cuda:0" if use_gpu and torch.cuda.is_available() else "cpu")
vae = vae.to(device)

num_params = sum(p.numel() for p in vae.parameters() if p.requires_grad)
print('Number of parameters: %d' % num_params)

In [None]:
optimizer = torch.optim.Adam(params=vae.parameters(), lr=learning_rate, weight_decay=1e-5)

# set to training mode
vae.train()

train_loss_avg = []

print('Training ...')
for epoch in range(num_epochs):
    train_loss_avg.append(0)
    num_batches = 0
    
    for image_batch, _ in train_data_loader:
        #print(image_batch.size())
        image_batch = image_batch.to(device)
        #print(image_batch.size())

        # vae reconstruction
        image_batch_recon, latent_mu, latent_logvar = vae(image_batch)
        # RuntimeError: Given groups=1, weight of size [64, 1, 4, 4], expected input[32, 3, 256, 256] to have 1 channels, but got 3 channels instead
        
        # reconstruction error
        loss = vae_loss(image_batch_recon, image_batch, latent_mu, latent_logvar)
        
        # backpropagation
        optimizer.zero_grad()
        loss.backward()
        
        # one step of the optmizer (using the gradients from backpropagation)
        optimizer.step()
        
        train_loss_avg[-1] += loss.item()
        num_batches += 1
        
    train_loss_avg[-1] /= num_batches
    print('Epoch [%d / %d] average reconstruction error: %f' % (epoch+1, num_epochs, train_loss_avg[-1]))

In [None]:
# Plot training loss across epochs

import matplotlib.pyplot as plt
plt.ion()

fig = plt.figure()
plt.plot(train_loss_avg)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()

In [None]:
def to_img(x):
    x = x.clamp(0, 1)
    return x

def show_image(img):
    img = to_img(img)
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

vae.eval()

with torch.no_grad():

    # sample latent vectors from the normal distribution
    latent = torch.randn(7000, latent_dims, device=device)

    # reconstruct images from the latent vectors
    img_recon = vae.decoder(latent)
    img_recon = img_recon.cpu()

    fig, ax = plt.subplots(figsize=(5, 5))
    show_image(torchvision.utils.make_grid(img_recon.data[:9],3))
    plt.show()

In [None]:
from zipfile import ZipFile
zipObj = ZipFile('images.zip', 'w')
for i in range(7000):
    img = img_recon[i]
    fp = 'img' + str(i) + '.jpg'
    torchvision.utils.save_image(img, fp)
    zipObj.write(fp)
zipObj.close()
!rm *.jpg
print("done")