In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import torch
from torch.utils.data import DataLoader, Dataset
import torchvision
from torchvision import transforms
from PIL import Image
import pytorch_lightning as pl
import torchvision.transforms as T
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from pytorch_lightning.core.lightning import LightningModule
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv('../input/pokemon-images-and-types/pokemon.csv')
df2 = df.sort_values(by=['Name'], ascending=True).reset_index(drop=True)
image_names = os.listdir('../input/pokemon-images-and-types/images/images/')
df2['Image'] = image_names
df2['Valid'] = df2['Image'].apply(lambda x: 0 if x[-2]=='p' else 1)

In [None]:
df3 = df2[df2['Valid']==1]

In [None]:
data_size = len(df2)
print(data_size)
df_train = df3.iloc[:621,:].reset_index(drop=True)
df_val = df3.iloc[621:721,:] .reset_index(drop=True)

In [None]:
df_train.shape

Data Loader

In [None]:
class DatasetPokemon(Dataset):
    
    def __init__(self, df,image_dir,transform=None):
        #self.data = pd.read_csv(file_path)
        self.transform = transform
        self.df = df
        self.image_dir = image_dir
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        # load image as ndarray type (Height * Width * Channels)
        # be carefull for converting dtype to np.uint8 [Unsigned integer (0 to 255)]
        # in this example, i don't use ToTensor() method of torchvision.transforms
        # so you can convert numpy ndarray shape to tensor in PyTorch (H, W, C) --> (C, H, W)
        img_path = self.image_dir + self.df['Image'][index]
        image = Image.open(img_path).convert('RGB')
        label = Image.open(img_path).convert('RGB')
        image = np.asarray(image, dtype=np.float32)/255
        label = np.asarray(label, dtype=np.float32)/255
        image = torch.from_numpy(image)
        label = torch.from_numpy(label)
#         print(image.size())
        image = image.permute(2,0,1)
        label = label.permute(2,0,1)
#         if self.transform is not None:
#             image = self.transform(image)
#             label = self.transform(label)

        return image, label

In [None]:
img = Image.open('../input/pokemon-images-and-types/images/images/brionne.jpg').convert('RGB')
# img.crop(l,t,r,b)
img.size
img

In [None]:
im_transform = transform(img)
im_transform[im_transform<1]

In [None]:
transform = T.Compose([
        T.ToTensor()
#     ,transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
file_path = '../input/pokemon-images-and-types/pokemon.csv'
image_dir = '../input/pokemon-images-and-types/images/images/'
batch_size = 8
train_dataset = DatasetPokemon(df_train,image_dir,transform=transform)
train_loader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=3, pin_memory=True)

val_dataset = DatasetPokemon(df_val,image_dir,transform=transform)
val_loader = DataLoader(val_dataset, batch_size, shuffle=True, num_workers=3, pin_memory=True)

In [None]:
for x,y in train_loader:
    print(x.size())
    break

In [None]:
model_params ={'train_loss':[],'val_loss':[]}

In [None]:
class VAE(LightningModule):
    def __init__(self,in_channels,latent_dim):
        super(VAE, self).__init__()
        hidden_dims = [32, 64, 128]
        self.latent_dim = latent_dim
        modules=[]
        for h_dim in hidden_dims:
            modules.append(
                nn.Sequential(
                    nn.Conv2d(in_channels, out_channels=h_dim,
                              kernel_size= 3, stride= 2, padding  = 1),
                    nn.BatchNorm2d(h_dim),
                    nn.LeakyReLU())
            )
            in_channels = h_dim
        
        self.encoder = nn.Sequential(*modules)
        self.fc_mu = nn.Linear(hidden_dims[-1]*15*15, latent_dim)
        self.fc_var = nn.Linear(hidden_dims[-1]*15*15, latent_dim)
        self.avg_train_loss = {'loss': 0, 'Reconstruction_Loss':0, 'KLD':0}
        self.avg_val_loss = {'loss': 0, 'Reconstruction_Loss':0, 'KLD':0}
        self.temp_train = {}
        self.temp_val = {}
        hidden_dims.reverse()
        modules = []
        for i in range(len(hidden_dims) - 1):
            modules.append(
                nn.Sequential(
                    nn.ConvTranspose2d(hidden_dims[i],
                                       hidden_dims[i + 1],
                                       kernel_size=3,
                                       stride = 2,
                                       padding=1,
                                       output_padding=1),
                    nn.BatchNorm2d(hidden_dims[i + 1]),
                    nn.LeakyReLU())
            )

        self.decoder_input = nn.Linear(latent_dim, hidden_dims[0]*15*15)

        self.decoder = nn.Sequential(*modules)

        self.final_layer = nn.Sequential(
                            nn.ConvTranspose2d(hidden_dims[-1],
                                               hidden_dims[-1],
                                               kernel_size=3,
                                               stride=2,
                                               padding=1,
                                               output_padding=1),
                            nn.BatchNorm2d(hidden_dims[-1]),
                            nn.LeakyReLU(),
                            nn.Conv2d(hidden_dims[-1], out_channels= 3,
                                      kernel_size= 3, padding= 1),
                            nn.Sigmoid())
    
    def train_dataloader(self):
        return train_loader
    
    def val_dataloader(self):
        return val_loader
        
    def encode(self,input):

        result = self.encoder(input)

        result = torch.flatten(result, start_dim=1)

        # Split the result into mu and var components
        # of the latent Gaussian distribution
        mu = self.fc_mu(result)
        log_var = self.fc_var(result)
        return mu, log_var
    
    def decode(self,z):
        result = self.decoder_input(z)

        result = result.view(-1, 128, 15, 15)
        result = self.decoder(result)
        result = self.final_layer(result)
        return result
    
    def reparameterize(self, mu, logvar):
#         print("reparam")
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return eps * std + mu
    
    def forward(self, input):
        mu,log_var = self.encode(input)
        z = self.reparameterize(mu, log_var)
        return  self.decode(z), mu, log_var
    
    def loss_function(self,recons,label,mu,log_var):
        
        recons_loss =F.mse_loss(recons, label,reduction='mean')
#         print("prediction:",recons[recons>0],"output:",label[label>0])
        kld_loss = torch.mean(-0.5 * torch.sum(1 + log_var - mu ** 2 - log_var.exp(), dim = 1), dim = 0)
#         kld_loss = -0.5 * torch.sum(1 + log_var - mu ** 2 - log_var.exp())
        
#         recons_loss = F.binary_cross_entropy_with_logits(recons, label, size_average=False)
#         kld_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp()).mean()
        
        loss = recons_loss + kld_loss
        loss_dict = {'loss': loss.item(), 'Reconstruction_Loss':recons_loss.item(), 'KLD':-kld_loss.item()}
        self.temp_train = loss_dict
        self.log('train_loss', loss_dict, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def sample(self,
               num_samples,
               current_device):
        z = torch.randn(num_samples,
                        self.latent_dim)


        samples = self.decode(z)
        return samples
                       
    def generate(self, x):
        return self.forward(x)[0]
                       
    def training_step(self, batch, batch_idx):
        
        real_img, label = batch
#         print(real_img.size(),label.size())
        self.curr_device = real_img.device

        recons,mu,log_var = self.forward(real_img)
        train_loss = self.loss_function(recons,label,mu,log_var)

        self.avg_train_loss['loss'] =self.avg_train_loss['loss'] + self.temp_train['loss']
        self.avg_train_loss['Reconstruction_Loss'] =self.avg_train_loss['Reconstruction_Loss'] + self.temp_train['Reconstruction_Loss']
        self.avg_train_loss['KLD'] =self.avg_train_loss['KLD'] + self.temp_train['KLD']
        return train_loss
    
    def training_epoch_end(self,outputs):
        size = 89
        print('Train:','Total Loss: ',self.avg_train_loss['loss']/size,' Recon Loss: ',self.avg_train_loss['Reconstruction_Loss']/size,
             'KLD: ',self.avg_train_loss['KLD']/size)
        model_params['train_loss'].append(self.avg_train_loss)
        self.avg_train_loss = {'loss': 0, 'Reconstruction_Loss':0, 'KLD':0}
        
    
    def validation_step(self,batch,batch_idx):
        real_img, label = batch
        self.curr_device = real_img.device
#         print(real_img.size(),label.size())
        recons,mu,log_var = self.forward(real_img)
#         print(recons.size(),label.size(),input.size())
        val_loss = self.loss_function(recons,label,mu,log_var)
        
        self.avg_val_loss['loss'] =self.avg_val_loss['loss'] + self.temp_train['loss']
        self.avg_val_loss['Reconstruction_Loss'] =self.avg_train_loss['Reconstruction_Loss'] + self.temp_train['Reconstruction_Loss']
        self.avg_val_loss['KLD'] =self.avg_val_loss['KLD'] + self.temp_train['KLD']

        self.log('val_loss', val_loss.item(), on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return val_loss
    
    def validation_epoch_end(self,outputs):
        size = 13
        print('Val:','Total Loss: ',self.avg_val_loss['loss']/size,' Recon Loss: ',self.avg_val_loss['Reconstruction_Loss']/size,
             'KLD: ',self.avg_val_loss['KLD']/size)
    
        model_params['val_loss'].append(self.avg_val_loss)
        self.avg_val_loss = {'loss': 0, 'Reconstruction_Loss':0, 'KLD':0}
    
    def configure_optimizers(self):
        return optim.Adam(self.parameters())

In [None]:
# %debug
model = VAE(3,512)
trainer = pl.Trainer(max_epochs=5)
trainer.fit(model)

In [None]:
from IPython.display import Image

In [None]:
len(model_params['train_loss'])

## VAE

In [None]:
def show_image(x):
    img = x[0]
    img = img.detach().numpy()
    img = img.transpose((1, 2, 0))
    im = Image.fromarray((img*255).astype(np.uint8)).convert('RGB')
#     print((img*255).astype(np.uint8))
#     mean = np.array([0.485, 0.456, 0.406])
#     std = np.array([0.229, 0.224, 0.225])
#     img_norm = std * img + mean
#     img_norm = np.clip(img_norm, 0, 1)
#     plt.imshow(im)
#     print(img)
    return im

In [None]:
x = model.sample(8,0)
print(x)
im = show_image(x)
im

In [None]:
img = Image.open('../input/pokemon-images-and-types/images/images/abomasnow.png').convert('RGB')
# img.crop(l,t,r,b)
img.size
im_transform = transform(img).unsqueeze(0)

x = model.generate(im_transform)

In [None]:
x[x>.9]

In [None]:
im_transform[im_transform>.9]

In [None]:
im = show_image(x)

In [None]:
x = model.sample(1,0)

In [None]:
x*255

In [None]:
img.shape

In [None]:

img.shape

In [None]:
img

In [None]:
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
img_norm = std * img + mean
img_norm = np.clip(img_norm, 0, 1)
plt.imshow(img_norm)

In [None]:
import matplotlib.pyplot as plt
plt.imshow(img)

## Hierarchical VAE