In [21]:
import pandas as pd
import os
from PIL import Image
from fastprogress import progress_bar
import random
import numpy as np

import torch
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.io import read_image
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [22]:
config = {}

config['project_path'] = '/kaggle/input/celeba-dataset'
config['img_dir'] = os.path.join(config['project_path'],'img_align_celeba','img_align_celeba')
config['working'] = '/kaggle/working'
config['lr'] = 1e-3
config['batch_size'] = 128
config['num_attr'] = 2
config['epochs'] = 10
config['enc_size'] = 100
config['num_workers'] = 8
config['img_size'] = 64

In [23]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

## Create custom Dataset

In [24]:
class celebDataset(Dataset):
    
    def __init__(self, img_dir, df, num_attr, transform=None):
        """
        img_dir: is the directory to image folder
        df: is the dataframe annotation image and attribute
        num_attr: number of attribute to random a list of selection attribute
        transform: transform augmentation
        """
        self.img_dir = img_dir
        self.df = df
        self.num_attr = num_attr
        
        self.transform = transform
    
        self.img_name = list(self.df['image_id'])
                
        self.attribute = random.sample(list(self.df.columns)[1:], num_attr)
        
        print('Selected attributes: ', self.attribute)
        
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_name[idx])
        
        attr = torch.tensor(self.df[self.attribute].iloc[idx])
        attr = torch.relu(attr).float()
        img = Image.open(img_path).convert('RGB')
        
        if self.transform:
            img = self.transform(img)
        
        return img, attr
    
    def __len__(self):
        return len(self.df)

## Build Generator and Discriminator models
![Screen Shot 2021-09-05 at 14.05.58.png](https://s3.us-west-2.amazonaws.com/secure.notion-static.com/1c6682b0-a715-4363-89a2-693ae3d18a8e/Screen_Shot_2021-09-05_at_14.05.58.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAT73L2G45O3KS52Y5%2F20210905%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20210905T085415Z&X-Amz-Expires=86400&X-Amz-Signature=794ac9751b5f96d6d278f39418de1e144df5946c5cb516413e932aece881a7b2&X-Amz-SignedHeaders=host&response-content-disposition=filename%20%3D%22Screen%2520Shot%25202021-09-05%2520at%252014.05.58.png%22)

In [25]:
class Generator(nn.Module):
    """
    in_channels (int) – Number of channels in the input image
    out_channels (int) – Number of channels produced by the convolution
    kernel_size (int or tuple) – Size of the convolving kernel
    stride (int or tuple, optional) – Stride of the convolution. Default: 1
    padding (int or tuple, optional) – dilation * (kernel_size - 1) - padding zero-padding 
        will be added to both sides of each dimension in the input. Default: 0
    """
    def __init__(self, num_attr):
        super(Generator, self).__init__()
        ## deconvolution 1 of image
        self.deconv1_1 = nn.ConvTranspose2d(in_channels = 100, 
                                            out_channels = 512,
                                            kernel_size = 4, 
                                            stride = 1, 
                                            padding = 0)
        self.deconv1_1_bn = nn.BatchNorm2d(512)
        
        ## deconvolution 2 of label
        ## in_channels is number of labels where it is number of desired attributes
        
        self.deconv1_2 = nn.ConvTranspose2d(in_channels = num_attr, 
                                            out_channels = 512,
                                            kernel_size = 4, 
                                            stride = 1, 
                                            padding = 0)
        self.deconv1_2_bn = nn.BatchNorm2d(512)
        
        ## the size*2 for concat between vector images and vector attributes
        self.deconv2 = nn.ConvTranspose2d(  in_channels = 1024, 
                                            out_channels = 512,
                                            kernel_size = 4, 
                                            stride = 2, 
                                            padding = 1)
        self.deconv2_bn = nn.BatchNorm2d(512)
        
        
        self.deconv3 = nn.ConvTranspose2d(  in_channels = 512, 
                                            out_channels = 256,
                                            kernel_size = 4, 
                                            stride = 2, 
                                            padding = 1)
        self.deconv3_bn = nn.BatchNorm2d(256)
        
        self.deconv4 = nn.ConvTranspose2d(  in_channels = 256, 
                                            out_channels = 128,
                                            kernel_size = 4, 
                                            stride = 2, 
                                            padding = 1)
        
        self.deconv4_bn = nn.BatchNorm2d(128)
        
        ## out_channels = 3 is the image RGB
        ## size 128 with kernel_size =4 and stride =2 --> generating image 64x64
        self.deconv5 = nn.ConvTranspose2d(  in_channels = 128, 
                                            out_channels = 3,
                                            kernel_size = 4, 
                                            stride = 2, 
                                            padding = 1)

    def forward(self, img, attr):
        img = F.leaky_relu(self.deconv1_1_bn(self.deconv1_1(img)), 0.2)

        attr = F.leaky_relu(self.deconv1_2_bn(self.deconv1_2(attr)), 0.2)
        
        img = torch.cat([img, attr], 1)
        
        img = F.leaky_relu(self.deconv2_bn(self.deconv2(img)), 0.2)
        img = F.leaky_relu(self.deconv3_bn(self.deconv3(img)), 0.2)
        img = F.leaky_relu(self.deconv4_bn(self.deconv4(img)), 0.2)
        img = torch.tanh(self.deconv5(img))
        
        return img

![Untitle](https://s3.us-west-2.amazonaws.com/secure.notion-static.com/e96e7a9d-7e8f-4b4d-a2c5-0be80443dcc1/Screen_Shot_2021-09-05_at_14.05.00.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAT73L2G45O3KS52Y5%2F20210905%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20210905T094004Z&X-Amz-Expires=86400&X-Amz-Signature=070ba6164e6693ba887115113f20e8b96fa98a57378b6e351da8edc1aa4280bf&X-Amz-SignedHeaders=host&response-content-disposition=filename%20%3D%22Screen%2520Shot%25202021-09-05%2520at%252014.05.00.png%22)

In [26]:
class Discriminator(nn.Module):
    """
    in_channels (int) – Number of channels in the input image
    out_channels (int) – Number of channels produced by the convolution
    kernel_size (int or tuple) – Size of the convolving kernel
    stride (int or tuple, optional) – Stride of the convolution. Default: 1
    padding (int, tuple or str, optional) – Padding added to all four sides of the input. Default: 0
    """
    def __init__(self, num_attr):
        super(Discriminator, self).__init__()
        
        self.conv1_1 = nn.Conv2d(in_channels = 3, 
                                 out_channels = 64, 
                                 kernel_size = 4, 
                                 stride = 2, 
                                 padding = 1)
        
        self.conv1_2 = nn.Conv2d(in_channels = num_attr, 
                                 out_channels = 64, 
                                 kernel_size = 4, 
                                 stride = 2, 
                                 padding = 1)
        
        self.conv2 = nn.Conv2d(in_channels = 128, 
                               out_channels = 256, 
                               kernel_size = 4, 
                               stride = 2, 
                               padding = 1)
        
        self.conv2_bn = nn.BatchNorm2d(256)
        
        self.conv3 = nn.Conv2d(in_channels = 256, 
                               out_channels = 512, 
                               kernel_size = 4, 
                               stride = 2, 
                               padding = 1)
        
        self.conv3_bn = nn.BatchNorm2d(512)
        
        self.conv4 = nn.Conv2d(in_channels = 512, 
                               out_channels = 1024, 
                               kernel_size = 4, 
                               stride = 2, 
                               padding = 1)
        
        self.conv4_bn = nn.BatchNorm2d(1024)
        
        self.conv5 = nn.Conv2d(in_channels = 1024, 
                               out_channels = 1, 
                               kernel_size = 4, 
                               stride = 1, 
                               padding = 0)

    def forward(self, img, attr):
        img = F.leaky_relu(self.conv1_1(img), 0.2)
        attr = F.leaky_relu(self.conv1_2(attr), 0.2)
        
        img = torch.cat([img, attr], 1)
        
        img = F.leaky_relu(self.conv2_bn(self.conv2(img)), 0.2)
        img = F.leaky_relu(self.conv3_bn(self.conv3(img)), 0.2)
        img = F.leaky_relu(self.conv4_bn(self.conv4(img)), 0.2)
        img = torch.sigmoid(self.conv5(img))
        
        return img

In [28]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transforms = transforms.Compose([
                    transforms.Resize((config['img_size'],config['img_size'])),
                    transforms.RandomHorizontalFlip(),
                    transforms.ColorJitter(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean, std)
                    ])

criterion = torch.nn.BCELoss()

model_g = Generator(num_attr = config['num_attr'])
model_d = Discriminator(num_attr = config['num_attr'])

optimizer_g = optim.Adam(model_g.parameters(), lr=config['lr'])
optimizer_d = optim.Adam(model_d.parameters(), lr=config['lr'])

df_ = pd.read_csv('../input/celeba-dataset/list_attr_celeba.csv')[:20000]

celeb_ds = celebDataset(config['img_dir'] , df_, config['num_attr'], train_transforms)
celeb_dl = DataLoader(celeb_ds, config['batch_size'], config['num_workers'])

Selected attributes:  ['Pointy_Nose', 'No_Beard']


In [None]:
for epoch in range(config['epochs']):
    
    print('Current epoch: {}'.format(str(epoch)))
    
    model_d = model_d.to(device)
    model_g = model_g.to(device)
    
    dis_losses = []
    dis_accs = []
    
    gen_losses = []

    
    for imgs, attrs in progress_bar(celeb_dl):
        imgs, attrs = imgs.to(device), attrs.to(device)
        
        y_real = torch.full((imgs.shape[0],), 1, dtype=torch.float, device=device)
        y_fake = torch.full((imgs.shape[0],), 0, dtype=torch.float, device=device)
        
        mask_attr = torch.zeros(imgs.shape[0], 
                                config['num_attr'], 
                                config['img_size'], 
                                config['img_size'], 
                                device = device)
        
        ## attrs.shape [batch_size, num_attr, 1, 1]
        
        g_attrs = attrs.unsqueeze(-1).unsqueeze(-1).to(device)
        d_attrs = mask_attr + g_attrs
        

        ###################################################################################
        ########### (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) ###########
        ###################################################################################
        
        model_d.zero_grad()

        ## train with real

        y_dis_real = model_d(imgs, d_attrs).squeeze()
        real_loss_d = criterion(y_dis_real, y_real)
        real_acc = torch.mean(1 - torch.abs(y_dis_real - y_real)).item()

        ## train with fake
        
        init_encode_vector = torch.randn(imgs.shape[0], 
                            config['enc_size'],
                            device = device).unsqueeze(-1).unsqueeze(-1)
        y_gen = model_g(init_encode_vector, g_attrs)
        y_dis_fake = model_d(y_gen, d_attrs).squeeze()
        fake_loss_d = criterion(y_dis_fake, y_fake)
        fake_acc = torch.mean(1 - torch.abs(y_dis_fake - y_fake)).item()

        ## add losses and backprop
        
        loss_dis = real_loss_d + fake_loss_d
        loss_dis.backward()
        optimizer_d.step()

        ## recording for metrics
        dis_losses.append(loss_dis.item())
        dis_accs.append((real_acc, fake_acc))
        
        ###################################################################################
        ################### (2) Update G network: maximize log(D(G(z))) ###################
        ###################################################################################
        
        model_g.zero_grad()
        
        ## get discriminator predictions on faked images, and take loss between real y

        y_gen = model_g(init_encode_vector, g_attrs)
        y_dis_fake = model_d(y_gen, d_attrs).squeeze()
        fake_loss_d = criterion(y_dis_fake, y_real)

        ## backprop and record metric
        
        fake_loss_d.backward()
        optimizer_g.step()
        gen_losses.append(fake_loss_d.item())
    
    print('Loss Discriminator: {}'.format(np.mean(dis_losses)))
    print('Loss Generator: {}'.format(np.mean(gen_losses)))

Current epoch: 0


Loss Discriminator: 0.9109365285617436
Loss Generator: 5.913375919791544
Current epoch: 1


Loss Discriminator: 0.11114628543516704
Loss Generator: 6.647100164632129
Current epoch: 2


Loss Discriminator: 0.15624066807976983
Loss Generator: 8.11368818875331
Current epoch: 3


Loss Discriminator: 0.004982543427961605
Loss Generator: 9.348012006966172
Current epoch: 4


Loss Discriminator: 0.007591987693657102
Loss Generator: 9.996645982098428
Current epoch: 5


Loss Discriminator: 0.0004969004221956815
Loss Generator: 9.93841126618112
Current epoch: 6


Loss Discriminator: 0.00019158709367086878
Loss Generator: 11.156585265117087
Current epoch: 7
