In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Parameter Setting

In [None]:
# Dataset path
data_path = '../input/celeba-dataset/img_align_celeba/'

# Default device
device = 'cuda'

# Batch size during training
batch_size = 256

# Spatial size of training images. All images will be resized to this
#   size using a transformer.
image_size = 64

# Number of channels in the training images. For color images this is 3
nc = 3

# Size of z latent vector (i.e. size of generator input)
nz = 100

# Size of feature maps in generator
ngf = 64

# Size of feature maps in discriminator
ndf = 64

# Number of training epochs
num_epochs = 5

# Learning rate for optimizers
lr = 0.0002

# Prepare Dataset

In [None]:
from torchvision import transforms
from torchvision.datasets import ImageFolder

In [None]:
transform = transforms.Compose([
                               transforms.Resize(image_size),
                               transforms.CenterCrop(image_size),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                              ])

dataset = ImageFolder(root=data_path, transform=transform)

## Quiz 1
**Why we should use those transformations for pre-processing?**

**Answer:**


In [None]:
from torch.utils.data import DataLoader

In [None]:
# Create the dataloader
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

In [None]:
from matplotlib import pyplot as plt

import torchvision.utils as vutils

In [None]:
# Plot some training images
real_batch = next(iter(dataloader))
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))

# Define Networks

In [None]:
# custom weights initialization called on netG and netD
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

## Quiz 2
**Why we should use weight initialization?**

**Answer:**


## Generator

In [None]:
import torch
import torch.nn as nn

In [None]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.layers = nn.Sequential(
            ##############################
            #        YOUR CODE HERE      #
            ##############################
            # input is Z, going into a convolution
            # the shape of the input is (batch_size, nz, 1, 1)
            nn.ConvTranspose2d(..., bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(..., bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d(..., bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d(..., bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d(..., bias=False),
            ##############################
            nn.Tanh()
            # state size. (nc) x 64 x 64
        )

    def forward(self, input):
        return self.layers(input)

In [None]:
generator = Generator().to(device)

# Apply the weights_init function to randomly initialize all weights
#  to mean=0, stdev=0.2.
generator.apply(weights_init)

# Print the model
print(generator)

In [None]:
##############################
#             TEST           #
##############################
out_ = generator(torch.randn(batch_size, nz, 1, 1).to(device))
assert out_.shape == torch.Size([batch_size, nc, image_size, image_size])

## Discriminator

In [None]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.layers = nn.Sequential(
            ##############################
            #        YOUR CODE HERE      #
            ##############################
            # input is (nc) x 64 x 64
            nn.Conv2d(..., bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(..., bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(..., bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(..., bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(..., bias=False),
            ##############################
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.layers(input)

In [None]:
discriminator = Discriminator().to(device)

# Apply the weights_init function to randomly initialize all weights
#  to mean=0, stdev=0.2.
discriminator.apply(weights_init)

# Print the model
print(discriminator)

In [None]:
##############################
#             TEST           #
##############################
out_ = discriminator(torch.randn(batch_size, nc, image_size, image_size).to(device))
assert out_.shape == torch.Size([batch_size, 1, 1, 1])

# Losses and Optimizers

In [None]:
# Initialize BCELoss function
criterion = nn.BCELoss()

# Create batch of latent vectors that we will use to visualize
#  the progression of the generator
fixed_noise = torch.randn(64, nz, 1, 1, device=device)

# Establish convention for real and fake labels during training
real_label = 1.
fake_label = 0.

# Setup Adam optimizers for both G and D
optimizerD = torch.optim.Adam(discriminator.parameters(), lr=lr)
optimizerG = torch.optim.Adam(generator.parameters(), lr=lr)

# Training

In [None]:
from tqdm.notebook import tqdm

In [None]:
# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0

print("Starting Training Loop...")
# For each epoch
for epoch in tqdm(range(num_epochs)):
    # For each batch in the dataloader
    for i, (data, label) in enumerate(dataloader):

        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        ## Train with all-real batch
        discriminator.zero_grad()

        # Format batch
        data = data.to(device)
        
        ##############################
        #        YOUR CODE HERE      #
        ##############################
        
        # Construct pseudo labels
        label = ...
        
        # Forward pass real batch through D
        output = ...
        
        # Calculate loss on all-real batch
        errD_real = ...
        
        # Calculate gradients for D in backward pass
        errD_real.backward()
        
        ##############################
        
        D_x = output.mean().item()
        

        ## Train with all-fake batch
        
        ##############################
        #        YOUR CODE HERE      #
        ##############################
        
        # Generate batch of random noise (use torch.randn)
        noise = ...
        
        # Generate fake image batch with G
        fake = ...
        
        # Construct pseudo labels
        label = ...
        
        # Classify all fake batch with D
        output = ...
        
        # Calculate D's loss on the all-fake batch
        errD_fake = ...
        
        # Calculate the gradients for this batch
        errD_fake.backward()
        
        ##############################
        
        D_G_z1 = output.mean().item()
        
        # Add the gradients from the all-real and all-fake batches
        errD = errD_real + errD_fake
        # Update D
        optimizerD.step()
        

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        
        generator.zero_grad()
        
        ##############################
        #        YOUR CODE HERE      #
        ##############################
        
        # Construct pseudo labels
        # fake labels are real for generator cost
        label = ...
        
        # Since we just updated D, perform another forward pass of all-fake batch through D
        output = ...
        
        # Calculate G's loss based on this output
        errG = ...
        
        # Calculate gradients for G
        errG.backward()
        
        ##############################
        
        D_G_z2 = output.mean().item()
        
        # Update G
        optimizerG.step()

        # Output training stats
        if i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                  % (epoch, num_epochs, i, len(dataloader),
                     errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))

        # Save Losses for plotting later
        G_losses.append(errG.item())
        D_losses.append(errD.item())

        # Check how the generator is doing by saving G's output on fixed_noise
        if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
            with torch.no_grad():
                fake = generator(fixed_noise).detach().cpu()
            img_list.append(vutils.make_grid(fake, padding=2, normalize=True))

        iters += 1

## Quiz 3
**What's the output of the discriminator when equilibration reached?**

**Answer:**


# Results

In [None]:
# Grab a batch of real images from the dataloader
real_batch = next(iter(dataloader))

# Plot the real images
plt.figure(figsize=(15,15))
plt.subplot(1,2,1)
plt.axis("off")
plt.title("Real Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=5, normalize=True).cpu(),(1,2,0)))

# Plot the fake images from the last epoch
plt.subplot(1,2,2)
plt.axis("off")
plt.title("Fake Images")
plt.imshow(np.transpose(img_list[-1],(1,2,0)))
plt.show()