# Assignment 6: Generative Adversarial Networks

Submitted by: **Muhammad Ibrhaim Afsar Khan**


Task 1:

- Implement a conditional DCGAN model (https://arxiv.org/abs/1411.1784)
- Train the model for conditional generation on the SVHN dataset
- Requirements:
    - Use Tensorboard, WandDB or some other experiment tracker
    - Show the capabilities of the model to generate data based on given label

Task 2:
- Implement a fully convolutional DCGAN-like model (https://arxiv.org/abs/1511.06434)
- Train the model on the CelebA dataset to generate new faces
- Requirements:
    - Use Tensorboard, WandDB or some other experiment tracker
    - Show the capabilities of your model to generate images
    - Evaluate and track during training using one quantitative metric (e.g. FID)

## Prelimenaries

In [40]:
import numpy as np
import os
import shutil

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt

from tqdm import tqdm

In [41]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [42]:
TBOARD_LOGS = os.path.join(os.getcwd(), "tboard_logs", "conditional_DCGAN")
if not os.path.exists(TBOARD_LOGS):
    os.makedirs(TBOARD_LOGS)

shutil.rmtree(TBOARD_LOGS)
writer = SummaryWriter(TBOARD_LOGS)

## Dataset

In [43]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load SVHN dataset
train_dataset = datasets.SVHN(root='../data', split='train', download=True, transform=transform)
test_dataset = datasets.SVHN(root='../data', split='test', download=True, transform=transform)

Using downloaded and verified file: ../data/train_32x32.mat
Using downloaded and verified file: ../data/test_32x32.mat


In [44]:
BATCH_SIZE = 256

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) 
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

## Model

In [45]:
IMAGE_SIZE = 32
LATENT_DIM = 100
NUM_CLASSES = 10
EPOCHS = 50
LEARNING_RATE = 3e-4
BETA1 = 0.5

### Generator

In [46]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.label_emb = nn.Embedding(NUM_CLASSES, NUM_CLASSES)

        self.init_size = IMAGE_SIZE // 4
        self.l1 = nn.Sequential(nn.Linear(LATENT_DIM + NUM_CLASSES, 128 * self.init_size ** 2))

        self.conv_blocks = nn.Sequential(
            nn.BatchNorm2d(128),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 128, 3, stride=1, padding=1),
            nn.BatchNorm2d(128, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 64, 3, stride=1, padding=1),
            nn.BatchNorm2d(64, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 3, 3, stride=1, padding=1),
            nn.Tanh(),
        )

    def forward(self, noise, labels):
        gen_input = torch.cat((self.label_emb(labels), noise), -1)
        out = self.l1(gen_input)
        out = out.view(out.shape[0], 128, self.init_size, self.init_size)
        img = self.conv_blocks(out)
        return img

### Discriminator

In [33]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.label_emb = nn.Embedding(NUM_CLASSES, NUM_CLASSES)

        self.model = nn.Sequential(
            nn.Conv2d(3 + 1, 64, 3, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout2d(0.25),
            nn.Conv2d(64, 128, 3, stride=2, padding=1),
            nn.ZeroPad2d((0, 1, 0, 1)),
            nn.BatchNorm2d(128, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout2d(0.25),
            nn.Conv2d(128, 256, 3, stride=2, padding=1),
            nn.BatchNorm2d(256, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout2d(0.25),
            nn.Conv2d(256, 512, 3, stride=2, padding=1),
            nn.BatchNorm2d(512, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout2d(0.25),
            nn.Conv2d(512, 1, 3, stride=1, padding=1),
        )

    def forward(self, img, labels):
        labels = self.label_emb(labels).view(labels.shape[0], 1, 32, 32)
        d_in = torch.cat((img, labels), 1)
        validity = self.model(d_in)
        return validity.view(validity.shape[0], -1)

## Training

In [39]:
# Initialize models
generator = Generator().to(device)
discriminator = Discriminator().to(device)

# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.9))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.9))

# Loss function
adversarial_loss = torch.nn.BCEWithLogitsLoss()

for epoch in range(EPOCHS):
    for i, (imgs, labels) in enumerate(train_loader):

        batch_size = imgs.shape[0]

        # Adversarial ground truths
        valid = torch.ones(batch_size, 1, requires_grad=False).to(device)
        fake = torch.zeros(batch_size, 1, requires_grad=False).to(device)

        # Configure input
        real_imgs = imgs.to(device)
        labels = labels.to(device)

        # -----------------
        #  Train Generator
        # -----------------
        optimizer_G.zero_grad()

        # Sample noise as generator input
        z = torch.randn(batch_size, LATENT_DIM).to(device)

        # Generate a batch of images
        gen_imgs = generator(z, labels)

        # Loss measures generator's ability to fool the discriminator
        g_loss = adversarial_loss(discriminator(gen_imgs, labels), valid)

        g_loss.backward()
        optimizer_G.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------
        optimizer_D.zero_grad()

        # Loss for real images
        real_loss = adversarial_loss(discriminator(real_imgs, labels), valid)

        # Loss for fake images
        fake_loss = adversarial_loss(discriminator(gen_imgs.detach(), labels), fake)

        # Total discriminator loss
        d_loss = (real_loss + fake_loss) / 2

        d_loss.backward()
        optimizer_D.step()

        print(f"[Epoch {epoch}/{EPOCHS}] [Batch {i}/{len(train_loader)}] [D loss: {d_loss.item()}] [G loss: {g_loss.item()}]")

        # Logging to Tensorboard
        writer.add_scalar('Loss/Discriminator', d_loss.item(), epoch * len(train_loader) + i)
        writer.add_scalar('Loss/Generator', g_loss.item(), epoch * len(train_loader) + i)

    # Save generator's output on fixed noise
    if epoch % 10 == 0:
        fixed_noise = torch.randn(10, LATENT_DIM).to(device)
        fixed_labels = torch.tensor([num for num in range(10)]).to(device)
        with torch.no_grad():
            fixed_gen_imgs = generator(fixed_noise, fixed_labels).cpu()
        grid = torchvision.utils.make_grid(fixed_gen_imgs, nrow=5, normalize=True)
        writer.add_image('Generated Images', grid, epoch)

writer.close()


RuntimeError: shape '[256, 1, 32, 32]' is invalid for input of size 2560