# Assignment 6: Generative Adversarial Networks

Submitted by: **Muhammad Ibrahim Afsar Khan**


Task 2:
- Implement a fully convolutional DCGAN-like model (https://arxiv.org/abs/1511.06434)
- Train the model on the CelebA dataset to generate new faces
- Requirements:
    - Use Tensorboard, WandDB or some other experiment tracker
    - Show the capabilities of your model to generate images
    - Evaluate and track during training using one quantitative metric (e.g. FID)

## Prelimenaries

In [31]:
import numpy as np
import os
import shutil

from PIL import Image

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader, Dataset

import matplotlib.pyplot as plt

from tqdm import tqdm

In [32]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [33]:
TBOARD_LOGS = os.path.join(os.getcwd(), "tboard_logs", "fully_convolutional_DCGAN")
if not os.path.exists(TBOARD_LOGS):
    os.makedirs(TBOARD_LOGS)

shutil.rmtree(TBOARD_LOGS)
writer = SummaryWriter(TBOARD_LOGS)

## Dataset

In [34]:
dataset_dir = './img_align_celeba/img_align_celeba'

In [35]:
class CelebADataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = [os.path.join(root_dir, fname) for fname in os.listdir(root_dir) if fname.endswith('.jpg')]
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        # print(f"Image shape (with batch dim): {image.shape}")  # Debug print

        return image

In [36]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

dataset = CelebADataset(root_dir=dataset_dir, transform=transform)

In [37]:
BATCH_SIZE = 32

data_loader = DataLoader(dataset=dataset, batch_size=BATCH_SIZE, shuffle=True) 

## Model

### Generator

In [39]:
class Generator(nn.Module):
    """
    A fully convolutional generator using ReLU activations. 
    Takes as input a latent vector and outputs a fake sample.
       (B, latent_dim, 1, 1)  --> (B, num_channels, 64, 64)
    """
    def __init__(self, latent_dim=128, num_channels=3, base_channels=64):
        """ Model initializer """
        super().__init__()

        layers = []
        for i in range(4):
            layers.append(
                ConvTransposeBlock(
                        in_channels=latent_dim if i == 0 else base_channels * 2 ** (3-i+1),
                        out_channels=base_channels * 2 ** (3-i),
                        kernel_size=4,
                        stride=1 if i == 0 else 2,
                        add_norm=True,
                        activation="ReLU"
                    )
                )
        layers.append(
            ConvTransposeBlock(
                    in_channels=base_channels,
                    out_channels=num_channels,
                    kernel_size=4,
                    stride=2,
                    add_norm=False,
                    activation="Tanh"
                )
            )
        
        self.model = nn.Sequential(*layers)
        return
    
    def forward(self, x):
        """ Forward pass through generator """
        y = self.model(x)
        return y

### Discriminator

In [40]:
class Discriminator(nn.Module):
    """ A fully convolutional discriminator using LeakyReLU activations. 
    Takes as input either a real or fake sample and predicts its authenticity.
       (B, num_channels, 64, 64)  -->  (B, 1, 1, 1)
    """
    def __init__(self, in_channels=3, out_dim=1, base_channels=64, dropout=0.3):
        """ Module initializer """
        super().__init__()  
        
        layers = []
        for i in range(4):
            layers.append(
                ConvBlock(
                        in_channels=in_channels if i == 0 else base_channels * 2 ** i,
                        out_channels=base_channels * 2 ** (i + 1),
                        kernel_size=4,
                        add_norm=True,
                        activation="LeakyReLU",
                        dropout=dropout,
                        stride=2
                    )
                )
        layers.append(
                ConvBlock(
                        in_channels=base_channels * 16,
                        out_channels=out_dim,
                        kernel_size=4,
                        stride=4,
                        add_norm=False,
                        activation="Sigmoid"
                    )
                )
        
        self.model = nn.Sequential(*layers)
        return
      
    def forward(self, x):
        """ Forward pass """
        y = self.model(x)
        return y

## Training

In [41]:
generator = Generator()
discriminator = Discriminator()

In [42]:
generator

Generator(
  (model): Sequential(
    (0): ConvTransposeBlock(
      (block): Sequential(
        (0): ConvTranspose2d(128, 512, kernel_size=(4, 4), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
    )
    (1): ConvTransposeBlock(
      (block): Sequential(
        (0): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
    )
    (2): ConvTransposeBlock(
      (block): Sequential(
        (0): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
    )
    (3): ConvTransposeBlock(
      (block): Sequential(
        (0): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))

## Training

In [56]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.utils import save_image
import os
from PIL import Image

# Assuming CelebADataset and DataLoader are already defined as provided
# Assuming Generator and Discriminator classes are defined as previously provided

# Hyperparameters
batch_size = 32
learning_rate = 0.0002
num_epochs = 50
latent_dim = 128
sample_interval = 500  # Interval to save generated images

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize models
generator = Generator(latent_dim=latent_dim, num_channels=3).to(device)
discriminator = Discriminator(in_channels=3).to(device)

# Loss function
criterion = nn.BCELoss()

# Optimizers
optimizer_G = optim.Adam(generator.parameters(), lr=learning_rate, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=learning_rate, betas=(0.5, 0.999))

# Directory to save generated images
os.makedirs("generated_images", exist_ok=True)

# Training loop
for epoch in range(num_epochs):
    for i, imgs in enumerate(data_loader):
        
        # Adversarial ground truths
        valid = torch.ones((imgs.size(0), 1), requires_grad=False).to(device)
        fake = torch.zeros((imgs.size(0), 1), requires_grad=False).to(device)
        
        # Configure input
        real_imgs = imgs.to(device)
        
        # -----------------
        #  Train Generator
        # -----------------
        
        optimizer_G.zero_grad()
        
        # Sample noise as generator input
        z = torch.randn((imgs.size(0), latent_dim, 1, 1)).to(device)
        
        # Generate a batch of images
        gen_imgs = generator(z)
        
        # Loss measures generator's ability to fool the discriminator
        g_loss = criterion(discriminator(gen_imgs).view(-1, 1), valid)
        
        g_loss.backward()
        optimizer_G.step()
        
        # ---------------------
        #  Train Discriminator
        # ---------------------
        
        optimizer_D.zero_grad()
        
        # Measure discriminator's ability to classify real from generated samples
        real_loss = criterion(discriminator(real_imgs).view(-1, 1), valid)
        fake_loss = criterion(discriminator(gen_imgs.detach()).view(-1, 1), fake)
        d_loss = (real_loss + fake_loss) / 2
        
        d_loss.backward()
        optimizer_D.step()
        
        # Output training stats
        if i % sample_interval == 0:
            print(f"[Epoch {epoch}/{num_epochs}] [Batch {i}/{len(data_loader)}] [D loss: {d_loss.item()}] [G loss: {g_loss.item()}]")
            
            # Save some generated images
            save_image(gen_imgs.data[:25], f"generated_images/{epoch}_{i}.png", nrow=5, normalize=True)


ValueError: Using a target size (torch.Size([32, 1])) that is different to the input size (torch.Size([128, 1])) is deprecated. Please ensure they have the same size.