## E0270 Machine Learning Course Project
# DCGAN implementation
Name : Mohit Kumar<br>
M. Tech in Artificial Intelligence<br>
SR No. : 04-01-03-10-51-21-1-19825<br>
email : mohitk2@iisc.ac.in<br>

I learnt using the pytorch library from [PyTorch Tutorials by Aladdin Persson](https://www.youtube.com/playlist?list=PLhhyoLH6IjfxeoooqP9rhU3HJIAVAJ3Vz)

For implementing the DCGAN I took help from the YouTube video [DCGAN implementation from scratch](https://www.youtube.com/watch?v=IZtv9s_Wx9I&list=PLhhyoLH6IjfwIp8bZnzX8QR30TRcHO8Va&index=3)<br>
and the official DCGAN paper [Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks](https://arxiv.org/abs/1511.06434) by Alec Radford, Luke Metz and Soumith Chintala
  

In [None]:
# imports

from __future__ import print_function
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torchvision.models as models
import matplotlib.animation as animation
from IPython.display import HTML
import pandas as pd
import torchvision
import sys
from scipy import linalg
from torch.nn.functional import adaptive_avg_pool2d
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import time
from google.colab import drive

drive.mount('/content/drive/')
%cd /content/drive/'My Drive'/GAN/
!ls

Mounted at /content/drive/
/content/drive/My Drive/GAN
checkpoint  dataset  outputs


## FID implementation
Code taken from [FID score for PyTorch](https://github.com/mseitzer/pytorch-fid)<br>
<img src='https://drive.google.com/uc?id=11r3lyk-PUkOzSV5LFZeeZ1AYGVB7QMDa'  height='450' width='700' ><br>
[image source](https://www.kaggle.com/code/ibtesama/gan-in-pytorch-with-fid/notebook)

In [None]:
class InceptionV3(nn.Module):
    """Pretrained InceptionV3 network returning feature maps"""

    # Index of default block of inception to return,
    # corresponds to output of final average pooling
    DEFAULT_BLOCK_INDEX = 3

    # Maps feature dimensionality to their output blocks indices
    BLOCK_INDEX_BY_DIM = {
        64: 0,   # First max pooling features
        192: 1,  # Second max pooling featurs
        768: 2,  # Pre-aux classifier features
        2048: 3  # Final average pooling features
    }

    def __init__(self,
                 output_blocks=[DEFAULT_BLOCK_INDEX],
                 resize_input=True,
                 normalize_input=True,
                 requires_grad=False):
        
        super(InceptionV3, self).__init__()

        self.resize_input = resize_input
        self.normalize_input = normalize_input
        self.output_blocks = sorted(output_blocks)
        self.last_needed_block = max(output_blocks)

        assert self.last_needed_block <= 3, \
            'Last possible output block index is 3'

        self.blocks = nn.ModuleList()

        
        inception = models.inception_v3(pretrained=True)

        # Block 0: input to maxpool1
        block0 = [
            inception.Conv2d_1a_3x3,
            inception.Conv2d_2a_3x3,
            inception.Conv2d_2b_3x3,
            nn.MaxPool2d(kernel_size=3, stride=2)
        ]
        self.blocks.append(nn.Sequential(*block0))

        # Block 1: maxpool1 to maxpool2
        if self.last_needed_block >= 1:
            block1 = [
                inception.Conv2d_3b_1x1,
                inception.Conv2d_4a_3x3,
                nn.MaxPool2d(kernel_size=3, stride=2)
            ]
            self.blocks.append(nn.Sequential(*block1))

        # Block 2: maxpool2 to aux classifier
        if self.last_needed_block >= 2:
            block2 = [
                inception.Mixed_5b,
                inception.Mixed_5c,
                inception.Mixed_5d,
                inception.Mixed_6a,
                inception.Mixed_6b,
                inception.Mixed_6c,
                inception.Mixed_6d,
                inception.Mixed_6e,
            ]
            self.blocks.append(nn.Sequential(*block2))

        # Block 3: aux classifier to final avgpool
        if self.last_needed_block >= 3:
            block3 = [
                inception.Mixed_7a,
                inception.Mixed_7b,
                inception.Mixed_7c,
                nn.AdaptiveAvgPool2d(output_size=(1, 1))
            ]
            self.blocks.append(nn.Sequential(*block3))

        for param in self.parameters():
            param.requires_grad = requires_grad

    def forward(self, inp):
        """Get Inception feature maps
        Parameters
        ----------
        inp : torch.autograd.Variable
            Input tensor of shape Bx3xHxW. Values are expected to be in
            range (0, 1)
        Returns
        -------
        List of torch.autograd.Variable, corresponding to the selected output
        block, sorted ascending by index
        """
        outp = []
        x = inp

        if self.resize_input:
            x = F.interpolate(x,
                              size=(299, 299),
                              mode='bilinear',
                              align_corners=False)

        if self.normalize_input:
            x = 2 * x - 1  # Scale from range (0, 1) to range (-1, 1)

        for idx, block in enumerate(self.blocks):
            x = block(x)
            if idx in self.output_blocks:
                outp.append(x)

            if idx == self.last_needed_block:
                break

        return outp
    
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[2048]
model = InceptionV3([block_idx])
model=model.cuda()

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth


  0%|          | 0.00/104M [00:00<?, ?B/s]

In [None]:
def calculate_activation_statistics(images,model,batch_size=128, dims=2048,
                    cuda=False):
    model.eval()
    act=np.empty((len(images), dims))
    
    if cuda:
        batch=images.cuda()
    else:
        batch=images
    pred = model(batch)[0]

        # If model output is not scalar, apply global spatial average pooling.
        # This happens if you choose a dimensionality not equal 2048.
    if pred.size(2) != 1 or pred.size(3) != 1:
        pred = adaptive_avg_pool2d(pred, output_size=(1, 1))

    act= pred.cpu().data.numpy().reshape(pred.size(0), -1)
    
    mu = np.mean(act, axis=0)
    sigma = np.cov(act, rowvar=False)
    return mu, sigma

In [None]:
def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
    """Numpy implementation of the Frechet Distance.
    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
    and X_2 ~ N(mu_2, C_2) is
            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
    """

    mu1 = np.atleast_1d(mu1)
    mu2 = np.atleast_1d(mu2)

    sigma1 = np.atleast_2d(sigma1)
    sigma2 = np.atleast_2d(sigma2)

    assert mu1.shape == mu2.shape, \
        'Training and test mean vectors have different lengths'
    assert sigma1.shape == sigma2.shape, \
        'Training and test covariances have different dimensions'

    diff = mu1 - mu2

    
    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
    if not np.isfinite(covmean).all():
        msg = ('fid calculation produces singular product; '
               'adding %s to diagonal of cov estimates') % eps
        print(msg)
        offset = np.eye(sigma1.shape[0]) * eps
        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))

    
    if np.iscomplexobj(covmean):
        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
            m = np.max(np.abs(covmean.imag))
            raise ValueError('Imaginary component {}'.format(m))
        covmean = covmean.real

    tr_covmean = np.trace(covmean)

    return (diff.dot(diff) + np.trace(sigma1) +
            np.trace(sigma2) - 2 * tr_covmean)

In [None]:
def calculate_fretchet(images_real,images_fake,model):
     mu_1,std_1=calculate_activation_statistics(images_real,model,cuda=True)
     mu_2,std_2=calculate_activation_statistics(images_fake,model,cuda=True)
    
     """get fretched distance"""
     fid_value = calculate_frechet_distance(mu_1, std_1, mu_2, std_2)
     return fid_value

Functions for loading and saving checkpoints

In [None]:
# function for saving the state of the trained discriminator and generator
def save_checkpoint(state, filename="cifar10_dcgan.pth.tar"): 
    if not os.path.exists("./checkpoint/{}/".format('dcgan')):
        os.makedirs("./checkpoint/{}/".format('dcgan'))
    print("=> Saving checkpoint....")
    torch.save(state, os.path.join("./checkpoint/{}/".format('dcgan'),filename))

# function for loading the state of the trained discriminator and generator
def load_checkpoint(checkpoint, gen, disc):
    print("=> Loading checkpoint....")
    gen.load_state_dict(checkpoint['gen'])
    disc.load_state_dict(checkpoint['disc'])

## Definition of the Discriminator and Generator Networks
<img src='https://drive.google.com/uc?id=1AfOR3_glHI_WHJ3HJ1wglH7Te5R6NHvP' height='200' width='600'><br>[image source](https://arxiv.org/abs/1511.06434)

In [None]:
# discriminator network
class Discriminator(nn.Module): 
    def __init__(self, img_channels, disc_features):
        super(Discriminator, self).__init__()
        self.d = nn.Sequential(
            # Input: N x img_channels x disc_features x disc_features (N X 3 X 64 X 64)
            nn.Conv2d(
                img_channels, disc_features, kernel_size=4, stride=2, padding=1
            ), # 32 X 32
            nn.LeakyReLU(0.2),
            self._block(disc_features, disc_features * 2, 4, 2, 1), # 16 X 16
            self._block(disc_features * 2, disc_features * 4, 4, 2, 1), # 8 X 8
            self._block(disc_features * 4, disc_features * 8, 4, 2, 1), # 4 X 4
            nn.Conv2d(disc_features * 8, 1, kernel_size=4, stride=2, padding=0), # 1 X 1
            nn.Sigmoid(),
        )

    def _block(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            nn.Conv2d(
                in_channels,
                out_channels,
                kernel_size,
                stride,
                padding,
                bias=False,
            ),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(0.2),
        )

    def forward(self, x):
        return self.d(x)

#  generator network 
class Generator(nn.Module):
    def __init__(self, z_dim, img_channels, gen_features):
        super(Generator, self).__init__()
        self.g = nn.Sequential(
            # Input: N x z_dim x 1 x 1
            self._block(z_dim, gen_features * 16, 4, 1, 0), # 4 x 4
            self._block(gen_features * 16, gen_features * 8, 4, 2, 1), # 8 x 8
            self._block(gen_features * 8, gen_features * 4, 4, 2, 1), # 16 x 16
            self._block(gen_features * 4, gen_features * 2, 4, 2, 1), # 32  x32
            nn.ConvTranspose2d(gen_features * 2, img_channels, kernel_size=4, stride=2, padding=1),
            # Output: N x 3 x 64 x 64
            nn.Tanh(),
        )

    def _block(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            nn.ConvTranspose2d(
                in_channels,
                out_channels,
                kernel_size,
                stride,
                padding,
                bias=False,
            ),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )

    def forward(self, x):
        return self.g(x)

# function for initializing weights of the neural network layers
def initialize_weights(model):
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.BatchNorm2d)):
            nn.init.normal_(m.weight.data, 0.0, 0.02)

## Training of the DCGAN
<img src='https://drive.google.com/uc?id=1exRpZ8b74TjlTEe5wU5RIwJU2WWqIK68' width='600' height='210'><br>
[image source](https://cedar.buffalo.edu/~srihari/CSE676/index.html)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

channels_img = 3
batch_size = 128
img_size = 64
num_epochs = 100
disc_features = 64
gen_features = 64
disc_lr = 0.0004 
gen_lr = 0.0001
z_dim = 100

transforms = transforms.Compose(
    [
        transforms.Resize(img_size),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.5 for _ in range(channels_img)], [0.5 for _ in range(channels_img)]
        ),
    ]
)


dataset = datasets.CIFAR10(root="dataset/", train=True, transform=transforms,download=True)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

gen = Generator(z_dim, channels_img, gen_features).to(device)
print(gen)
disc = Discriminator(channels_img, disc_features).to(device)
print(disc)

initialize_weights(disc)
initialize_weights(gen)

opt_gen = optim.Adam(gen.parameters(), lr=gen_lr, betas=(0.5, 0.999))
opt_disc = optim.Adam(disc.parameters(), lr=disc_lr, betas=(0.5, 0.999))
criterion = nn.BCELoss()

if os.path.exists(os.path.join("./checkpoint/{}/".format('dcgan'), "cifar10_dcgan.pth.tar")):
    checkpoint = torch.load(os.path.join("./checkpoint/{}/".format('dcgan'), "cifar10_dcgan.pth.tar"))
    load_checkpoint(checkpoint, gen, disc)

step = 0

gen.train()
disc.train()

Gen_losses = []
Disc_losses = []
fretchet_distances=[]
iter_list = []

print("Starting Training Loop....")

for epoch in range(num_epochs):
    if epoch % 5 == 0:
        checkpoint = {'gen' : gen.state_dict(), 'disc' : disc.state_dict()}
        save_checkpoint(checkpoint)

    for batch_index, (real, _) in enumerate(loader):
        real = real.to(device)
        noise = torch.randn(batch_size, z_dim, 1, 1).to(device)
        fake = gen(noise)

        # Train Discriminator: max log(D(x)) + log(1 - D(G(z)))
        disc_real = disc(real).reshape(-1)
        loss_disc_real = criterion(disc_real, torch.ones_like(disc_real))
        disc_fake = disc(fake).reshape(-1)
        loss_disc_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
        loss_disc = (loss_disc_real + loss_disc_fake) / 2
        disc.zero_grad()
        loss_disc.backward(retain_graph=True)
        opt_disc.step()

        # Train Generator: min log(1 - D(G(z))) <-> max log(D(G(z))
        output = disc(fake).reshape(-1)
        loss_gen = criterion(output, torch.ones_like(output))
        gen.zero_grad()
        loss_gen.backward()
        opt_gen.step()

        # Print losses 
        if batch_index % 100 == 0:
            print(
                f"Epoch [{epoch}/{num_epochs}] Batch {batch_index}/{len(loader)} \
                  Disc Loss : {loss_disc:.4f}, Gen loss : {loss_gen:.4f}"
            )
        
        # save images generated by the generator, generator and discriminator losses and fid score every 500 iterations
        if step % 500 == 0 and step > 0:
            with torch.no_grad():
                fake = gen(noise)
                img_grid_fake = torchvision.utils.make_grid(fake[:64], normalize=True)
                if not os.path.exists("./outputs/{}/generated_images/".format('dcgan')):
                    os.makedirs("./outputs/{}/generated_images/".format('dcgan'))
                torchvision.utils.save_image(img_grid_fake, "./outputs/{}/generated_images/img_{}.png".format('dcgan', str(step)),)
            iter_list.append(step)
            Gen_losses.append(loss_gen.item())
            Disc_losses.append(loss_disc.item())
            fretchet_distance=calculate_fretchet(real,fake,model)
            fretchet_distances.append(fretchet_distance.item())

            losses = pd.DataFrame({"Iteration No.": iter_list, "Generator Loss": Gen_losses, "Discriminator Loss": Disc_losses,"FID Score": fretchet_distances})

            if not os.path.exists("./outputs/{}/".format('dcgan')):
                os.makedirs("./outputs/{}/".format('dcgan'))
            losses.to_csv("./outputs/{}/output_data.csv".format('dcgan'))

        step += 1

Files already downloaded and verified
Generator(
  (g): Sequential(
    (0): Sequential(
      (0): ConvTranspose2d(100, 1024, kernel_size=(4, 4), stride=(1, 1), bias=False)
      (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): Sequential(
      (0): ConvTranspose2d(1024, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (2): Sequential(
      (0): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (3): Sequential(
      (0): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
   

In [None]:
# loading data from output_data.csv for plotting graphs 
df = pd.read_csv("./outputs/{}/output_data.csv".format('dcgan'))
Iterations = df['Iteration No.']
Gen_Loss = df['Generator Loss']
Disc_Loss = df['Discriminator Loss']
FID = df['FID Score']

In [None]:
# plotting the generator losses
plt.xlabel('Iterations')
plt.ylabel('Generator Loss')
plt.plot(Iterations, Gen_Loss)

In [None]:
# plotting the discriminator losses
plt.xlabel('Iterations')
plt.ylabel('Discriminator Loss')
plt.plot(Iterations, Disc_Loss)

In [None]:
# plotting the FID scores
plt.xlabel('Iterations')
plt.ylabel('FID Score')
plt.plot(Iterations, FID)