# DCGAN with CIFTAR10

## Import

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import torchvision.utils as vutils
from torch.utils.data import DataLoader
from torchvision.models import inception_v3
from scipy import linalg
import tensorflow as tf
import numpy as np
import os
import math
import random
import torch.nn.functional as F

2023-04-24 19:29:07.694169: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Setup

In [2]:
# Set random seed for reproducibility
manual_seed = 42
torch.manual_seed(manual_seed)

# Hyperparameters
batch_size = 64
nz = 100 # Size of latent vector
ngf = 64 # Size of generator feature maps
ndf = 64 # Size of discriminator feature maps
nc = 3
learning_rate = 0.0002
num_epochs = 2
lr = 0.0002 # Learning rate
beta1 = 0.5 # Adam optimizer beta1
fid_sample_size = 10000 # Number of samples to compute FID score
fid_batch_size = 50 # Batch size for computing FID score

## Data Load

In [3]:
# Load dataset
transform = transforms.Compose([transforms.Resize(64), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_dataset = dsets.CIFAR10(root='./data', train=True, download=True, transform=transform)    #tf.keras.datasets.cifar10.load_data()
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)

Files already downloaded and verified


## Models

### Generator

In [4]:
class Generator(nn.Module):
    def __init__(self, latent_dim=100, img_channels=3, output_size=32):
        super(Generator, self).__init__()

        self.output_size = output_size
        self.fc = nn.Linear(latent_dim, 512 * 4 * 4, bias=False)
        self.bn1 = nn.BatchNorm2d(512)
        self.deconv1 = nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(256)
        self.deconv2 = nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(128)
        self.deconv3 = nn.ConvTranspose2d(128, img_channels, kernel_size=4, stride=2, padding=1, bias=False)
        self.deconv4 = nn.ConvTranspose2d(img_channels, img_channels, kernel_size=3, stride=1, padding=1, bias=False)


    def forward(self, z):
        x = self.fc(z)
        x = x.view(-1, 512, 4, 4)
        x = F.relu(self.bn1(x))
        x = F.relu(self.bn2(self.deconv1(x)))
        x = F.relu(self.bn3(self.deconv2(x)))
        x = torch.tanh(self.deconv3(x))
        x = self.deconv4(x)
        x = F.interpolate(x, size=self.output_size, mode='bilinear', align_corners=True)
        return x

### Discriminator

In [5]:
class Discriminator(nn.Module):
    def __init__(self, img_channels=3):
        super(Discriminator, self).__init__()

        self.conv1 = nn.Conv2d(img_channels, 64, kernel_size=4, stride=2, padding=1, bias=False)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(256)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1, bias=False)
        self.bn4 = nn.BatchNorm2d(512)
        self.conv5 = nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0, bias=False)

    def forward(self, x):
        x = F.leaky_relu(self.conv1(x), negative_slope=0.2)
        x = F.leaky_relu(self.bn2(self.conv2(x)), negative_slope=0.2)
        x = F.leaky_relu(self.bn3(self.conv3(x)), negative_slope=0.2)
        x = F.leaky_relu(self.bn4(self.conv4(x)), negative_slope=0.2)
        x = self.conv5(x)
        return x.view(1, -1)    #x.view(-1, 1)    #(x.size(0), -1)


## Helpers


In [6]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)


In [7]:
def calculate_fid(real_images, generated_images, device, batch_size=50, dims=2048):
    """
    Calculates the Fréchet Inception Distance (FID) between the real images and the generated images.
    """
    # Load the Inception v3 model pre-trained on ImageNet
    inception_model = inception_v3(pretrained=True, transform_input=False).to(device)
    inception_model.eval()
    
    # Calculate the mean and covariance matrix of the real images
    real_images = np.transpose(real_images, (0, 3, 1, 2))
    mu_real, sigma_real = calculate_activation_statistics(real_images, inception_model, device, batch_size, dims)
    
    # Calculate the mean and covariance matrix of the generated images
    generated_images = np.transpose(generated_images, (0, 3, 1, 2))
    mu_gen, sigma_gen = calculate_activation_statistics(generated_images, inception_model, device, batch_size, dims)
    
    # Calculate the FID between the real and generated images
    fid_score = calculate_frechet_distance(mu_real, sigma_real, mu_gen, sigma_gen)
    
    return fid_score

In [8]:
def calculate_inception_score(images, net_inception, batch_size, splits):
    # Get predictions for each batch of images
    preds = []
    num_batches = int(math.ceil(float(images.shape[0]) / float(batch_size)))
    for i in range(num_batches):
        batch = images[i*batch_size:(i+1)*batch_size]
        with torch.no_grad():
            batch = F.interpolate(batch, size=(299, 299), mode='bilinear', align_corners=True)
            pred = F.softmax(net_inception(batch), dim=1).cpu().numpy()
        preds.append(pred)

    # Calculate Inception score
    preds = np.concatenate(preds, axis=0)
    scores = []
    for i in range(splits):
        part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :]
        kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
        kl = np.mean(np.sum(kl, 1))
        scores.append(np.exp(kl))

    return np.mean(scores), np.std(scores)

## Training

In [9]:
def train(netG, netD, train_loader, num_epochs, optimizerG, optimizerD, criterion, device):
    # Lists to keep track of losses over time
    G_losses = []
    D_losses = []

    # Loop over epochs
    for epoch in range(num_epochs):
        # Loop over batches in the dataset
        for i, (real_images, _) in enumerate(train_loader):
            real_images = real_images.to(device)

            # Update discriminator network
            netD.zero_grad()
            real_labels = torch.full((real_images.size(0),), 1, device=device)
            fake_labels = torch.full((real_images.size(0),), 0, device=device)
            fake_labels = fake_labels.unsqueeze(1)
            real_labels = real_labels.unsqueeze(1) #added
            real_labels.flatten()
 
            # Train discriminator on real images
            real_output = netD(real_images)
            real_loss = criterion(real_output, real_labels)
            real_loss.backward()

            # Train discriminator on fake images
            noise = torch.randn(real_images.size(0), 100, 1, 1, device=device)
            fake_images = netG(noise)
            fake_output = netD(fake_images.detach())
            fake_loss = criterion(fake_output, fake_labels)
            fake_loss.backward()

            # Update discriminator parameters
            optimizerD.step()

            # Update generator network
            netG.zero_grad()
            noise = torch.randn(real_images.size(0), 100, 1, 1, device=device)
            fake_images = netG(noise)
            output = netD(fake_images)
            loss = criterion(output, real_labels)
            loss.backward()

            # Update generator parameters
            optimizerG.step()

            # Keep track of losses
            G_losses.append(loss.item())
            D_losses.append(real_loss.item() + fake_loss.item())

        # Print progress
        print('Epoch [{}/{}], Step [{}/{}], D_loss: {:.4f}, G_loss: {:.4f}'
              .format(epoch + 1, num_epochs, i + 1, len(train_loader), D_losses[-1], G_losses[-1]))

    return G_losses, D_losses


## Images

In [10]:
def generate_images(netG, num_images, device):
    # Generate noise
    noise = torch.randn(num_images, 100, 1, 1, device=device)

    # Generate fake images
    fake_images = netG(noise).detach().cpu()

    # Rescale pixel values from [-1, 1] to [0, 1]
    fake_images = (fake_images + 1) / 2

    # Plot images
    fig, ax = plt.subplots(1, num_images, figsize=(20, 5))
    for i in range(num_images):
        ax[i].imshow(np.transpose(fake_images[i], (1, 2, 0)))
        ax[i].axis('off')
    plt.show()


In [11]:
def plot_images(G_losses, D_losses, fake_images, real_images):
    # Plot losses over time
    plt.figure(figsize=(10, 5))
    plt.title("Generator and Discriminator Loss During Training")
    plt.plot(G_losses, label="Generator")
    plt.plot(D_losses, label="Discriminator")
    plt.xlabel("Iterations")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

    # Plot real and fake images
    fig, ax = plt.subplots(1, 12, figsize=(20, 5))
    for i in range(12):
        ax[i].imshow(np.transpose(real_images[i], (1, 2, 0)))
        ax[i].axis('off')
    plt.show()

    fig, ax = plt.subplots(1, 12, figsize=(20, 5))
    for i in range(12):
        ax[i].imshow(np.transpose(fake_images[i], (1, 2, 0)))
        ax[i].axis('off')
    plt.show()


## Main

In [12]:
def main():
    # Set random seed for reproducibility
    random.seed(1234)
    torch.manual_seed(1234)

    # Set device (CPU or GPU)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # Create generator and discriminator networks
    netG = Generator(nz, ngf, nc).to(device)
    netD = Discriminator(nc).to(device)

    # Initialize weights of neural networks
    netG.apply(weights_init)
    netD.apply(weights_init)

    # Set loss function and optimizer
    criterion = nn.BCELoss()
    optimizerG = optim.Adam(netG.parameters(), lr=learning_rate, betas=(beta1, 0.999))
    optimizerD = optim.Adam(netD.parameters(), lr=learning_rate, betas=(beta1, 0.999))

    # Train the DCGAN
    G_losses, D_losses = train(netG, netD, train_loader, num_epochs, optimizerG, optimizerD, criterion, device)

    # Generate sample images and plot them
    generated_images = generate_images(netG, device)
    plot_images(generated_images)

    # Calculate FID and Inception Score of generated images
    fid_score = calculate_fid(test_loader.dataset.data[:10000], generated_images, device)
    is_score = calculate_inception_score(generated_images, device)

    # Print FID and Inception Score
    print('FID score:', fid_score)
    print('Inception Score:', is_score)


In [13]:
main()

tensor([[-0.3639, -0.5641, -0.2319, -0.0533, -0.8460, -0.2053, -1.2609, -0.4069,
         -1.4518, -1.0236,  0.1786, -0.6088,  0.5581, -3.5042,  0.8576, -0.0637,
          0.7037, -0.4831, -1.2059, -1.0361, -0.7458, -0.7315, -1.2684, -2.3548,
          0.6333,  0.1499,  0.5496, -0.4615,  0.7192,  0.3913, -1.5533, -1.1736,
         -2.4295, -0.5262, -0.4120, -2.0019,  0.8015, -0.3588, -0.4224, -0.9425,
         -1.5760, -0.6769, -0.6101, -0.7147, -0.8330, -1.1415, -2.4340, -1.4335,
          0.8336, -0.8447,  1.0018, -1.3018, -0.3290,  0.7490, -1.7046, -1.8792,
          1.1098,  0.0252, -1.1534, -0.8696,  0.2536, -1.1089, -0.5838,  0.4491]],
       grad_fn=<ViewBackward0>)
tensor([[1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
      

ValueError: Using a target size (torch.Size([64, 1])) that is different to the input size (torch.Size([1, 64])) is deprecated. Please ensure they have the same size.