### Import statements

In [1]:
import torch
from torch import nn
from torch.autograd import Variable
from torch.optim import Adam
from torchvision import transforms, datasets

### Define discriminator and generator networks

Discriminator --> just a CNN 

Generator --> basically a reverse CNN

The DCGAN network in the paper by Radford et al is implemented. 
Note:

_''The first layer of the GAN, which takes a uniform noise distribution Z as input, could be called fully connected as it is just a matrix multiplication, but the result is reshaped into a 4-dimensional tensor and used as the start of the convolution stack. For the discriminator, the last convolution layer is flattened and then fed into a single sigmoid output. See Fig. 1 for a visualization of an example model architecture_''.

<img src="figures/Generator_network.png" width="800">



In [2]:
class Generator(torch.nn.Module):
    """
    This generator network is based on the original DCGAN paper by Radford et al.
    The generator takes as input a 100-dimensional noise vector (z) and maps it to the data space 
    (which in this case is the image space) via a series of 'deconvolution'* blocks.
    Hence, from the input random noise, the generator outputs an image 
    
    *Note: This 'deconvolution' blocks do not perform the mathematical deconvolution operator, 
    it is just a reverse operation for the convolution operation used in deep learning models.
    """
    
    def __init__(self):
        super(Generator, self).__init__()
        self.nz = 100
        self.linear = torch.nn.Linear(nz, 1024*4*4)
        
        #first 'deconvolution' block
        self.Conv1 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=4,stride=2, padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True))
        
        #second 'deconvolution' block
        self.Conv2 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True)
        )
        
        #third 'deconvolution' block
        self.Conv3 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True)
        )
        
        #fourth 'deconvolution' block
        self.Conv4 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=128, out_channels=3, kernel_size=4, stride=2, padding=1, bias=False)
        )
        self.out = torch.nn.Tanh()

    def forward(self, z):
        """
        Perform forward calculation for generator output, given random noise input z
        """
        # Project and reshape
        X = self.linear(z)
        X = X.view(X.shape[0], 1024, 4, 4)
        # conv blocks
        X = self.Conv1(X)
        X = self.Conv2(X)
        X = self.Conv3(X)
        X = self.Conv4(X)
        # tanh activation
        return self.out(X)

In [3]:
class Discriminator(torch.nn.Module):
    """
    This discriminator network is based on the original DCGAN paper by Radford et al.
    The discriminator is a CNN which takes as input a 3-channel image data (i.e. RGB image)
    and outputs a probability,p(real), that the image is from the real dataset.
    """
    def __init__(self):
        super(Discriminator, self).__init__()
        #Conv block 1
        self.Conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=128, kernel_size=4, stride=2, padding=1, bias=False),
            nn.LeakyReLU(0.2, inplace=True)
        )
        #Conv block 2
        self.Conv2 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True)
        )
        #Conv block 3
        self.Conv3 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True)
        )
        #Conv block 4
        self.Conv4 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=4,stride=2, padding=1, bias=False),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.2, inplace=True)
        )
        self.Out = nn.Sequential(
            nn.Linear(1024*4*4, 1),
            nn.Sigmoid(),
        )

    def forward(self, I):
        # Convolutional layers
        X = self.Conv1(I)
        X = self.Conv2(X)
        X = self.Conv3(X)
        X = self.Conv4(X)
        # reshape and apply sigmoid activation
        X = X.view(-1, 1024*4*4)
        X = self.out(X)
        return X

### Training of DCGAN 
The discriminator and generator are like two agents playing a minimax game with value function V(G, D):

$$\min_{G}\max_{D}V(G, D)  = \textbf{E}_{x\sim p_{data}(x)}[\log{D(x)}] + \textbf{E}_{z\sim p_{z}(z)}[\log{(1-D(G(z)))}]$$

That is, the generator is trying to fool the discriminator by maximizing the probability that its output is recognized as real, which is mathematically equivalent to minimizing $log(1-D(G(z))$. Meanwhile the discriminator tries to increase its accuracy in distinguishing between real and fake data by minimizing this same quantity, which is equivalent to maximizing the above value function.

In [None]:
def train(discriminator, generator, d_optimizer, g_optimizer, real_data, fake_data,real_target, fake_target, loss):
    #===============================================================
    #1. Train Discriminator by maximizing log(D(x)) + log(1 - D(G(z)))
    #===============================================================
    # Reset gradients
    d_optimizer.zero_grad()
    
    # 1.1 Train on Real Data
    d_prediction_real = discriminator(real_data)
    # Calculate error and backpropagate
    d_error_real = loss(prediction_real, real_target(real_data.size(0)))
    d_error_real.backward()

    # 1.2 Train on Fake Data
    d_prediction_fake = discriminator(fake_data)
    # Calculate error and backpropagate
    error_fake = loss(prediction_fake, fake_target(real_data.size(0)))
    error_fake.backward()
    
    # 1.3 Update weights with gradients
    d_optimizer.step()
    
    #===============================================================
    #2. Train Generator by minimizing log(1 - D(G(z)))
    #===============================================================
    # Reset gradients
    g_optimizer.zero_grad()
    # Sample noise and generate fake data
    g_prediction = discriminator(fake_data)
    # Calculate error and backpropagate
    g_error = loss(g_prediction, real_target(g_prediction.size(0)))
    error.backward()
    # Update weights with gradients
    g_optimizer.step()

    # Return error
    return d_error_real + d_error_fake, d_prediction_real, d_prediction_fake, g_error

In [26]:
def noise(s):
    """
    Generate s-dimensional noise vector from random normal distribution with mean zero and std one
    """
    z = Variable(torch.randn(s, 100))
    if torch.cuda.is_available(): 
        return z.cuda()
    return z

In [30]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)