### Import statements

In [None]:
!pip3 install torch torchvision

In [None]:
from __future__ import division
from IPython.display import clear_output
import torch
from torch import nn
from torch.autograd import Variable
from torch.optim import Adam
from torchvision import transforms, datasets
import torchvision.utils as vutils

from google.colab import files
import os
import pickle

In [None]:
!pip install Pillow==4.0.0

In [None]:
!pip install -U -q PyDrive

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
#Create the folder
# If no parent ID is provided this will automatically go to the root or My Drive 'directory'
results = drive.CreateFile({'title': 'results',
                                    "mimeType": "application/vnd.google-apps.folder"})
# Upload the file to your drive
results.Upload()
# Grab the ID of the folder we just created
result_id = results['id']

# Create a sub-directory
# Make sure to assign it the proper parent ID
real = drive.CreateFile({'title': 'real', 
                         'parents':[{'id':result_id}],
                        "mimeType": "application/vnd.google-apps.folder"} )
real.Upload()
real_id = real['id']

fake = drive.CreateFile({'title': 'fake', 
                         'parents':[{'id':result_id}],
                        "mimeType": "application/vnd.google-apps.folder"} )
fake.Upload()
fake_id = fake['id']

loss_logs = drive.CreateFile({'title': 'loss_logs', 
                         'parents':[{'id':result_id}],
                        "mimeType": "application/vnd.google-apps.folder"} )
loss_logs.Upload()
loss_id = loss_logs['id']

models = drive.CreateFile({'title': 'models', 
                         'parents':[{'id':result_id}],
                        "mimeType": "application/vnd.google-apps.folder"} )

models.Upload()
models_id = models['id']

### Define discriminator and generator networks

Discriminator --> just a CNN 

Generator --> basically a reverse CNN

The DCGAN network in the paper by Radford et al is implemented. 
Note:

_''The first layer of the GAN, which takes a uniform noise distribution Z as input, could be called fully connected as it is just a matrix multiplication, but the result is reshaped into a 4-dimensional tensor and used as the start of the convolution stack. For the discriminator, the last convolution layer is flattened and then fed into a single sigmoid output. See Fig. 1 for a visualization of an example model architecture_''.

<img src="figures/Generator_network.png" width="800">



In [None]:
class Generator(torch.nn.Module):
    """
    This generator network is based on the original DCGAN paper by Radford et al.
    The generator takes as input a 100-dimensional noise vector (z) and maps it to the data space 
    (which in this case is the image space) via a series of 'deconvolution'* blocks.
    Hence, from the input random noise, the generator outputs an image 
    
    *Note: These 'deconvolution' blocks do not perform the mathematical deconvolution operator, 
    it is just a reverse operation for the convolution operation used in deep learning models.
    """
    
    def __init__(self):
        super(Generator, self).__init__()
        self.nz = 100
        self.linear = torch.nn.Linear(self.nz, 1024*4*4)
        
        #first 'deconvolution' block
        self.Conv1 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=4,stride=2, padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True))
        
        #second 'deconvolution' block
        self.Conv2 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True)
        )
        
        #third 'deconvolution' block
        self.Conv3 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True)
        )
        
        #fourth 'deconvolution' block
        self.Conv4 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=128, out_channels=3, kernel_size=4, stride=2, padding=1, bias=False)
        )
        self.out = torch.nn.Tanh()

    def forward(self, z):
        """
        Perform forward calculation for generator output, given random noise input z
        """
        # Project and reshape
        X = self.linear(z)
        X = X.view(X.shape[0], 1024, 4, 4)
        # conv blocks
        X = self.Conv1(X)
        X = self.Conv2(X)
        X = self.Conv3(X)
        X = self.Conv4(X)
        # tanh activation
        return self.out(X)

In [None]:
class Discriminator(torch.nn.Module):
    """
    This discriminator network is based on the original DCGAN paper by Radford et al.
    The discriminator is a CNN which takes as input a 3-channel image data (i.e. RGB image)
    and outputs a probability,p(real), that the image is from the real dataset.
    """
    def __init__(self):
        super(Discriminator, self).__init__()
        #Conv block 1
        self.Conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=128, kernel_size=4, stride=2, padding=1, bias=False),
            nn.LeakyReLU(0.2, inplace=True)
        )
        #Conv block 2
        self.Conv2 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True)
        )
        #Conv block 3
        self.Conv3 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True)
        )
        #Conv block 4
        self.Conv4 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=4,stride=2, padding=1, bias=False),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.2, inplace=True)
        )
        self.Out = nn.Sequential(
            nn.Linear(1024*4*4, 1),
            nn.Sigmoid(),
        )

    def forward(self, I):
        # Convolutional layers
        X = self.Conv1(I)
        X = self.Conv2(X)
        X = self.Conv3(X)
        X = self.Conv4(X)
        # reshape and apply sigmoid activation
        X = X.view(-1, 1024*4*4)
        X = self.Out(X)
        return X

### Training of DCGAN 
The discriminator and generator are like two agents playing a minimax game with value function V(G, D):

$$\min_{G}\max_{D}V(G, D)  = \textbf{E}_{x\sim p_{data}(x)}[\log{D(x)}] + \textbf{E}_{z\sim p_{z}(z)}[\log{(1-D(G(z)))}]$$

That is, the generator is trying to fool the discriminator by maximizing the probability that its output is recognized as real, which is mathematically equivalent to minimizing $log(1-D(G(z))$. Meanwhile the discriminator tries to increase its accuracy in distinguishing between real and fake data by minimizing this same quantity, which is equivalent to maximizing the above value function.

In [None]:
def train(discriminator, generator, d_optimizer, g_optimizer, real_data, fake_data, real_target, fake_target, loss):
    #===============================================================
    #1. Train Discriminator by maximizing log(D(x)) + log(1 - D(G(z)))
    #===============================================================
    # Reset gradients
    d_optimizer.zero_grad()
    
    # 1.1 Train on Real Data
    d_prediction_real = discriminator(real_data)
    # Calculate error and backpropagate
    d_error_real = loss(d_prediction_real, real_target)
    d_error_real.backward()

    # 1.2 Train on Fake Data
    d_prediction_fake = discriminator(fake_data.detach())
    # Calculate error and backpropagate
    d_error_fake = loss(d_prediction_fake, fake_target)
    d_error_fake.backward()
    
    # 1.3 Update weights with gradients
    d_optimizer.step()
    
    #===============================================================
    #2. Train Generator by maximizing log(D(G(z)))
    #===============================================================
    # Reset gradients
    g_optimizer.zero_grad()
    # Sample noise and generate fake data
    g_prediction = discriminator(fake_data)
    # Calculate error and backpropagate
    g_error = loss(g_prediction, real_target)
    g_error.backward()
    # Update weights with gradients
    g_optimizer.step()

    # Return error
    return d_error_real + d_error_fake, d_prediction_real, d_prediction_fake, g_error

In [None]:
def train_discriminator(optimizer, real_data, fake_data):
    # Reset gradients
    optimizer.zero_grad()
    
    # 1.1 Train on Real Data
    prediction_real = netD(real_data)
    # Calculate error and backpropagate
    error_real = loss(prediction_real, real_data_target(real_data.size(0)))
    error_real.backward()

    # 1.2 Train on Fake Data
    prediction_fake = netD(fake_data)
    # Calculate error and backpropagate
    error_fake = loss(prediction_fake, fake_data_target(real_data.size(0)))
    error_fake.backward()
    
    # 1.3 Update weights with gradients
    optimizer.step()
    
    # Return error
    return error_real + error_fake, prediction_real, prediction_fake

def train_generator(optimizer, fake_data):
    # 2. Train Generator
    # Reset gradients
    optimizer.zero_grad()
    # Sample noise and generate fake data
    prediction = netD(fake_data)
    # Calculate error and backpropagate
    error = loss(prediction, real_data_target(prediction.size(0)))
    error.backward()
    # Update weights with gradients
    optimizer.step()
    # Return error
    return error

In [None]:
def noise(s):
    """
    Generate s-dimensional noise vector from random normal distribution with mean zero and std one
    """
    z = Variable(torch.randn(s, 100))
    if torch.cuda.is_available(): 
        return z.cuda()
    return z

In [None]:
# def weights_init(m):
#     classname = m.__class__.__name__
#     if classname.find('Conv') != -1:
#         m.weight.data.normal_(0.0, 0.02)
#     elif classname.find('BatchNorm') != -1:
#         m.weight.data.normal_(1.0, 0.02)
#         m.bias.data.fill_(0)
        
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1 or classname.find('BatchNorm') != -1:
        m.weight.data.normal_(0.00, 0.02)

In [None]:
#load cifar 10 data
def cifar_data():
    compose = transforms.Compose(
        [
            transforms.Resize(64),
            transforms.ToTensor(),
            transforms.Normalize((.5, .5, .5), (.5, .5, .5))
        ])
    out_dir = './data/'
    return datasets.CIFAR10(root=out_dir, train=True, transform=compose, download=True)

In [None]:
data = cifar_data()
batch_size = 100
data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)
num_batches = len(data_loader)

In [None]:
# create network instances
netG = Generator()
netD = Discriminator()

#initialize weights
netD.apply(weights_init)
netG.apply(weights_init)

#use cuda if available
if torch.cuda.is_available():
    netG.cuda()
    netD.cuda()

#generate samples for testing
num_test_samples = 16
test_noise = noise(num_test_samples)

In [None]:
# learning rate
lr = 0.0002

# Number of training epochs
num_epochs = 200

#setup optimizers
optD = Adam(netD.parameters(), lr=lr, betas=(0.5, 0.999))
optG = Adam(netG.parameters(), lr=lr, betas=(0.5, 0.999))

#test noise
test_noise = noise(20)

# loss function
loss = nn.BCELoss()

real_label = 1
fake_label = 0

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def real_data_target(size):
    '''
    Tensor containing ones, with shape = size
    '''
    data = Variable(torch.ones(size, 1))
    if torch.cuda.is_available(): return data.cuda()
    return data

def fake_data_target(size):
    '''
    Tensor containing zeros, with shape = size
    '''
    data = Variable(torch.zeros(size, 1))
    if torch.cuda.is_available(): return data.cuda()
    return data

In [None]:
def save_file(fname, file, id_):
    # 2. Create & upload a file text file
    uploaded = drive.CreateFile({'title': fname, 'parents':[{'id': id_}]})
    uploaded.SetContentFile(file)
    uploaded.Upload()
    print('File %s uploaded'%fname)

In [None]:
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
    process = psutil.Process(os.getpid())
    print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
    print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
    printm()

In [None]:
!mkdir results
!mkdir results/models
!mkdir results/loss_logs
!mkdir results/fake

In [None]:
for epoch in range(num_epochs):
    dloss_log = []
    gloss_log = []
    for i, data in enumerate(data_loader, 0):
        real_data = Variable(data[0])        
        # 1. Train Discriminator
        if torch.cuda.is_available(): real_data = real_data.cuda()
        # Generate fake data
        fake_data = netG(noise(real_data.size(0))).detach()
        # Train D
        d_error, d_pred_real, d_pred_fake = train_discriminator(optD, 
                                                                real_data, fake_data)

        # 2. Train Generator
        # Generate fake data
        fake_data = netG(noise(real_data.size(0)))
        # Train G
        g_error = train_generator(optG, fake_data)


        print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G:%.4f'%
              (epoch, num_epochs, i, len(data_loader), d_error.item(), g_error.item()))

        dloss_log.append(d_error.item())
        gloss_log.append(g_error.item())
        
        
        if i%int(batch_size) == 0: 
            fname_real = 'real_samples_epoch_%03d_%03d.png'% (epoch, i)
            real_path = 'results/' + fname_real
            
            vutils.save_image(real_data,real_path, normalize = True)
            fake = netG(test_noise)
            fname_fake = 'fake_samples2_epoch_%03d_%03d.png'% (epoch, i)
            fake_path = 'results/fake/' + fname_fake
            
            vutils.save_image(fake.detach(), fake_path, normalize=True)
            save_file(fname_fake, fake_path, fake_id)
            save_file(fname_real, real_path, real_id)

            if i != 0:
                clear_output()

            
            #log loss per epoch of training
            with open("results/loss_logs/dloss2_%03d.pickle"%(epoch), "wb") as output_file:
                pickle.dump(dloss_log, output_file)
    
            with open("results/loss_logs/gloss2_%03d.pickle"%(epoch), "wb") as output_file:
                pickle.dump(gloss_log, output_file)
 
            torch.save(netG.state_dict(), 'results/models/netG2_epoch_%d.pth' % (epoch))
            torch.save(netD.state_dict(), 'results/models/netD2_epoch_%d.pth' % (epoch))
    
    #log loss per epoch of training
    dlogs_fname = 'dloss2_%03d.pickle'%epoch
    glogs_fname = 'gloss2_%03d.pickle'%epoch
    
    with open("results/loss_logs/"+dlogs_fname, "wb") as output_file:
        pickle.dump(dloss_log, output_file)
    
    with open('results/loss_logs/'+glogs_fname, "wb") as output_file:
        pickle.dump(gloss_log, output_file)
        
    save_file(dlogs_fname, "results/loss_logs/"+dlogs_fname, loss_id)
    save_file(glogs_fname, "results/loss_logs/"+glogs_fname, loss_id)
    
    #save model state per epoch
    Dstate = 'netD2_epoch_%d.pth' % (epoch)
    Gstate = 'netG2_epoch_%d.pth' % (epoch)
    torch.save(netG.state_dict(), 'results/models/'+Gstate)
    torch.save(netD.state_dict(), 'results/models/'+Dstate)

    save_file(Dstate, "results/models/"+Dstate, models_id)
    save_file(Gstate, "results/models/"+Gstate, models_id)