<a href="https://colab.research.google.com/github/rajeshsahu09/CS69002_9A_18CS60R19/blob/master/18CS60R19_Assignment5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Loading the MNIST Dataset

In [0]:
!pip install mxnet-cu100
from __future__ import print_function
import os
import matplotlib as mpl
import tarfile
import matplotlib.image as mpimg
from matplotlib import pyplot as plt

import mxnet as mx
from mxnet import gluon
from mxnet import ndarray as nd
from mxnet.gluon import nn, utils
from mxnet import autograd
import numpy as np

In [3]:
mnist_train = mx.gluon.data.vision.datasets.MNIST(train=True)
mnist_test = mx.gluon.data.vision.datasets.MNIST(train=False)

x_train = np.zeros((70000, 28, 28))
for i, (data, label) in enumerate(mnist_train):
    x_train[i] = data.asnumpy()[:,:,0]
for i, (data, label) in enumerate(mnist_test):
    x_train[len(mnist_train)+i] = data.asnumpy()[:,:,0]
x_train[0].shape, x_train.shape

((28, 28), (70000, 28, 28))

In [4]:
# shuffle the dataset
#Use a seed so that we get the same random permutation each time
np.random.seed(42)
x_train = x_train[np.random.permutation(x_train.shape[0])]
x_train[0].shape, x_train.shape

((28, 28), (70000, 28, 28))

In [5]:
'''DCGAN take 64*64 image as input so reshaped the train data'''
import cv2
x_train = np.asarray([cv2.resize(x, (64,64)) for x in x_train])
x_train[0].shape, x_train.shape

((64, 64), (70000, 64, 64))

In [6]:
'''Each pixel in the 64x64 image is represented by a number between 0-255, that represents the intensity of the pixel. 
However, we want to input numbers between -1 and 1 into the DCGAN. To rescale the pixel values, we will 
divide it by (255/2). This changes the scale to 0-2. We then subtract by 1 to get them in the range of -1 to 1. '''

x_train = x_train.astype(np.float32, copy=False)/(255.0/2) - 1.0
x_train[0].shape, x_train.shape

((64, 64), (70000, 64, 64))

In [0]:
'''Ultimately, images are fed into the neural net through a 70000x3x64x64 array 
but they are currently in a 70000x64x64 array. We need to add 3 channels to the images. 
Typically, when we are working with the images, the 3 channels represent 
the red, green, and blue (RGB) components of each image. Since the MNIST dataset is grayscale, 
we only need 1 channel to represent the dataset. We will pad the other channels with 0's:'''

x_train = x_train.reshape((70000, 1, 64, 64))
x_train = np.tile(x_train, (1, 3, 1, 1))

In [8]:
x_train[0].shape, x_train.shape

((3, 64, 64), (70000, 3, 64, 64))

In [0]:
epochs = 20 # Set low by default for tests, set higher when you actually run this code.
batch_size = 64
latent_z_size = 100

ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()

lr = 0.0002
beta1 = 0.5

In [0]:
train_data = mx.io.NDArrayIter(x_train, batch_size=batch_size)

## Define the network

In [0]:
from datetime import datetime
import time

real_label = nd.ones((batch_size,), ctx=ctx)
fake_label = nd.zeros((batch_size,),ctx=ctx)

def facc(label, pred):
    pred = pred.ravel()
    label = label.ravel()
    return ((pred > 0.5) == label).mean()
metric = mx.metric.CustomMetric(facc)

In [0]:
def visualize(img_arr):
    plt.imshow(((img_arr.asnumpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8))
    plt.axis('off')

## Task-3

In [0]:
# build the generator
nc = 3
ngf = 64
netG = nn.Sequential()
with netG.name_scope():
    # input is Z, going into a convolution
    netG.add(nn.Conv2DTranspose(ngf * 8, 4, 1, 0, use_bias=False))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation('relu'))
    # state size. (ngf*8) x 4 x 4
    netG.add(nn.Conv2DTranspose(ngf * 4, 4, 2, 1, use_bias=False))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation('relu'))
    # state size. (ngf*8) x 8 x 8
    netG.add(nn.Conv2DTranspose(ngf * 2, 4, 2, 1, use_bias=False))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation('relu'))
    # state size. (ngf*8) x 16 x 16
    netG.add(nn.Conv2DTranspose(ngf, 4, 2, 1, use_bias=False))
    netG.add(nn.BatchNorm())
    netG.add(nn.Activation('relu'))
    # state size. (ngf*8) x 32 x 32
    netG.add(nn.Conv2DTranspose(nc, 4, 2, 1, use_bias=False))
    netG.add(nn.Activation('tanh'))
    # state size. (nc) x 64 x 64

# build the discriminator
ndf = 64
netD = nn.Sequential()
with netD.name_scope():
    # input is (nc) x 64 x 64
    netD.add(nn.Conv2D(ndf, 4, 2, 1, use_bias=False))
    netD.add(nn.LeakyReLU(0.2))
    # state size. (ndf) x 32 x 32
    netD.add(nn.Conv2D(ndf * 2, 4, 2, 1, use_bias=False))
    netD.add(nn.BatchNorm())
    netD.add(nn.LeakyReLU(0.2))
    # state size. (ndf) x 16 x 16
    netD.add(nn.Conv2D(ndf * 4, 4, 2, 1, use_bias=False))
    netD.add(nn.BatchNorm())
    netD.add(nn.LeakyReLU(0.2))
    # state size. (ndf) x 8 x 8
    netD.add(nn.Conv2D(ndf * 8, 4, 2, 1, use_bias=False))
    netD.add(nn.BatchNorm())
    netD.add(nn.LeakyReLU(0.2))
    # state size. (ndf) x 4 x 4
    netD.add(nn.Conv2D(1, 4, 1, 0, use_bias=False))

In [0]:
# loss
loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()

# initialize the generator and the discriminator
netG.initialize(mx.init.Normal(0.02), ctx=ctx)
netD.initialize(mx.init.Normal(0.02), ctx=ctx)

### ADAM

In [0]:
# trainer for the generator and the discriminator
trainerG = gluon.Trainer(netG.collect_params(), 'adam', {'learning_rate': lr, 'beta1': beta1})
trainerD = gluon.Trainer(netD.collect_params(), 'adam', {'learning_rate': lr, 'beta1': beta1})

In [0]:
for epoch in range(epochs):
    print ('epoch:'+str(epoch+1))
    tic = time.time()
    btic = time.time()
    train_data.reset()
    iter = 0
    for batch in train_data:
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        data = batch.data[0].as_in_context(ctx)
        latent_z = mx.nd.random_normal(0, 1, shape=(batch_size, latent_z_size, 1, 1), ctx=ctx)

        with autograd.record():
            # train with real image
            output = netD(data).reshape((-1, 1))
            errD_real = loss(output, real_label)
            metric.update([real_label,], [output,])

            # train with fake image
            fake = netG(latent_z)
            output = netD(fake.detach()).reshape((-1, 1))
            errD_fake = loss(output, fake_label)
            errD = errD_real + errD_fake
            errD.backward()
            metric.update([fake_label,], [output,])

        trainerD.step(batch.data[0].shape[0])

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        with autograd.record():
            fake = netG(latent_z)
            output = netD(fake).reshape((-1, 1))
            errG = loss(output, real_label)
            errG.backward()

        trainerG.step(batch.data[0].shape[0])

        # Print log infomation every ten batches
        if iter % 100 == 0:
            name, acc = metric.get()
            print('speed: {} samples/s'.format(batch_size / (time.time() - btic)))
            print('discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d'
                     %(nd.mean(errD).asscalar(),
                       nd.mean(errG).asscalar(), acc, iter, epoch))
        iter = iter + 1
        btic = time.time()

    name, acc = metric.get()
    metric.reset()
    print('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc))
    print('time: %f' % (time.time() - tic))

epoch:1
speed: 95.76186840832261 samples/s
discriminator loss = 1.440271, generator loss = 5.971055, binary training acc = 0.578125 at iter 0 epoch 0
speed: 832.1386797278237 samples/s
discriminator loss = 0.046643, generator loss = 7.193783, binary training acc = 0.937809 at iter 100 epoch 0
speed: 844.107455402486 samples/s
discriminator loss = 0.117289, generator loss = 3.946844, binary training acc = 0.933652 at iter 200 epoch 0
speed: 801.3716332805923 samples/s
discriminator loss = 0.143659, generator loss = 4.830194, binary training acc = 0.935605 at iter 300 epoch 0
speed: 799.4170625005584 samples/s
discriminator loss = 0.033756, generator loss = 7.072913, binary training acc = 0.937286 at iter 400 epoch 0
speed: 782.31871488191 samples/s
discriminator loss = 0.346571, generator loss = 2.709261, binary training acc = 0.930748 at iter 500 epoch 0
speed: 796.0695727474118 samples/s
discriminator loss = 0.892779, generator loss = 2.766939, binary training acc = 0.933600 at iter 6

In [0]:
num_image = 8
for i in range(num_image):
    latent_z = mx.nd.random_normal(0, 1, shape=(1, latent_z_size, 1, 1), ctx=ctx)
    img = netG(latent_z)
    plt.subplot(2,4,i+1)
    visualize(img[0])
plt.show()

In [0]:
num_image = 12
latent_z = mx.nd.random_normal(0, 1, shape=(1, latent_z_size, 1, 1), ctx=ctx)
step = 0.05
for i in range(num_image):
    img = netG(latent_z)
    plt.subplot(3,4,i+1)
    visualize(img[0])
    latent_z += 0.05
plt.show()

### SGD

In [0]:
# trainer for the generator and the discriminator
trainerG = gluon.Trainer(netG.collect_params(), 'sgd', {'learning_rate': lr, 'beta1': beta1})
trainerD = gluon.Trainer(netD.collect_params(), 'sgd', {'learning_rate': lr, 'beta1': beta1})

In [0]:
for epoch in range(epochs):
    print ('epoch:'+str(epoch+1))
    tic = time.time()
    btic = time.time()
    train_data.reset()
    iter = 0
    for batch in train_data:
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        data = batch.data[0].as_in_context(ctx)
        latent_z = mx.nd.random_normal(0, 1, shape=(batch_size, latent_z_size, 1, 1), ctx=ctx)

        with autograd.record():
            # train with real image
            output = netD(data).reshape((-1, 1))
            errD_real = loss(output, real_label)
            metric.update([real_label,], [output,])

            # train with fake image
            fake = netG(latent_z)
            output = netD(fake.detach()).reshape((-1, 1))
            errD_fake = loss(output, fake_label)
            errD = errD_real + errD_fake
            errD.backward()
            metric.update([fake_label,], [output,])

        trainerD.step(batch.data[0].shape[0])

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        with autograd.record():
            fake = netG(latent_z)
            output = netD(fake).reshape((-1, 1))
            errG = loss(output, real_label)
            errG.backward()

        trainerG.step(batch.data[0].shape[0])

        # Print log infomation every ten batches
        if iter % 100 == 0:
            name, acc = metric.get()
            print('speed: {} samples/s'.format(batch_size / (time.time() - btic)))
            print('discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d'
                     %(nd.mean(errD).asscalar(),
                       nd.mean(errG).asscalar(), acc, iter, epoch))
        iter = iter + 1
        btic = time.time()

    name, acc = metric.get()
    metric.reset()
    print('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc))
    print('time: %f' % (time.time() - tic))

In [0]:
num_image = 8
for i in range(num_image):
    latent_z = mx.nd.random_normal(0, 1, shape=(1, latent_z_size, 1, 1), ctx=ctx)
    img = netG(latent_z)
    plt.subplot(2,4,i+1)
    visualize(img[0])
plt.show()

In [0]:
num_image = 12
latent_z = mx.nd.random_normal(0, 1, shape=(1, latent_z_size, 1, 1), ctx=ctx)
step = 0.05
for i in range(num_image):
    img = netG(latent_z)
    plt.subplot(3,4,i+1)
    visualize(img[0])
    latent_z += 0.05
plt.show()