#have removed transform function, changed to relu6, iterations are 100, leak is 0.3 instead of 0.2

In [27]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.contrib.slim as slim
import os
import scipy.misc
import scipy
from tensorflow.examples.tutorials.mnist import input_data

The following cell contains all the helper functions needed to save the images into the specified folder. 

In [28]:
def save_images(images, size, image_path):
    return imsave(images, size, image_path)

def imsave(images, size, path):
    return scipy.misc.imsave(path, merge(images, size))

def merge(images, size):
    h, w = images.shape[1], images.shape[2]
    img = np.zeros((h * size[0], w * size[1]))

    for idx, image in enumerate(images):
        i = idx % size[1]
        j = idx // size[1]
        img[j*h:j*h+h, i*w:i*w+w] = image

    return img

The following cell defines the generator and discriminator functions. Both these functions are using the Tensorflow-slim lightweight library. This makes making, training and evaluating models easier in tensorflow. With the help of TF-slim, one can define complex networks in a single line, taking into consideration all the required parameters.

The generator function is initially defining a fully connected network wherein the normalization function is batch_norm. Normalization is used to prevent neurons from saturating when inputs have varying scale and to help generalization. The activation function is ReLU6 i.e. Rectified Linear Unit layer which computes the rectified linear value 6 i.e. min(max(features,0),6). In general, relu activation function is better than any other such as sigmoid because it is computationally simpler in terms of forward and backward passes. Also, ReLU saturates only when the input is less than 0. After this, it uses 4 consecutive convolutional 2d transpositions to generate the final image from the given random vectors. It keeps reducing the number of outputs in every layer keeping the kernel size the same of [5,5].

The discriminator function aims at producing probabilities from the given input images. The first 3 layers use convolutional 2d networks and the last layer uses a fully connected layer and sigmoid function to flatten out the answer. The ReLU function used here is the leaky relu which is a modified version of ReLU using a non-zero gradient for negative input. It helps to eliminate saturation which tends to hamper learning in deep networks. 

In [29]:
def generator(z):
    
    zP = slim.fully_connected(z,4*4*256,normalizer_fn=slim.batch_norm, activation_fn=tf.nn.relu6,\
                              scope='g_project',weights_initializer=initializer)
    zCon = tf.reshape(zP,[-1,4,4,256])
    
    gen1 = slim.convolution2d_transpose(zCon,num_outputs=64,kernel_size=[5,5],stride=[2,2],\
        padding="SAME",normalizer_fn=slim.batch_norm, activation_fn=tf.nn.relu6,scope='g_conv1', weights_initializer=initializer)
    
    gen2 = slim.convolution2d_transpose(gen1,num_outputs=32,kernel_size=[5,5],stride=[2,2],\
        padding="SAME",normalizer_fn=slim.batch_norm,\
        activation_fn=tf.nn.relu6,scope='g_conv2', weights_initializer=initializer)
    
    gen3 = slim.convolution2d_transpose(gen2,num_outputs=16,kernel_size=[5,5],stride=[2,2],\
        padding="SAME",normalizer_fn=slim.batch_norm, activation_fn=tf.nn.relu6,scope='g_conv3', weights_initializer=initializer)
    
    g_out = slim.convolution2d_transpose(gen3,num_outputs=1,kernel_size=[32,32],padding="SAME",\
        biases_initializer=None,activation_fn=tf.nn.tanh, scope='g_out', weights_initializer=initializer)
    
    return g_out


def discriminator(bottom, reuse=False):
    
    dis1 = slim.convolution2d(bottom,16,[4,4],stride=[2,2],padding="SAME",\
        biases_initializer=None,activation_fn=lrelu, reuse=reuse,scope='d_conv1',weights_initializer=initializer)
    
    dis2 = slim.convolution2d(dis1,32,[4,4],stride=[2,2],padding="SAME",\
        normalizer_fn=slim.batch_norm,activation_fn=lrelu, reuse=reuse,scope='d_conv2', weights_initializer=initializer)
    
    dis3 = slim.convolution2d(dis2,64,[4,4],stride=[2,2],padding="SAME",\
        normalizer_fn=slim.batch_norm,activation_fn=lrelu, reuse=reuse,scope='d_conv3',weights_initializer=initializer)
    
    d_out = slim.fully_connected(slim.flatten(dis3),1,activation_fn=tf.nn.sigmoid,\
        reuse=reuse,scope='d_out', weights_initializer=initializer)
    
    return d_out


def lrelu(x, leak=0.3, name="lrelu"):
     with tf.variable_scope(name):
         f1 = 0.5 * (1 + leak)
         f2 = 0.5 * (1 - leak)
         return f1 * x + f2 * abs(x)

This GAN will be using the MNIST dataset containing images of handwritten digits in black and white. It has 60000 training and 10000 testing examples. The original images are 28 x 28 but have been resized to 32 x 32 for this network. An initial vector of size 100 is used for generating images. The optimization objective of the GAN is taken as negative because the loss has to be reduced.

The Adam optimizer is being used here to train the GAN. This optimizer performs much better than other optimizers such as Gradient Descent. There are many reasons for this. It uses moving averages of the parameters enabling it to use larger effective step size and the algorithm (Kingma and Ba's Adam) will converge to this step size without fine tuning. Gradients are then calculated and applied to the generator and discriminator networks respectively. 

In [30]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=False)

tf.reset_default_graph()

z_size = 100

initializer = tf.truncated_normal_initializer(stddev=0.02)

z_in = tf.placeholder(shape=[None,z_size],dtype=tf.float32) #Random vector
real_in = tf.placeholder(shape=[None,32,32,1],dtype=tf.float32) #Real images

Gz = generator(z_in)
Dx = discriminator(real_in)
Dg = discriminator(Gz,reuse=True)

d_loss = -tf.reduce_mean(tf.log(Dx) + tf.log(1.-Dg))
g_loss = -tf.reduce_mean(tf.log(Dg))

tvars = tf.trainable_variables()

trainerD = tf.train.AdamOptimizer(learning_rate=0.0002,beta1=0.5)
trainerG = tf.train.AdamOptimizer(learning_rate=0.0002,beta1=0.5)
d_grads = trainerD.compute_gradients(d_loss,tvars[9:])
g_grads = trainerG.compute_gradients(g_loss,tvars[0:9])

update_D = trainerD.apply_gradients(d_grads)
update_G = trainerG.apply_gradients(g_grads)

loss_gen1 = []
loss_dis1 = []

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


The following cell runs the actual program and ensures calling of the generator and discriminator functions appropriately. It runs for 10000 iterations at the end of which images can be seen as discernible and able to be recognized. The batch size of 128 is taken which means 128 random vectors are created in every iteration. Every tenth iteration stores the generated images and every 1000th iteration stores the updated model which can be used in later stages. 

A 'batch_size' amount of images are taken, they are reshaped and padded to make them of size 32 x 32. Then the discriminator is called once and generator twice to make the GAN more robust. On running, it can be seen that the generator loss fluctuates slightly while the discriminator loss decreases. This means the model is learning to generate better and better images. This is also seen from the images that are created. The first few images are extremely random and hazy with absolutely no sign of digits. They are just greyish pixels. As the iterations increase, digits begin to be seen and utimately, at the end of all iterations, the images that result are very good in terms of digit visibility. Thus, the GANs seem to have learnt to generate images of handwritten digits!

The progression of the images can be seen below.
Iteration 1
<img src="fig0.png">    
Iteration 700
<img src="fig700.png">
Iteration 12360
<img src="fig12360.png">
Iteration 16760
<img src="fig16760.png">
Iteration 18860
<img src="fig18860.png">
Iteration 20000
<img src="fig20000.png">

In [31]:
batch_size = 128
iterations = 20000
sample_directory = './figures'
model_directory = './trainedModels'

init = tf.initialize_all_variables()
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(init)
    for i in range(iterations):
        zs = np.random.uniform(-1.0,1.0,size=[batch_size,z_size]).astype(np.float32)
        xs,_ = mnist.train.next_batch(batch_size)
        xs = (np.reshape(xs,[batch_size,28,28,1]) - 0.5) * 2.0
        xs = np.lib.pad(xs, ((0,0),(2,2),(2,2),(0,0)),'constant', constant_values=(-1, -1))
        _,dLoss = sess.run([update_D,d_loss],feed_dict={z_in:zs,real_in:xs})
        _,gLoss = sess.run([update_G,g_loss],feed_dict={z_in:zs})
        _,gLoss = sess.run([update_G,g_loss],feed_dict={z_in:zs})
        if i % 10 == 0:
            loss_gen1.append(float(gLoss))
            loss_dis1.append(float(dLoss))
            z2 = np.random.uniform(-1.0,1.0,size=[batch_size,z_size]).astype(np.float32)
            newZ = sess.run(Gz,feed_dict={z_in:z2})
            if not os.path.exists(sample_directory):
                os.makedirs(sample_directory)
            save_images(np.reshape(newZ[0:36],[36,32,32]),[6,6],sample_directory+'/fig'+str(i)+'.png')
        if i % 1000 == 0 and i != 0:
            if not os.path.exists(model_directory):
                os.makedirs(model_directory)
            saver.save(sess,model_directory+'/model-'+str(i)+'.cptk')
print ("Done")

Instructions for updating:
Use `tf.global_variables_initializer` instead.
Done
