# A Simple GAN with Keras

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

from keras.layers import Input
from keras.models import Model, Sequential
from keras.layers.core import Dense, Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.datasets import mnist
from keras.optimizers import Adam
from keras import initializers
from keras import backend as K

from tensorflow.python.client import device_lib

%matplotlib inline

Using TensorFlow backend.


In [2]:
#check if tensorflow is using GPU
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 9643144250863969060
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 1472135168
locality {
  bus_id: 1
  links {
  }
}
incarnation: 18234107187098113454
physical_device_desc: "device: 0, name: GeForce GTX 660, pci bus id: 0000:01:00.0, compute capability: 3.0"
]


In [3]:
# check available GPUs
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0']

In [4]:
# Use Tensorflow for Keras backend
os.environ["KERAS_BACKEND"] = "tensorflow"

# Make sure we can reproduce the experiment and get the same result
np.random.seed(10)

# The dimension of our random noise vector
random_dim = 100

## Gathering and preprocessing MNIST data

In [5]:
def load_mnist_data():
    # load the data
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    # normalize our inputs to be in the range[-1, 1]
    # remember that the value of each attribute (pixel) ranges from 0 to 255
    x_train = (x_train.astype(np.float32) - 127.5)/127.5
    # convert x_train with a shape of (60000, 28, 28) to (60000, 784) so we have
    # 784 columns per row
    x_train = x_train.reshape(60000, 784)
    return (x_train, y_train, x_test, y_test)

## Defining our optimizer
We will use the stochastic gradient-based optimizer Adam

In [6]:
# Adam optimizer
def get_optimizer():
    return Adam(lr=0.0002, beta_1=0.5)

## Defining our generator and discriminator

Each function will be represented by a Neural Network with three hidden layers. The generator will have fully connected layers (Dense) whereas the discriminator will use layers with a dropout rate of 0.3.

In [7]:
# generator
def get_generator(optimizer):
    generator = Sequential()
    
    # first hidden layer
    generator.add(Dense(256, input_dim=random_dim, kernel_initializer=initializers.RandomNormal(stddev=0.02)))
    generator.add(LeakyReLU(0.2))
    
    # second hidden layer
    generator.add(Dense(512))
    generator.add(LeakyReLU(0.2))
    
    # third hidden layer
    generator.add(Dense(1024))
    generator.add(LeakyReLU(0.2))
    
    # the output layer (784 units) has a different activation function
    generator.add(Dense(784, activation='tanh'))
    
    # a Keras model needs two arguments to compile: loss function and optimizer
    generator.compile(loss='binary_crossentropy', optimizer=optimizer)
    
    return generator

# discriminator
def get_discriminator(optimizer):
    discriminator = Sequential()
    discriminator.add(Dense(1024, input_dim=784, kernel_initializer=initializers.RandomNormal(stddev=0.02)))
    discriminator.add(LeakyReLU(0.2))
    discriminator.add(Dropout(0.3))

    discriminator.add(Dense(512))
    discriminator.add(LeakyReLU(0.2))
    discriminator.add(Dropout(0.3))

    discriminator.add(Dense(256))
    discriminator.add(LeakyReLU(0.2))
    discriminator.add(Dropout(0.3))

    discriminator.add(Dense(1, activation='sigmoid'))
    discriminator.compile(loss='binary_crossentropy', optimizer=optimizer)
    return discriminator

# the number of units in the layers of the generator and discriminator
# are symmetrical

In [8]:
def get_gan_network(discriminator, random_dim, generator, optimizer):
    # We initially set trainable to False since we only want to train either the
    # generator or discriminator at a time
    discriminator.trainable = False
    # gan input (noise) will be 100-dimensional vectors
    gan_input = Input(shape=(random_dim,))
    # the output of the generator (an image)
    x = generator(gan_input)
    # get the output of the discriminator (probability if the image is real or not)
    gan_output = discriminator(x)
    gan = Model(inputs=gan_input, outputs=gan_output)
    gan.compile(loss='binary_crossentropy', optimizer=optimizer)
    return gan

In [9]:
# Create a wall of generated MNIST images
def plot_generated_images(epoch, generator, examples=100, dim=(10, 10), figsize=(10, 10)):
    noise = np.random.normal(0, 1, size=[examples, random_dim])
    generated_images = generator.predict(noise)
    generated_images = generated_images.reshape(examples, 28, 28)

    plt.figure(figsize=figsize)
    for i in range(generated_images.shape[0]):
        plt.subplot(dim[0], dim[1], i+1)
        plt.imshow(generated_images[i], interpolation='nearest', cmap='gray_r')
        plt.axis('off')
    plt.tight_layout()
    plt.savefig('gan_generated_image_epoch_{0}.png'.format(epoch))

## Training function

Train_on_batch runs a single gradient update for a batch of certain batch_size. Essentially, it runs the optimization algorithm (Adam, in this case) in a mini-batch manner.

In [10]:
def train(epochs=1, batch_size=128):
    # Get the training and testing data
    x_train, y_train, x_test, y_test = load_mnist_data()
    # Split the training data into batches of size 128
    batch_count = x_train.shape[0] // batch_size

    # Build our GAN netowrk
    adam = get_optimizer()
    generator = get_generator(adam)
    discriminator = get_discriminator(adam)
    gan = get_gan_network(discriminator, random_dim, generator, adam)

    for e in range(1, epochs+1):
        print('-'*15, 'Epoch', e, '-'*15)
        for _ in tqdm(range(batch_count)):
            # Get a random set of input noise and images
            noise = np.random.normal(0, 1, size=[batch_size, random_dim])
            image_batch = x_train[np.random.randint(0, x_train.shape[0], size=batch_size)]

            # Generate fake MNIST images
            generated_images = generator.predict(noise)
            X = np.concatenate([image_batch, generated_images])

            # Labels for generated and real data
            # First half of the images are real, second half are fake
            y_dis = np.zeros(2*batch_size)
            # One-sided label smoothing - why??
            y_dis[:batch_size] = 0.9

            # Train discriminator on batch
            discriminator.trainable = True
            discriminator.train_on_batch(X, y_dis)

            # Train generator on batch
            noise = np.random.normal(0, 1, size=[batch_size, random_dim])
            # The output is defined as 'real' (y = 1)
            # We want to minimize the difference between the discriminator predictions
            # on the generated images and the probability of them being real.
            # That is, we want to train the GAN so that the generated images
            # are more likely to be classified as real by the discriminator
            y_gen = np.ones(batch_size)
            discriminator.trainable = False
            gan.train_on_batch(noise, y_gen)

        if e == 1 or e % 20 == 0:
            plot_generated_images(e, generator)