# Homework 11 - Generative Models

In this homework you will implement a simple generative adversarial network to generate new samples for the MNIST dataset. 

On the technical side you will learn about TensorBoard and how to use it to store your metrics and also the stored images.

In [None]:
%load_ext tensorboard
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import datetime
%matplotlib notebook

In [None]:
# Load the MNIST dataset. Training data is enough. Normalize to [-1,1]. Add channel dimension of depth 1.
# Format to float32.
### YOUR CODE HERE ###
(train_images, train_labels), (_, _) = tf.keras.datasets.mnist.load_data()
train_images = (train_images/255.0)*2 - 1
train_images = np.reshape(train_images, newshape=[-1,28,28,1])
train_images = train_images.astype(np.float32)
######################

In [None]:
# Define the generator:
# Dense layer: 7*7*64 neurons + BatchNorm + LeakyReLU
# Reshape to (batch size, 7, 7, 64)
# Transpose Convolutional Layer: 32 kernels of size (5,5) with strides (1,1) + "same" padding + BatchNorm + LeakyReLU
# Transpose Convolutional Layer: 16 kernels of size (5,5) with strides (2,2) + "same" padding + BatchNorm + LeakyReLU
# Transpose Convolutional Layer: 1 kernels of size (5,5) with strides (2,2) + "same" padding + TanH
# All layers in the generator don't use biases! Set parameter use_bias=False.
# NOTE: Go back to lecture 7 to make sure you use BatchNorm in combination with an 
# activation function in the correct way.

### YOUR CODE HERE ###
class Generator(tf.keras.layers.Layer):
    
    def __init__(self):
        super(Generator, self).__init__()
        self.dense = tf.keras.layers.Dense(
                            units=7*7*64,
                            activation=None,
                            use_bias=False
            
        )
        self.batchnorm_1 = tf.keras.layers.BatchNormalization()
        self.convT_1 = tf.keras.layers.Conv2DTranspose(
                            filters=32,
                            kernel_size=5,
                            strides=1,
                            padding='SAME',
                            activation=None,
                            use_bias=False
        )
        self.batchnorm_2 = tf.keras.layers.BatchNormalization()
        self.convT_2 = tf.keras.layers.Conv2DTranspose(
                            filters=16,
                            kernel_size=5,
                            strides=2,
                            padding='SAME',
                            activation=None,
                            use_bias=False
        )
        self.batchnorm_3 = tf.keras.layers.BatchNormalization()
        self.convT_3 = tf.keras.layers.Conv2DTranspose(
                            filters=1,
                            kernel_size=5,
                            strides=2,
                            padding='SAME',
                            activation=tf.nn.tanh,
                            use_bias=False
        )
        
    def call(self,x,is_training):
        x = self.dense(x)
        x = self.batchnorm_1(x, training=is_training)
        x = tf.nn.leaky_relu(x)
        x = tf.reshape(x, shape=(-1, 7, 7, 64))
        x = self.convT_1(x)
        x = self.batchnorm_2(x, training=is_training)
        x = tf.nn.leaky_relu(x)
        x = self.convT_2(x)
        x = self.batchnorm_3(x, training=is_training)
        x = tf.nn.leaky_relu(x)
        x = self.convT_3(x)
        return x
########################

In [None]:
# Define the discriminator:
# Conv layer: 8 kernels of size (5,5) with strides (2,2) + "same" padding + LeakyReLU
# Conv layer: 16 kernels of size (5,5) with strides (2,2) + "same" padding + LeakyReLU
# Flatten
# Dense layer: 1 unit, no activation + Sigmoid activation

### YOUR CODE HERE ###
class Discriminator(tf.keras.layers.Layer):
     
    def __init__(self):
        super(Discriminator, self).__init__()
        
        self.conv_1 = tf.keras.layers.Conv2D(
                            filters = 8,
                            kernel_size =5, 
                            strides=2,
                            padding="SAME",
                            activation=tf.nn.leaky_relu
                            
        )
        self.conv_2 = tf.keras.layers.Conv2D(
                            filters = 16,
                            kernel_size =5, 
                            strides=2,
                            padding="SAME",
                            activation=tf.nn.leaky_relu
        )        
        self.flatten = tf.keras.layers.Flatten()
        self.output_layer = tf.keras.layers.Dense(
                            units = 1,
                            activation=tf.nn.sigmoid
        )
        
    def call(self, x, is_training):
        x = self.conv_1(x)
        x = self.conv_2(x)
        x = self.flatten(x)
        x = self.output_layer(x)
        return x
########################

In [None]:
# Define the dataset. Shuffle.
# We will train with batches of 64 (32 real images, 32 fake images).
# So batch in 32.
### YOUR CODE HERE ###
dataset = tf.data.Dataset.from_tensor_slices(train_images)
dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.batch(batch_size=32)
########################

In [None]:
# Define the losses. It makes sense to first program the training loop and then come back here!

# Define the loss for the generator.
def generator_loss(probabilities):
    ### YOUR CODE HERE ###
    # Get only the output probabilities for the fake images.
    probabilities_fake = probabilities[:32]
    # Create the label vector indicating that the images are correct (=1).
    labels_one = tf.convert_to_tensor(np.ones(shape=(32,1)))
    # Use binary cross entropy loss to compute the loss. tf.keras.losses.BinaryCrossentropy()
    binary = tf.keras.losses.BinaryCrossentropy()
    loss = binary(labels_one,probabilities_fake)
    ########################
    return loss

# Define the loss for the discriminator.
def discriminator_loss(probabilities):
    ### YOUR CODE HERE ###
    # Create the label vector indicating which images are real and which are fake.
    labels_one = tf.convert_to_tensor(np.ones(shape=(32,1)))
    labels_zero = tf.convert_to_tensor(np.zeros(shape=(32,1)))
    labels = tf.concat([labels_zero, labels_one], axis=0)
    # Use binary cross entropy loss to compute the loss.
    binary = tf.keras.losses.BinaryCrossentropy()
    loss = binary(labels,probabilities)
    ########################
    return loss

In [None]:
!rm -rf ./logs/ 
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
test_log_dir = 'logs/gradient_tape/' + current_time + '/test'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

# For me the in-notebook tensorboard sometimes doesn't work. In this case maybe just use your terminal
# if you work on your own machine. (comment out the following line)
%tensorboard --logdir logs/

tf.keras.backend.clear_session()

# Initialzing generator and discriminator.
generator = Generator()
discriminator = Discriminator()
# Define size of latent variable vector.
z_dim = 50
# Define random seed to use for generating 8 images for supervision.
seed = tf.random.normal(shape=[8,z_dim])

# Initialize two optimizers (one for generator, one for discriminator).
# During training you will have to do every step twice (computing loss, computing gradients,
# applying gradients, storing loss in summary). Namely once for generator and once for discriminator.
gen_optimizer = tf.keras.optimizers.Adam(1e-4)
dis_optimizer = tf.keras.optimizers.Adam(1e-4)

step=0

for epochs in range(6):
    for real in dataset:
        
        with tf.GradientTape() as gen_tape, tf.GradientTape() as dis_tape:
            
            ### YOUR CODE HERE ###
            # Generate random noise vector. tf.random.normal()
            noise = tf.random.normal(shape=[32, z_dim])
            # Generate fake images with generator.
            fake = generator(noise, is_training=True)
            # Merge fake and real images to a long vector. tf.concat()
            images = tf.concat([fake, real], axis=0)
            # Compute output from discriminator.
            probs = discriminator(images, is_training=True)
            
            # Compute loss, compute gradients, apply gradients, store summaries.
            gen_loss = generator_loss(probs)
            dis_loss = discriminator_loss(probs)
            gen_gradients = gen_tape.gradient(gen_loss, generator.trainable_variables)
            dis_gradients = dis_tape.gradient(dis_loss, discriminator.trainable_variables)

            gen_optimizer.apply_gradients(zip(gen_gradients, generator.trainable_variables))
            dis_optimizer.apply_gradients(zip(dis_gradients, discriminator.trainable_variables))

            with train_summary_writer.as_default():
                tf.summary.scalar('generator_loss', gen_loss, step=step)
                tf.summary.scalar('discriminator_loss', dis_loss, step=step)
            ########################
            
        # Every 100 steps generate images from the defined seed and 
        # store to supervise how well the generator works.
        if step % 100 == 0:
            ### YOUR CODE HERE ###
            fake = generator(seed, is_training=False)
            with test_summary_writer.as_default():
                tf.summary.image('fake_images', fake, step=step, max_outputs=8)
            ########################
            
        step += 1