<a href="https://colab.research.google.com/github/sayakpaul/TF-2.0-Hacks/blob/master/GANs_with_TF_2_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook follows [this amazing tutorial on GANs](https://medium.com/@devnag/generative-adversarial-networks-gans-in-50-lines-of-code-pytorch-e81b79659e3f) and tries to port the code to TensorFlow 2.0. 


## Install `Tensorflow 2.0`

In [0]:
!pip install tensorflow-gpu==2.0.0-beta1

## Imports

In [0]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow import keras

%matplotlib inline

## Helper function to generate a distribution (normal) for the real data

In [0]:
def get_distribution_sampler(mu, sigma):
  return lambda n: np.random.normal(mu, sigma, (1, n))

## Helper function to generate a uniform distribution for the generator network

In [0]:
def get_generator_input_sampler():
  return lambda m, n: np.random.rand(m, n)

## The Generator network class

In [0]:
class Generator(keras.Model):
  def __init__(self, input_size, hidden_size, output_size):
    super(Generator, self).__init__()
    self.map1 = keras.layers.Dense(hidden_size, input_shape=input_size, activation='tanh')
    self.map2 = keras.layers.Dense(hidden_size, activation='tanh')
    self.map3 = keras.layers.Dense(output_size, activation='linear')
  
  def call(self, inputs):
    x = self.map1(inputs)
    x = self.map2(x)
    x = self.map3(x)
    return x

## The Discriminator network class

In [0]:
class Discriminator(keras.Model):
  def __init__(self, input_size, hidden_size, output_size):
    super(Discriminator, self).__init__()
    self.map1 = keras.layers.Dense(hidden_size, input_shape=input_size, activation='sigmoid')
    self.map2 = keras.layers.Dense(hidden_size, activation='sigmoid')
    self.map3 = keras.layers.Dense(output_size, activation='sigmoid')
  
  def call(self, inputs):
    x = self.map1(inputs)
    return self.map3(self.map2(x))

In [0]:
def get_moments(d):
    # https://stats.stackexchange.com/questions/126346/why-kurtosis-of-a-normal-distribution-is-3-instead-of-0
    # Return the first 4 moments of the data provided
    mean = np.mean(d)
    diffs = (d - mean)
    var = np.mean(np.power(diffs, 2.0))
    std = np.power(var, 0.5)
    zscores = diffs / std
    skews = np.mean(np.power(zscores, 3.0))
    kurtoses = np.mean(np.power(zscores, 4.0)) - 3.0  # excess kurtosis, should be 0 for Gaussian
    final = np.concatenate((mean.reshape(1,), std.reshape(1,), skews.reshape(1,), kurtoses.reshape(1,)))
    return final

## Model hyperparameters and other constants

In [0]:
# Model parameters
g_input_size = 1      # Random noise dimension coming into generator, per output vector
g_hidden_size = 5     # Generator complexity
g_output_size = 1     # Size of generated output vector
d_input_size = 500    # Minibatch size - cardinality of distributions
d_hidden_size = 10    # Discriminator complexity
d_output_size = 1     # Single dimension for 'real' vs. 'fake' classification
minibatch_size = d_input_size

d_learning_rate = 1e-3
g_learning_rate = 1e-3
sgd_momentum = 0.9

num_epochs = 5000
print_interval = 100
d_steps = 20
g_steps = 20

dfe, dre, ge = 0, 0, 0
d_real_data, d_fake_data, g_fake_data = None, None, None

## Data generation parameters

In [0]:
data_mean = 4
data_stddev = 1.25

d_sampler = get_distribution_sampler(data_mean, data_stddev)
gi_sampler = get_generator_input_sampler()

# d_input_func = lambda x: 4

## Initialize the networks

In [0]:
G = Generator(input_size=(500, 1),
                  hidden_size=g_hidden_size,
                  output_size=g_output_size)

D = Discriminator(input_size=(1,4),
                  hidden_size=d_hidden_size,
                  output_size=d_output_size)

## Declare the loss and optimizers

In [0]:
criterion = tf.keras.losses.BinaryCrossentropy()  
d_optimizer = tf.keras.optimizers.SGD(learning_rate=d_learning_rate)
g_optimizer = tf.keras.optimizers.SGD(learning_rate=g_learning_rate)

## One forward and backward pass with the Discriminator network with real data

*We do not update the parameters with these gradients.*

In [29]:
d_real_data = d_sampler(d_input_size)

with tf.GradientTape() as tape:
  d_real_decision = D(get_moments(d_real_data).reshape((1,4)))
  d_real_error = criterion(d_real_decision, np.ones((1,1)))  # ones = true
d_real_grads = tape.gradient(d_real_error, D.trainable_weights) # compute/store gradients, but don't change params
d_real_grads[0].numpy()

array([[-1.15983969e-02, -8.16648348e-02, -2.09742039e-01,
        -3.17376896e-01, -9.84524109e-02,  2.58932419e-01,
        -7.56928926e-02, -4.94383546e-02, -1.92833462e-01,
        -3.26418538e-01],
       [-3.65464409e-03, -2.57325136e-02, -6.60895215e-02,
        -1.00005165e-01, -3.10222632e-02,  8.15893648e-02,
        -2.38507602e-02, -1.55779796e-02, -6.07616448e-02,
        -1.02854178e-01],
       [-1.37570703e-04, -9.68641511e-04, -2.48778860e-03,
        -3.76446530e-03, -1.16776202e-03,  3.07124469e-03,
        -8.97807215e-04, -5.86397348e-04, -2.28723289e-03,
        -3.87170986e-03],
       [-3.10193484e-04, -2.18408629e-03, -5.60944880e-03,
        -8.48809072e-03, -2.63306184e-03,  6.92502161e-03,
        -2.02436960e-03, -1.32220475e-03, -5.15723715e-03,
        -8.72990503e-03]])

## One forward and backward pass with the Discriminator network with the fake data

In [30]:
d_gen_input = gi_sampler(minibatch_size, g_input_size)
with tf.GradientTape() as tape:
  with tape.stop_recording():
    d_fake_data = G(d_gen_input)
  d_fake_decision = D(get_moments(d_fake_data.numpy().T).reshape((1,4)))
  d_fake_error = criterion(d_fake_decision, np.zeros((1,1)))
d_fake_grads = tape.gradient(d_fake_error, D.trainable_weights) 
print(d_fake_grads[0].numpy())
d_optimizer.apply_gradients(zip(d_fake_grads, D.trainable_weights)) # Only optimizes D's parameters

[[-0.00117246  0.00736158  0.01094385  0.03456392  0.00493063 -0.01111478
   0.00517153  0.00062399  0.01326515  0.0260264 ]
 [-0.00058004  0.00364196  0.00541419  0.01709964  0.00243931 -0.00549876
   0.00255849  0.0003087   0.0065626   0.01287591]
 [ 0.00237863 -0.01493484 -0.02220239 -0.07012174 -0.01000305  0.02254916
  -0.01049177 -0.00126592 -0.02691175 -0.05280121]
 [ 0.00688943 -0.04325699 -0.06430658 -0.20309929 -0.02897264  0.06531097
  -0.03038816 -0.00366659 -0.07794668 -0.15293242]]


<tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=3>

## One forward and backward pass with the Generator network

In [32]:
gen_input = gi_sampler(minibatch_size, g_input_size)
with tf.GradientTape() as tape:
  g_fake_data = G(gen_input)
  dg_fake_decision = D(get_moments(g_fake_data.numpy().T).reshape((1,4)))
  g_error = criterion(dg_fake_decision, np.ones((1,1)))
tape.gradient(g_error, G.trainable_weights)

[None, None, None, None, None, None]

In [128]:
criterion(dg_fake_decision, np.ones((1,1)))

<tf.Tensor: id=7911, shape=(), dtype=float64, numpy=3.5222349166870117>