In [1]:
#Variational Autoencoder w/ ReLUs and Adam optimizer

In [2]:
#imports
from __future__ import division
from __future__ import print_function
import os.path

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

In [3]:
#data
mnist = input_data.read_data_sets('MNIST')

Extracting MNIST/train-images-idx3-ubyte.gz
Extracting MNIST/train-labels-idx1-ubyte.gz
Extracting MNIST/t10k-images-idx3-ubyte.gz
Extracting MNIST/t10k-labels-idx1-ubyte.gz


In [4]:
#layer sizes and dimensions
input_dim = 784
hidden_encoder_dim = 400
hidden_decoder_dim = 400
latent_dim = 20 #restricting hidden var to R^20 representation
lam = 0


#constructors
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.001)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0., shape=shape)
  return tf.Variable(initial)

In [5]:
x = tf.placeholder("float", shape=[None, input_dim])
l2_loss = tf.constant(0.0)

#################
#### ENCODER ####
#################


W_encoder_input_hidden = weight_variable([input_dim,hidden_encoder_dim])
b_encoder_input_hidden = bias_variable([hidden_encoder_dim])
l2_loss += tf.nn.l2_loss(W_encoder_input_hidden)

# Hidden layer encoder
hidden_encoder = tf.nn.relu(tf.matmul(x, W_encoder_input_hidden) + b_encoder_input_hidden)

W_encoder_hidden_mu = weight_variable([hidden_encoder_dim,latent_dim])
b_encoder_hidden_mu = bias_variable([latent_dim])
l2_loss += tf.nn.l2_loss(W_encoder_hidden_mu)

# Mu encoder
mu_encoder = tf.matmul(hidden_encoder, W_encoder_hidden_mu) + b_encoder_hidden_mu

W_encoder_hidden_logvar = weight_variable([hidden_encoder_dim,latent_dim])
b_encoder_hidden_logvar = bias_variable([latent_dim])
l2_loss += tf.nn.l2_loss(W_encoder_hidden_logvar)

# Sigma encoder
logvar_encoder = tf.matmul(hidden_encoder, W_encoder_hidden_logvar) + b_encoder_hidden_logvar

In [6]:
##########################
### ENCODER -> DECODER ###
##########################

# Sample epsilon
epsilon = tf.random_normal(tf.shape(logvar_encoder), name='epsilon')

# Sample latent variable
std_encoder = tf.exp(0.5 * logvar_encoder)
z = mu_encoder + tf.mul(std_encoder, epsilon)

In [7]:
#################
#### DECODER ####
#################

W_decoder_z_hidden = weight_variable([latent_dim,hidden_decoder_dim])
b_decoder_z_hidden = bias_variable([hidden_decoder_dim])
l2_loss += tf.nn.l2_loss(W_decoder_z_hidden)

# Hidden layer decoder
hidden_decoder = tf.nn.relu(tf.matmul(z, W_decoder_z_hidden) + b_decoder_z_hidden)

W_decoder_hidden_reconstruction = weight_variable([hidden_decoder_dim, input_dim])
b_decoder_hidden_reconstruction = bias_variable([input_dim])
l2_loss += tf.nn.l2_loss(W_decoder_hidden_reconstruction)

In [8]:
#loss

#KL divergence b/w logvar_encoder and mu_encoder
KLD = -0.5 * tf.reduce_sum(1 + logvar_encoder - tf.pow(mu_encoder, 2) - tf.exp(logvar_encoder), reduction_indices=1)

#final output (decoded latent into domain of input)
x_hat = tf.matmul(hidden_decoder, W_decoder_hidden_reconstruction) + b_decoder_hidden_reconstruction
BCE = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(x_hat, x), reduction_indices=1)

loss = tf.reduce_mean(BCE + KLD)

#final loss is l2 * factor + KL div + BCE (binary cross entropy loss)
regularized_loss = loss + lam * l2_loss

train_step = tf.train.AdamOptimizer(0.01).minimize(regularized_loss)

In [None]:
#graph var initializer
init = tf.global_variables_initializer()

In [None]:
n_steps = int(1e6)
batch_size = 100

with tf.Session() as sess:
  sess.run(init)

  for step in range(1, n_steps):
    batch = mnist.train.next_batch(batch_size)
    feed_dict = {x: batch[0]}
    _, cur_loss= sess.run([train_step, loss], feed_dict=feed_dict)
    
    if step % 50 == 0:
      print("Step {0} | Loss: {1}".format(step, cur_loss))

Step 50 | Loss: 206.6797332763672
Step 100 | Loss: 188.53404235839844
Step 150 | Loss: 228.7799072265625
Step 200 | Loss: 159.44052124023438
Step 250 | Loss: 161.36410522460938
Step 300 | Loss: 144.46897888183594
Step 350 | Loss: 151.40940856933594
Step 400 | Loss: 150.08047485351562
Step 450 | Loss: 140.50794982910156
Step 500 | Loss: 144.87803649902344
Step 550 | Loss: 148.7763214111328
Step 600 | Loss: 139.9831085205078
Step 650 | Loss: 135.39344787597656
Step 700 | Loss: 141.51634216308594
Step 750 | Loss: 133.27589416503906
Step 800 | Loss: 130.71604919433594
Step 850 | Loss: 133.49606323242188
Step 900 | Loss: 134.733154296875
Step 950 | Loss: 128.15646362304688
Step 1000 | Loss: 130.33447265625
Step 1050 | Loss: 127.86308288574219
Step 1100 | Loss: 123.37059783935547
Step 1150 | Loss: 124.92028045654297
Step 1200 | Loss: 122.23834228515625
Step 1250 | Loss: 125.19023132324219
Step 1300 | Loss: 125.96185302734375
Step 1350 | Loss: 125.10791015625
Step 1400 | Loss: 125.79914855957