# Variational Auto Encoder

In [1]:
from keras.layers import Input, Dense, Lambda
from keras.models import Model
from keras.losses import binary_crossentropy
from keras import backend as K
from keras.datasets import mnist
from keras.callbacks import TensorBoard

import numpy as np
import matplotlib.pyplot as plt

Using TensorFlow backend.


## Let's use the MNIST dataset because this is just an example showcase

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
image_size = x_train.shape[1]
original_dim = image_size * image_size
x_train = np.reshape(x_train, [-1, original_dim])
x_test = np.reshape(x_test, [-1, original_dim])
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

In [4]:
x_train.shape

(60000, 784)

In [5]:
x_test.shape

(10000, 784)

## Network hyperparameters

In [6]:
input_shape = (original_dim, )
batch_size = 128
latent_dim = 2
epochs = 50

## The encoder model

In [7]:
inputs = Input(shape=input_shape,name='encoder_inputs')
x = Dense(512,activation='relu')(inputs)

z_mean = Dense(latent_dim, name='z_mean')(x) #Mean
z_log_sigma = Dense(latent_dim, name='z_log_sigma')(x) #Standard deviation

## Because VAE is a GAN, we can user these parameters to sample new similar points from the latent space

In [8]:
# reparameterization trick
# z = z_mean + sqrt(sigma)*eps
def sampling(args):
    
    z_mean,z_log_sigma = args
    
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    
    
    #Reparametrization trick
    epsilon = K.random_normal(shape=(batch,dim))
     
    
    return z_mean + K.exp(0.5*z_log_sigma) * epsilon

In [9]:
z = Lambda(sampling,output_shape=(latent_dim,), name='z')([z_mean,z_log_sigma])

## Encoding inputs to latent space

In [10]:
encoder = Model(inputs,[z_mean,z_log_sigma,z], name='encoder')
encoder.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_inputs (InputLayer)     (None, 784)          0                                            
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 512)          401920      encoder_inputs[0][0]             
__________________________________________________________________________________________________
z_mean (Dense)                  (None, 2)            1026        dense_1[0][0]                    
__________________________________________________________________________________________________
z_log_sigma (Dense)             (None, 2)            1026        dense_1[0][0]                    
__________________________________________________________________________________________________
z (Lambda)

## The decoder model

In [11]:
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(512, activation='relu')(latent_inputs)
outputs = Dense(original_dim,activation='sigmoid')(x)

## Decoding latent space samples to outputs

In [12]:
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
z_sampling (InputLayer)      (None, 2)                 0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               1536      
_________________________________________________________________
dense_3 (Dense)              (None, 784)               402192    
Total params: 403,728
Trainable params: 403,728
Non-trainable params: 0
_________________________________________________________________


In [13]:
outputs = decoder(encoder(inputs)[2]) #Position 2 is the z_samples generated by the sampling function

## The VAE model

In [14]:
vae = Model(inputs, outputs, name='vae') #encoder + decoder

## Now we have to build the VAE's most important feature, its loss function, which is a combination of the reconstruction loss and the KL divergence

In [15]:
reconstruction_loss = binary_crossentropy(inputs,outputs)
reconstruction_loss *= original_dim #Apply to all pixels

In [16]:
kl_loss = 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma)
kl_loss = K.sum(kl_loss,axis=-1)
kl_loss *= -0.5

In [17]:
vae_loss = K.mean(reconstruction_loss + kl_loss)

In [18]:
vae.add_loss(vae_loss)

## Compile VAE model using ADAM optimizer

In [19]:
vae.compile(optimizer='adam')
vae.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_inputs (InputLayer)  (None, 784)               0         
_________________________________________________________________
encoder (Model)              [(None, 2), (None, 2), (N 403972    
_________________________________________________________________
decoder (Model)              (None, 784)               403728    
Total params: 807,700
Trainable params: 807,700
Non-trainable params: 0
_________________________________________________________________


## Training VAE using MNIST data

In [20]:
vae.fit(x_train,epochs=epochs,batch_size=batch_size,validation_data=(x_test,None),verbose=False)

Train on 60000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fceb7396080>