In [40]:
import keras
from keras import backend as K
from keras.layers import Input, Lambda, Dense, Conv2D, Conv2DTranspose, MaxPool2D, Flatten, Reshape
from keras.models import Model
from keras import metrics
from keras.datasets import mnist
import tensorflow as tf
from tensorflow.python import debug as tf_debug
import numpy as np

In [41]:
batch_size = 128
epochs = 50
image_size = (28,28,1)
latent_dimension = 3 ## to view representation clusters in 3 dimensions

In [42]:
## defining the input for mnist images
input_image = Input(shape=image_size)

In [43]:
## defining the inference network
## this is the network that will produce a latent space representation of the original image
## 5 layer convolutional network
encoder = Conv2D(16, (3,3), activation='relu', padding='same')(input_image)
encoder = MaxPool2D((2,2), padding="same")(encoder)
encoder = Conv2D(8, (3,3), activation='relu', padding='same')(encoder)
encoder = MaxPool2D((2,2), padding="same")(encoder)
encoder = Conv2D(4, (3,3), activation='relu', padding='same')(encoder)
encoder_shape = K.int_shape(encoder)

encoder = Flatten()(encoder) ## turns output to size of (None, 112)
# testing without further reduction of dimensions
#encoder = Dense(32)(encoder)
z_mean = Dense(latent_dimension)(encoder)
z_var = Dense(latent_dimension)(encoder)

In [44]:
print(encoder_shape)

(None, 7, 7, 4)


In [45]:
## defining the sampling method for the generator network
def normal_sample(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dimension),
                              mean=0., stddev=1.)
    return z_mean + K.exp(z_log_var) * epsilon
z = Lambda(normal_sample)([z_mean, z_var])
sample_shape = K.int_shape(z)
print(sample_shape)

(None, 3)


In [46]:
## defining the generator network
## this is the network that takes the latent space representation and upsamples it to create a reconstruction
## 5 layer convolutional upsampling network
decoder_input = Input(shape=sample_shape[1:])
decoder = Dense(np.prod(encoder_shape[1:]), activation="relu")(decoder_input)
decoder = Reshape(encoder_shape[1:])(decoder)
print(decoder.shape)
## figure out the shaping problem
decoder  = Conv2DTranspose(32, 3,
                           padding='same', activation='relu',
                           strides=(2, 2))(decoder)
print(decoder.shape)
decoder = Conv2D(1, 3, padding='same', activation='sigmoid')(decoder)
print(decoder.shape)
decoder = Model(decoder_input, decoder)

(?, 7, 7, 4)
(?, ?, ?, 32)
(?, ?, ?, 1)


In [47]:
## variational layer for reconstruction loss
class CustomVariationalLayer(keras.layers.Layer):

    def vae_loss(self, x, z_decoded):
        x = K.flatten(x)
        z_decoded = K.flatten(z_decoded)
        xent_loss = keras.metrics.binary_crossentropy(x, z_decoded)
        kl_loss = -5e-4 * K.mean(
            1 + z_var - K.square(z_mean) - K.exp(z_var), axis=-1)
        return K.mean(xent_loss + kl_loss)

    def call(self, inputs):
        x = inputs[0]
        z_decoded = inputs[1]
        loss = self.vae_loss(x, z_decoded)
        self.add_loss(loss, inputs=inputs)
        # We don't use this output.
        return x

In [48]:
## reconstruct the sample taken from the latent space
decoded_sample = decoder(z) ## generated sample
y = CustomVariationalLayer()([input_image, decoded_sample]) ## reconstruction loss applied

In [34]:
from keras.datasets import mnist

vae = Model(input_image, y)
vae.compile(optimizer='rmsprop', loss=None)
vae.summary()

# Train the VAE on MNIST digits
(x_train, _), (x_test, y_test) = mnist.load_data()

x_train = x_train.astype('float32') / 255.
x_train = x_train.reshape(x_train.shape + (1,))
x_test = x_test.astype('float32') / 255.
x_test = x_test.reshape(x_test.shape + (1,))

vae.fit(x=x_train, y=None,
        shuffle=True,
        epochs=10,
        batch_size=batch_size,
        validation_data=(x_test, None))

  after removing the cwd from sys.path.


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv2d_9 (Conv2D)               (None, 28, 28, 16)   160         input_5[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_5 (MaxPooling2D)  (None, 14, 14, 16)   0           conv2d_9[0][0]                   
__________________________________________________________________________________________________
conv2d_10 (Conv2D)              (None, 14, 14, 8)    1160        max_pooling2d_5[0][0]            
__________________________________________________________________________________________________
max_poolin

ValueError: Tensor("Square:0", shape=(3, 3, 1, 16), dtype=float32) must be from the same graph as Tensor("training/RMSprop/sub:0", shape=(), dtype=float32).