
Example of autoencoder model on MNIST dataset<br>
This autoencoder has modular design. The encoder, decoder and autoencoder<br>
are 3 models that share weights. For example, after training the<br>
autoencoder, the encoder can be used to  generate latent vectors<br>
of input data for low-dim visualization like PCA or TSNE.<br>


In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [None]:
from keras.layers import Dense, Input
from keras.layers import Conv2D, Flatten
from keras.layers import Reshape, Conv2DTranspose
from keras.models import Model
from keras.datasets import mnist
from keras.utils import plot_model
from keras import backend as K

In [None]:
import numpy as np
import matplotlib.pyplot as plt

load MNIST dataset

In [None]:
(x_train, _), (x_test, _) = mnist.load_data()

reshape to (28, 28, 1) and normalize input images

In [None]:
image_size = x_train.shape[1]
x_train = np.reshape(x_train, [-1, image_size, image_size, 1])
x_test = np.reshape(x_test, [-1, image_size, image_size, 1])
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

network parameters

In [None]:
input_shape = (image_size, image_size, 1)
batch_size = 32
kernel_size = 3
latent_dim = 16
# encoder/decoder number of CNN layers and filters per layer
layer_filters = [32, 64]

build the autoencoder model<br>
first build the encoder model

In [None]:
inputs = Input(shape=input_shape, name='encoder_input')
x = inputs
# stack of Conv2D(32)-Conv2D(64)
for filters in layer_filters:
    x = Conv2D(filters=filters,
               kernel_size=kernel_size,
               activation='relu',
               strides=2,
               padding='same')(x)

shape info needed to build decoder model<br>
so we don't do hand computation<br>
the input to the decoder's first<br>
Conv2DTranspose will have this shape<br>
shape is (7, 7, 64) which is processed by<br>
the decoder back to (28, 28, 1)

In [None]:
shape = K.int_shape(x)

generate latent vector

In [None]:
x = Flatten()(x)
latent = Dense(latent_dim, name='latent_vector')(x)

instantiate encoder model

In [None]:
encoder = Model(inputs,
                latent,
                name='encoder')
encoder.summary()
plot_model(encoder,
           to_file='encoder.png',
           show_shapes=True)

build the decoder model

In [None]:
latent_inputs = Input(shape=(latent_dim,), name='decoder_input')
# use the shape (7, 7, 64) that was earlier saved
x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs)
# from vector to suitable shape for transposed conv
x = Reshape((shape[1], shape[2], shape[3]))(x)

stack of Conv2DTranspose(64)-Conv2DTranspose(32)

In [None]:
for filters in layer_filters[::-1]:
    x = Conv2DTranspose(filters=filters,
                        kernel_size=kernel_size,
                        activation='relu',
                        strides=2,
                        padding='same')(x)

reconstruct the input

In [None]:
outputs = Conv2DTranspose(filters=1,
                          kernel_size=kernel_size,
                          activation='sigmoid',
                          padding='same',
                          name='decoder_output')(x)

instantiate decoder model

In [None]:
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()
plot_model(decoder, to_file='decoder.png', show_shapes=True)

autoencoder = encoder + decoder<br>
instantiate autoencoder model

In [None]:
autoencoder = Model(inputs,
                    decoder(encoder(inputs)),
                    name='autoencoder')
autoencoder.summary()
plot_model(autoencoder,
           to_file='autoencoder.png',
           show_shapes=True)

Mean Square Error (MSE) loss function, Adam optimizer

In [None]:
autoencoder.compile(loss='mse', optimizer='adam')

train the autoencoder

In [None]:
autoencoder.fit(x_train,
                x_train,
                validation_data=(x_test, x_test),
                epochs=1,
                batch_size=batch_size)

predict the autoencoder output from test data

In [None]:
x_decoded = autoencoder.predict(x_test)

display the 1st 8 test input and decoded images

In [None]:
imgs = np.concatenate([x_test[:8], x_decoded[:8]])
imgs = imgs.reshape((4, 4, image_size, image_size))
imgs = np.vstack([np.hstack(i) for i in imgs])
plt.figure()
plt.axis('off')
plt.title('Input: 1st 2 rows, Decoded: last 2 rows')
plt.imshow(imgs, interpolation='none', cmap='gray')
plt.savefig('input_and_decoded.png')
plt.show()