# Autoencoder example

## Load Minst data

In [1]:
#imports
import pandas as pd
from keras.models import Model
from matplotlib import pyplot as plt
from keras.layers import Input, Dense

#load MNIST data and show the shape for the training and test sets
from keras.datasets import mnist
import numpy as np
(X_train, _), (X_test, _) = mnist.load_data()
print(X_train.shape)
print(X_test.shape)
#(60000, 28, 28) -> it means that there are 60000 28x28 images
#(10000, 28, 28) -> it means that there are 10000 28x28 images

Using TensorFlow backend.


(60000, 28, 28)
(10000, 28, 28)


## Normalize the data

In [2]:
#normalize values to be uni-directional array 784 = 28*28 
X_train = X_train / 255
X_test = X_test / 255
X_train = X_train.reshape(X_train.shape[0], np.prod(X_train.shape[1:]))
X_test = X_test.reshape(X_test.shape[0], np.prod(X_test.shape[1:]))
print(X_train.shape)
print(X_test.shape)

(60000, 784)
(10000, 784)


## Create the neural network

In [3]:
#so the autoencoder will have 784 inputs
input_dim = 784
#define dim of the encoder
enc_dim = 32
#create layers, encoder, and decoder
input_image = Input(shape=(input_dim,))
encoded_image = Dense(enc_dim, activation='relu')(input_image)
decoded_image = Dense(input_dim, activation='sigmoid')(encoded_image)
#create the autoencoder
autoencoder = Model(input_image, decoded_image)

encoder = Model(input_image, encoded_image)
encoded_layer = Input(shape=(enc_dim,))
decoder_layer = autoencoder.layers[-1]
decoder = Model(encoded_layer, decoder_layer(encoded_layer))

#compile the autoencoder
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')






Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


### show the summary

In [6]:
#it can be seen that we have 784 inputs for the inpiut layer
#32 dimensions for the hidden layer and 784 for the output layer
print(autoencoder.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 784)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                25120     
_________________________________________________________________
dense_2 (Dense)              (None, 784)               25872     
Total params: 50,992
Trainable params: 50,992
Non-trainable params: 0
_________________________________________________________________
None


## Fit the model

In [5]:
##fit the model. In this case we use the test set as the validation set, because we do not need the testing set for anything else.
#as it is unsupervised learning we do not know the labeled output. 
#that is why we do not separate into X and Y groups at the begining as the other cases 
history = autoencoder.fit(X_train, X_train,
                epochs=10,
                shuffle=True,
                validation_data=(X_test, X_test), verbose=1)




Train on 60000 samples, validate on 10000 samples
Epoch 1/10





Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Plot the loss

In [None]:
# list all data in history
print(history.history.keys())
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

## Show the reconstruction error

In [None]:
from sklearn import model_selection
import matplotlib.pyplot as plt
plt.imshow(X_test[0].reshape(28,28))
plt.show()
result = decoder.predict(encoder.predict(X_test))
plt.imshow(result[0].reshape(28,28))