In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist

In [2]:
num_features = 784 # data features (img shape: 28*28).

learning_rate = 0.01
training_steps = 20000
batch_size = 256
display_step = 1000

num_hidden_1 = 128 
num_hidden_2 = 64 

In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train, x_test = x_train.astype(np.float32), x_test.astype(np.float32)
x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features])
x_train, x_test = x_train / 255., x_test / 255.

In [4]:
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(10000).batch(batch_size).prefetch(1)

test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_data = test_data.repeat().batch(batch_size).prefetch(1)

In [6]:
random_normal = tf.initializers.RandomNormal()

weights = {
    'encoder_h1': tf.Variable(random_normal([num_features, num_hidden_1])),
    'encoder_h2': tf.Variable(random_normal([num_hidden_1, num_hidden_2])),
    'decoder_h1': tf.Variable(random_normal([num_hidden_2, num_hidden_1])),
    'decoder_h2': tf.Variable(random_normal([num_hidden_1, num_features])),
}
biases = {
    'encoder_b1': tf.Variable(random_normal([num_hidden_1])),
    'encoder_b2': tf.Variable(random_normal([num_hidden_2])),
    'decoder_b1': tf.Variable(random_normal([num_hidden_1])),
    'decoder_b2': tf.Variable(random_normal([num_features])),
}

In [11]:
def encoder(x):
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']), biases['encoder_b1']))
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']), biases['encoder_b2']))
    return layer_2

def decoder(x):
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']), biases['decoder_b1']))
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']), biases['decoder_b2']))
    return layer_2    

In [8]:
def mean_square(reconstructed, original):
    return tf.reduce_mean(tf.pow(original - reconstructed, 2))

optimizer = tf.optimizers.Adam(learning_rate = learning_rate)

In [14]:
def run_optimization(x):
    with tf.GradientTape() as g:
        reconstructed_image = decoder(encoder(x))
        loss = mean_square(reconstructed_image, x)
    
    trainable_variables = list(weights.values()) + list(biases.values())
    gradients = g.gradient(loss, trainable_variables)
    optimizer.apply_gradients(zip(gradients, trainable_variables))
    
    return loss

In [15]:
for step, (batch_x, _) in enumerate(train_data.take(training_steps + 1)):
    loss = run_optimization(batch_x)
    
    if step % display_step == 0:
        print("step: %i, loss: %f" % (step, loss))

W0717 15:31:24.331964 140735745508224 deprecation.py:323] From /anaconda3/envs/openai/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1205: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


step: 0, loss: 0.236988
step: 1000, loss: 0.016294
step: 2000, loss: 0.010612
step: 3000, loss: 0.007956
step: 4000, loss: 0.006190
step: 5000, loss: 0.006015
step: 6000, loss: 0.005884
step: 7000, loss: 0.004970
step: 8000, loss: 0.004805
step: 9000, loss: 0.004730
step: 10000, loss: 0.004485
step: 11000, loss: 0.004526
step: 12000, loss: 0.003858
step: 13000, loss: 0.003942
step: 14000, loss: 0.004158
step: 15000, loss: 0.003753
step: 16000, loss: 0.003838
step: 17000, loss: 0.004110
step: 18000, loss: 0.003785
step: 19000, loss: 0.003735
step: 20000, loss: 0.003636
