The objective of the exercise is to implement computational graphs in TensorFlow to train and use such an architecture. The constraints we put ourselves is to use **low-level** functions of TensorFlow, i.e. we will not use high-level functions to compose layers and to train the parameters.

In [None]:
#######################
# Dataset Preparation #
#######################
import numpy as np
import tensorflow as tf
mnist = tf.keras.datasets.mnist
(x_train, y_train_vec),(x_test, y_test_vec) = mnist.load_data()
x_train, x_test = (x_train / 255.0) - 0.5, (x_test / 255.0) - 0.5
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
# convert class vectors to binary class matrices
n_classes = 10
y_train = tf.keras.utils.to_categorical(y_train_vec, n_classes)
y_test = tf.keras.utils.to_categorical(y_test_vec, n_classes)

In [None]:
# Function to sample a random batch from dataset
def next_batch(num, data, labels):
    '''
    Return a total of `num` random samples and labels. 
    '''
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = data[idx]
    labels_shuffle = labels[idx]

    return data_shuffle, labels_shuffle

In [None]:
##################
# Training phase #
##################
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as pl

E = 50               # number of epochs
B = 128               # batch size
N = x_train.shape[0]  # number of samples
D = x_train.shape[1]  # dimension of input sample
H = 300               # number of neurons
A = 0.01              # learning rate alpha

##############################################
#  COMPLETE CODE BELOW WHERE YOU SEE # ...   #
##############################################

# Build the computational graph

# define placeholders x, y and learning rate alpha
# x = ...
# y = ...
# alpha = ...
# define TensorFlow Variables for w1, b1, w2, b2 following the given examples
w1 = tf.Variable(tf.truncated_normal((D, H), stddev = 0.1))
b1 = tf.Variable(tf.constant(0.0, shape=[H]))
# w2 = ...
# b2 = ...

# define nodes for forward computation for hidden neurons h and output neurons y_pred
# h = ...
# y_pred = ...
# define nodes for difference between predicted and target values and for loss
# diff = ...
# loss = ...
# define the gradients
# grad_w1, grad_b1, grad_w2, grad_b2 = ...

# compute the new values of the gradients with the Variable assign method (see slides)
# new_w1 = ...
# new_b1 = ...
# new_w2 = ...
# new_b2 = ...
updates = tf.group(new_w1, new_b1, new_w2, new_b2)

# Run the computational graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    J = []
    for epoch in range(E):
        for _ in range(int(N/B)): # number of batches to visit for 1 epoch
            # get batches calling the next_batch method provided above
            # x_train_batch, y_train_batch = ...
            # define a dictionary of values that will be used to feed the placeholders of the graph
            # values = { ... }
            # ask TensorFlow to compute the graph on the batch and update the values
            # loss_val = sess.run(...)
        J.append(loss_val)
        print("epoch", epoch, loss_val[0])

    # now retrieve the weights and bias out of the computational graph
    w1_trained, b1_trained, w2_trained, b2_trained = sess.run([w1, b1, w2, b2])

In [None]:
# Plot the evolution of the loss
pl.plot(J)

In [None]:
#################
# Testing phase #
#################

N = x_test.shape[0]  # number of samples
D = x_test.shape[1]  # dimension of input sample

##############################################
#  COMPLETE CODE BELOW WHERE YOU SEE # ...   #
##############################################
# Build the computational graph
# x = ...
# y = ...
# w1 = ...
# b1 = ...
# w2 = ...
# b2 = ...

# define nodes for forward computation for hidden neurons h and output neurons y_pred
# h = ...
# y_pred = ...

# Run the computational graph
with tf.Session() as sess:
    # define a dictionary of values that will be used to feed the placeholders of the graph
    # don't forget to pass in the trained weights and biases
    # values = ...
    # ask TensorFlow to compute the graph on the test set
    # values = { ... }
    # y_pred_test = sess.run(...)

# At this stage, y_pred_test should contain the matrix of outputs on the test set with shape (N_test, 10)

In [None]:
# compute accuracy
y_winner = np.argmax(y_pred_test, axis=1)
N_test = y_winner.size
num_correct = (y_winner == y_test_vec).sum()
num_missed = N_test - num_correct
accuracy = num_correct * 1.0 / N_test
error_rate = num_missed * 1.0 / N_test
print('# correct  : ', num_correct)
print('# missed   : ', num_missed)
print('accuracy   :  %2.2f %%'% (accuracy*100.0))
print('error rate :  %2.2f %%'% (error_rate*100.0))