# Tensorflow Implementation of a simple Feedforward Deep Neural Network

In [1]:
import tensorflow as tf
import numpy as np

from sklearn.metrics import accuracy_score

  return f(*args, **kwds)


### Test out implementation on MNIST Dataset

In [2]:
# load mnist data
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# reshape and normalize data
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0

# convert labels to integer
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)

# split dataset into train and validation set
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [80]:
# input and output shapes
n_inputs = 28*28 # shape of MNIST images
n_hidden1 = 300 # 1st layer
n_hidden2 = 100 # 2nd layer
n_outputs = 10 # output layer, 10 digits

# hyperparameters
n_epochs = 40
batch_size = 50
learning_rate = 0.01

model_name = "dnn-mnist"

In [81]:
tf.reset_default_graph()
# feature input
X = tf.placeholder(dtype=tf.float32, shape=(None, n_inputs), name="X")

# labels
y = tf.placeholder(dtype=tf.int32, shape=(None,), name="y")

In [82]:
def dense_layer(X, size, name, activation=None):
    """method that encapsulates a single layer of a feedforward neural network"""

    with tf.name_scope(name):
        input_size = int(X.get_shape()[1])
        # sample according to xavier weight initialization technique
        xavier_initialization = tf.truncated_normal((input_size, size), stddev=2/np.sqrt(input_size))
        W = tf.Variable(xavier_initialization, dtype=tf.float32, name="weight")
        b = tf.Variable(np.zeros([size]), dtype=tf.float32, name="bias")
        z = tf.matmul(X, W) + b
        if activation == "relu":
            return tf.nn.relu(z)
        else:
            return z

In [83]:
# two layer neural network
with tf.name_scope("dnn"):
    hidden1 = dense_layer(X, n_hidden1, "hidden1", activation="relu")
    hidden2 = dense_layer(hidden1, n_hidden2, "hidden2", activation="relu")
    logits = dense_layer(hidden2, n_outputs, "outputs")

In [84]:
# cross-entropy loss
with tf.name_scope("loss"):
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(cross_entropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)

In [85]:
# gradient descent optimizer
with tf.name_scope("optimizer"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [86]:
# accuracy metric
with tf.name_scope("evaluate"):
    # tp: images that we predicted correctly
    tp = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(tp, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

In [87]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [88]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()
summary_op = tf.summary.merge_all()

In [89]:
with tf.Session() as sess:
    init.run()
    train_writer = tf.summary.FileWriter("tf_logs/{model}/train_summary".format(model=model_name), sess.graph)
    valid_writer = tf.summary.FileWriter("tf_logs/{model}/valid_summary".format(model=model_name), sess.graph)
    for epoch in range(n_epochs):
        for i, (X_batch, y_batch) in enumerate(shuffle_batch(X_train, y_train, batch_size)):
            _, train_summary = sess.run([training_op, summary_op], feed_dict={X: X_batch, y: y_batch})
#             if i % 10 == 0:
        train_writer.add_summary(train_summary, epoch)
        accuracy_val, loss_val, valid_summary = sess.run([accuracy, loss, summary_op], feed_dict={X: X_valid, y: y_valid})
        valid_writer.add_summary(valid_summary, epoch)
#         summary_writer.add_summary(loss_summary_str, epoch)
        print(epoch, "Validation accuracy:", accuracy_val, ", Validaton Loss:", loss_val)

    save_path = saver.save(sess, "tf_logs/{model}/model.ckpt".format(model=model_name))

0 Validation accuracy: 0.9194 , Validaton Loss: 0.30003527
1 Validation accuracy: 0.9326 , Validaton Loss: 0.23755257
2 Validation accuracy: 0.9408 , Validaton Loss: 0.20630664
3 Validation accuracy: 0.949 , Validaton Loss: 0.18060751
4 Validation accuracy: 0.9546 , Validaton Loss: 0.16556223
5 Validation accuracy: 0.9578 , Validaton Loss: 0.15209463
6 Validation accuracy: 0.9622 , Validaton Loss: 0.14075862
7 Validation accuracy: 0.9616 , Validaton Loss: 0.13801004
8 Validation accuracy: 0.9666 , Validaton Loss: 0.12686086
9 Validation accuracy: 0.9656 , Validaton Loss: 0.120537706
10 Validation accuracy: 0.968 , Validaton Loss: 0.11677975
11 Validation accuracy: 0.971 , Validaton Loss: 0.11036893
12 Validation accuracy: 0.973 , Validaton Loss: 0.10595713
13 Validation accuracy: 0.9732 , Validaton Loss: 0.1030479
14 Validation accuracy: 0.9736 , Validaton Loss: 0.09880702
15 Validation accuracy: 0.9742 , Validaton Loss: 0.09673351
16 Validation accuracy: 0.974 , Validaton Loss: 0.0944

In [90]:
# restore the model and get predictions for test set
with tf.Session() as sess:
    saver.restore(sess, "tf_logs/{model}/model.ckpt".format(model=model_name)) # or better, use save_path
    logits_val = logits.eval(feed_dict={X: X_test})
    y_pred = np.argmax(logits_val, axis=1)

INFO:tensorflow:Restoring parameters from tf_logs/dnn-mnist-8/model.ckpt


In [91]:
# get accuracy of test set
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
print("test set accuracy: ", accuracy)

test set accuracy:  0.9762
