In [2]:
import numpy as np
import tensorflow as tf

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

Creating DNN for MNIST

In [5]:
def selu(z, scale=1.0507009873554804934193349852946, alpha=1.6732632423543772848170429916717):
    return scale * tf.where(z >= 0.0, z, alpha * tf.nn.elu(z))

In [11]:
reset_graph()
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32,shape=(None,n_inputs), name="X")
y = tf.placeholder(tf.int64,shape=(None), name = "y")

with tf.name_scope("DNN"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=selu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=selu, name="hidden2")
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")


In [12]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,logits=logits)
    loss = tf.reduce_mean(xentropy,name="loss")

In [13]:
learning_rate = .01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_op = optimizer.minimize(loss)

In [14]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits,y,1)
    accuracy = tf.reduce_mean(tf.cast(correct,tf.float32))

In [15]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

Loading Data

In [21]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [23]:
n_epochs = 40
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iterations in range(mnist.train.num_examples//batch_size):
            X_batch,y_batch = mnist.train.next_batch(batch_size)
            sess.run(train_op,feed_dict={X:X_batch,y:y_batch})
        if(epoch%5 == 0):
            acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            acc_test = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})
            print(epoch, "Batch accuracy:", acc_train, "Validation accuracy:", acc_test)
    save_patch = saver.save(sess, "./my_model_final.ckpt")

0 Batch accuracy: 0.86 Validation accuracy: 0.9134
5 Batch accuracy: 0.92 Validation accuracy: 0.9396
10 Batch accuracy: 0.96 Validation accuracy: 0.955
15 Batch accuracy: 0.98 Validation accuracy: 0.9626
20 Batch accuracy: 1.0 Validation accuracy: 0.9676
25 Batch accuracy: 1.0 Validation accuracy: 0.9698
30 Batch accuracy: 1.0 Validation accuracy: 0.972
35 Batch accuracy: 0.98 Validation accuracy: 0.9738


Using Selu as activation 

In [24]:
reset_graph()

n_inputs = 28 * 28  # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=selu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=selu, name="hidden2")
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 40
batch_size = 50

In [25]:
means = mnist.train.images.mean(axis=0, keepdims=True)
stds = mnist.train.images.std(axis=0, keepdims=True) + 1e-10

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            X_batch_scaled = (X_batch - means) / stds
            sess.run(training_op, feed_dict={X: X_batch_scaled, y: y_batch})
        if epoch % 5 == 0:
            acc_train = accuracy.eval(feed_dict={X: X_batch_scaled, y: y_batch})
            X_val_scaled = (mnist.validation.images - means) / stds
            acc_test = accuracy.eval(feed_dict={X: X_val_scaled, y: mnist.validation.labels})
            print(epoch, "Batch accuracy:", acc_train, "Validation accuracy:", acc_test)

    save_path = saver.save(sess, "./my_model_final_selu.ckpt")

0 Batch accuracy: 0.98 Validation accuracy: 0.923
5 Batch accuracy: 1.0 Validation accuracy: 0.9574
10 Batch accuracy: 1.0 Validation accuracy: 0.9668
15 Batch accuracy: 1.0 Validation accuracy: 0.9684
20 Batch accuracy: 1.0 Validation accuracy: 0.97
25 Batch accuracy: 1.0 Validation accuracy: 0.9706
30 Batch accuracy: 1.0 Validation accuracy: 0.9706
35 Batch accuracy: 1.0 Validation accuracy: 0.9698


Applying batch normalization

In [28]:
reset_graph()
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None,n_inputs),name = "X")
training = tf.placeholder_with_default(False,shape=(), name = "training")

hidden1 = tf.layers.dense(X, n_hidden1, name = "hidden1")
bn1 = tf.layers.batch_normalization(hidden1,training=training,momentum=.9)
bn1_act = tf.nn.relu(bn1)

hidden2 = tf.layers.dense(bn1_act,n_hidden2,name="hidden2")
bn2 = tf.layers.batch_normalization(hidden2,training=training, momentum=.9)
bn2_act = tf.nn.relu(bn2)

logits_before_bn = tf.layers.dense(bn2_act, n_outputs,name="outputs")
logits = tf.layers.batch_normalization(logits_before_bn,training = training, momentum=.9)

using Python's partial() function:

In [33]:
reset_graph()
X = tf.placeholder(tf.float32,shape=(None,n_inputs),name = "X")
training = tf.placeholder_with_default(False,shape=(),name="training")

In [34]:
from functools import partial

my_batch_norm_layer = partial(tf.layers.batch_normalization,
                              training=training, momentum=0.9)

hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1")
bn1 = my_batch_norm_layer(hidden1)
bn1_act = tf.nn.elu(bn1)
hidden2 = tf.layers.dense(bn1_act, n_hidden2, name="hidden2")
bn2 = my_batch_norm_layer(hidden2)
bn2_act = tf.nn.elu(bn2)
logits_before_bn = tf.layers.dense(bn2_act, n_outputs, name="outputs")
logits = my_batch_norm_layer(logits_before_bn)

In [54]:
reset_graph()

batch_norm_momentum = .9

X = tf.placeholder(tf.float32,shape=(None,n_inputs),name="X")
y = tf.placeholder(tf.int64,shape=(None),name="y")
training = tf.placeholder_with_default(False,shape=(),name="training")

with tf.name_scope("dnn"):
    he_init = tf.contrib.layers.variance_scaling_initializer()
    
    my_batch_norm_layer = partial(tf.layers.batch_normalization,training=training,momentum=batch_norm_momentum)
    my_dense_layer = partial(tf.layers.dense,kernel_initializer=he_init)
    
    hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))
    hidden2 = my_dense_layer(bn1, n_hidden2, name="hidden2")
    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))
    logits_before_bn = my_dense_layer(bn2, n_outputs, name="outputs")
    logits = my_batch_norm_layer(logits_before_bn)
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y,logits = logits)
    loss = tf.reduce_mean(xentropy)
    
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits,y,1)
    accuracy = tf.reduce_mean(tf.cast(correct,tf.float32))
    


Training the pervious model

In [55]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 20
batch_size = 200

extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

In [60]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples//batch_size):
            X_batch,y_batch = mnist.train.next_batch(batch_size)
            sess.run([training_op, extra_update_ops],feed_dict={training:True, X:X_batch,y:y_batch})
        accuracy_val = accuracy.eval(feed_dict={X:mnist.test.images,y:mnist.test.labels})
        print(epoch, "Test accuracy: ",accuracy_val)

    save_path = saver.save(sess,"./my_model_final.ckpt")
            
            
    

0 Test accuracy:  0.8724
1 Test accuracy:  0.8984
2 Test accuracy:  0.9122
3 Test accuracy:  0.9212
4 Test accuracy:  0.929
5 Test accuracy:  0.934
6 Test accuracy:  0.9384
7 Test accuracy:  0.9418
8 Test accuracy:  0.9453
9 Test accuracy:  0.9482
10 Test accuracy:  0.95
11 Test accuracy:  0.9525
12 Test accuracy:  0.9541
13 Test accuracy:  0.9559
14 Test accuracy:  0.9574
15 Test accuracy:  0.9588
16 Test accuracy:  0.9604
17 Test accuracy:  0.9606
18 Test accuracy:  0.9627
19 Test accuracy:  0.9633


# Gradient clipping


In [84]:
#mnist adding 3 more layers
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 50
n_hidden5 = 50
n_outputs = 10
learning_rate = .01
#for gradient clipping
threshold = 1.0

In [91]:
reset_graph()
X = tf.placeholder(tf.float32,shape=(None,n_inputs),name="X")
y = tf.placeholder(tf.int64,shape=(None),name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X,n_hidden1,activation=tf.nn.relu,name="hidden1")
    hidden2 = tf.layers.dense(hidden1,n_hidden2,activation=tf.nn.relu,name="hidden2")
    hidden3 = tf.layers.dense(hidden2,n_hidden3,activation=tf.nn.relu,name="hidden3")
    hidden4 = tf.layers.dense(hidden3,n_hidden4,activation=tf.nn.relu,name="hidden4")
    hidden5 = tf.layers.dense(hidden4, n_hidden5, activation=tf.nn.relu, name="hidden5")
    logits = tf.layers.dense(hidden5,n_outputs,name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy)


optimizer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)
              for grad, var in grads_and_vars]
training_op = optimizer.apply_gradients(capped_gvs)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits,y,1)
    accuracy = tf.reduce_mean(tf.cast(correct,tf.float32),name="accuracy")
 

In [92]:
#training initializing
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 20
batch_size = 200

In [94]:
#training model
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples//batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op,feed_dict={X:X_batch,y:y_batch})
        accuracy_val = accuracy.eval(feed_dict={X:mnist.test.images,y:mnist.test.labels})
        print(epoch, "Accuracy: ",accuracy_val)
    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Accuracy:  0.3056
1 Accuracy:  0.7972
2 Accuracy:  0.8829
3 Accuracy:  0.9018
4 Accuracy:  0.9134
5 Accuracy:  0.9198
6 Accuracy:  0.9235
7 Accuracy:  0.9287
8 Accuracy:  0.9354
9 Accuracy:  0.9394
10 Accuracy:  0.9405
11 Accuracy:  0.9441
12 Accuracy:  0.9482
13 Accuracy:  0.9489
14 Accuracy:  0.9508
15 Accuracy:  0.9524
16 Accuracy:  0.9541
17 Accuracy:  0.956
18 Accuracy:  0.9589
19 Accuracy:  0.9593


# Reusing a tensorflow model

In [103]:
reset_graph()
saver = tf.train.import_meta_graph("./my_model_final.ckpt.meta")

In [104]:
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

accuracy = tf.get_default_graph().get_tensor_by_name("eval/accuracy:0")
training_op = tf.get_default_graph().get_operation_by_name("GradientDescent")

In [105]:
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt")
    # continue training the model...

In [106]:
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt")

    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images,
                                                y: mnist.test.labels})
        print(epoch, "Test accuracy:", accuracy_val)

    save_path = saver.save(sess, "./my_new_model_final.ckpt")

0 Test accuracy: 0.9587
1 Test accuracy: 0.9603
2 Test accuracy: 0.9621
3 Test accuracy: 0.9625
4 Test accuracy: 0.9629
5 Test accuracy: 0.9649
6 Test accuracy: 0.9617
7 Test accuracy: 0.9635
8 Test accuracy: 0.9669
9 Test accuracy: 0.9666
10 Test accuracy: 0.9665
11 Test accuracy: 0.9666
12 Test accuracy: 0.9683
13 Test accuracy: 0.9684
14 Test accuracy: 0.9693
15 Test accuracy: 0.9683
16 Test accuracy: 0.9695
17 Test accuracy: 0.9701
18 Test accuracy: 0.97
19 Test accuracy: 0.9696
