# Classify digits of MNIST database
These experiments have been inspired by Hands-on machine learning by Aurlie Geron

### Approach 1: Using the Estimator API (formerly tf.contrib.learn)
Here, I try to use tensorflow's Estimator API, to do a quick check of how well the data and classifier are

In [2]:
import tensorflow as tf
import numpy as np
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data() 

  from ._conv import register_converters as _register_converters


255.0


Scale the data between [0, 1] and convert it into appropriate types

In [3]:
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

Here, I use tf.estimator.DNNClassifier Class of tensorflow

In [None]:
feature_cols = [tf.feature_column.numeric_column("X", shape=[28 * 28])]
dnn_clf = tf.estimator.DNNClassifier(hidden_units=[300,100], n_classes=10,
                                     feature_columns=feature_cols)

input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, num_epochs=40, batch_size=50, shuffle=True)
dnn_clf.train(input_fn=input_fn)

Evaluate and Predict on the validation data 

In [None]:
input_fun_test = tf.estimator.inputs.numpy_input_fn(x={"X": X_valid}, y=y_valid, shuffle=False)
evalu = dnn_clf.evaluate(input_fn = input_fun_test)

In [None]:
y_pred_iter  = dnn_clf.predict(input_fn=input_fun_test)
y_pred = list(y_pred_iter)
y_pred[0] 

### Approach 2: Using plain TensorFlow

In [6]:
import tensorflow as tf
import numpy as np
n_inputs = 28*28  # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10
tf.reset_default_graph()

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [None]:
X = tf.placeholder(tf.float32, shape=[None, 28*28])
y = tf.placeholder(tf.int32, shape=[None])

with tf.name_scope('my_variables'):
    w1 = tf.Variable(initial_value=tf.random_normal([n_inputs, n_hidden1], stddev=2/np.sqrt(n_inputs + n_hidden1)), dtype=tf.float32)
    w2 = tf.Variable(initial_value=tf.random_normal([n_hidden1, n_hidden2], stddev=2/np.sqrt(n_hidden1 + n_hidden2)), dtype=tf.float32)
    w3 = tf.Variable(initial_value=tf.random_normal([n_hidden2, n_outputs], stddev=2/np.sqrt(n_outputs + n_hidden2)), dtype=tf.float32)
    b1= tf.Variable(np.zeros(shape=[n_hidden1]), dtype=tf.float32)
    b2 = tf.Variable(np.zeros([n_hidden2]), dtype=tf.float32)
    b3 = tf.Variable(np.zeros([n_outputs]), dtype=tf.float32)

with tf.name_scope("H_values"):
    h1 = tf.nn.relu(tf.matmul(X, w1) + b1)
    h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)
    logits = tf.matmul(h2, w3) + b3
    
with tf.name_scope("Loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
    loss = tf.reduce_mean(xentropy)
    
with tf.name_scope('GDescent'):
    gD = tf.train.GradientDescentOptimizer(.01)
    oper = gD.minimize(loss)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    
n_epochs = 40
batch_size = 50
def shuffle_batch(X, y):
    rnd_index = np.random.permutation(len(X))
    batches = len(X) // batch_size
    for index in np.array_split(rnd_index, batches):
        X_batch = X[index]
        y_batch = y[index]
        yield X_batch, y_batch
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
with tf.Session() as sess:
    sess.run(init)
    for i in range(40):
        for X_batch, y_batch in shuffle_batch(X_train, y_train ):
            sess.run(oper,feed_dict={X:X_batch, y:y_batch})
        if i%10 == 0:
            print("loss ", loss.eval(feed_dict={X:X_train, y:y_train}))
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(i, "Batch accuracy:", acc_batch, "Val accuracy:", acc_val)

In [7]:
from datetime import datetime
import os
def log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_logs"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)

logdir =  log_dir("mnist_dnn")


In [11]:
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape=[None, 28*28])
y = tf.placeholder(tf.int32, shape=[None])

with tf.name_scope('dnn'):
    h1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
    h2 = tf.layers.dense(h1, n_hidden2, name="hidden2",activation=tf.nn.relu )
    logits = tf.layers.dense(h2, n_outputs, name="outputs")
    
with tf.name_scope("Loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
    loss = tf.reduce_mean(xentropy)
    loss_summary = tf.summary.scalar('log_loss', loss)
    
with tf.name_scope('GDescent'):
    gD = tf.train.GradientDescentOptimizer(.01)
    oper = gD.minimize(loss)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    
n_epochs = 40
batch_size = 50
def shuffle_batch(X, y):
    rnd_index = np.random.permutation(len(X))
    batches = len(X) // batch_size
    for index in np.array_split(rnd_index, batches):
        X_batch = X[index]
        y_batch = y[index]
        yield X_batch, y_batch
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

checkpoint_path = "/tmp/my_deep_mnist_model.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./my_deep_mnist_model"
best_loss = np.infty
epochs_without_progress = 0
max_epochs_without_progress = 50

with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        # if the checkpoint file exists, restore the model and load the epoch number
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Training was interrupted. Continuing at epoch", start_epoch)
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)
         
    for i in range(start_epoch, n_epochs):
        
        for X_batch, y_batch in shuffle_batch(X_train, y_train ):
            sess.run(oper, feed_dict={X:X_batch, y:y_batch})
        loss_summary_str, accuracy_summary_str = sess.run([loss_summary, accuracy_summary], feed_dict={X:X_train, y:y_train})
        file_writer.add_summary(accuracy_summary_str, i)
        file_writer.add_summary(loss_summary_str, i)
        
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid}) 
        print(i, "Batch accuracy:", acc_batch, "Val accuracy:", acc_val)
        
        if i%5 == 0:
            loss_f = loss.eval(feed_dict={X:X_train, y:y_train})
            print("loss ", loss_f)
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, 'wb') as f:
                f.write(b"%d"%(i+1))
            if loss_f < best_loss: 
                best_loss = loss_f
            else:
                epochs_without_progress +=5
                if epochs_without_progress > max_epochs_without_progress:
                    break
                
    saver.save(sess, final_model_path)

Training was interrupted. Continuing at epoch 6
INFO:tensorflow:Restoring parameters from /tmp/my_deep_mnist_model.ckpt
6 Batch accuracy: 0.96 Val accuracy: 0.9534
7 Batch accuracy: 0.96 Val accuracy: 0.9588
8 Batch accuracy: 0.94 Val accuracy: 0.9592
9 Batch accuracy: 0.98 Val accuracy: 0.9622
10 Batch accuracy: 0.96 Val accuracy: 0.963
loss  0.12588255
11 Batch accuracy: 0.96 Val accuracy: 0.9634
12 Batch accuracy: 0.98 Val accuracy: 0.9662
13 Batch accuracy: 1.0 Val accuracy: 0.9674
14 Batch accuracy: 0.94 Val accuracy: 0.9676
15 Batch accuracy: 0.94 Val accuracy: 0.97
loss  0.0943256
16 Batch accuracy: 0.98 Val accuracy: 0.9698
17 Batch accuracy: 1.0 Val accuracy: 0.9694
18 Batch accuracy: 0.98 Val accuracy: 0.9722
19 Batch accuracy: 1.0 Val accuracy: 0.9726
20 Batch accuracy: 1.0 Val accuracy: 0.9732
loss  0.07208039
21 Batch accuracy: 0.98 Val accuracy: 0.9728
22 Batch accuracy: 1.0 Val accuracy: 0.9746
23 Batch accuracy: 0.98 Val accuracy: 0.9744
24 Batch accuracy: 1.0 Val accur