# Hands On ML Chapter 10 - Introduction to Artificial Neural Networks

In [25]:
import tensorflow as tf
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics import accuracy_score
from datetime import datetime

In [2]:
mnist = fetch_mldata('MNIST original')

In [3]:
X, y = mnist['data'], mnist['target']
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]


In [4]:
y_train, X_train

(array([ 0.,  0.,  0., ...,  9.,  9.,  9.]), array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ..., 
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8))

In [5]:
scaler = StandardScaler()
scaler.fit_transform(X_train)
scaler.fit_transform(X_test)
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
y_train = y_train.astype(int)
y_test = y_test.astype(int)



## Multi-Layer Perceptron DNN with High-Level API

DNN with:
* two hidden layers
* first layer with 300 neurons
* second layer with 100 neurons
* ouput with 10 neurons

In [6]:
feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)
dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units = [300, 100], n_classes = 10, feature_columns = feature_cols)
dnn_clf = tf.contrib.learn.SKCompat(dnn_clf)
#dnn_clf.fit(X_train, y_train, batch_size = 50, steps = 40000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_master': '', '_save_summary_steps': 100, '_model_dir': 'C:\\Users\\Kuba\\AppData\\Local\\Temp\\tmprx7fg34l', '_task_type': None, '_num_worker_replicas': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002B4249C9128>, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_tf_random_seed': None, '_num_ps_replicas': 0, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_environment': 'local', '_task_id': 0, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_is_chief': True, '_evaluation_master': '', '_save_checkpoints_steps': None}


In [7]:
dnn_clf.fit(X_train, y_train, batch_size = 50, steps = 40000)

Instructions for updating:
Please switch to tf.train.get_global_step
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\Kuba\AppData\Local\Temp\tmprx7fg34l\model.ckpt.
INFO:tensorflow:loss = 138.974, step = 1
INFO:tensorflow:global_step/sec: 77.0022
INFO:tensorflow:loss = 1.02055, step = 101 (1.321 sec)
INFO:tensorflow:global_step/sec: 82.939
INFO:tensorflow:loss = 0.95958, step = 201 (1.188 sec)
INFO:tensorflow:global_step/sec: 80.0837
INFO:tensorflow:loss = 0.896987, step = 301 (1.244 sec)
INFO:tensorflow:global_step/sec: 83.4631
INFO:tensorflow:loss = 0.770624, step = 401 (1.199 sec)
INFO:tensorflow:global_step/sec: 86.9834
INFO:tensorflow:loss = 0.7128, step = 501 (1.151 sec)
INFO:tensorflow:global_step/sec: 85.5323
INFO:tensorflow:loss = 0.760754, step = 601 (1.168 sec)
INFO:tensorflow:global_step/sec: 79.7031
INFO:tensorflow:loss = 0.409079, step = 701 (1.261 sec)
INFO:tensorflow:global_step/sec: 77.0902
INFO:tensorflow:loss = 0.768

SKCompat()

In [8]:
y_pred = dnn_clf.predict(X_test)
accuracy_score(y_test, y_pred['classes'])

INFO:tensorflow:Restoring parameters from C:\Users\Kuba\AppData\Local\Temp\tmprx7fg34l\model.ckpt-40000


0.94720000000000004

## Training a DNN Using Plain TensorFlow

Lets create Mini-batch Gradient Descent model to train it on MNIST dataset.
* First step - construction phase - building graph.
* Second step - execution phase - run the graph to train the model.

### Construction Phase

Two hidden layers with 300 and 100 neurons, output with 10 neurons. 

In [9]:
n_inputs = 28*28 # MNIST size of picture
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X's shape is matrix with instances along first dimension and features along second dimension, so we know second parameter of the shape - 28x28 per instance, but we dont know how many instances we will take to single batch, so the first is None. Same for y, but in 1-dimnesional case - only instances with 1 feature per instance.

In [10]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

Now let's create the actual neural network.
* X will be the input layer, it will take one training batch at a time
* the two hidden layers differ only by the inputs they are connected to and number of neurons they contain
* the output layer is also very similiar but it uses softmax instead of ReLU activation function

neuron_layer() will create one layer at the time

In [11]:
def neuron_layer(X, n_neurons, name, activation = None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev = stddev)
        W = tf.Variable(init, name = "kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name = "bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else:
            return Z

In [12]:
with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
    hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
    logits = neuron_layer(hidden2, n_outputs, name="outputs")

Actually there is no need to create your own neuron_layer() method, because tensorflow provides one:

In [13]:
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")

So far we have neural network. Now we have to define cost function that we will use to train it. (Cost function - you want it to minimize, it can be sum of squared errors over your training set). We will use cross entropy. TensorFlow provides several functions to compute cross entropy. We will use sparse_soft_max_cross_entropy_with_logits()

In [14]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

We have neural network, we have cost function, so now we need GradientDescentOptimizer to choose model parameters that minimize the cos function.

In [15]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss) #training operation

Last step in construction phase is to evaluate the model. We will use accuracy metric.

In [31]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

In [32]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

### Execution Phase

In [33]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [34]:
n_epochs = 80
batch_size = 50

The code below:
* firstly runs init.run() to initalize all variables
* at each epoch iterates through number of minibatches, then it runs training operation
* at the end of each epoch the code evaluates modelon the last mini-batch and on the full test set
* finally model parameters are saved to disk

In [36]:
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = 'tf_logs'
logdir = "{}/run-{}/".format(root_logdir, now)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_summary_train = accuracy_summary.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images,
                                            y: mnist.test.labels})
        file_writer.add_summary(acc_summary_train, epoch)
        #file_writer.add_summary(summary_str, epoch)
        print(epoch, "Train accuracy: ", acc_train, "Test accuracy:", acc_test)
    save_path = saver.save(sess, "./my_model_final.ckpt")
    
file_writer.close()

0 Train accuracy:  0.92 Test accuracy: 0.9016
1 Train accuracy:  0.9 Test accuracy: 0.9202
2 Train accuracy:  0.98 Test accuracy: 0.9297
3 Train accuracy:  0.96 Test accuracy: 0.9385
4 Train accuracy:  0.96 Test accuracy: 0.9423
5 Train accuracy:  0.98 Test accuracy: 0.9471
6 Train accuracy:  0.96 Test accuracy: 0.9497
7 Train accuracy:  0.94 Test accuracy: 0.9524
8 Train accuracy:  0.96 Test accuracy: 0.9562
9 Train accuracy:  0.92 Test accuracy: 0.957
10 Train accuracy:  1.0 Test accuracy: 0.9591
11 Train accuracy:  0.98 Test accuracy: 0.9616
12 Train accuracy:  1.0 Test accuracy: 0.9647
13 Train accuracy:  0.98 Test accuracy: 0.9655
14 Train accuracy:  1.0 Test accuracy: 0.9665
15 Train accuracy:  0.98 Test accuracy: 0.9678
16 Train accuracy:  0.96 Test accuracy: 0.9676
17 Train accuracy:  1.0 Test accuracy: 0.9706
18 Train accuracy:  0.98 Test accuracy: 0.9703
19 Train accuracy:  0.96 Test accuracy: 0.9723
20 Train accuracy:  1.0 Test accuracy: 0.9707
21 Train accuracy:  0.96 Test 

### Using the Neural Network to predict new images

The code below:
* restore checkpoint with model parameters
* provide new images (they should be prescaled before)
* then the code evaluates the logits node
* choose predictions

In [21]:
#with tf.Session as sess:
    #saver.restore(sess, "./my_model_final_ckpt")
    #X_new_scaled = [...]
    #Z = logits.eval(feed_dict={X: X_new_scaled})
    #y_pred = np.argmax(Z, axis=1)
