# Hands On ML Chapter 10 - Introduction to Artificial Neural Networks

In [1]:
import tensorflow as tf
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics import accuracy_score

In [2]:
mnist = fetch_mldata('MNIST original')

In [3]:
X, y = mnist['data'], mnist['target']
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]


In [4]:
y_train, X_train

(array([ 0.,  0.,  0., ...,  9.,  9.,  9.]), array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ..., 
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8))

In [5]:
scaler = StandardScaler()
scaler.fit_transform(X_train)
scaler.fit_transform(X_test)
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
y_train = y_train.astype(int)
y_test = y_test.astype(int)



## Multi-Layer Perceptron DNN with High-Level API

DNN with:
* two hidden layers
* first layer with 300 neurons
* second layer with 100 neurons
* ouput with 10 neurons

In [6]:
feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)
dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units = [300, 100], n_classes = 10, feature_columns = feature_cols)
dnn_clf = tf.contrib.learn.SKCompat(dnn_clf)
#dnn_clf.fit(X_train, y_train, batch_size = 50, steps = 40000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002093ACB7080>, '_environment': 'local', '_tf_random_seed': None, '_task_id': 0, '_log_step_count_steps': 100, '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_num_worker_replicas': 0, '_save_checkpoints_secs': 600, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_task_type': None, '_master': '', '_model_dir': 'C:\\Users\\Kuba\\AppData\\Local\\Temp\\tmpdmu6rguv', '_save_checkpoints_steps': None, '_evaluation_master': '', '_keep_checkpoint_max': 5, '_num_ps_replicas': 0, '_is_chief': True, '_session_config': None}


In [7]:
dnn_clf.fit(X_train, y_train, batch_size = 50, steps = 40000)

Instructions for updating:
Please switch to tf.train.get_global_step
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\Kuba\AppData\Local\Temp\tmpdmu6rguv\model.ckpt.
INFO:tensorflow:step = 1, loss = 152.252
INFO:tensorflow:global_step/sec: 101.426
INFO:tensorflow:step = 101, loss = 1.60483 (1.061 sec)
INFO:tensorflow:global_step/sec: 90.2048
INFO:tensorflow:step = 201, loss = 1.60542 (1.055 sec)
INFO:tensorflow:global_step/sec: 99.8255
INFO:tensorflow:step = 301, loss = 1.05549 (1.005 sec)
INFO:tensorflow:global_step/sec: 104.815
INFO:tensorflow:step = 401, loss = 1.4741 (0.953 sec)
INFO:tensorflow:global_step/sec: 103.95
INFO:tensorflow:step = 501, loss = 1.04918 (0.952 sec)
INFO:tensorflow:global_step/sec: 105.266
INFO:tensorflow:step = 601, loss = 0.930143 (0.958 sec)
INFO:tensorflow:global_step/sec: 95.5942
INFO:tensorflow:step = 701, loss = 0.950582 (1.043 sec)
INFO:tensorflow:global_step/sec: 96.2194
INFO:tensorflow:step = 801, lo

SKCompat()

In [8]:
y_pred = dnn_clf.predict(X_test)
accuracy_score(y_test, y_pred['classes'])

INFO:tensorflow:Restoring parameters from C:\Users\Kuba\AppData\Local\Temp\tmpdmu6rguv\model.ckpt-40000


0.94089999999999996

## Training a DNN Using Plain TensorFlow

Lets create Mini-batch Gradient Descent model to train it on MNIST dataset.
* First step - construction phase - building graph.
* Second step - execution phase - run the graph to train the model.

### Construction Phase

Two hidden layers with 300 and 100 neurons, output with 10 neurons. 

In [9]:
n_inputs = 28*28 # MNIST size of picture
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X's shape is matrix with instances along first dimension and features along second dimension, so we know second parameter of the shape - 28x28 per instance, but we dont know how many instances we will take to single batch, so the first is None. Same for y, but in 1-dimnesional case - only instances with 1 feature per instance.

In [10]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

Now let's create the actual neural network.
* X will be the input layer, it will take one training batch at a time
* the two hidden layers differ only by the inputs they are connected to and number of neurons they contain
* the output layer is also very similiar but it uses softmax instead of ReLU activation function

neuron_layer() will create one layer at the time

In [11]:
def neuron_layer(X, n_neurons, name, activation = None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev = stddev)
        W = tf.Variable(init, name = "kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name = "bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else:
            return Z

In [12]:
with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
    hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
    logits = neuron_layer(hidden2, n_outputs, name="outputs")

Actually there is no need to create your own neuron_layer() method, because tensorflow provides one:

In [13]:
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")

So far we have neural network. Now we have to define cost function that we will use to train it. (Cost function - you want it to minimize, it can be sum of squared errors over your training set). We will use cross entropy. TensorFlow provides several functions to compute cross entropy. We will use sparse_soft_max_cross_entropy_with_logits()

In [14]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

We have neural network, we have cost function, so now we need GradientDescentOptimizer to choose model parameters that minimize the cos function.

In [24]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss) #training operation

Last step in construction phase is to evaluate the model. We will use accuracy metric.

In [18]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [19]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

### Execution Phase

In [20]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [21]:
n_epochs = 40
batch_size = 50

The code below:
* firstly runs init.run() to initalize all variables
* at each epoch iterates through number of minibatches, then it runs training operation
* at the end of each epoch the code evaluates modelon the last mini-batch and on the full test set
* finally model parameters are saved to disk

In [23]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images,
                                            y: mnist.test.labels})
        print(epoch, "Train accuracy: ", acc_train, "Test accuracy:", acc_test)
    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Train accuracy:  0.96 Test accuracy: 0.9035
1 Train accuracy:  0.96 Test accuracy: 0.9213
2 Train accuracy:  0.92 Test accuracy: 0.9308
3 Train accuracy:  0.92 Test accuracy: 0.9391
4 Train accuracy:  0.98 Test accuracy: 0.9418
5 Train accuracy:  0.98 Test accuracy: 0.9469
6 Train accuracy:  0.98 Test accuracy: 0.9495
7 Train accuracy:  0.98 Test accuracy: 0.9534
8 Train accuracy:  0.96 Test accuracy: 0.955
9 Train accuracy:  0.98 Test accuracy: 0.9577
10 Train accuracy:  0.96 Test accuracy: 0.959
11 Train accuracy:  1.0 Test accuracy: 0.9617
12 Train accuracy:  0.96 Test accuracy: 0.964
13 Train accuracy:  0.96 Test accuracy: 0.9654
14 Train accuracy:  1.0 Test accuracy: 0.9662
15 Train accuracy:  0.98 Test accuracy: 0.9672
16 Train accuracy:  1.0 Test accuracy: 0.9695
17 Train accuracy:  1.0 Test accuracy: 0.9699
18 Train accuracy:  0.98 Test accuracy: 0.9709
19 Train accuracy:  1.0 Test accuracy: 0.9705
20 Train accuracy:  0.96 Test accuracy: 0.9719
21 Train accuracy:  0.96 Test a

### Using the Neural Network to predict new images

The code below:
* restore checkpoint with model parameters
* provide new images (they should be prescaled before)
* then the code evaluates the logits node
* choose predictions

In [25]:
#with tf.Session as sess:
    #saver.restore(sess, "./my_model_final_ckpt")
    #X_new_scaled = [...]
    #Z = logits.eval(feed_dict={X: X_new_scaled})
    #y_pred = np.argmax(Z, axis=1)
