# Chapter 10 Introduction to Artificial Neural Networks

## From Biological to Artificial Neurons

### Biological Neurons

### Logical Computations with Neurons

### The Perceptron

### Multi-Layer Perceptron and Backpropagation

## Training an MLP with TensorFlow's High-Level API

In [2]:
import tensorflow as tf

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [6]:
print(X_train.shape)
print(y_test.shape)

(60000, 28, 28)
(10000,)


In [8]:
import numpy as np
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [9]:
X_train.shape

(55000, 784)

In [19]:
feature_cols = [tf.feature_column.numeric_column("X", shape=[28*28])]
dnn_clf = tf.estimator.DNNClassifier(hidden_units=[300,100], n_classes=10, feature_columns=feature_cols)

input_fn = tf.estimator.inputs.numpy_input_fn(x={"X":X_train}, y=y_train, num_epochs=10, batch_size=25, shuffle=True)
dnn_clf.train(input_fn=input_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_task_type': 'worker', '_model_dir': 'C:\\Users\\rumusan\\AppData\\Local\\Temp\\tmppwe22uk4', '_service': None, '_master': '', '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_is_chief': True, '_save_summary_steps': 100, '_task_id': 0, '_global_id_in_cluster': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000026AEECC39E8>, '_save_checkpoints_steps': None, '_device_fn': None, '_train_distribute': None, '_tf_random_seed': None, '_evaluation_master': '', '_num_worker_replicas': 1, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 i

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x26aeecc37f0>

In [20]:
test_input_fn = tf.estimator.inputs.numpy_input_fn(x={"X":X_test}, y=y_test, shuffle=False)
eval_results = dnn_clf.evaluate(input_fn=test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-21-07:59:23
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\rumusan\AppData\Local\Temp\tmppwe22uk4\model.ckpt-22000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-07-21-07:59:24
INFO:tensorflow:Saving dict for global step 22000: accuracy = 0.9796, average_loss = 0.07330247, global_step = 22000, loss = 9.278794
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 22000: C:\Users\rumusan\AppData\Local\Temp\tmppwe22uk4\model.ckpt-22000


In [21]:
eval_results

{'accuracy': 0.9796,
 'average_loss': 0.07330247,
 'global_step': 22000,
 'loss': 9.278794}

In [22]:
y_pred_iter = dnn_clf.predict(input_fn=test_input_fn)
y_pred = list(y_pred_iter)
y_pred[0]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\rumusan\AppData\Local\Temp\tmppwe22uk4\model.ckpt-22000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


{'class_ids': array([7], dtype=int64),
 'classes': array([b'7'], dtype=object),
 'logits': array([ -5.696324  ,  -0.65812564,  -0.5697815 ,   4.636589  ,
         -9.971786  ,  -0.6450563 , -24.364986  ,  18.083654  ,
         -2.2112381 ,   2.4903064 ], dtype=float32),
 'probabilities': array([4.7042044e-11, 7.2535062e-09, 7.9234574e-09, 1.4454831e-06,
        6.5414942e-13, 7.3489255e-09, 3.6710446e-19, 9.9999845e-01,
        1.5347554e-09, 1.6900304e-07], dtype=float32)}

## Training a DNN Using Plain TensorFlow

### Construction Phase

In [23]:
tf.reset_default_graph()

In [26]:
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="weights")
        b = tf.Variable(tf.zeros([n_neurons]), name="biases")
        z = tf.matmul(X,W)+b
        if activation == "relu":
            return tf.nn.relu(z)
        else:
            return z

with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, "hidden1", activation="relu")
    hidden2 = neuron_layer(hidden1, n_hidden2, "hidden2", activation="relu")
    logits = neuron_layer(hidden2, n_outputs, "outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

### Execution Phase

In [32]:
n_epochs = 10
batch_size = 25

def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [34]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
        acc_batch = accuracy.eval(feed_dict={X:X_batch, y:y_batch})
        acc_val = accuracy.eval(feed_dict={X:X_valid, y:y_valid})
        print(epoch, "Batch accuracy:", acc_batch, "Val accuracy:", acc_val)
        
    save_path = saver.save(sess, "./mymodel/my_model_final.ckpt")

0 Batch accuracy: 1.0 Val accuracy: 0.9362
1 Batch accuracy: 1.0 Val accuracy: 0.9532
2 Batch accuracy: 1.0 Val accuracy: 0.9572
3 Batch accuracy: 1.0 Val accuracy: 0.9634
4 Batch accuracy: 1.0 Val accuracy: 0.9654
5 Batch accuracy: 1.0 Val accuracy: 0.9684
6 Batch accuracy: 0.96 Val accuracy: 0.969
7 Batch accuracy: 1.0 Val accuracy: 0.9724
8 Batch accuracy: 1.0 Val accuracy: 0.9716
9 Batch accuracy: 1.0 Val accuracy: 0.9724


### Using the Neural Network

In [35]:
with tf.Session() as sess:
    saver.restore(sess, "./mymodel/my_model_final.ckpt")
    X_new_scaled = X_test[:20]
    Z = logits.eval(feed_dict={X:X_new_scaled})
    y_pred = np.argmax(Z, axis=1)

INFO:tensorflow:Restoring parameters from ./mymodel/my_model_final.ckpt


In [39]:
print(y_pred)
print(y_test[:20])

[7 2 1 0 4 1 4 9 6 9 0 6 9 0 1 5 9 7 3 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## Fine-Tuning Neural Network Hyperparameters

In [45]:
tf.reset_default_graph()

In [46]:
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100 
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation = tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2", activation = tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")
    y_proba = tf.nn.softmax(logits)

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits,y,1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 10
n_batches = 25

def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
        acc_batch = accuracy.eval(feed_dict={X:X_batch, y:y_batch})
        acc_valid = accuracy.eval(feed_dict={X:X_valid, y:y_valid})
        print(epoch, "Batch accuracy:", acc_batch, "Valid accuracy:", acc_valid)
        
    save_path = saver.save(sess, "./mymodel/my_model_final2.ckpt")

0 Batch accuracy: 0.92 Valid accuracy: 0.9234
1 Batch accuracy: 1.0 Valid accuracy: 0.9408
2 Batch accuracy: 0.92 Valid accuracy: 0.95
3 Batch accuracy: 1.0 Valid accuracy: 0.959
4 Batch accuracy: 0.96 Valid accuracy: 0.9606
5 Batch accuracy: 1.0 Valid accuracy: 0.9668
6 Batch accuracy: 0.96 Valid accuracy: 0.9684
7 Batch accuracy: 0.96 Valid accuracy: 0.9688
8 Batch accuracy: 1.0 Valid accuracy: 0.9712
9 Batch accuracy: 1.0 Valid accuracy: 0.973
