# Chapter 10. Introduction to Artificial Neural Networks

## Setup

In [1]:
# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# to plot figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

## From Biological to Artificial Neurons

### The Perceptron

In [2]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron

iris = load_iris()
X = iris.data[:, (2, 3)]  # petal length, petal width
y = (iris.target == 0).astype(np.int)

per_clf = Perceptron(max_iter=100, random_state=42)
per_clf.fit(X, y)

y_pred = per_clf.predict([[2, 0.5]])

In [3]:
y_pred

array([1])

## Using the Estimator API (formerly tf.contrib.learn)

Warning: tf.examples.tutorials.mnist is deprecated. We will use tf.keras.datasets.mnist instead. Moreover, the tf.contrib.learn API was promoted to tf.estimators and tf.feature_columns, and it has changed considerably. In particular, there is no infer_real_valued_columns_from_input() function or SKCompat class.

In [4]:
import tensorflow as tf

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

  from ._conv import register_converters as _register_converters


In [5]:
feature_cols = [tf.feature_column.numeric_column("X", shape=[28 * 28])]
dnn_clf = tf.estimator.DNNClassifier(hidden_units=[300,100], n_classes=10,
                                     feature_columns=feature_cols)

input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, num_epochs=40, batch_size=50, shuffle=True)
dnn_clf.train(input_fn=input_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/w0/h81z_nvs6tqdsv95jpw4dhc80000gn/T/tmprzdfti27', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a209b6fd0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/w0

INFO:tensorflow:global_step/sec: 349.181
INFO:tensorflow:loss = 1.2447526, step = 7201 (0.287 sec)
INFO:tensorflow:global_step/sec: 338.639
INFO:tensorflow:loss = 0.8249962, step = 7301 (0.295 sec)
INFO:tensorflow:global_step/sec: 338.594
INFO:tensorflow:loss = 2.9231095, step = 7401 (0.295 sec)
INFO:tensorflow:global_step/sec: 341.78
INFO:tensorflow:loss = 0.48147577, step = 7501 (0.293 sec)
INFO:tensorflow:global_step/sec: 278.652
INFO:tensorflow:loss = 0.5042276, step = 7601 (0.360 sec)
INFO:tensorflow:global_step/sec: 305.85
INFO:tensorflow:loss = 0.39853835, step = 7701 (0.326 sec)
INFO:tensorflow:global_step/sec: 340.288
INFO:tensorflow:loss = 1.5872667, step = 7801 (0.294 sec)
INFO:tensorflow:global_step/sec: 278.351
INFO:tensorflow:loss = 4.322463, step = 7901 (0.360 sec)
INFO:tensorflow:global_step/sec: 325.614
INFO:tensorflow:loss = 0.5724811, step = 8001 (0.307 sec)
INFO:tensorflow:global_step/sec: 317.969
INFO:tensorflow:loss = 0.40450358, step = 8101 (0.315 sec)
INFO:tenso

INFO:tensorflow:global_step/sec: 308.021
INFO:tensorflow:loss = 0.47210142, step = 15401 (0.324 sec)
INFO:tensorflow:global_step/sec: 317.397
INFO:tensorflow:loss = 0.10956919, step = 15501 (0.315 sec)
INFO:tensorflow:global_step/sec: 282.993
INFO:tensorflow:loss = 0.9390899, step = 15601 (0.353 sec)
INFO:tensorflow:global_step/sec: 299.894
INFO:tensorflow:loss = 0.21843158, step = 15701 (0.334 sec)
INFO:tensorflow:global_step/sec: 247.995
INFO:tensorflow:loss = 0.15770791, step = 15801 (0.403 sec)
INFO:tensorflow:global_step/sec: 268.259
INFO:tensorflow:loss = 0.080808364, step = 15901 (0.373 sec)
INFO:tensorflow:global_step/sec: 290.713
INFO:tensorflow:loss = 0.08851427, step = 16001 (0.344 sec)
INFO:tensorflow:global_step/sec: 296.857
INFO:tensorflow:loss = 0.38659468, step = 16101 (0.337 sec)
INFO:tensorflow:global_step/sec: 301.524
INFO:tensorflow:loss = 0.13353604, step = 16201 (0.333 sec)
INFO:tensorflow:global_step/sec: 213.164
INFO:tensorflow:loss = 0.014825351, step = 16301 (

INFO:tensorflow:global_step/sec: 187.984
INFO:tensorflow:loss = 0.037446026, step = 23501 (0.530 sec)
INFO:tensorflow:global_step/sec: 192.87
INFO:tensorflow:loss = 0.123848855, step = 23601 (0.519 sec)
INFO:tensorflow:global_step/sec: 164.672
INFO:tensorflow:loss = 0.032126218, step = 23701 (0.608 sec)
INFO:tensorflow:global_step/sec: 161.152
INFO:tensorflow:loss = 0.061387815, step = 23801 (0.621 sec)
INFO:tensorflow:global_step/sec: 122.43
INFO:tensorflow:loss = 0.04207813, step = 23901 (0.817 sec)
INFO:tensorflow:global_step/sec: 161.052
INFO:tensorflow:loss = 0.0045017772, step = 24001 (0.620 sec)
INFO:tensorflow:global_step/sec: 190.077
INFO:tensorflow:loss = 0.0147614265, step = 24101 (0.525 sec)
INFO:tensorflow:global_step/sec: 184.695
INFO:tensorflow:loss = 0.042210646, step = 24201 (0.542 sec)
INFO:tensorflow:global_step/sec: 221.451
INFO:tensorflow:loss = 0.015348793, step = 24301 (0.451 sec)
INFO:tensorflow:global_step/sec: 220.394
INFO:tensorflow:loss = 0.094902635, step =

INFO:tensorflow:global_step/sec: 308.631
INFO:tensorflow:loss = 0.031496968, step = 31601 (0.324 sec)
INFO:tensorflow:global_step/sec: 309.73
INFO:tensorflow:loss = 0.17971013, step = 31701 (0.323 sec)
INFO:tensorflow:global_step/sec: 303.631
INFO:tensorflow:loss = 0.02218496, step = 31801 (0.330 sec)
INFO:tensorflow:global_step/sec: 293.866
INFO:tensorflow:loss = 0.01874355, step = 31901 (0.340 sec)
INFO:tensorflow:global_step/sec: 310.512
INFO:tensorflow:loss = 0.013548233, step = 32001 (0.322 sec)
INFO:tensorflow:global_step/sec: 296.441
INFO:tensorflow:loss = 0.04419624, step = 32101 (0.337 sec)
INFO:tensorflow:global_step/sec: 297.935
INFO:tensorflow:loss = 0.0051828227, step = 32201 (0.335 sec)
INFO:tensorflow:global_step/sec: 282.231
INFO:tensorflow:loss = 0.026615385, step = 32301 (0.354 sec)
INFO:tensorflow:global_step/sec: 257.245
INFO:tensorflow:loss = 0.05961953, step = 32401 (0.389 sec)
INFO:tensorflow:global_step/sec: 297.438
INFO:tensorflow:loss = 0.041802913, step = 325

INFO:tensorflow:global_step/sec: 267.483
INFO:tensorflow:loss = 0.015009033, step = 39701 (0.374 sec)
INFO:tensorflow:global_step/sec: 263.425
INFO:tensorflow:loss = 0.0028287475, step = 39801 (0.379 sec)
INFO:tensorflow:global_step/sec: 270.753
INFO:tensorflow:loss = 0.023954205, step = 39901 (0.370 sec)
INFO:tensorflow:global_step/sec: 291.786
INFO:tensorflow:loss = 0.028064365, step = 40001 (0.342 sec)
INFO:tensorflow:global_step/sec: 301.851
INFO:tensorflow:loss = 0.04389785, step = 40101 (0.331 sec)
INFO:tensorflow:global_step/sec: 291.808
INFO:tensorflow:loss = 0.004361023, step = 40201 (0.345 sec)
INFO:tensorflow:global_step/sec: 257.07
INFO:tensorflow:loss = 0.013731959, step = 40301 (0.387 sec)
INFO:tensorflow:global_step/sec: 293.023
INFO:tensorflow:loss = 0.008216769, step = 40401 (0.341 sec)
INFO:tensorflow:global_step/sec: 281.835
INFO:tensorflow:loss = 0.016262759, step = 40501 (0.355 sec)
INFO:tensorflow:global_step/sec: 229.815
INFO:tensorflow:loss = 0.00025950506, step

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x106d5b128>

In [6]:
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_test}, y=y_test, shuffle=False)
eval_results = dnn_clf.evaluate(input_fn=test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-19-22:51:10
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/w0/h81z_nvs6tqdsv95jpw4dhc80000gn/T/tmprzdfti27/model.ckpt-44000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-07-19-22:51:11
INFO:tensorflow:Saving dict for global step 44000: accuracy = 0.9773, average_loss = 0.11295462, global_step = 44000, loss = 14.298053


In [7]:
eval_results

{'accuracy': 0.9773,
 'average_loss': 0.11295462,
 'loss': 14.298053,
 'global_step': 44000}

In [8]:
y_pred_iter = dnn_clf.predict(input_fn=test_input_fn)
y_pred = list(y_pred_iter)
y_pred[0]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/w0/h81z_nvs6tqdsv95jpw4dhc80000gn/T/tmprzdfti27/model.ckpt-44000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


{'logits': array([ -8.955193  ,  -1.2841066 ,  -4.030523  ,   3.9894106 ,
        -16.479994  , -11.545952  , -16.83486   ,  20.025167  ,
         -7.452689  ,   0.08374912], dtype=float32),
 'probabilities': array([2.5941166e-13, 5.5654409e-10, 3.5706410e-11, 1.0858233e-07,
        1.3996191e-16, 1.9446303e-14, 9.8150717e-17, 9.9999988e-01,
        1.1655177e-12, 2.1855076e-09], dtype=float32),
 'class_ids': array([7]),
 'classes': array([b'7'], dtype=object)}

## Using Plain TensorFlow

In [9]:
import tensorflow as tf

n_inputs = 28*28  # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [10]:
reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

In [11]:
def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else:
            return Z

In [12]:
with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, name="hidden1",
                           activation=tf.nn.relu)
    hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2",
                           activation=tf.nn.relu)
    logits = neuron_layer(hidden2, n_outputs, name="outputs")

In [13]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [14]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [15]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [16]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [17]:
n_epochs = 40
batch_size = 50

In [18]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [19]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Batch accuracy:", acc_batch, "Val accuracy:", acc_val)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Batch accuracy: 0.9 Val accuracy: 0.9146
1 Batch accuracy: 0.92 Val accuracy: 0.936
2 Batch accuracy: 0.96 Val accuracy: 0.945
3 Batch accuracy: 0.92 Val accuracy: 0.9508
4 Batch accuracy: 0.98 Val accuracy: 0.956
5 Batch accuracy: 0.96 Val accuracy: 0.9566
6 Batch accuracy: 1.0 Val accuracy: 0.9614
7 Batch accuracy: 0.96 Val accuracy: 0.9632
8 Batch accuracy: 0.98 Val accuracy: 0.9652
9 Batch accuracy: 0.96 Val accuracy: 0.9658
10 Batch accuracy: 0.92 Val accuracy: 0.9688
11 Batch accuracy: 0.98 Val accuracy: 0.9684
12 Batch accuracy: 0.98 Val accuracy: 0.9676
13 Batch accuracy: 0.98 Val accuracy: 0.9712
14 Batch accuracy: 1.0 Val accuracy: 0.9714
15 Batch accuracy: 0.94 Val accuracy: 0.9732
16 Batch accuracy: 1.0 Val accuracy: 0.9736
17 Batch accuracy: 1.0 Val accuracy: 0.9742
18 Batch accuracy: 1.0 Val accuracy: 0.9746
19 Batch accuracy: 0.98 Val accuracy: 0.9746
20 Batch accuracy: 1.0 Val accuracy: 0.976
21 Batch accuracy: 1.0 Val accuracy: 0.9756
22 Batch accuracy: 0.98 Val accu

In [20]:
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt") # or better, use save_path
    X_new_scaled = X_test[:20]
    Z = logits.eval(feed_dict={X: X_new_scaled})
    y_pred = np.argmax(Z, axis=1)

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt


In [21]:
print("Predicted classes:", y_pred)
print("Actual classes:   ", y_test[:20])

Predicted classes: [7 2 1 0 4 1 4 9 6 9 0 6 9 0 1 5 9 7 3 4]
Actual classes:    [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


In [22]:
from tensorflow_graph_in_jupyter import show_graph

In [23]:
show_graph(tf.get_default_graph())

## Using dense() instead of neuron_layer()

In [24]:
n_inputs = 28*28  # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [25]:
reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

In [26]:
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1",
                              activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2",
                              activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")
    y_proba = tf.nn.softmax(logits)

In [27]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [28]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [29]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [30]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [31]:
n_epochs = 20
n_batches = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Batch accuracy: 0.9 Validation accuracy: 0.9024
1 Batch accuracy: 0.92 Validation accuracy: 0.9254
2 Batch accuracy: 0.94 Validation accuracy: 0.9372
3 Batch accuracy: 0.9 Validation accuracy: 0.9416
4 Batch accuracy: 0.94 Validation accuracy: 0.9472
5 Batch accuracy: 0.94 Validation accuracy: 0.9512
6 Batch accuracy: 1.0 Validation accuracy: 0.9548
7 Batch accuracy: 0.94 Validation accuracy: 0.961
8 Batch accuracy: 0.96 Validation accuracy: 0.9622
9 Batch accuracy: 0.94 Validation accuracy: 0.9648
10 Batch accuracy: 0.92 Validation accuracy: 0.9656
11 Batch accuracy: 0.98 Validation accuracy: 0.9666
12 Batch accuracy: 0.98 Validation accuracy: 0.9684
13 Batch accuracy: 0.98 Validation accuracy: 0.9704
14 Batch accuracy: 1.0 Validation accuracy: 0.9694
15 Batch accuracy: 0.94 Validation accuracy: 0.9718
16 Batch accuracy: 0.98 Validation accuracy: 0.9726
17 Batch accuracy: 1.0 Validation accuracy: 0.9728
18 Batch accuracy: 0.98 Validation accuracy: 0.9744
19 Batch accuracy: 0.98 Vali

In [32]:
show_graph(tf.get_default_graph())