# Hands On ML Chapter 10 - Introduction to Artificial Neural Networks

In [43]:
import tensorflow as tf
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics import accuracy_score

In [6]:
mnist = fetch_mldata('MNIST original')

In [29]:
X, y = mnist['data'], mnist['target']
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]


In [28]:
y_train, X_train

(array([ 0.,  0.,  0., ...,  9.,  9.,  9.]), array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ..., 
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8))

In [40]:
scaler = StandardScaler()
scaler.fit_transform(X_train)
scaler.fit_transform(X_test)
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
y_train = y_train.astype(int)
y_test = y_test.astype(int)

## Multi-Layer Perceptron DNN with High-Level API

DNN with:
* two hidden layers
* first layer with 300 neurons
* second layer with 100 neurons
* ouput with 10 neurons

In [41]:
feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)
dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units = [300, 100], n_classes = 10, feature_columns = feature_cols)
dnn_clf = tf.contrib.learn.SKCompat(dnn_clf)
#dnn_clf.fit(X_train, y_train, batch_size = 50, steps = 40000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_tf_random_seed': None, '_log_step_count_steps': 100, '_session_config': None, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_task_type': None, '_keep_checkpoint_every_n_hours': 10000, '_evaluation_master': '', '_save_checkpoints_steps': None, '_save_summary_steps': 100, '_model_dir': 'C:\\Users\\Kuba\\AppData\\Local\\Temp\\tmptwsalte5', '_environment': 'local', '_keep_checkpoint_max': 5, '_task_id': 0, '_is_chief': True, '_num_worker_replicas': 0, '_master': '', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001BE237B3DD8>, '_num_ps_replicas': 0, '_save_checkpoints_secs': 600}


In [42]:
dnn_clf.fit(X_train, y_train, batch_size = 50, steps = 40000)

Instructions for updating:
Please switch to tf.train.get_global_step
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\Kuba\AppData\Local\Temp\tmptwsalte5\model.ckpt.
INFO:tensorflow:step = 1, loss = 153.716
INFO:tensorflow:global_step/sec: 93.0819
INFO:tensorflow:step = 101, loss = 1.50392 (1.124 sec)
INFO:tensorflow:global_step/sec: 90.3629
INFO:tensorflow:step = 201, loss = 0.994454 (1.062 sec)
INFO:tensorflow:global_step/sec: 100.159
INFO:tensorflow:step = 301, loss = 0.758746 (1.001 sec)
INFO:tensorflow:global_step/sec: 93.3936
INFO:tensorflow:step = 401, loss = 0.862277 (1.078 sec)
INFO:tensorflow:global_step/sec: 99.9079
INFO:tensorflow:step = 501, loss = 0.855615 (0.996 sec)
INFO:tensorflow:global_step/sec: 98.7454
INFO:tensorflow:step = 601, loss = 0.720426 (1.006 sec)
INFO:tensorflow:global_step/sec: 82.9806
INFO:tensorflow:step = 701, loss = 0.441494 (1.220 sec)
INFO:tensorflow:global_step/sec: 101.372
INFO:tensorflow:step = 8

SKCompat()

In [44]:
y_pred = dnn_clf.predict(X_test)
accuracy_score(y_test, y_pred['classes'])

INFO:tensorflow:Restoring parameters from C:\Users\Kuba\AppData\Local\Temp\tmptwsalte5\model.ckpt-40000


0.94530000000000003

## Training a DNN Using Plain TensorFlow

Lets create Mini-batch Gradient Descent model to train it on MNIST dataset.
* First step - construction phase - building graph.
* Second step - execution phase - run the graph to train the model.

### Construction Phase

Two hidden layers with 300 and 100 neurons, output with 10 neurons. 

In [45]:
n_inputs = 28*28 # MNIST size of picture
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X's shape is matrix with instances along first dimension and features along second dimension, so we know second parameter of the shape - 28x28 per instance, but we dont know how many instances we will take to single batch, so the first is None. Same for y, but in 1-dimnesional case - only instances with 1 feature per instance.

In [46]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

Now let's create the actual neural network.
* X will be the input layer, it will take one training batch at a time
* the two hidden layers differ only by the inputs they are connected to and number of neurons they contain
* the output layer is also very similiar but it uses softmax instead of ReLU activation function

neuron_layer() will create one layer at the time

In [47]:
def neuron_layer(X, n_neurons, name, activation = None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev = stddev)
        W = tf.Variable(init, name = "kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name = "bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else:
            return Z