<a href="https://colab.research.google.com/github/rhilderbrand/MSDS-422-MachineLearning/blob/main/Hilderbrand_Assignment_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Assignment 6: Neural Networks

This assignment asks you to fit a number of neural networks, comparing processing time and performance across experimental treatments. Processing time will be recorded for the fitting on the train.csv.  Kaggle.com accuracy scores will be reported for all benchmarks

## System Setup & Data Preparation

In [1]:
# prepare for Python version 3 features and functions
from __future__ import division, print_function, unicode_literals

# import common packages
import pandas as pd
import numpy as np
import os

# import TensorFlow
import tensorflow as tf

#suppress tf.logging
import logging
logging.getLogger('tensorflow').disabled = True

# for visualization
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# split data into train and test sets
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
# check data splits for test and train
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(60000, 784)
(10000, 784)
(60000,)
(10000,)


##Model 1 - 2 layers 10 nodes per layer

**Processing Time:** 128 s

**Training Set Accuracy:** 0.8613

**Test Set Accuracy:** 0.8637

In [4]:
%%time
feature_cols = [tf.feature_column.numeric_column("X", shape=[28 * 28])]
dnn_clf_1 = tf.estimator.DNNClassifier(hidden_units=[10,10], n_classes=10,
                                     feature_columns=feature_cols)

input_fn_1 = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, num_epochs=50, batch_size=50, shuffle=True)
dnn_clf_1.train(input_fn=input_fn_1)

CPU times: user 2min 2s, sys: 6.56 s, total: 2min 8s
Wall time: 1min 27s


In [5]:
# evaluate training accuracy
train_1_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, shuffle=False)
eval_results_train_1 = dnn_clf_1.evaluate(input_fn=train_1_input_fn)
eval_results_train_1

{'accuracy': 0.8613333,
 'average_loss': 0.5005337,
 'global_step': 60000,
 'loss': 0.5005781}

In [6]:
# evalute test accuracy
test_1_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"X": X_test}, y=y_test, shuffle=False)
eval_results_test_1 = dnn_clf_1.evaluate(input_fn=test_1_input_fn)
eval_results_test_1

{'accuracy': 0.8637,
 'average_loss': 0.48754096,
 'global_step': 60000,
 'loss': 0.48577008}

In [7]:
#confusion matrix for Model 1 predictions
raw_predictions_1 = dnn_clf_1.predict(input_fn=test_1_input_fn)
predictions_1 = [p['class_ids'][0] for p in raw_predictions_1]
confusion_matrix_1 = tf.math.confusion_matrix(y_test, predictions_1)
confusion_matrix_1

<tf.Tensor: shape=(10, 10), dtype=int32, numpy=
array([[ 903,    0,   19,    8,    1,   34,    2,    1,    7,    5],
       [   1, 1085,    2,    5,    1,    0,   11,    1,   28,    1],
       [  14,   14,  837,   44,   18,    0,   35,   14,   48,    8],
       [  21,    1,   25,  855,    2,   37,    5,   23,   37,    4],
       [   0,    5,   11,    0,  887,    0,   15,    0,    4,   60],
       [  31,    9,    5,   57,   12,  681,   16,   11,   63,    7],
       [   8,   13,   54,    3,    8,   27,  839,    0,    5,    1],
       [   2,   13,   32,    6,    7,    2,    3,  905,    5,   53],
       [   8,   19,   13,   28,   24,   26,    9,    7,  814,   26],
       [   5,    3,    7,   10,   77,   15,    0,   46,   15,  831]],
      dtype=int32)>

##Model 2 - 2 layers 20 nodes per layer

**Processing Time:** 140 s

**Training Set Accuracy:** 0.90785

**Test Set Accuracy:** 0.9142

In [8]:
%%time
feature_cols = [tf.feature_column.numeric_column("X", shape=[28 * 28])]
dnn_clf_2 = tf.estimator.DNNClassifier(hidden_units=[20,20], n_classes=10,
                                     feature_columns=feature_cols)

input_fn_2 = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, num_epochs=50, batch_size=50, shuffle=True)
dnn_clf_2.train(input_fn=input_fn_2)

CPU times: user 2min 12s, sys: 7.34 s, total: 2min 20s
Wall time: 1min 28s


In [9]:
# evaluate training accuracy
train_2_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, shuffle=False)
eval_results_train_2 = dnn_clf_2.evaluate(input_fn=train_2_input_fn)
eval_results_train_2

{'accuracy': 0.90785,
 'average_loss': 0.33671892,
 'global_step': 60000,
 'loss': 0.33669648}

In [10]:
# evalute test accuracy
test_2_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"X": X_test}, y=y_test, shuffle=False)
eval_results_test_2 = dnn_clf_2.evaluate(input_fn=test_2_input_fn)
eval_results_test_2

{'accuracy': 0.9142,
 'average_loss': 0.32454455,
 'global_step': 60000,
 'loss': 0.32295388}

In [11]:
#confusion matrix for Model 2 predictions
raw_predictions_2 = dnn_clf_2.predict(input_fn=test_2_input_fn)
predictions_2 = [p['class_ids'][0] for p in raw_predictions_2]
confusion_matrix_2 = tf.math.confusion_matrix(y_test, predictions_2)
confusion_matrix_2

<tf.Tensor: shape=(10, 10), dtype=int32, numpy=
array([[ 956,    0,    2,    2,    0,    4,   11,    1,    4,    0],
       [   0, 1111,    2,    4,    0,    1,    4,    1,   12,    0],
       [  13,    1,  908,   15,   20,    3,   14,   18,   37,    3],
       [   3,    1,   14,  913,    0,   31,    4,   15,   22,    7],
       [   2,    6,    4,    2,  907,    1,   15,    2,    6,   37],
       [  11,    2,    2,   43,    8,  751,   17,    5,   47,    6],
       [  12,    3,    7,    1,   13,   17,  897,    1,    7,    0],
       [   3,   13,   33,    7,    8,    0,    0,  934,    6,   24],
       [  12,    9,    7,   20,    7,   18,   10,   21,  860,   10],
       [  11,    6,    3,   11,   38,   13,    0,   17,    5,  905]],
      dtype=int32)>

##Model 3 - 5 layers 10 nodes per layer

**Processing Time:** 138 s

**Training Set Accuracy:** 0.7168

**Test Set Accuracy:** 0.718

In [12]:
%%time
feature_cols = [tf.feature_column.numeric_column("X", shape=[28 * 28])]
dnn_clf_3 = tf.estimator.DNNClassifier(hidden_units=[10,10,10,10,10], n_classes=10,
                                     feature_columns=feature_cols)

input_fn_3 = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, num_epochs=50, batch_size=50, shuffle=True)
dnn_clf_3.train(input_fn=input_fn_3)

CPU times: user 2min 11s, sys: 6.48 s, total: 2min 18s
Wall time: 1min 28s


In [13]:
# evaluate training accuracy
train_3_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, shuffle=False)
eval_results_train_3 = dnn_clf_3.evaluate(input_fn=train_3_input_fn)
eval_results_train_3

{'accuracy': 0.71683335,
 'average_loss': 0.8182206,
 'global_step': 60000,
 'loss': 0.81823534}

In [14]:
# evalute test accuracy
test_3_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"X": X_test}, y=y_test, shuffle=False)
eval_results_test_3 = dnn_clf_3.evaluate(input_fn=test_3_input_fn)
eval_results_test_3

{'accuracy': 0.718,
 'average_loss': 0.8159881,
 'global_step': 60000,
 'loss': 0.81525016}

In [15]:
#confusion matrix for Model 3 predictions
raw_predictions_3 = dnn_clf_3.predict(input_fn=test_3_input_fn)
predictions_3 = [p['class_ids'][0] for p in raw_predictions_3]
confusion_matrix_3 = tf.math.confusion_matrix(y_test, predictions_3)
confusion_matrix_3

<tf.Tensor: shape=(10, 10), dtype=int32, numpy=
array([[ 873,    0,   48,   45,    0,    2,    6,    1,    5,    0],
       [   1, 1100,    2,    1,    5,    2,    9,    2,   13,    0],
       [  51,   15,  772,   28,   16,   14,   41,    7,   87,    1],
       [  82,    1,   47,  619,    2,  107,    1,   11,  133,    7],
       [   0,   67,    5,    0,  333,    0,   16,    7,   35,  519],
       [  22,    4,   61,  385,    1,  187,    5,   17,  208,    2],
       [   8,   28,   69,    4,    2,    0,  840,    0,    6,    1],
       [   2,   10,    7,    6,   17,    1,    1,  875,   45,   64],
       [   3,   21,   36,   31,    8,   41,    3,   14,  812,    5],
       [   2,   12,    3,    9,   88,    6,    4,   89,   27,  769]],
      dtype=int32)>

##Model 4 - 5 layers 20 nodes per layer

**Processing Time:** 156 s

**Training Set Accuracy:** 0.8982

**Test Set Accuracy:** 0.8992

In [16]:
%%time
feature_cols = [tf.feature_column.numeric_column("X", shape=[28 * 28])]
dnn_clf_4 = tf.estimator.DNNClassifier(hidden_units=[20,20,20,20,20], n_classes=10,
                                     feature_columns=feature_cols)

input_fn_4 = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, num_epochs=50, batch_size=50, shuffle=True)
dnn_clf_4.train(input_fn=input_fn_4)

CPU times: user 2min 30s, sys: 6.56 s, total: 2min 36s
Wall time: 1min 47s


In [17]:
# evaluate training accuracy
train_4_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, shuffle=False)
eval_results_train_4 = dnn_clf_4.evaluate(input_fn=train_4_input_fn)
eval_results_train_4

{'accuracy': 0.89821666,
 'average_loss': 0.35280833,
 'global_step': 60000,
 'loss': 0.3528102}

In [18]:
# evalute test accuracy
test_4_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
    x={"X": X_test}, y=y_test, shuffle=False)
eval_results_test_4 = dnn_clf_4.evaluate(input_fn=test_4_input_fn)
eval_results_test_4

{'accuracy': 0.8992,
 'average_loss': 0.34777656,
 'global_step': 60000,
 'loss': 0.34536946}

In [19]:
#confusion matrix for Model 4 predictions
raw_predictions_4 = dnn_clf_4.predict(input_fn=test_4_input_fn)
predictions_4 = [p['class_ids'][0] for p in raw_predictions_4]
confusion_matrix_4 = tf.math.confusion_matrix(y_test, predictions_4)
confusion_matrix_4

<tf.Tensor: shape=(10, 10), dtype=int32, numpy=
array([[ 946,    0,    1,    1,    0,   11,   16,    2,    3,    0],
       [   0, 1095,   15,   12,    0,    1,    3,    0,    7,    2],
       [  12,    8,  928,   12,   12,    3,   15,   12,   27,    3],
       [   4,    5,   23,  891,    0,   40,    1,   15,   28,    3],
       [   0,    0,    6,    0,  904,    0,   21,    7,    3,   41],
       [  22,    2,   10,   78,    6,  723,   10,    2,   32,    7],
       [  22,    1,   15,    0,   10,   11,  896,    0,    3,    0],
       [   5,   33,   14,    1,    5,    0,    0,  915,    3,   52],
       [   6,    4,   17,   31,   15,   54,    4,    4,  812,   27],
       [   5,    1,    0,   14,   52,    6,    4,   32,   13,  882]],
      dtype=int32)>

##Summary


Model 1 took the least amount of time (128 s) to process, but Model 2 had the highest accuracy score on the test data set (0.9142) only taking an additional 20 seconds with a processing time of 140 s.