<a href="https://colab.research.google.com/github/nah90/CS767_NeuralNetwork/blob/main/CS767_hw2_Horak.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at

#https://www.apache.org/licenses/LICENSE-2.0

#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.

#Copyright 2019 The TensorFlow Authors. Initial code edited by Eric Braude

#Assignment 2- initial data with reduced sample size


In [None]:
#INTENT: Load the MNIST data and show it, raw, on the monitor

#Import libraries 
import tensorflow as tf
import pandas as pd
import numpy as np
import math

#Constants
TRAIN_SIZE=7500 #Set constant for training size to lower starting accuracy - 1/8 of original (in half 3 times)
TEST_SIZE=1250 #Set appropriate TEST_SIZE- 1/6 of TRAIN_SIZE

mnist=tf.keras.datasets.mnist #One of a handful of data sets known to Keras/TensorFlow
#mnist.load_data() produces a pair of inpu/output tensors for training
#and one for testing

#Reduce 
(X_train,y_train), (X_test,y_test) = mnist.load_data()
X_train = X_train[:TRAIN_SIZE,:,:] #Reduce size of X_train to TRAIN_SIZE 7500
X_test = X_test[:TEST_SIZE,:,:] #Reduce size of X_test to TEST_SIZE 1250
y_train = y_train[:TRAIN_SIZE] #Reduce size of y_train to TRAIN_SIZE 7500
y_test  = y_test[:TEST_SIZE] #Reduce size of y_test to TEST_SIZE 1250

X_train,X_test=X_train/255, X_test/255  #Scale down input

#print("===========y_train===========")
#print(tf.shape(y_train))
#print(y_train)
#print("===========X_train===========")
#print(tf.shape(X_train))
#print("===========X_train element 0 (28 rows of 28 gray values)===========")
print(X_train[0])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0

In [None]:
model = tf.keras.models.Sequential([ #Layer format for the neural net
  tf.keras.layers.Flatten(input_shape=(28, 28)), #Each pixel (grayscale value) mapped to one of 784 nodes
  tf.keras.layers.Dense(128, activation='relu'), #Fully connected to hidden layer with relu
  #Dropout layer randomly sets its input units to 0 at 20% rate at each training step
  #The other inputs are scaled up by 1/0.8 so sum over all inputs is unchanged
  #Illustrative figure: http://laid.delanover.com/wp-content/uploads/2018/02/dropout.png
  #(see https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dropout)
  tf.keras.layers.Dropout(0.2),  
  tf.keras.layers.Dense(10) #e.g., output #7 expresses degree to which the input is a 7
])


In [None]:
print("===========first element of X_train (29 rows)===========")
print(X_train[:1]) #For each example, model returns a vector of 'logit' scores, one for each class
#A tensor where highest value indicates most likely output

predictions = model(X_train[:1]).numpy() #numpy() converts the tensor output
print("===========untrained output of first training set input===========")
predictions #Print predictions

[[[0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.        ]
  [0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.        ]
  [0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.        ]
  [0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
  

array([[-0.06525381,  0.32089993,  0.62820756,  0.14460514, -0.5281236 ,
         0.01137353,  1.0299832 , -0.03285824,  0.17087965, -0.02277037]],
      dtype=float32)

In [None]:
tf.nn.softmax(predictions).numpy() #tf.nn.softmax function converts logits to "probabilities" for each class

array([[0.07274252, 0.10702688, 0.14553078, 0.08972821, 0.04578957,
        0.07853572, 0.21749224, 0.07513765, 0.09211701, 0.07589947]],
      dtype=float32)

In [None]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) #Loss equal to negative log probability of the true class
#'0' if sure of correct class

In [None]:
loss_fn(y_train[:1], predictions).numpy() #Initial untrained model should give loss as -tf.math.log(1/10)~~ 2.3

2.5442019

In [None]:
#Put together the NN with training process, loss, and means of evaluation
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

In [None]:
#Model.fit method adjusts the model parameters to minimize the loss
model.fit(X_train, y_train, epochs=5) #Train it with 5 epochs

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fd7a1ef09d0>

In [None]:
#Model.evaluate method checks the models performance on a 'Validation-set' or 'Test-set'
model.evaluate(X_test,  y_test, verbose=2) #Accuracy = fraction of correct test pairs

40/40 - 0s - loss: 0.2504 - accuracy: 0.9152 - 248ms/epoch - 6ms/step


[0.250422865152359, 0.9151999950408936]

#First Code Modification

In [None]:
model1 = tf.keras.models.Sequential([ #Layer format for the neural net
  tf.keras.layers.Flatten(input_shape=(28, 28)), #Each pixel (grayscale value) mapped to one of 784 nodes
  tf.keras.layers.Dense(units=128, activation='relu'), #Fully connected to hidden layer with 'relu'
  tf.keras.layers.Dropout(0.2), #Dropout layer randomly sets its input units to 0 at 20% rate at each training step
  tf.keras.layers.Dense(units=64, activation='relu'), #Add hidden layer with size=64 'relu'
  tf.keras.layers.Dense(units=28, activation='relu'), #Add hidden layer with size=28 'relu'
  tf.keras.layers.Dense(units=10) #Final output layer
])

predictions1 = model1(X_train[:1]).numpy() #numpy() converts the tensor output
print("===========untrained output of first training set input===========") #For each example, model returns a vector of 'logit' scores, one for each class
#A tensor where highest value indicates most likely output
print(predictions1) #Print predictions

tf.nn.softmax(predictions1).numpy() #tf.nn.softmax function converts logits to "probabilities" for each class
print('')
print(tf.nn.softmax(predictions1).numpy()) #Print softmax predictions

loss_fn1 = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) #Loss equal to negative log probability of the true class
#'0' if sure of correct class

print('')
print(loss_fn1(y_train[:1], predictions1).numpy()) #Initial untrained model should give loss as -tf.math.log(1/10)~~ 2.3
print('')

#Put together the NN with training process, loss, and means of evaluation
model1.compile(optimizer='adam',
              loss=loss_fn1,
              metrics=['accuracy'])

#Model.fit method adjusts the model parameters to minimize the loss
model1.fit(X_train, y_train, epochs=5) #Train it with 5 epochs
print('')

#Model.evaluate method checks the models performance on a 'Validation-set' or 'Test-set'
model1.evaluate(X_test,  y_test, verbose=2) #Accuracy = fraction of correct test pairs


[[-0.23467462  0.00186955 -0.00747118  0.01996927 -0.26848364  0.16531032
   0.37239632  0.05639692 -0.38697582  0.22759798]]

[[0.07758057 0.09828399 0.09737022 0.1000791  0.07500149 0.11573488
  0.14236411 0.10379197 0.06662073 0.12317297]]

2.1564534

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

40/40 - 0s - loss: 0.2434 - accuracy: 0.9264 - 185ms/epoch - 5ms/step


[0.2433576136827469, 0.9264000058174133]

#Second Code Modification

In [None]:
model2 = tf.keras.models.Sequential([ #Layer format for the neural net
  tf.keras.layers.Flatten(input_shape=(28, 28)), #Each pixel (grayscale value) mapped to one of 784 nodes
  tf.keras.layers.Dense(units=256, activation='relu'), #Fully connected to hidden layer with 'relu'
  tf.keras.layers.Dropout(0.20), #Dropout layer randomly sets its input units to 0 at 20% rate at each training step
  tf.keras.layers.LeakyReLU(alpha=0.3), #LeakyReLU layer - negative slope coefficient at 0.3
  tf.keras.layers.Dense(units=96, activation='relu'), #Add hidden layer with size=96 'relu'
  tf.keras.layers.Dense(units=42, activation='relu'), #Add hidden layer with size=42 'relu'
  tf.keras.layers.Dense(units=10) #Final output layer
])

predictions2 = model2(X_train[:1]).numpy() #numpy() converts the tensor output
print("===========untrained output of first training set input===========") #For each example, model returns a vector of 'logit' scores, one for each class
#A tensor where highest value indicates most likely output
print(predictions2) #Print predictions

tf.nn.softmax(predictions2).numpy() #tf.nn.softmax function converts logits to "probabilities" for each class
print('')
print(tf.nn.softmax(predictions2).numpy()) #Print softmax predictions

loss_fn2 = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) #Loss equal to negative log probability of the true class
#'0' if sure of correct class

print('')
print(loss_fn2(y_train[:1], predictions2).numpy()) #Initial untrained model should give loss as -tf.math.log(1/10)~~ 2.3
print('')

#Put together the NN with training process, loss, and means of evaluation
model2.compile(optimizer='adam',
              loss=loss_fn2,
              metrics=['accuracy'])

#Model.fit method adjusts the model parameters to minimize the loss
model2.fit(X_train, y_train, epochs=5) #Train it with 5 epochs
print('')

#Model.evaluate method checks the models performance on a 'Validation-set' or 'Test-set'
model2.evaluate(X_test,  y_test, verbose=2) #Accuracy = fraction of correct test pairs

[[-0.00459114  0.17415036  0.37202567  0.35450232  0.18983492 -0.08045578
  -0.05044585 -0.25217775  0.0272236  -0.20357607]]

[[0.09246933 0.11056666 0.1347598  0.13241893 0.11231452 0.08571368
  0.08832493 0.0721892  0.09545852 0.07578438]]

2.4567428

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

40/40 - 0s - loss: 0.2064 - accuracy: 0.9368 - 262ms/epoch - 7ms/step


[0.20638659596443176, 0.9368000030517578]

#Third Code Modification

In [None]:
model3 = tf.keras.models.Sequential([ #Layer format for the neural net
  tf.keras.layers.Flatten(input_shape=(28, 28)), #Each pixel (grayscale value) mapped to one of 784 nodes
  tf.keras.layers.Dense(units=256, activation='relu'), #Fully connected to hidden layer with 'relu'
  tf.keras.layers.Dropout(0.20), #Dropout layer randomly sets its input units to 0 at 20% rate at each training step
  tf.keras.layers.LeakyReLU(alpha=0.3), #LeakyReLU layer - negative slope coefficient at 0.3
  tf.keras.layers.Dropout(0.20), #Dropout layer randomly sets units to 0 at 20% rate at each training step
  tf.keras.layers.Dense(units=96, activation='relu'), #Add hidden layer with size=96 'relu'
  tf.keras.layers.Dropout(0.20), #Dropout layer randomly sets units to 0 at 20% rate at each training step
  tf.keras.layers.Dense(units=42, activation='relu'), #Add hidden layer with size=42 'relu'
  tf.keras.layers.Dropout(0.20), #Dropout layer randomly sets units to 0 at 20% rate at each training step
  tf.keras.layers.Dense(units=10) #Final output layer
])

predictions3 = model3(X_train[:1]).numpy() #numpy() converts the tensor output
print("===========untrained output of first training set input===========") #For each example, model returns a vector of 'logit' scores, one for each class
#A tensor where highest value indicates most likely output
print(predictions3) #Print predictions

tf.nn.softmax(predictions3).numpy() #tf.nn.softmax function converts logits to "probabilities" for each class
print('')
print(tf.nn.softmax(predictions3).numpy()) #Print softmax predictions

loss_fn3 = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) #Loss equal to negative log probability of the true class
#'0' if sure of correct class

print('')
print(loss_fn3(y_train[:1], predictions3).numpy()) #Initial untrained model should give loss as -tf.math.log(1/10)~~ 2.3
print('')

#Put together the NN with training process, loss, and means of evaluation
model3.compile(optimizer='adam',
              loss=loss_fn3,
              metrics=['accuracy'])

#Model.fit method adjusts the model parameters to minimize the loss
model3.fit(X_train, y_train, epochs=5) #Train it with 5 epochs
print('')

#Model.evaluate method checks the models performance on a 'Validation-set' or 'Test-set'
model3.evaluate(X_test,  y_test, verbose=2) #Accuracy = fraction of correct test pairs

[[-0.12781101  0.07565821  0.29559293  0.04302369  0.465162   -0.30641836
  -0.00267497  0.20424484  0.00270183  0.16061102]]

[[0.07945491 0.09738371 0.12133966 0.09425694 0.14376251 0.06645881
  0.09004647 0.1107467  0.09053192 0.1060183 ]]

2.7111728

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

40/40 - 0s - loss: 0.2306 - accuracy: 0.9296 - 275ms/epoch - 7ms/step


[0.23061266541481018, 0.9296000003814697]

#Fourth Code Modification

In [None]:
model4 = tf.keras.models.Sequential([ #Layer format for the neural net
  tf.keras.layers.Flatten(input_shape=(28, 28)), #Each pixel (grayscale value) mapped to one of 784 nodes
  tf.keras.layers.Dense(units=256, activation='relu'), #Fully connected to hidden layer with 'relu'
  tf.keras.layers.Dropout(0.20), #Dropout layer randomly sets its input units to 0 at 20% rate at each training step
  tf.keras.layers.LeakyReLU(alpha=0.3), #LeakyReLU layer - negative slope coefficient at 0.3
  tf.keras.layers.Dropout(0.30), #Dropout layer randomly sets units to 0 at 30% rate at each training step
  tf.keras.layers.Dense(units=96, activation='relu'), #Add hidden layer with size=96 'relu'
  tf.keras.layers.Dropout(0.30), #Dropout layer randomly sets units to 0 at 30% rate at each training step
  tf.keras.layers.Dense(units=42, activation='relu'), #Add hidden layer with size=42 'relu'
  tf.keras.layers.Dropout(0.30), #Dropout layer randomly sets units to 0 at 30% rate at each training step
  tf.keras.layers.Dense(units=10) #Final output layer
])

predictions4 = model4(X_train[:1]).numpy() #numpy() converts the tensor output
print("===========untrained output of first training set input===========") #For each example, model returns a vector of 'logit' scores, one for each class
#A tensor where highest value indicates most likely output
print(predictions4) #Print predictions

tf.nn.softmax(predictions4).numpy() #tf.nn.softmax function converts logits to "probabilities" for each class
print('')
print(tf.nn.softmax(predictions4).numpy()) #Print softmax predictions

loss_fn4 = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) #Loss equal to negative log probability of the true class
#'0' if sure of correct class

print('')
print(loss_fn4(y_train[:1], predictions4).numpy()) #Initial untrained model should give loss as -tf.math.log(1/10)~~ 2.3
print('')

#Put together the NN with training process, loss, and means of evaluation
model4.compile(optimizer='adam',
              loss=loss_fn4,
              metrics=['accuracy'])

#Model.fit method adjusts the model parameters to minimize the loss
model4.fit(X_train, y_train, epochs=40) #Train it with 5 epochs
print('')

#Model.evaluate method checks the models performance on a 'Validation-set' or 'Test-set'
model4.evaluate(X_test,  y_test, verbose=2) #Accuracy = fraction of correct test pairs

[[-0.20015302  0.27866676 -0.29472956  0.31885427  0.19967213  0.03304237
   0.09823121  0.46119103 -0.08594295  0.20798685]]

[[0.07210556 0.1163905  0.06559861 0.1211632  0.10755004 0.09104251
  0.09717517 0.13969691 0.08082943 0.10844802]]

2.3964288

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40

40/40 - 0s - loss: 0.2640 - accuracy: 0.9528 - 185ms/epoch - 5ms/step


[0.26401129364967346, 0.9527999758720398]

#Appendix A Code Modification
---



In [None]:
modelA = tf.keras.models.Sequential([ #Layer format for the neural net
  tf.keras.layers.Flatten(input_shape=(28, 28)), #Each pixel (grayscale value) mapped to one of 784 nodes
  tf.keras.layers.Dense(units=128, activation='relu'), #Fully connected to hidden layer with 'relu'
  tf.keras.layers.Dropout(0.2), #Dropout layer randomly sets its input units to 0 at 20% rate at each training step
  tf.keras.layers.Dense(units=64, activation='sigmoid'), #Add hidden layer with size=64 'sigmoid'
  tf.keras.layers.Dense(units=28, activation='sigmoid'), #Add hidden layer with size=32 'sigmoid'
  tf.keras.layers.Dense(units=10) #Final output layer
])

predictionsA = modelA(X_train[:1]).numpy() #numpy() converts the tensor output
print("===========untrained output of first training set input===========") #For each example, model returns a vector of 'logit' scores, one for each class
#A tensor where highest value indicates most likely output
print(predictionsA) #Print predictions

tf.nn.softmax(predictionsA).numpy() #tf.nn.softmax function converts logits to "probabilities" for each class
print('')
print(tf.nn.softmax(predictionsA).numpy()) #Print softmax predictions

loss_fnA = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) #Loss equal to negative log probability of the true class
#'0' if sure of correct class

print('')
print(loss_fnA(y_train[:1], predictionsA).numpy()) #Initial untrained model should give loss as -tf.math.log(1/10)~~ 2.3
print('')

#Put together the NN with training process, loss, and means of evaluation
modelA.compile(optimizer='adam',
              loss=loss_fnA,
              metrics=['accuracy'])

#Model.fit method adjusts the model parameters to minimize the loss
modelA.fit(X_train, y_train, epochs=5) #Train it with 5 epochs
print('')

#Model.evaluate method checks the models performance on a 'Validation-set' or 'Test-set'
modelA.evaluate(X_test,  y_test, verbose=2) #Accuracy = fraction of correct test pairs

[[ 1.7327286   0.21384455  0.6639745  -0.7029829  -0.73584586  0.80450904
  -0.5775426   0.07245612  0.2888424   0.5507639 ]]

[[0.33762157 0.0739243  0.11595146 0.02955386 0.02859841 0.13344723
  0.03350365 0.06417754 0.07968166 0.10354031]]

2.014049

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

40/40 - 0s - loss: 0.2984 - accuracy: 0.9176 - 218ms/epoch - 5ms/step


[0.29839134216308594, 0.9175999760627747]

#Appendix B Code Modification

In [None]:
modelB = tf.keras.models.Sequential([ #Layer format for the neural net
  tf.keras.layers.Flatten(input_shape=(28, 28)), #Each pixel (grayscale value) mapped to one of 784 nodes
  tf.keras.layers.Dense(units=128, activation='relu'), #Fully connected to hidden layer with 'relu'
  tf.keras.layers.Dropout(0.30), #Dropout layer randomly sets its input units to 0 at 30% rate at each training step
  tf.keras.layers.Dense(units=64, activation='relu'), #Add hidden layer with size=64 'relu'
  tf.keras.layers.Dense(units=28, activation='relu'), #Add hidden layer with size=28 'relu'
  tf.keras.layers.Dense(units=10) #Final output layer
])

predictionsB = modelB(X_train[:1]).numpy() #numpy() converts the tensor output
print("===========untrained output of first training set input===========") #For each example, model returns a vector of 'logit' scores, one for each class
#A tensor where highest value indicates most likely output
print(predictionsB) #Print predictions

tf.nn.softmax(predictionsB).numpy() #tf.nn.softmax function converts logits to "probabilities" for each class
print('')
print(tf.nn.softmax(predictionsB).numpy()) #Print softmax predictions

loss_fnB = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) #Loss equal to negative log probability of the true class
#'0' if sure of correct class

print('')
print(loss_fnB(y_train[:1], predictionsB).numpy()) #Initial untrained model should give loss as -tf.math.log(1/10)~~ 2.3
print('')

#Put together the NN with training process, loss, and means of evaluation
modelB.compile(optimizer='adam',
              loss=loss_fnB,
              metrics=['accuracy'])

#Model.fit method adjusts the model parameters to minimize the loss
modelB.fit(X_train, y_train, epochs=5) #Train it with 5 epochs
print('')

#Model.evaluate method checks the models performance on a 'Validation-set' or 'Test-set'
modelB.evaluate(X_test,  y_test, verbose=2) #Accuracy = fraction of correct test pairs

[[-0.44597062  0.13044488  0.06301535 -0.18137233  0.38030982  0.4295041
   0.11816728  0.1465842   0.00912652  0.06478774]]

[[0.05800332 0.10322548 0.09649452 0.07557295 0.13252623 0.1392088
  0.10196587 0.10490499 0.09143217 0.0966657 ]]

1.9717804

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

40/40 - 0s - loss: 0.2416 - accuracy: 0.9224 - 159ms/epoch - 4ms/step


[0.24158601462841034, 0.9223999977111816]

#Appendix C Code Modification

---



In [None]:
modelC = tf.keras.models.Sequential([ #Layer format for the neural net
  tf.keras.layers.Flatten(input_shape=(28, 28)), #Each pixel (grayscale value) mapped to one of 784 nodes
  tf.keras.layers.Dense(units=256, activation='relu'), #Fully connected to hidden layer with 'relu'
  tf.keras.layers.Dropout(0.20), #Dropout layer randomly sets its input units to 0 at 20% rate at each training step
  tf.keras.layers.LeakyReLU(alpha=0.3), #LeakyReLU layer - negative slope coefficient at 0.3
  tf.keras.layers.Dense(units=128, activation='relu'), #Add hidden layer with size=128 'relu
  tf.keras.layers.Dense(units=96, activation='relu'), #Add hidden layer with size=96 'relu'
  tf.keras.layers.Dense(units=42, activation='relu'), #Add hidden layer with size=42 'relu'
  tf.keras.layers.Dense(units=10) #Final output layer
])

predictionsC = modelC(X_train[:1]).numpy() #numpy() converts the tensor output
print("===========untrained output of first training set input===========") #For each example, model returns a vector of 'logit' scores, one for each class
#A tensor where highest value indicates most likely output
print(predictionsC) #Print predictions

tf.nn.softmax(predictionsC).numpy() #tf.nn.softmax function converts logits to "probabilities" for each class
print('')
print(tf.nn.softmax(predictionsC).numpy()) #Print softmax predictions

loss_fnC = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) #Loss equal to negative log probability of the true class
#'0' if sure of correct class

print('')
print(loss_fnC(y_train[:1], predictionsC).numpy()) #Initial untrained model should give loss as -tf.math.log(1/10)~~ 2.3
print('')

#Put together the NN with training process, loss, and means of evaluation
modelC.compile(optimizer='adam',
              loss=loss_fnC,
              metrics=['accuracy'])

#Model.fit method adjusts the model parameters to minimize the loss
modelC.fit(X_train, y_train, epochs=5) #Train it with 5 epochs
print('')

#Model.evaluate method checks the models performance on a 'Validation-set' or 'Test-set'
modelC.evaluate(X_test,  y_test, verbose=2) #Accuracy = fraction of correct test pairs

[[-0.6519744   0.0958652   0.09562487  0.08635166  0.3799377  -0.19937691
  -0.39145115  0.00081263  0.27409098  0.02948064]]

[[0.05150548 0.1088018  0.10877565 0.10777162 0.14454637 0.08098677
  0.06683397 0.09893621 0.1300285  0.10181356]]

2.5134695

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

40/40 - 0s - loss: 0.2324 - accuracy: 0.9288 - 247ms/epoch - 6ms/step


[0.2324375957250595, 0.9287999868392944]

#Appendix D Code Modification

In [None]:
mnist=tf.keras.datasets.mnist #One of a handful of data sets known to Keras/TensorFlow
#mnist.load_data() produces a pair of inpu/output tensors for training
#and one for testing

#Reduce 
(X_train,y_train), (X_test,y_test) = mnist.load_data()

X_train,X_test=X_train/255, X_test/255  #Scale down input

modelD = tf.keras.models.Sequential([ #Layer format for the neural net
  tf.keras.layers.Flatten(input_shape=(28, 28)), #Each pixel (grayscale value) mapped to one of 784 nodes
  tf.keras.layers.Dense(units=256, activation='relu'), #Fully connected to hidden layer with 'relu'
  tf.keras.layers.Dropout(0.20), #Dropout layer randomly sets its input units to 0 at 20% rate at each training step
  tf.keras.layers.LeakyReLU(alpha=0.3), #LeakyReLU layer - negative slope coefficient at 0.3
  tf.keras.layers.Dropout(0.30), #Dropout layer randomly sets units to 0 at 30% rate at each training step
  tf.keras.layers.Dense(units=96, activation='relu'), #Add hidden layer with size=96 'relu'
  tf.keras.layers.Dropout(0.30), #Dropout layer randomly sets units to 0 at 30% rate at each training step
  tf.keras.layers.Dense(units=42, activation='relu'), #Add hidden layer with size=42 'relu'
  tf.keras.layers.Dropout(0.30), #Dropout layer randomly sets units to 0 at 30% rate at each training step
  tf.keras.layers.Dense(units=10) #Final output layer
])

predictionsD = modelD(X_train[:1]).numpy() #numpy() converts the tensor output
print("===========untrained output of first training set input===========") #For each example, model returns a vector of 'logit' scores, one for each class
#A tensor where highest value indicates most likely output
print(predictionsD) #Print predictions

tf.nn.softmax(predictionsD).numpy() #tf.nn.softmax function converts logits to "probabilities" for each class
print('')
print(tf.nn.softmax(predictionsD).numpy()) #Print softmax predictions

loss_fnD = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) #Loss equal to negative log probability of the true class
#'0' if sure of correct class

print('')
print(loss_fnD(y_train[:1], predictionsD).numpy()) #Initial untrained model should give loss as -tf.math.log(1/10)~~ 2.3
print('')

#Put together the NN with training process, loss, and means of evaluation
modelD.compile(optimizer='adam',
              loss=loss_fnD,
              metrics=['accuracy'])

#Model.fit method adjusts the model parameters to minimize the loss
modelD.fit(X_train, y_train, epochs=40) #Train it with 5 epochs
print('')

#Model.evaluate method checks the models performance on a 'Validation-set' or 'Test-set'
modelD.evaluate(X_test,  y_test, verbose=2) #Accuracy = fraction of correct test pairs

[[ 0.39843574  0.05194907 -0.1503809   0.17238015 -0.1955871  -0.23457094
  -0.10793986  0.01675963 -0.19781418 -0.4744837 ]]

[[0.15577377 0.11015826 0.08998007 0.12425664 0.08600298 0.08271476
  0.09388111 0.10634927 0.08581166 0.06507142]]

2.4923573

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40

313/313 - 1s - loss: 0.0836 - accuracy: 0.9819 - 603ms/epoch - 2ms/step


[0.0836314857006073, 0.9818999767303467]