In [43]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
from IPython.display import clear_output

# Load the data

In [2]:
# load images
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# vectorize all of the images
x_train = x_train.reshape(-1, 784)
y_train = y_train.reshape(-1,1)
x_test = x_test.reshape(-1, 784)
y_test = y_test.reshape(-1,1)

# Onehot encode the labels for neural net
one_hot = OneHotEncoder()
y_train_onehot = one_hot.fit_transform(y_train.reshape(-1,1)).toarray()
y_test_onehot = one_hot.fit_transform(y_test.reshape(-1,1)).toarray()

# Build the three models

In [45]:
# model 1: 10-node single softmax layer
model1 = Sequential()
model1.add(Input(shape=(784,)))
model1.add(Dense(10, activation='softmax'))
model1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model1.summary())


# model 2: 784-node relu layer followed by 10-node softmax layer
model2 = Sequential()
model2.add(Input(shape=(784,)))
model2.add(Dense(784, activation='relu'))
model2.add(Dense(10, activation='softmax'))
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model2.summary())


# model 3: 784-node sigmoid layer followed by 10-node softmax layer
model3 = Sequential()
model3.add(Input(shape=(784,)))
model3.add(Dense(784, activation='sigmoid'))
model3.add(Dense(10, activation='softmax'))
model3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model3.summary())

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             (None, 10)                7850      
Total params: 7,850
Trainable params: 7,850
Non-trainable params: 0
_________________________________________________________________
None
Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 784)               615440    
_________________________________________________________________
dense_13 (Dense)             (None, 10)                7850      
Total params: 623,290
Trainable params: 623,290
Non-trainable params: 0
_________________________________________________________________
None
Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Pa

# Cross-validate to find best number of epochs

In [20]:
num_subsets = 5
num_epochs = 10
test_epochs = np.arange(1,num_epochs+1)
accuracies = -1*np.ones([3, num_epochs, num_subsets])
model_names = ['10-node softmax', '784-node ReLU to 10-node softmax', '784-node sigmoid to 10-node softmax'];

# use each of the 10 subsets for testing
for subset in range(num_subsets):
    
    # hold-out one subset for testing
    te_s = int(60000/num_subsets*subset)
    te_e = int(60000/num_subsets*(subset+1))
    test_idxs = np.r_[te_s:te_e]
    # other subsets are for training
    train_idxs = np.r_[0:te_s, te_e:60000]
    
    # building training and testing subsets
    x_tr = x_train[train_idxs]
    y_tr = one_hot.fit_transform(y_train[train_idxs]).toarray()
    x_te = x_train[test_idxs]
    y_te = one_hot.fit_transform(y_train[test_idxs]).toarray()
    

    
    # try keeping different numbers of singular values
    for epochs in test_epochs:
        
        for i, model in enumerate([model1, model2, model3]):
            
            print('Subset ' + str(subset) + ' for model \"' + model_names[i] + '\" with ' + str(epochs) + ' epochs.')
            
            model.fit(x_tr, y_tr, epochs=epochs)
            accuracy = model.evaluate(x_te, y_te)[1]
            accuracies[i,epochs-1,subset] = error
            clear_output()

In [25]:
df = pd.DataFrame({'Epochs': test_epochs,
                   'Model 1 Average Accuracy': np.mean(accuracies[0], axis=1),
                   'Model 2 Average Accuracy': np.mean(accuracies[1], axis=1),
                   'Model 3 Average Accuracy': np.mean(accuracies[2], axis=1)})
df

Unnamed: 0,Epochs,Model 1 Average Error,Model 2 Average Error,Model 3 Average Error
0,1,0.895317,0.97375,0.959583
1,2,0.894817,0.975917,0.96005
2,3,0.8928,0.976083,0.961833
3,4,0.888,0.978267,0.96225
4,5,0.880533,0.980367,0.962333
5,6,0.891183,0.979967,0.963317
6,7,0.885517,0.97985,0.962583
7,8,0.887017,0.980333,0.9609
8,9,0.884267,0.979183,0.9623
9,10,0.884,0.980117,0.962917


Training with more than one epoch appears to make no significant change in classification performance.

# Get train and test accuracy with ReLU activation layer (model2) and 1 epoch

In [46]:
model2.fit(x_train, y_train_onehot, epochs=5)

y_pred = model2.predict(x_train)
print('Training accuracy\n:' +
      classification_report(y_train, [np.argmax(y_pred[i]) for i in range(y_pred.shape[0])]))

y_pred = model2.predict(x_test)
print('Testing accuracy\n:' +
      classification_report(y_test, [np.argmax(y_pred[i]) for i in range(y_pred.shape[0])]))

Training accuracy
:              precision    recall  f1-score   support

           0       0.98      0.96      0.97      5923
           1       0.99      0.96      0.98      6742
           2       0.94      0.95      0.94      5958
           3       0.96      0.89      0.93      6131
           4       0.98      0.92      0.95      5842
           5       0.93      0.93      0.93      5421
           6       0.93      0.99      0.96      5918
           7       0.97      0.94      0.95      6265
           8       0.86      0.93      0.89      5851
           9       0.87      0.94      0.90      5949

    accuracy                           0.94     60000
   macro avg       0.94      0.94      0.94     60000
weighted avg       0.94      0.94      0.94     60000

Testing accuracy
:              precision    recall  f1-score   support

           0       0.96      0.96      0.96       980
           1       0.99      0.96      0.98      1135
           2       0.92      0.94      0.