In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### 1. Import the MNIST Dataset

In [None]:
from keras.datasets import mnist
(Xtrain, Ytrain), (Xtest, Ytest) = mnist.load_data()

In [None]:
print(Xtrain.shape)
print("Training Set: %s images of %s x %s pixels each" %(Xtrain.shape[0],Xtrain.shape[1],Xtrain.shape[2]))

In [None]:
print(Xtest.shape)
print("Training Set: %s images of %s x %s pixels each" %(Xtest.shape[0],Xtest.shape[1],Xtest.shape[2]))

#### Attempt to display some images in the dataset

#### Training Set

In [None]:
for i in range(4):
    plt.subplot(221 + i)
    plt.imshow(Xtrain[i], cmap=plt.get_cmap('gray'))
    plt.axis('off')
    plt.title("Ground Truth: " + str(Ytrain[i]) )
plt.show()

#### Test Set

In [None]:
for i in range(9):
    plt.subplot(331 + i)
    plt.imshow(Xtest[i], cmap=plt.get_cmap('gray'))
    plt.axis('off')
    plt.title("Ground Truth: " + str(Ytest[i]) )
plt.show()    

In [None]:
Ytest[:9]

In [None]:
LR.predict(Xtest_reshape[:9,:])

#### Preprocessing

In [None]:
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score, KFold

In [None]:
Xtrain_reshape = Xtrain.reshape(Xtrain.shape[0],-1)
Xtest_reshape = Xtest.reshape(Xtest.shape[0],-1)



### A. Building different models for the same problem

#### 1. Logistic Regression (Classifier)

In [None]:
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression(verbose=2)
LR.fit(Xtrain_reshape[:10000,:],Ytrain[:10000])
print("Training Accuracy: " + str(accuracy_score(LR.predict(Xtrain_reshape[:10000,:]),Ytrain[:10000])))
print("Test Accuracy: " + str(accuracy_score(LR.predict(Xtest_reshape),Ytest)))

#### 2. Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
### 
RandForest.fit(Xtrain_reshape[:1000,:],Ytrain[:1000])
print("Training Accuracy: " + str(accuracy_score(RandForest.predict(Xtrain_reshape[:1000,:]),Ytrain[:1000])))
print("Test Accuracy: " + str(accuracy_score(RandForest.predict(Xtest_reshape),Ytest)))

In [None]:
from sklearn.ensemble import RandomForestClassifier
###
RF.fit(Xtrain_reshape[:10000,:],Ytrain[:10000])
print("Training Accuracy: " + str(accuracy_score(RF.predict(Xtrain_reshape[:10000,:]),Ytrain[:10000])))
print("Test Accuracy: " + str(accuracy_score(RF.predict(Xtest_reshape),Ytest)))

#### 3. Multilayer Perceptron Classifier

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout
###
NN.fit(Xtrain_reshape[:10000,:],Ytrain[:10000],batch_size=200,epochs=100)

In [None]:
print("NN")
print("Training Accuracy: " + str(accuracy_score(NN.predict_classes(Xtrain_reshape[:10000,:]),Ytrain[:10000])))
print("Test Accuracy: " + str(accuracy_score(NN.predict_classes(Xtrain_reshape[:1000,:]),Ytrain[:1000])))

In [None]:
print("NN")
print("Training Accuracy: " + str(accuracy_score(NN_deep.predict_classes(Xtrain_reshape[:10000,:]),Ytrain[:10000])))
print("Test Accuracy: " + str(accuracy_score(NN_deep.predict_classes(Xtest_reshape),Ytest)))

In [None]:
####
MLP.compile(loss='sparse_categorical_crossentropy',optimizer=tf.keras.optimizers.Adam(0.0002),metrics=['accuracy'])
MLP.fit(Xtrain_reshape,Ytrain,batch_size=200,epochs=100)

In [None]:
print("MLP")
print("Training Accuracy: " + str(accuracy_score(MLP.predict_classes(Xtrain_reshape[:10000,:]),Ytrain[:10000])))
print("Test Accuracy: " + str(accuracy_score(MLP.predict_classes(Xtest_reshape),Ytest)))

In [None]:
MLP.summary()

#### 4. Convolutional Neural Network Classifier

In [None]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten
### 
CNN.fit(Xtrain_reshape.reshape((Xtrain.shape[0], Xtrain.shape[1], Xtrain.shape[1], 1)),Ytrain,batch_size=100,epochs=30)

In [None]:
print("CNN")
print("Training Accuracy: " + str(accuracy_score(CNN.predict_classes( Xtrain_reshape.reshape(Xtrain.shape[0], Xtrain.shape[1], Xtrain.shape[1], 1)  ),Ytrain)))
print("Test Accuracy: " + str(accuracy_score(CNN.predict_classes( Xtest_reshape.reshape(Xtest.shape[0], Xtest.shape[1], Xtest.shape[1], 1)  ),Ytest)))

In [None]:
CNN.summary()

In [None]:
CNN.save('CNN.h5')
#CNN_load = tf.keras.models.load_model('CNN.h5')
#print("Training Accuracy: " + str(accuracy_score(CNN_load.predict_classes( Xtrain_reshape.reshape(Xtrain.shape[0], Xtrain.shape[1], Xtrain.shape[1], 1)  ),Ytrain)))
#print("Test Accuracy: " + str(accuracy_score(CNN_load.predict_classes( Xtest_reshape.reshape(Xtest.shape[0], Xtest.shape[1], Xtest.shape[1], 1)  ),Ytest)))

### Machine Learning Concepts

In [None]:
wrong_indices = np.where(np.not_equal(MLP.predict_classes(Xtest_reshape), Ytest))

In [None]:
Ytest[wrong_indices]

In [None]:
MLP.predict_classes(Xtest_reshape)[wrong_indices]

In [None]:
for i in range(4):
    plt.subplot(221 + i)
    plt.imshow(Xtest[wrong_indices[0][i+9]], cmap=plt.get_cmap('gray'))
    plt.axis('off')
    plt.title("Truth: " + str(Ytest[wrong_indices[0][i+9]]) + ", Predicted: " +str(MLP.predict_classes(Xtest_reshape)[wrong_indices[0][i+9]])  )
plt.show()

In [None]:
for i in range(4):
    plt.subplot(221 + i)
    plt.imshow(Xtest[wrong_indices[0][4+i]], cmap=plt.get_cmap('gray'))
    plt.axis('off')
    plt.title("Truth: " + str(Ytest[wrong_indices[0][i+4]]) + ", Predicted: " +str(MLP.predict_classes(Xtest_reshape)[wrong_indices[0][4+i]])  )
plt.show()

#### For further reference: Stanford CS231n Convolutional Neural Networks, 2 Blue 1 Brown Neural Networks, Tensorflow Documentation