# **Data Importation**

**Import the Libraries**


In [None]:
import keras
import tensorflow
import numpy as np
from keras.layers import Dense
from keras.layers import Conv2D
from keras.datasets import mnist
from keras.layers import Dropout
from keras.layers import Flatten
from sklearn import preprocessing
from keras.layers import MaxPool2D
from keras.models import Sequential
from sklearn.svm import SVC,LinearSVC
from skimage.util.shape import view_as_blocks
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score,classification_report

**Load the Dataset**


In [None]:
(train_x, train_y), (test_x, test_y) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


**Transform 3D Data to 2D**

In [None]:
nsamples, nx, ny = train_x.shape
tsamples, tx, ty = test_x.shape

d2_train = train_x.reshape((nsamples,nx*ny))
d2_test = test_x.reshape((tsamples,tx*ty))

print('Before: ', train_x.shape ,'\nAfter: ', d2_train.shape)
#First dimension is maintained and the other two dimensions are flattened (so 28x28 becomes 784).

Before:  (60000, 28, 28) 
After:  (60000, 784)


# **MLP**

**Calculate the Centroid**

In [None]:
def centroid(block):
    x = 0
    y = 0
    xy = 0
    for i in range(7):
        for j in range(7):
            x += i * block[i][j]
            y += j * block[i][j]
            xy += block[i][j]
    x = x / xy if xy > 0 else 0
    y = y / xy if xy > 0 else 0
    return x, y

**Extract the Feature Vector**

In [None]:
def extractfv(x):
    x_blocks = view_as_blocks(x[i], block_shape=(7, 7))
    fv_image = []
    for j in range(4):
        for k in range(4):
          x,y=centroid(x_blocks[j][k])
          fv_image.append(x)
          fv_image.append(y)
    fv_image = np.array(fv_image)
    return fv_image

**Calculate the Feature Vector for the Training and Testing Sets**

In [None]:
train_fv = []
for i in range(len(train_x)):
    train_fv.append(extractfv(train_x))
train_fv=np.array(train_fv)

test_fv = []
for i in range(len(test_x)):
    test_fv.append(extractfv(test_x))
test_fv=np.array(test_fv)

**Normalize the Features (Standardize features by removing the mean and scaling to unit variance) and Transform (Perform standardization by centering and scaling)**

In [None]:
normTrain = preprocessing.StandardScaler().fit(train_fv)
train_fv = normTrain.transform(train_fv)

normTest = preprocessing.StandardScaler().fit(test_fv)
test_fv = normTest.transform(test_fv)

**Implement the MLP Neural Network**

In [None]:
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
MLP_train=clf.fit(train_fv,train_y)
MLP_predict = clf.predict(test_fv)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


**Classification Report (Classes Indicating Digits 0-9)**

In [None]:
print(classification_report(test_y, MLP_predict))

              precision    recall  f1-score   support

           0       0.83      0.90      0.86       980
           1       0.87      0.91      0.89      1135
           2       0.71      0.72      0.72      1032
           3       0.37      0.49      0.42      1010
           4       0.60      0.66      0.63       982
           5       0.41      0.28      0.33       892
           6       0.82      0.86      0.84       958
           7       0.80      0.81      0.81      1028
           8       0.53      0.39      0.45       974
           9       0.62      0.58      0.60      1009

    accuracy                           0.67     10000
   macro avg       0.66      0.66      0.65     10000
weighted avg       0.66      0.67      0.66     10000



# **SVM**

**Create and Fit our LinearSVC Object**

In [None]:
clf = LinearSVC(dual=False)
clf.fit(train_fv,train_y)

LinearSVC(dual=False)

**Use our LinearSVC Object to Predict the Testing Samples**

In [None]:
pred = clf.predict(test_fv)

**Classification Report (Classes Indicating Digits 0-9)**

In [None]:
print(classification_report(test_y, pred))

              precision    recall  f1-score   support

           0       0.88      0.94      0.91       980
           1       0.91      0.97      0.94      1135
           2       0.84      0.83      0.84      1032
           3       0.69      0.80      0.75      1010
           4       0.84      0.77      0.81       982
           5       0.74      0.59      0.66       892
           6       0.84      0.90      0.87       958
           7       0.82      0.85      0.83      1028
           8       0.77      0.64      0.70       974
           9       0.74      0.76      0.75      1009

    accuracy                           0.81     10000
   macro avg       0.81      0.81      0.80     10000
weighted avg       0.81      0.81      0.81     10000



# **KNN**

**Take 10% of the Training Data and Use That for Validation**

In [None]:
(trainData, valData, trainLabels, valLabels) = train_test_split(d2_train, train_y,test_size=0.1, random_state=84)

**Initialize the Values of K (Odd Numbers Between 1 to 30) for KNN, and Loop Over the Values of K to Find the Best Accuracy**

In [None]:
accuracies = []

for k in range(1, 30, 2):
  model = KNeighborsClassifier(n_neighbors=k)
  model.fit(trainData, trainLabels)

  #Evaluate the Model and Update the Accuracies
  score = model.score(valData, valLabels)
  print("k= " , k , "accuracy= ", (score * 100))
  accuracies.append(score)

k=  1 accuracy=  97.48333333333333
k=  3 accuracy=  97.61666666666666
k=  5 accuracy=  97.28333333333333
k=  7 accuracy=  97.1
k=  9 accuracy=  97.03333333333333
k=  11 accuracy=  97.03333333333333
k=  13 accuracy=  96.76666666666667
k=  15 accuracy=  96.6
k=  17 accuracy=  96.43333333333334
k=  19 accuracy=  96.2
k=  21 accuracy=  96.08333333333333
k=  23 accuracy=  95.89999999999999
k=  25 accuracy=  95.91666666666666
k=  27 accuracy=  95.8
k=  29 accuracy=  95.75


**Retrain Our Classifier Using the Value of K with the Highest Accuracy**

In [None]:
kVal = range(1, 30, 2)
k = np.argmax(accuracies)

model = KNeighborsClassifier(n_neighbors=kVal[k])
model.fit(trainData, trainLabels)
pred = model.predict(d2_test)

**Classification Report (Classes Indicating Digits 0-9)**

In [None]:
print(classification_report(test_y, pred))

              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.96      1.00      0.98      1135
           2       0.98      0.97      0.97      1032
           3       0.96      0.97      0.96      1010
           4       0.97      0.96      0.97       982
           5       0.97      0.96      0.96       892
           6       0.99      0.99      0.99       958
           7       0.96      0.96      0.96      1028
           8       0.99      0.94      0.96       974
           9       0.96      0.96      0.96      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000



# **CNN**

**Reshape the Inputs and Add 1 to Indicate the Images are Grayscale**

In [None]:
X_train = train_x.reshape((train_x.shape[0], train_x.shape[1], train_x.shape[2], 1))
X_test = test_x.reshape((test_x.shape[0],test_x.shape[1],test_x.shape[2],1))

#Check the Shape
print('Before Reshaping: ', train_x.shape, test_x.shape)
print('After Reshaping: ' , X_train.shape, X_test.shape)

Before Reshaping:  (60000, 28, 28) (10000, 28, 28)
After Reshaping:  (60000, 28, 28, 1) (10000, 28, 28, 1)


**Normalize the Pixel Values**

In [None]:
X_train=X_train/255
X_test=X_test/255

**Create a Sequential Model (Layer by Layer), and Add Model Layers**

In [None]:
model=Sequential()

#Convolution Layer
model.add(Conv2D(32,(3,3),activation='relu',input_shape=(28,28,1)))

#Pooling Layer
model.add(MaxPool2D(2,2))

#Fully Connected Layer
model.add(Flatten())
model.add(Dense(100,activation='relu'))

#Output Layer
model.add(Dense(10,activation='softmax'))

**Compile the Model**


In [None]:
model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 32)       0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 5408)              0         
                                                                 
 dense (Dense)               (None, 100)               540900    
                                                                 
 dense_1 (Dense)             (None, 10)                1010      
                                                                 
Total params: 542,230
Trainable params: 542,230
Non-trainable params: 0
__________________________________________________

**Train the Model, then Use It to Make Predictions**

In [None]:
model.fit(X_train,train_y,epochs=10)
pred=model.predict(X_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


**Evaluate the Model, and Show the Loss and Accuracy Percentages**

In [None]:
score = model.evaluate(X_test,test_y,verbose=1)

print('Loss = ', score[0], ', Accuracy = ', score[1])

Loss =  0.05244740471243858 , Accuracy =  0.9872999787330627


**Classification Report (Classes Indicating Digits 0-9)**

In [None]:
#Convert labels to single-digits
pred_y = np.argmax(pred, axis=1)
print(classification_report(test_y, pred_y))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.99      0.99      0.99      1032
           3       0.99      0.99      0.99      1010
           4       0.99      0.99      0.99       982
           5       0.98      0.99      0.99       892
           6       0.99      0.99      0.99       958
           7       0.97      0.99      0.98      1028
           8       0.99      0.98      0.99       974
           9       0.98      0.98      0.98      1009

    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000

