In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

`(a)`

In [2]:
train_data = pd.read_csv("mnist_train.csv")
test_data = pd.read_csv("mnist_test.csv")

In [3]:
print("Train data Shape:",train_data.shape)
print("Test data Shape:",test_data.shape)

Train data Shape: (60000, 785)
Test data Shape: (10000, 785)


In [4]:
train_data.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
test_data.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
train_data.isnull().sum()

label    0
1x1      0
1x2      0
1x3      0
1x4      0
        ..
28x24    0
28x25    0
28x26    0
28x27    0
28x28    0
Length: 785, dtype: int64

In [7]:
test_data.isnull().sum()

label    0
1x1      0
1x2      0
1x3      0
1x4      0
        ..
28x24    0
28x25    0
28x26    0
28x27    0
28x28    0
Length: 785, dtype: int64

In [8]:
y = train_data['label'] 
x = train_data.drop(columns = 'label')
x = x/255

In [9]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2 ,random_state = 10)

## Linear SVM

In [10]:
model_linear = SVC(kernel='linear')
model_linear.fit(X_train, y_train)

In [11]:
y_pred = model_linear.predict(X_test)
y_pred[:20]

array([3, 5, 0, 6, 6, 9, 1, 1, 5, 6, 2, 0, 1, 4, 7, 6, 2, 5, 5, 0])

In [13]:
C_Matrix = confusion_matrix(y_test, y_pred)
C_Matrix

array([[1095,    0,    5,    2,    3,   13,    4,    1,    4,    0],
       [   0, 1328,    6,    3,    0,    0,    0,    2,    5,    1],
       [   8,   12, 1161,   15,    8,    5,   12,   11,    8,    1],
       [   6,    5,   30, 1077,    1,   37,    2,    3,   16,    5],
       [   2,    5,   12,    0, 1132,    0,    4,    6,    2,   37],
       [  12,    7,    9,   39,    8,  993,    9,    2,   20,    5],
       [  12,    4,   12,    0,    6,   20, 1146,    0,    1,    0],
       [   4,    5,   17,    4,   17,    4,    0, 1243,    4,   38],
       [   8,   21,   20,   27,    4,   24,   10,    5, 1032,    3],
       [   4,    4,    5,    9,   37,    7,    0,   22,   14, 1008]])

## F1-Score from Scratch

In [34]:
def f1_scratch(ground_truth, prediction, average = True):
    if average == True:
        TP = C_Matrix.diagonal()
        FP = C_Matrix.sum(axis=1)-TP
        FN = C_Matrix.sum(axis=0)-TP
        TN = C_Matrix.sum().sum()-TP-FP-FN
        Precision = TP/(TP+FP)
        Recall = TP/(TP+FN)
        f1score = 2 * Precision * Recall / (Precision + Recall)
        f1 = f1score.mean()
        return f1
    else:
        TP = C_Matrix.diagonal()
        FP = C_Matrix.sum(axis=1)-TP
        FN = C_Matrix.sum(axis=0)-TP
        TN = C_Matrix.sum().sum()-TP-FP-FN
        Precision = TP/(TP+FP)
        Recall = TP/(TP+FN)
        f1score = 2 * Precision * Recall / (Precision + Recall)
        return f1score

In [36]:
f1_scratch(y_test, y_pred, average=False)

array([0.96136962, 0.97076023, 0.92216044, 0.91348601, 0.93708609,
       0.89986407, 0.95979899, 0.94488788, 0.91327434, 0.91304348])

In [37]:
f1_scratch(y_test, y_pred, average=True)

0.9335731152719008

## F1-Score from Sklearn

In [46]:
f1_score(y_test, y_pred, average ='macro')

0.9335731152719008

In [38]:
f1_score(y_test, y_pred, average =None)

array([0.96136962, 0.97076023, 0.92216044, 0.91348601, 0.93708609,
       0.89986407, 0.95979899, 0.94488788, 0.91327434, 0.91304348])

## Accuracy

In [39]:
accuracy_score(y_test, y_pred)

0.9345833333333333

`(b)`

## Non-Linear SVM

### RBF

In [40]:
non_linear_model = SVC(kernel='rbf')
non_linear_model.fit(X_train, y_train)

In [41]:
y_pred_rbf = non_linear_model.predict(X_test)

In [42]:
accuracy_score(y_test, y_pred_rbf)

0.97825

### Polynomial

In [43]:
poly_model = SVC(kernel='poly')
poly_model.fit(X_train, y_train)

In [44]:
y_pred_poly = poly_model.predict(X_test)

In [45]:
accuracy_score(y_test, y_pred_poly)

0.9768333333333333

`(c)`

## Grid Search CV

In [46]:
folds = KFold(n_splits = 5, shuffle = True, random_state = 10)
model = SVC(kernel="rbf")
Grid_Search = GridSearchCV(estimator = model,param_grid = [ {'gamma': [1e-2, 1e-3, 1e-4],'C': [5,10]}],scoring= 'accuracy',cv = folds,return_train_score=True)      
Grid_Search.fit(X_train, y_train)

In [47]:
df_results = pd.DataFrame(Grid_Search.cv_results_)
df_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,params,split0_test_score,split1_test_score,split2_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,82.022877,1.620856,55.674841,4.62525,5,0.01,"{'C': 5, 'gamma': 0.01}",0.979479,0.977604,0.981563,...,0.979521,0.001314,2,0.997969,0.998073,0.997839,0.997865,0.997943,0.997938,8.3e-05
1,99.724374,3.441869,65.657,3.952859,5,0.001,"{'C': 5, 'gamma': 0.001}",0.946042,0.946458,0.947292,...,0.945646,0.001414,4,0.95401,0.954427,0.95362,0.95474,0.953438,0.954047,0.000486
2,212.991456,2.620406,105.200622,0.77844,5,0.0001,"{'C': 5, 'gamma': 0.0001}",0.92,0.921771,0.923438,...,0.920208,0.00218,6,0.922708,0.922865,0.921823,0.923099,0.923385,0.922776,0.000528
3,73.194609,1.812835,49.453996,0.707506,10,0.01,"{'C': 10, 'gamma': 0.01}",0.98,0.977604,0.982708,...,0.979875,0.001655,1,0.999583,0.999479,0.999427,0.999557,0.999505,0.99951,5.6e-05
4,77.207629,0.334548,53.353911,0.286822,10,0.001,"{'C': 10, 'gamma': 0.001}",0.950833,0.952396,0.952396,...,0.950958,0.001523,3,0.962995,0.962135,0.961615,0.962708,0.961641,0.962219,0.000556
5,149.641071,1.984764,90.649604,2.657392,10,0.0001,"{'C': 10, 'gamma': 0.0001}",0.928229,0.927292,0.930104,...,0.927479,0.001594,5,0.930677,0.931719,0.930078,0.931536,0.930781,0.930958,0.0006


`(d)`

## Best Hyperparameters

In [48]:
best_model = SVC(C= 10,kernel='rbf', gamma= 0.01)
best_model.fit(X_train, y_train)

In [49]:
y_pred_best = best_model.predict(X_test)

In [50]:
accuracy_score(y_test, y_pred_best)

0.9824166666666667

In [51]:
confusion_matrix(y_test, y_pred_best)

array([[1119,    0,    1,    0,    0,    0,    2,    1,    2,    2],
       [   0, 1338,    5,    0,    0,    0,    0,    2,    0,    0],
       [   1,    1, 1225,    3,    2,    1,    0,    7,    1,    0],
       [   2,    0,   12, 1148,    0,   10,    0,    3,    6,    1],
       [   0,    3,    2,    0, 1180,    0,    0,    2,    1,   12],
       [   3,    1,    2,    6,    1, 1076,    5,    1,    6,    3],
       [   6,    3,    2,    0,    2,    2, 1186,    0,    0,    0],
       [   2,    2,    7,    0,    8,    1,    0, 1310,    2,    4],
       [   4,    3,    3,    5,    2,    5,    6,    2, 1120,    4],
       [   2,    0,    0,    3,    6,    5,    0,    3,    4, 1087]])

`(e)`

## Support Vectors

In [52]:
Support_Vectors = best_model.support_vectors_
Support_Vectors

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [53]:
# indices of support vectors
list_svI = list(best_model.support_)
list_svI

[26,
 185,
 226,
 462,
 524,
 563,
 594,
 647,
 664,
 720,
 733,
 935,
 1003,
 1062,
 1088,
 1254,
 1285,
 1324,
 1330,
 1555,
 1580,
 1596,
 1640,
 1724,
 1905,
 2081,
 2095,
 2184,
 2191,
 2269,
 2344,
 2358,
 2360,
 2514,
 2714,
 2716,
 2725,
 2821,
 2954,
 3064,
 3073,
 3255,
 3317,
 3465,
 3595,
 3729,
 3787,
 3790,
 3909,
 3983,
 4061,
 4186,
 4233,
 4391,
 4493,
 4561,
 4616,
 4784,
 4860,
 4935,
 4948,
 5119,
 5177,
 5231,
 5282,
 5325,
 5487,
 5539,
 5626,
 5673,
 5703,
 5731,
 5732,
 5765,
 5861,
 5931,
 5943,
 6262,
 6290,
 6377,
 6520,
 6605,
 6691,
 6738,
 6829,
 6845,
 6872,
 6891,
 7012,
 7041,
 7064,
 7071,
 7123,
 7137,
 7186,
 7387,
 7543,
 7632,
 7700,
 7710,
 7770,
 7866,
 7875,
 7911,
 7942,
 7978,
 7990,
 8015,
 8027,
 8199,
 8258,
 8451,
 8508,
 8516,
 8551,
 8589,
 8598,
 8623,
 8643,
 8725,
 8814,
 8971,
 9136,
 9151,
 9399,
 9405,
 9483,
 9488,
 9585,
 9809,
 9881,
 9951,
 9990,
 10026,
 10673,
 10864,
 10937,
 10976,
 10986,
 10988,
 11020,
 11071,
 11073,
 1

In [54]:
# number of support vectors for each class
best_model.n_support_ 

array([ 625,  450, 1060, 1095,  980, 1139,  743,  859, 1309, 1258],
      dtype=int32)

In [55]:
new_training_Set = train_data.iloc[train_data.index.isin(list_svI)]

In [56]:
new_training_Set

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
17,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
23,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
25,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47967,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
47979,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
47985,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
47993,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [57]:
y_new = new_training_Set['label'] 
x_new = new_training_Set.drop(columns = 'label')

In [58]:
x_new = x_new/255

In [59]:
newtrain_model = SVC(C= 10,kernel='rbf', gamma= 0.01)
newtrain_model.fit(x_new, y_new)

In [60]:
y_pred_new = newtrain_model.predict(X_train)

`Train Accuracy`

In [61]:
accuracy_score(y_train, y_pred_new)

0.9704583333333333

`Test Accuracy`

In [62]:
y_pred_newtest = newtrain_model.predict(X_test)

In [63]:
accuracy_score(y_test, y_pred_newtest)

0.9725833333333334

In [64]:
from tabulate import tabulate
mydata = [
    ["Linear SVM", "0.9345833333333333"],
    ["RBF Non-Linear SVM", "0.97825"],
    ["Polynomial Non-Linear SVM", "0.976833333333333"],
    ["Best Model(RBF) with C=10 & gamma=0.01", "0.982416666667"],
    ["Support Vector Extraction Model---Training","0.9704583333333333"],
    ["Support Vector Extraction Model---Test","0.9725833333333334"]
]
head = ["Model", "Accuracy"]
print(tabulate(mydata, headers=head, tablefmt="grid"))

+--------------------------------------------+------------+
| Model                                      |   Accuracy |
| Linear SVM                                 |   0.934583 |
+--------------------------------------------+------------+
| RBF Non-Linear SVM                         |   0.97825  |
+--------------------------------------------+------------+
| Polynomial Non-Linear SVM                  |   0.976833 |
+--------------------------------------------+------------+
| Best Model(RBF) with C=10 & gamma=0.01     |   0.982417 |
+--------------------------------------------+------------+
| Support Vector Extraction Model---Training |   0.970458 |
+--------------------------------------------+------------+
| Support Vector Extraction Model---Test     |   0.972583 |
+--------------------------------------------+------------+


Observations:
<br> (1) All the models showed good accuracies.</br>
<br> (2) After doing cross validation we found the best accuracy of the model that is at `C=10` & `gamma = 0.01` we got `98% accuracy`. </br>
<br> (3) After Support vector extraction the accuracy somewhat remained the same.</br>
