In [37]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import f1_score

import joblib

In [13]:
dataset = pd.read_csv('hand_dataset.csv')

dataset.head()

Unnamed: 0,class,x1,y1,x2,y2,x3,y3,x4,y4,x5,...,x17,y17,x18,y18,x19,y19,x20,y20,x21,y21
0,A,0.530803,0.581163,0.41399,0.501149,0.355407,0.369223,0.345517,0.263534,0.35425,...,0.534037,0.476981,0.665,0.338577,0.605741,0.320499,0.588447,0.400885,0.594126,0.455363
1,A,0.503664,0.616673,0.377512,0.536617,0.317852,0.408111,0.30645,0.296595,0.31667,...,0.494619,0.50849,0.635405,0.349954,0.561715,0.359349,0.546541,0.440563,0.551058,0.498263
2,A,0.267808,0.670679,0.18736,0.610215,0.14114,0.503103,0.133638,0.422246,0.138989,...,0.29892,0.586336,0.368706,0.504098,0.369881,0.461587,0.362212,0.517269,0.349507,0.565708
3,A,0.289091,0.751779,0.167962,0.654705,0.100007,0.499485,0.095107,0.371244,0.122449,...,0.294725,0.624627,0.457846,0.480413,0.393687,0.455553,0.365366,0.547497,0.356335,0.612805
4,A,0.28516,0.762803,0.161197,0.667723,0.094453,0.5121,0.090644,0.383932,0.113567,...,0.292644,0.635424,0.450271,0.489606,0.388296,0.466932,0.361084,0.560643,0.352804,0.623975


# Splitting Train and Test

In [5]:
X = dataset.iloc[:, 1:].values
Y = dataset.iloc[:, 0].values

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33)

In [9]:
X_train, X_test, y_train, y_test

(array([[0.53298569, 0.97056466, 0.44585094, ..., 0.65895563, 0.47625431,
         0.64618796],
        [0.17588018, 0.66803652, 0.18196076, ..., 0.71304005, 0.53357399,
         0.76438797],
        [0.5143553 , 0.8316958 , 0.3873868 , ..., 0.37709838, 0.67780656,
         0.33010072],
        ...,
        [0.43364981, 0.59830093, 0.29287198, ..., 0.81762534, 0.38286048,
         0.87926078],
        [0.30209509, 0.90348935, 0.21469623, ..., 0.55794823, 0.19139737,
         0.57637203],
        [0.33902836, 0.38122535, 0.38866568, ..., 0.30882075, 0.57372296,
         0.33335236]]),
 array([[0.73519158, 0.77187133, 0.65495861, ..., 0.40015435, 0.8737371 ,
         0.37768549],
        [0.28868023, 0.74296284, 0.30255535, ..., 0.82134694, 0.62409627,
         0.85880375],
        [0.4029195 , 0.92983544, 0.32097748, ..., 0.58305848, 0.529921  ,
         0.53123093],
        ...,
        [0.36992538, 0.78542095, 0.30022016, ..., 0.46378404, 0.72673428,
         0.42095381],
        [0.3

## Normalizing to reduce computational power

In [11]:
scaler = StandardScaler().fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
X_train,X_test

(array([[ 0.36163004,  1.40860444,  0.34581944, ...,  0.79952873,
         -0.36412498,  0.54985578],
        [-1.61354215, -0.27004561, -1.28840865, ...,  1.10016883,
         -0.06638977,  1.14757741],
        [ 0.25858425,  0.63805717, -0.01623928, ..., -0.76723659,
          0.68279656, -1.0485552 ],
        ...,
        [-0.18780272, -0.65699026, -0.60155386, ...,  1.68152909,
         -0.84923997,  1.72847388],
        [-0.91543986,  1.03642096, -1.08568333, ...,  0.23805694,
         -1.8437556 ,  0.19680593],
        [-0.71115969, -1.86148628, -0.0083194 , ..., -1.14677275,
          0.14215579, -1.03211208]]),
 array([[ 1.4800431 ,  0.30610675,  1.64078854, ..., -0.63907492,
          1.70051732, -0.80792536],
        [-0.98963826,  0.14570105, -0.54158635, ...,  1.70221639,
          0.40380958,  1.62502542],
        [-0.35777391,  1.18260855, -0.42750118, ...,  0.37763774,
         -0.08536447, -0.03146659],
        ...,
        [-0.54026639,  0.38129011, -0.5560478 , ..., -

# Making and Testing Accuracy of Different Models

# 1.1 K-Nearest Neighbours Model

In [15]:
classifier = KNeighborsClassifier(n_neighbors=3)

classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

In [16]:
y_pred

array(['M', 'P', 'I', ..., 'J', 'L', 'D'], dtype=object)

In [39]:
print(classification_report(y_test, y_pred))
print("Accuracy :",accuracy_score(y_test, y_pred))
print("f-1 score :",f1_score(y_test, y_pred, average='macro'))

              precision    recall  f1-score   support

           A       0.96      0.98      0.97       307
           B       0.98      1.00      0.99       334
           C       0.98      1.00      0.99       295
           D       1.00      0.98      0.99       325
           E       0.99      0.97      0.98       303
           F       1.00      1.00      1.00       326
           G       0.99      1.00      1.00       325
           H       1.00      0.99      1.00       345
           I       1.00      0.99      1.00       345
           J       1.00      0.99      0.99       331
           K       0.98      0.98      0.98       334
           L       1.00      1.00      1.00       332
           M       0.93      0.90      0.91       341
           N       0.91      0.92      0.91       312
           O       0.99      1.00      0.99       318
           P       0.99      0.99      0.99       349
           Q       0.99      0.99      0.99       323
           R       0.98    

# 1.2 Logistic Regression Model


In [18]:
classifier_2 = LogisticRegression(solver='liblinear', C=10.0, random_state=0)
classifier_2.fit(X_train, y_train)

y_pred_2 = classifier_2.predict(X_test)

In [19]:
y_pred_2

array(['J', 'P', 'I', ..., 'J', 'L', 'D'], dtype=object)

In [40]:
print(classification_report(y_test, y_pred_2))
print("Accuracy :",accuracy_score(y_test, y_pred_2))
print("f-1 score :",f1_score(y_test, y_pred_2, average='macro'))

              precision    recall  f1-score   support

           A       0.97      1.00      0.99       307
           B       0.99      1.00      0.99       334
           C       1.00      1.00      1.00       295
           D       0.99      0.99      0.99       325
           E       1.00      0.99      1.00       303
           F       1.00      0.99      1.00       326
           G       0.99      1.00      1.00       325
           H       1.00      0.99      1.00       345
           I       0.99      0.98      0.98       345
           J       0.99      0.98      0.99       331
           K       1.00      1.00      1.00       334
           L       1.00      1.00      1.00       332
           M       0.93      0.94      0.93       341
           N       0.95      0.91      0.93       312
           O       0.98      1.00      0.99       318
           P       1.00      1.00      1.00       349
           Q       1.00      0.99      1.00       323
           R       0.97    

# 1.3 Gaussian Naive Bayes Model

In [21]:
classifier_3 = GaussianNB(priors = None, var_smoothing = 1e-09)
classifier_3.fit(X_train, y_train)

y_pred_3 = classifier_3.predict(X_test)

In [22]:
y_pred_3

array(['K', 'P', 'T', ..., 'J', 'T', 'D'], dtype='<U1')

In [41]:
print(classification_report(y_test, y_pred_3))
print("Accuracy :",accuracy_score(y_test, y_pred_3))
print("f-1 score :",f1_score(y_test, y_pred_3, average='macro'))

              precision    recall  f1-score   support

           A       0.35      0.55      0.43       307
           B       0.64      0.59      0.62       334
           C       0.22      0.68      0.33       295
           D       0.61      0.46      0.53       325
           E       0.28      0.37      0.32       303
           F       0.98      0.63      0.77       326
           G       0.94      0.38      0.55       325
           H       0.70      0.75      0.72       345
           I       0.52      0.43      0.47       345
           J       0.80      0.64      0.71       331
           K       0.28      0.69      0.40       334
           L       0.51      0.45      0.48       332
           M       0.79      0.24      0.37       341
           N       0.23      0.61      0.34       312
           O       0.45      0.16      0.23       318
           P       0.52      0.64      0.57       349
           Q       0.83      0.41      0.55       323
           R       0.23    

# 1.4 Support Vector Machines Model

In [26]:
classifier_4 = SVC(kernel='rbf', random_state = 1)
classifier_4.fit(X_train,y_train)

y_pred_4 = classifier_4.predict(X_test)

In [27]:
y_pred_4

array(['B', 'P', 'I', ..., 'J', 'L', 'D'], dtype=object)

In [42]:
print(classification_report(y_test, y_pred_4))
print("Accuracy :",accuracy_score(y_test, y_pred_4))
print("f-1 score :",f1_score(y_test, y_pred_4, average='macro'))

              precision    recall  f1-score   support

           A       0.98      1.00      0.99       307
           B       0.96      1.00      0.98       334
           C       0.99      1.00      1.00       295
           D       0.99      0.95      0.97       325
           E       1.00      0.99      0.99       303
           F       1.00      1.00      1.00       326
           G       1.00      1.00      1.00       325
           H       1.00      1.00      1.00       345
           I       0.99      0.99      0.99       345
           J       1.00      0.98      0.99       331
           K       0.99      0.97      0.98       334
           L       1.00      1.00      1.00       332
           M       0.93      0.89      0.91       341
           N       0.90      0.90      0.90       312
           O       0.95      1.00      0.98       318
           P       1.00      0.99      0.99       349
           Q       0.99      0.99      0.99       323
           R       0.99    

# 1.5 Decision Tree Model

In [29]:
classifier_5 = DecisionTreeClassifier()
classifier_5.fit(X_train,y_train)

y_pred_5 = classifier_5.predict(X_test)

In [30]:
y_pred_5

array(['O', 'P', 'A', ..., 'J', 'L', 'D'], dtype=object)

In [43]:
print(classification_report(y_test, y_pred_5))
print("Accuracy :",accuracy_score(y_test, y_pred_5))
print("f-1 score :",f1_score(y_test, y_pred_5, average='macro'))

              precision    recall  f1-score   support

           A       0.90      0.98      0.94       307
           B       0.98      0.94      0.96       334
           C       0.94      0.96      0.95       295
           D       0.91      0.91      0.91       325
           E       0.94      0.91      0.92       303
           F       0.97      0.96      0.96       326
           G       0.96      0.97      0.97       325
           H       0.97      0.97      0.97       345
           I       0.96      0.94      0.95       345
           J       0.94      0.92      0.93       331
           K       0.92      0.95      0.93       334
           L       0.96      0.95      0.95       332
           M       0.89      0.83      0.86       341
           N       0.84      0.85      0.84       312
           O       0.87      0.92      0.89       318
           P       0.93      0.96      0.95       349
           Q       0.94      0.95      0.95       323
           R       0.84    

# 1.6 Random Forest Classifier Model

In [32]:
classifier_6 = RandomForestClassifier(n_estimators=100)
classifier_6.fit(X_train,y_train)

y_pred_6 = classifier_6.predict(X_test)

In [33]:
y_pred_6

array(['O', 'P', 'I', ..., 'J', 'L', 'D'], dtype=object)

In [44]:
print(classification_report(y_test, y_pred_6))
print("Accuracy :",accuracy_score(y_test, y_pred_6))
print("f-1 score :",f1_score(y_test, y_pred_6, average='macro'))

              precision    recall  f1-score   support

           A       0.98      1.00      0.99       307
           B       0.97      1.00      0.98       334
           C       1.00      1.00      1.00       295
           D       1.00      0.98      0.99       325
           E       0.99      0.99      0.99       303
           F       1.00      1.00      1.00       326
           G       0.98      1.00      0.99       325
           H       1.00      0.99      0.99       345
           I       0.99      0.99      0.99       345
           J       0.99      0.99      0.99       331
           K       1.00      0.99      1.00       334
           L       0.99      1.00      1.00       332
           M       0.95      0.94      0.94       341
           N       0.95      0.92      0.93       312
           O       0.98      1.00      0.99       318
           P       1.00      0.99      0.99       349
           Q       0.99      0.99      0.99       323
           R       0.98    

# Saving the Models

In [45]:
joblib.dump(classifier, 'Trained_models/knn.pkl')

joblib.dump(classifier_2, 'Trained_models/logisticreg.pkl')

joblib.dump(classifier_3, 'Trained_models/gnb.pkl')

joblib.dump(classifier_4, 'Trained_models/svm.pkl')

joblib.dump(classifier_5, 'Trained_models/decisiontree.pkl')

joblib.dump(classifier_6, 'Trained_models/randomforest.pkl')

['Trained_models/randomforest.pkl']