In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.svm import SVC

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [12]:
os.chdir("C:/Users/Kiat Kai/Desktop/NOTES/Self Projects/data/classify hand gestures/")
data0 = pd.read_csv("0.csv", header=None) # For class 0: rock
data1 = pd.read_csv("1.csv", header=None) # For class 1: scissors
data2 = pd.read_csv("2.csv", header=None) # For class 2: paper
data3 = pd.read_csv("3.csv", header=None) # For class 3: ok

# 8 consecutive readings of all 8 sensors which is why 64 columns plus last column is the class = 65 columns

# Now, we will combine all the dataset into 1 big dataset
data = pd.concat([data0,data1,data2,data3], axis=0)
data.shape

(11678, 65)

In [13]:
# Split into X and Y
Y = data.iloc[:,-1]
X = data.drop(data.columns[-1], axis=1)

# Now, train test split
X_train, Xtest, Y_train, Ytest = train_test_split(X, Y, train_size=0.8, random_state=10)

# Using different models for classification

## 1) LDA

In [14]:
lda = LinearDiscriminantAnalysis()
y_pred = lda.fit(X_train, Y_train).predict(Xtest)
f1_lda = f1_score(Ytest, y_pred, average='micro')
print("F1 Score for Linear Discriminant Analysis Classifier is", f1_lda)

F1 Score for Linear Discriminant Analysis Classifier is 0.3326198630136986


## 2) QDA

In [15]:
qda = QuadraticDiscriminantAnalysis()
y_pred = qda.fit(X_train, Y_train).predict(Xtest)
f1_qda = f1_score(Ytest, y_pred, average='micro')
print("F1 Score for Quadratic Discriminant Analysis Classifier is", f1_qda)

F1 Score for Quadratic Discriminant Analysis Classifier is 0.9353595890410958


## 3) Naive Bayes

In [16]:
gnb = GaussianNB()
y_pred = gnb.fit(X_train, Y_train).predict(Xtest)
f1_nb = f1_score(Ytest, y_pred, average='micro')
print("F1 Score for Naive Bayes Classifier is", f1_nb)

F1 Score for Naive Bayes Classifier is 0.8874143835616438


## 4) Random Forest

In [59]:
rfc=RandomForestClassifier(random_state=100)

param_grid = { 
    'n_estimators': [200, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8,9,10],
    'criterion' :['gini', 'entropy']
}

CV_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid, cv= 5)
CV_rfc.fit(X_train, Y_train)

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=100),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [4, 5, 6, 7, 8, 9, 10],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'n_estimators': [200, 500]})

In [60]:
CV_rfc.best_params_

{'criterion': 'gini',
 'max_depth': 10,
 'max_features': 'log2',
 'n_estimators': 500}

In [17]:
# Best parameters after tuning:
rf = RandomForestClassifier(random_state=100, n_estimators=500, criterion='gini', max_depth=10, max_features='log2')
y_pred = rf.fit(X_train, Y_train).predict(Xtest)
f1_rf = f1_score(Ytest, y_pred, average='micro')
print("F1 Score for Random Forest Classifier is", f1_rf)

F1 Score for Random Forest Classifier is 0.9075342465753424


## 5) Support Vector Classifier

In [4]:
svc=SVC(random_state=100)

param_grid = { 
    'C': [0.01, 0.1, 1],
    'kernel': ['linear','rbf'],
}

CV_svc = GridSearchCV(estimator=svc, param_grid=param_grid, cv= 5)
CV_svc.fit(X_train, Y_train)

GridSearchCV(cv=5, estimator=SVC(random_state=100),
             param_grid={'C': [0.01, 0.1, 1], 'kernel': ['linear', 'rbf']})

In [7]:
CV_svc.best_params_

{'C': 1, 'kernel': 'rbf'}

In [9]:
# Best parameters after tuning:
rf = SVC(random_state=100, C=1, kernel="rbf")
y_pred = rf.fit(X_train, Y_train).predict(Xtest)
f1_svc = f1_score(Ytest, y_pred, average='micro')
print("F1 Score for Support Vector Classifier is", f1_svc)

F1 Score for Support Vector Classifier is 0.879708904109589


## Evaluating all models

In [23]:
# Dataframe to contain model results
model_results = pd.DataFrame(columns=["Models","F1 Score"])

# LDA
model_results = model_results.append(pd.DataFrame({"Models":"Linear Discriminant Analysis", 
                                  "F1 Score":f1_lda}, index = [0]), ignore_index = False)
# QDA
model_results = model_results.append(pd.DataFrame({"Models":"Quadratic Discriminant Analysis", 
                                  "F1 Score":f1_qda}, index = [1]), ignore_index = False)
# Naive Bayes
model_results = model_results.append(pd.DataFrame({"Models":"Naive Bayes", 
                                  "F1 Score":f1_nb}, index = [2]), ignore_index = False)
# Random Forest
model_results = model_results.append(pd.DataFrame({"Models":"Random Forest", 
                                  "F1 Score":f1_rf}, index = [3]), ignore_index = False)
# Support Vector Classifier
model_results = model_results.append(pd.DataFrame({"Models":"Support Vector Classifier", 
                                  "F1 Score":f1_svc}, index = [4]), ignore_index = False)

model_results.sort_values(by="F1 Score", ascending = False)

Unnamed: 0,Models,F1 Score
1,Quadratic Discriminant Analysis,0.93536
3,Random Forest,0.907534
2,Naive Bayes,0.887414
4,Support Vector Classifier,0.879709
0,Linear Discriminant Analysis,0.33262
