In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt 
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Input_path='/kaggle/input/emg-signal-for-gesture-recognition/EMG-data.csv'

In [None]:
dataset=pd.read_csv(Input_path)
print(dataset.shape)
##check for any null/empty values
print("\nempty values:",dataset.isnull().any().sum()) 

In [None]:
dataset=dataset.drop(columns=["time"])
Class = dataset["class"]
print(Class.unique())
print("Value Count :\n", dataset["class"].value_counts())

In [None]:
##drop gesture 0 because it offers no information due to its unmarked nature
index_numbers_1=dataset[dataset["class"]==0].index 
dataset.drop(index_numbers_1,inplace=True)
##drop gesture 7 because it offers no information due to it being performed 
##by just two out of 36 patients 
index_numbers_2=dataset[dataset["class"]==7].index
dataset.drop(index_numbers_2,inplace=True)

In [None]:
dataset=dataset.groupby(['label','class'])

In [None]:
##functions for extracting sEMG features
def rms(data): ##root mean square
      return  np.sqrt(np.mean(data**2,axis=0))  

def SSI(data): ##Simple Square Integral
    return np.sum(data**2,axis=0)

def abs_diffs_signal(data): ##absolute differential signal
    return np.sum(np.abs(np.diff(data,axis=0)),axis=0)

##function for returning an estimator class name 
def print_estimator_name(estimator):
    return estimator.__class__.__name__

In [None]:
##tabulating the aggregated sEMG features
dataset=dataset.agg(['min','max',rms,SSI,abs_diffs_signal,np.ptp])

In [None]:
dataset=dataset.reset_index()
dataset.head()

In [None]:
temp2=dataset

In [None]:
## Training SVM Classifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import plot_confusion_matrix
estimators=[]
accuracy=[]
f1_macro=[]
svc=SVC(C=100,kernel='poly',gamma=0.01,decision_function_shape='ovo')
features=dataset.drop(columns=["label","class"])
labels=dataset["class"]
##Training and testing set splitting
X_train, X_test,y_train,y_test= train_test_split(features,labels,test_size=0.30, random_state=1)
## Data normalization
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)
X_train -= mean
X_train /= std
X_test -= mean
X_test /= std
svc.fit(X_train,y_train)
y_test_pred=svc.predict(X_test)
y=f1_score(y_test,y_test_pred,average='macro') 
x=confusion_matrix(y_test,y_test_pred)
a=accuracy_score(y_test,y_test_pred)
print(y)
print(x)
print(a)
estimators.append(print_estimator_name(svc))
f1_macro.append(y)
accuracy.append(a)
plot_confusion_matrix(svc,X_test,y_test)
plt.show()


In [None]:
estimators

In [None]:
## Training KNN Classifier
dataset=temp2
knn=KNeighborsClassifier(metric='minkowski',algorithm= 'auto',n_neighbors=10,p=2,weights='distance',leaf_size=5)
features=dataset.drop(columns=["label","class"])
labels=dataset["class"]
X_train, X_test,y_train,y_test= train_test_split(features,labels,test_size=0.30, random_state=1)
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)
X_train -= mean
X_train /= std
X_test -= mean
X_test /= std
knn.fit(X_train,y_train)
y_test_pred=knn.predict(X_test)
y=f1_score(y_test,y_test_pred,average='macro') 
x=confusion_matrix(y_test,y_test_pred)
a=accuracy_score(y_test,y_test_pred)
print(y)
print(x)
print(a)
estimators.append(print_estimator_name(knn))
f1_macro.append(y)
accuracy.append(a)
plot_confusion_matrix(knn,X_test,y_test)
plt.show()

In [None]:
## Training Decision Tree Classifier
dataset=temp2
dt=DecisionTreeClassifier(criterion= 'entropy', max_depth= 50, max_features= 5, max_leaf_nodes= 50,min_samples_split=2)
features=dataset.drop(columns=["label","class"])
labels=dataset["class"]
X_train, X_test,y_train,y_test= train_test_split(features,labels,test_size=0.30, random_state=1)
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)
X_train -= mean
X_train /= std
X_test -= mean
X_test /= std
dt.fit(X_train,y_train)
y_test_pred=dt.predict(X_test)
y=f1_score(y_test,y_test_pred,average='macro') 
x=confusion_matrix(y_test,y_test_pred)
a=accuracy_score(y_test,y_test_pred)
print(y)
print(x)
print(a)
estimators.append(print_estimator_name(dt))
f1_macro.append(y)
accuracy.append(a)
plot_confusion_matrix(dt,X_test,y_test)
plt.show()

In [None]:
## Training Random Forest classifier
dataset=temp2
rfc=RandomForestClassifier(criterion= 'entropy', max_depth= 50, max_features= 5, max_leaf_nodes= 50,min_samples_split=2,n_estimators=2000,oob_score=True)
features=dataset.drop(columns=["label","class"])
labels=dataset["class"]
X_train, X_test,y_train,y_test= train_test_split(features,labels,test_size=0.30, random_state=1)
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)
X_train -= mean
X_train /= std
X_test -= mean
X_test /= std
rfc.fit(X_train,y_train)
y_test_pred=rfc.predict(X_test)
y=f1_score(y_test,y_test_pred,average='macro') 
x=confusion_matrix(y_test,y_test_pred)
a=accuracy_score(y_test,y_test_pred)
print('F1_macro=',y)
print(x)
print('Accuracy=',a)
print('OOB validation score=',rfc.oob_score_)
estimators.append(print_estimator_name(rfc))
f1_macro.append(y)
accuracy.append(a)
plot_confusion_matrix(rfc,X_test,y_test)
plt.show()

In [None]:
estimators

In [None]:
for i in np.arange(len(estimators)):
    f1_macro[i]=round(f1_macro[i],3)
    accuracy[i]=round(accuracy[i],3)

In [None]:
## plotting results of comparative study
estimators=['SVC','KNN','DecisionTree','RandomForest']
fig, ax = plt.subplots()
width=0.2
bar_plot1 = ax.bar(np.arange(len(estimators)),f1_macro,width,tick_label=estimators)
bar_plot2 = ax.bar(np.arange(len(estimators))+width,accuracy,width,tick_label=estimators)

ax.set_xlabel('Classifiers')
ax.set_ylabel('Metric Performance')
ax.legend(('F1_macro','Accuracy'),loc='upper right')

def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., 0.5*height,height,ha='center', va='bottom', rotation=90)
autolabel(bar_plot1)
autolabel(bar_plot2)
plt.show()