In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from time import time


In [None]:
df=pd.read_csv('../input/emg-signal-for-gesture-recognition/EMG-data.csv')
df.drop('time',inplace=True,axis=1)
df.head()

In [None]:
df['class'].value_counts()

In [None]:
len(df['label'].value_counts())

In [None]:
# 
# plt.plot(df[df['label']==1].iloc[0:500,0:-2])

In [None]:
subjects_data=df.groupby(['label','class'])

In [None]:
import numpy as np

def rms(data): #root mean square
      return  np.sqrt(np.mean(data**2,axis=0))  

def abs_diffs_signal(data):
    return np.sum(np.abs(np.diff(data,axis=0)),axis=0)


def zero_crossing(data):
    return np.argmax(np.diff(np.sign(data),axis=0),axis=0)



In [None]:
#features=subjects_data.agg(['mean','std','var', 'min','max',ptp,rms,zero_crossing,abs_diffs_signal])
# features=subjects_data.agg([ 'min','max',np.ptp,rms,zero_crossing,abs_diffs_signal])


In [None]:
start=time()
features=subjects_data.agg([ 'min','max',np.ptp,rms,zero_crossing,abs_diffs_signal])

end=time()
print('feature calculation time of one subject one class')
print((end-start)/(36*7))

In [None]:
features=features.reset_index()
features.head()

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report,accuracy_score,recall_score,roc_auc_score, precision_score,f1_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold,StratifiedKFold


In [None]:
classifiers = [#manhattan,4,distance
    KNeighborsClassifier(metric='manhattan',weights='distance',n_neighbors=4),
    SVC(C=3, gamma=0.04), #2,0.01
    DecisionTreeClassifier(criterion= 'entropy',max_depth= 7,max_features= 'auto',random_state= 20),
    RandomForestClassifier(bootstrap=False,max_depth=8,max_features='sqrt',min_samples_leaf=1,
                          min_samples_split=2,n_estimators=100),
    GradientBoostingClassifier(),
    GaussianNB(),
    LinearDiscriminantAnalysis(),
    LogisticRegression(C=0.05, penalty='l2')]

In [None]:
import warnings
warnings.filterwarnings('always') 
clf_performace={'accuracy':[],'f1score':[],'training_time':[],'testing_time':[]}
for clfs in classifiers:
    #pipeline.set_params(estimator = clfs)
    name = clfs.__class__.__name__
    accuracy=[]
    f1score=[]
    recall=[]
    precision=[]
    training_time=[]
    testing_time=[]
    for i in range(1,37):
        train=features[features['label']!=i]
        test=features[features['label']==i]
        X_train=train.iloc[:,2::].values
        X_test=test.iloc[:,2::].values
        y_train=train['class']
        y_test=test['class']
        training_start=time()
        clf = make_pipeline(StandardScaler(), clfs)
        clf.fit(X_train, y_train)
        training_end=time()
        testing_start=time()
        y_pred=clf.predict(X_test)
        testing_end=time()
        acc=accuracy_score(y_test,y_pred)
        recall.append(recall_score(y_test,y_pred,average='macro'))
        #precision.append(precision_score(y_test,y_pred,average='macro'))
        f1score.append(f1_score(y_test,y_pred,average='macro'))
        accuracy.append(acc)
        training_time.append(training_end-training_start)
        testing_time.append(testing_end-testing_start)
        #print('subject {} accuracy is {}'.format(i,acc))
    print(name,np.mean(accuracy))
    clf_performace['accuracy'].append(accuracy)
    clf_performace['f1score'].append(f1score)
    clf_performace['training_time'].append(np.mean(training_time))
    clf_performace['testing_time'].append(np.mean(testing_time))
    

In [None]:
a=np.mean(np.array(clf_performace['accuracy']),axis=1)
f=np.mean(np.array(clf_performace['f1score']),axis=1)


In [None]:
df=pd.DataFrame(np.stack((a,f)),columns=['KNN','SVM','DT','RF','GB','NB','LDA','LR'],
            index=['Accuracy','F1-score'])
df

In [None]:
classifier=df.loc['F1-score'].index
y_pos = np.arange(len(classifier))
w=0.4

plt.figure(figsize=(10,5))
plt.bar(y_pos,df.loc['Accuracy'].values,align='center',width=w,label='Accuracy',color='black')
plt.bar(y_pos+w,df.loc['F1-score'].values,align='center',width=w,label='F1-score',color='blue')

ya=df.loc['Accuracy'].values
for index, value in enumerate(ya):
    plt.text(index-0.1,value-0.2, str(np.round(value,2)),rotation=90,color='white',fontsize=12)

yf=df.loc['F1-score'].values
for index, value in enumerate(yf):
    plt.text(index+w-0.1,value-0.2, str(np.round(value,2)),rotation=90,color='white',fontsize=12)


plt.xticks(y_pos, classifier,fontsize=11)
plt.yticks(fontsize=11)
plt.ylabel('Score',fontsize=12)
plt.xlabel('Classifiers',fontsize=12)
plt.title('Classifiers Performance',fontsize=12)
plt.legend()
plt.savefig('result.eps',dip=300)

In [None]:
pd.DataFrame(zip(np.array(clf_performace['training_time'])*65,np.array(clf_performace['testing_time'])),
            columns=['training_time','testing_time'],
            index=['KNN','SVM','DT','RF','GB','NB','LDA','LR'])