In [21]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sklearn

In [22]:
#Classifying EMG signals into various categories

In [23]:
import os
import pandas as pd

actions = {}

data_dirs = ["./EMG Physical Action Data Set/sub1/Aggressive/txt",
             "./EMG Physical Action Data Set/sub1/Normal/txt"]

ind = 0
data = pd.DataFrame()

for dirs in data_dirs:
    for files in os.listdir(dirs):
        with open(os.path.join(dirs, files), "r") as f:
            temp = pd.read_csv(f.name,
                               sep="\t",
                               header=None,
                               names=["ch" + str(i) for i in range(1, 9)])  # 8 input characters

            # Chunking using MAX of every 10 sequential values
            temp_chunked = pd.DataFrame()

            for i in range(0, len(temp), 10):
                temp_chunked = pd.concat([temp_chunked, temp.iloc[i:i+10].max().to_frame().T], ignore_index=True)

            labels = [files[:-4] for i in range(len(temp_chunked))]
            actions[files[:-4]] = ind

            temp_chunked["Action"] = labels

            data = pd.concat([data, temp_chunked])

    ind += 1

print(actions)


{'Elbowing': 0, 'Frontkicking': 0, 'Hamering': 0, 'Headering': 0, 'Kneeing': 0, 'Pulling': 0, 'Punching': 0, 'Pushing': 0, 'Sidekicking': 0, 'Slapping': 0, 'Bowing': 1, 'Clapping': 1, 'Handshaking': 1, 'Hugging': 1, 'Jumping': 1, 'Running': 1, 'Seating': 1, 'Standing': 1, 'Walking': 1, 'Waving': 1}


In [24]:
data.head()

Unnamed: 0,ch1,ch2,ch3,ch4,ch5,ch6,ch7,ch8,Action
0,717,391,2615,-29,4000,205,1084,4000,Elbowing
1,1036,251,2989,162,4000,2971,3062,4000,Elbowing
2,3705,30,4000,549,4000,2940,-1767,-205,Elbowing
3,2679,347,1566,167,-4000,2758,-3965,785,Elbowing
4,1689,77,4000,-246,4000,2422,-1767,360,Elbowing


In [26]:
Y =data["Action"]
X = data.drop(columns = ["Action"])

In [27]:
print(Y.unique())

['Elbowing' 'Frontkicking' 'Hamering' 'Headering' 'Kneeing' 'Pulling'
 'Punching' 'Pushing' 'Sidekicking' 'Slapping' 'Bowing' 'Clapping'
 'Handshaking' 'Hugging' 'Jumping' 'Running' 'Seating' 'Standing'
 'Walking' 'Waving']


In [28]:
Y = Y.map(actions)
Y.head()

0    0
1    0
2    0
3    0
4    0
Name: Action, dtype: int64

In [29]:
X = abs(X)
X.head()

Unnamed: 0,ch1,ch2,ch3,ch4,ch5,ch6,ch7,ch8
0,717,391,2615,29,4000,205,1084,4000
1,1036,251,2989,162,4000,2971,3062,4000
2,3705,30,4000,549,4000,2940,1767,205
3,2679,347,1566,167,4000,2758,3965,785
4,1689,77,4000,246,4000,2422,1767,360


In [30]:
X - X.ewm(10).mean()
X.info()

<class 'pandas.core.frame.DataFrame'>
Index: 19711 entries, 0 to 999
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   ch1     19711 non-null  int64
 1   ch2     19711 non-null  int64
 2   ch3     19711 non-null  int64
 3   ch4     19711 non-null  int64
 4   ch5     19711 non-null  int64
 5   ch6     19711 non-null  int64
 6   ch7     19711 non-null  int64
 7   ch8     19711 non-null  int64
dtypes: int64(8)
memory usage: 1.4 MB


In [31]:
from sklearn.model_selection import train_test_split

X = np.array(X.values)
Y = np.array(Y.values)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, shuffle = True)

print(f"Sizes of the sets created are: \nTraining set:{X_train.shape[0]}\nTest set:{X_test.shape[0]}")

Sizes of the sets created are: 
Training set:15768
Test set:3943


In [43]:
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn import tree
from sklearn.model_selection import GridSearchCV

params = {
    "max_depth":[3,5,7,10,15],
    "max_leaf_nodes" : [20,40,60]
}

model1 = DTC()
clf = GridSearchCV(model1, params, scoring = "accuracy", cv=5)

clf.fit(X_train, Y_train)

In [44]:
res = clf.cv_results_

for i in range(len(res["params"])):
    print(f"Parameters:{res['params'][i]} Mean_score: {res['mean_test_score'][i]} Rank: {res['rank_test_score'][i]}")

Parameters:{'max_depth': 3, 'max_leaf_nodes': 20} Mean_score: 0.8689747220641794 Rank: 13
Parameters:{'max_depth': 3, 'max_leaf_nodes': 40} Mean_score: 0.8689747220641794 Rank: 13
Parameters:{'max_depth': 3, 'max_leaf_nodes': 60} Mean_score: 0.8689747220641794 Rank: 13
Parameters:{'max_depth': 5, 'max_leaf_nodes': 20} Mean_score: 0.8728429668395652 Rank: 12
Parameters:{'max_depth': 5, 'max_leaf_nodes': 40} Mean_score: 0.8736673973172474 Rank: 10
Parameters:{'max_depth': 5, 'max_leaf_nodes': 60} Mean_score: 0.8736673973172474 Rank: 10
Parameters:{'max_depth': 7, 'max_leaf_nodes': 20} Mean_score: 0.8753161778266352 Rank: 7
Parameters:{'max_depth': 7, 'max_leaf_nodes': 40} Mean_score: 0.8846390620320935 Rank: 3
Parameters:{'max_depth': 7, 'max_leaf_nodes': 60} Mean_score: 0.8840683380524954 Rank: 4
Parameters:{'max_depth': 10, 'max_leaf_nodes': 20} Mean_score: 0.8753161778266352 Rank: 7
Parameters:{'max_depth': 10, 'max_leaf_nodes': 40} Mean_score: 0.8840048661771126 Rank: 5
Parameters:{'

In [45]:
clf = DTC(**clf.best_params_)
clf.fit(X_train, Y_train)

In [46]:
clf.score(X_train, Y_train)

0.9023972602739726

In [47]:
clf.score(X_test, Y_test)

0.8858737002282526

In [7]:
from sklearn.ensemble import GradientBoostingClassifier

In [9]:
gbc = GradientBoostingClassifier(n_estimators = 150)
gbc.fit(X_train, y_train)

In [10]:
gbc.score(X_train, y_train)

0.9773139745916516

In [11]:
gbc.score(X_test, y_test)

0.875

In [51]:
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn import tree
from sklearn.model_selection import RandomizedSearchCV
import datetime as dt

params = {
    "n_estimators": [100,150,200],
    "max_depth" : [3,5,10,15,20],
    #"max_leaf_nodes" : [20,40,80]
}

rfc = RFC(n_jobs = -1)
clf = RandomizedSearchCV(rfc, params, scoring = "accuracy", cv=2, n_jobs = -1, verbose = 1)

clf.fit(X_train, Y_train)

Fitting 2 folds for each of 10 candidates, totalling 20 fits


In [52]:
res = clf.cv_results_

for i in range(len(res["params"])):
    print(f"Parameters:{res['params'][i]} Mean_score: {res['mean_test_score'][i]} Rank: {res['rank_test_score'][i]}")

Parameters:{'n_estimators': 200, 'max_depth': 20} Mean_score: 0.901002029426687 Rank: 3
Parameters:{'n_estimators': 150, 'max_depth': 15} Mean_score: 0.9016362252663623 Rank: 1
Parameters:{'n_estimators': 200, 'max_depth': 5} Mean_score: 0.8815322171486555 Rank: 8
Parameters:{'n_estimators': 200, 'max_depth': 10} Mean_score: 0.898972602739726 Rank: 5
Parameters:{'n_estimators': 150, 'max_depth': 3} Mean_score: 0.8706240487062404 Rank: 9
Parameters:{'n_estimators': 100, 'max_depth': 10} Mean_score: 0.8989091831557585 Rank: 6
Parameters:{'n_estimators': 150, 'max_depth': 5} Mean_score: 0.8816590563165906 Rank: 7
Parameters:{'n_estimators': 100, 'max_depth': 3} Mean_score: 0.8706240487062404 Rank: 9
Parameters:{'n_estimators': 150, 'max_depth': 20} Mean_score: 0.9013825469304921 Rank: 2
Parameters:{'n_estimators': 100, 'max_depth': 15} Mean_score: 0.9003044140030441 Rank: 4


In [55]:
rf = RFC(**clf.best_params_)
rf.fit(X_train, Y_train)
print("Model acc", rf.score(X_test, Y_test))

Model acc 0.9033730661932539


## GBDT
#### This will take 30 minutes to run

In [56]:
params = {
    "n_estimators": [50,100,150,200],
    "max_depth":[3,4,5,7],
    "max_leaf_nodes": [20,40,80],
    "learning_rate":[0.1,0.2,0.3]
}

In [58]:
from sklearn.ensemble import GradientBoostingClassifier as GBC
from sklearn.model_selection import RandomizedSearchCV
import datetime as dt


gbc = GBC()
clf = RandomizedSearchCV(gbc, params, scoring = "accuracy", cv=3, n_jobs = -1, verbose = 1)

start = dt.datetime.now()

clf.fit(X_train, Y_train)

end = dt.datetime.now()



Fitting 3 folds for each of 10 candidates, totalling 30 fits


In [59]:
res = clf.cv_results_

for i in range(len(res["params"])):
  print(f"Parameters:{res['params'][i]} Mean_score: {res['mean_test_score'][i]} Rank: {res['rank_test_score'][i]}")


Parameters:{'n_estimators': 200, 'max_leaf_nodes': 80, 'max_depth': 7, 'learning_rate': 0.1} Mean_score: 0.9018899036022324 Rank: 1
Parameters:{'n_estimators': 200, 'max_leaf_nodes': 40, 'max_depth': 4, 'learning_rate': 0.3} Mean_score: 0.8949771689497718 Rank: 8
Parameters:{'n_estimators': 200, 'max_leaf_nodes': 20, 'max_depth': 4, 'learning_rate': 0.2} Mean_score: 0.8970065956367326 Rank: 5
Parameters:{'n_estimators': 200, 'max_leaf_nodes': 40, 'max_depth': 3, 'learning_rate': 0.3} Mean_score: 0.8952308472856418 Rank: 7
Parameters:{'n_estimators': 100, 'max_leaf_nodes': 40, 'max_depth': 3, 'learning_rate': 0.2} Mean_score: 0.8949137493658043 Rank: 9
Parameters:{'n_estimators': 150, 'max_leaf_nodes': 80, 'max_depth': 5, 'learning_rate': 0.3} Mean_score: 0.8961821410451547 Rank: 6
Parameters:{'n_estimators': 50, 'max_leaf_nodes': 20, 'max_depth': 5, 'learning_rate': 0.2} Mean_score: 0.8986555048198884 Rank: 4
Parameters:{'n_estimators': 200, 'max_leaf_nodes': 40, 'max_depth': 7, 'learn

In [60]:
print(f"Time taken for fits : {end - start}")

Time taken for fits : 0:02:02.660804


In [61]:
print(clf.best_estimator_)

GradientBoostingClassifier(max_depth=7, max_leaf_nodes=80, n_estimators=200)


In [64]:
gbc = clf.best_estimator_

gbc.fit(X_train, Y_train)

print("Model acc",gbc.score(X_test, Y_test))



Model acc 0.9026122241947756
