In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sklearn

In [2]:
#Classifying EMG signals into various categories

In [3]:
import os
import pandas as pd

actions = {}

data_dirs = ["./EMG Physical Action Data Set/sub1/Aggressive/txt",
             "./EMG Physical Action Data Set/sub1/Normal/txt"]

ind = 0
data = pd.DataFrame()

for dirs in data_dirs:
    for files in os.listdir(dirs):
        with open(os.path.join(dirs, files), "r") as f:
            temp = pd.read_csv(f.name,
                               sep="\t",
                               header=None,
                               names=["ch" + str(i) for i in range(1, 9)])  # 8 input characters

            # Chunking using MAX of every 10 sequential values
            temp_chunked = pd.DataFrame()

            for i in range(0, len(temp), 10):
                temp_chunked = pd.concat([temp_chunked, temp.iloc[i:i+10].max().to_frame().T], ignore_index=True)

            labels = [files[:-4] for i in range(len(temp_chunked))]
            actions[files[:-4]] = ind

            temp_chunked["Action"] = labels

            data = pd.concat([data, temp_chunked])

    ind += 1

print(actions)


{'Elbowing': 0, 'Frontkicking': 0, 'Hamering': 0, 'Headering': 0, 'Kneeing': 0, 'Pulling': 0, 'Punching': 0, 'Pushing': 0, 'Sidekicking': 0, 'Slapping': 0, 'Bowing': 1, 'Clapping': 1, 'Handshaking': 1, 'Hugging': 1, 'Jumping': 1, 'Running': 1, 'Seating': 1, 'Standing': 1, 'Walking': 1, 'Waving': 1}


In [4]:
data.head()

Unnamed: 0,ch1,ch2,ch3,ch4,ch5,ch6,ch7,ch8,Action
0,717,391,2615,-29,4000,205,1084,4000,Elbowing
1,1036,251,2989,162,4000,2971,3062,4000,Elbowing
2,3705,30,4000,549,4000,2940,-1767,-205,Elbowing
3,2679,347,1566,167,-4000,2758,-3965,785,Elbowing
4,1689,77,4000,-246,4000,2422,-1767,360,Elbowing


In [5]:
Y =data["Action"]
X = data.drop(columns = ["Action"])

In [6]:
print(Y.unique())

['Elbowing' 'Frontkicking' 'Hamering' 'Headering' 'Kneeing' 'Pulling'
 'Punching' 'Pushing' 'Sidekicking' 'Slapping' 'Bowing' 'Clapping'
 'Handshaking' 'Hugging' 'Jumping' 'Running' 'Seating' 'Standing'
 'Walking' 'Waving']


In [7]:
Y = Y.map(actions)
Y.head()

0    0
1    0
2    0
3    0
4    0
Name: Action, dtype: int64

In [8]:
X = abs(X)
X.head()

Unnamed: 0,ch1,ch2,ch3,ch4,ch5,ch6,ch7,ch8
0,717,391,2615,29,4000,205,1084,4000
1,1036,251,2989,162,4000,2971,3062,4000
2,3705,30,4000,549,4000,2940,1767,205
3,2679,347,1566,167,4000,2758,3965,785
4,1689,77,4000,246,4000,2422,1767,360


In [9]:
X - X.ewm(10).mean()
X.info()

<class 'pandas.core.frame.DataFrame'>
Index: 19711 entries, 0 to 999
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   ch1     19711 non-null  int64
 1   ch2     19711 non-null  int64
 2   ch3     19711 non-null  int64
 3   ch4     19711 non-null  int64
 4   ch5     19711 non-null  int64
 5   ch6     19711 non-null  int64
 6   ch7     19711 non-null  int64
 7   ch8     19711 non-null  int64
dtypes: int64(8)
memory usage: 1.4 MB


In [10]:
from sklearn.model_selection import train_test_split

X = np.array(X.values)
Y = np.array(Y.values)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, shuffle = True)

print(f"Sizes of the sets created are: \nTraining set:{X_train.shape[0]}\nTest set:{X_test.shape[0]}")

Sizes of the sets created are: 
Training set:15768
Test set:3943


## XGBoost

In [13]:
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import StratifiedKFold

import datetime as dt

params = {
        "n_estimators": [50,100,150,200],
        "max_depth" : [3, 4, 5, 7],
        "learning_rate": [0.1, 0.2, 0.3],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        }

xgb = XGBClassifier(objective='multi:softmax', num_class=20, silent=True)

In [17]:
random_search = RandomizedSearchCV(xgb, param_distributions = params,
                                  n_iter=10,
                                  scoring='accuracy',
                                  n_jobs = -1,
                                  cv = 3,
                                verbose=0)

start = dt.datetime.now()
random_search.fit(X_train, Y_train)
end = dt.datetime.now()

Parameters: { "silent" } are not used.



In [16]:
res = random_search.cv_results_

for i in range(len(res["params"])):
    print(f"Parameters:{res['params'][i]} Mean_score: {res['mean_test_score'][i]} Rank: {res['rank_test_score'][i]}")

Parameters:{'subsample': 0.8, 'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.1, 'colsample_bytree': 0.8} Mean_score: 0.9027777777777778 Rank: 1
Parameters:{'subsample': 1.0, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.3, 'colsample_bytree': 0.6} Mean_score: 0.8968797564687976 Rank: 10
Parameters:{'subsample': 0.8, 'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.2, 'colsample_bytree': 0.8} Mean_score: 0.9014459665144597 Rank: 4
Parameters:{'subsample': 0.6, 'n_estimators': 100, 'max_depth': 4, 'learning_rate': 0.3, 'colsample_bytree': 0.8} Mean_score: 0.9006849315068494 Rank: 6
Parameters:{'subsample': 0.8, 'n_estimators': 50, 'max_depth': 4, 'learning_rate': 0.3, 'colsample_bytree': 0.6} Mean_score: 0.8988457635717909 Rank: 8
Parameters:{'subsample': 0.6, 'n_estimators': 200, 'max_depth': 5, 'learning_rate': 0.1, 'colsample_bytree': 0.8} Mean_score: 0.9026509386098427 Rank: 2
Parameters:{'subsample': 0.6, 'n_estimators': 50, 'max_depth': 5, 'learning_rate':

In [19]:
print(f"Time taken for fits: {end - start}")

Time taken for fits: 0:01:19.515068


In [20]:
print(random_search.best_estimator_)

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=1.0, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.2, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=7, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=150, n_jobs=None, num_class=20,
              num_parallel_tree=None, ...)


In [21]:
xgb = random_search.best_estimator_
xgb.fit(X_train, Y_train)

print("Model acc", xgb.score(X_test, Y_test))

Parameters: { "silent" } are not used.



Model acc 0.898808014202384


In [22]:
xgb.score(X_train, Y_train) 

0.9982876712328768

In [27]:
import lightgbm as lgb
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

import datetime as dt
gridParams = {

    'learning_rate':[0.1,0.3,0.5],
    'boosting_type': ['gbdt'],
    'objective' : ['multiclass'],
    'max_depth': [5,6,7,8],
    'colsample_bytree': [0.5,0.7],
    'subsample': [0.5,0.7],
}

clf = lgb.LGBMClassifier(num_classes=20)
random_cv = RandomizedSearchCV(clf, gridParams, verbose=0, cv = 3, n_jobs = -1, n_iter=10)

In [28]:
start = dt.datetime.now()
random_cv.fit(X_train, Y_train)
end = dt.datetime.now()



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003325 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2038
[LightGBM] [Info] Number of data points in the train set: 15768, number of used features: 8
[LightGBM] [Info] Start training from score -0.695179
[LightGBM] [Info] Start training from score -0.691120
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776

In [26]:
res = random_cv.cv_results_

for i in range(len(res["params"])):
  print(f"Parameters:{res['params'][i]} Mean_score: {res['mean_test_score'][i]} Rank: {res['rank_test_score'][i]}")


Parameters:{'subsample': 0.5, 'objective': 'multiclass', 'max_depth': 8, 'learning_rate': 0.5, 'colsample_bytree': 0.5, 'boosting_type': 'gbdt'} Mean_score: 0.8900304414003045 Rank: 9
Parameters:{'subsample': 0.5, 'objective': 'multiclass', 'max_depth': 7, 'learning_rate': 0.5, 'colsample_bytree': 0.7, 'boosting_type': 'gbdt'} Mean_score: 0.894279553526129 Rank: 4
Parameters:{'subsample': 0.7, 'objective': 'multiclass', 'max_depth': 5, 'learning_rate': 0.5, 'colsample_bytree': 0.5, 'boosting_type': 'gbdt'} Mean_score: 0.8893328259766616 Rank: 10
Parameters:{'subsample': 0.7, 'objective': 'multiclass', 'max_depth': 5, 'learning_rate': 0.1, 'colsample_bytree': 0.7, 'boosting_type': 'gbdt'} Mean_score: 0.9040461694571283 Rank: 1
Parameters:{'subsample': 0.7, 'objective': 'multiclass', 'max_depth': 6, 'learning_rate': 0.5, 'colsample_bytree': 0.5, 'boosting_type': 'gbdt'} Mean_score: 0.890347539320142 Rank: 8
Parameters:{'subsample': 0.7, 'objective': 'multiclass', 'max_depth': 6, 'learnin

In [30]:
print(f"Time taken for fits : {end - start}")

Time taken for fits : 0:00:27.013563


In [31]:
lgb = random_cv.best_estimator_
lgb.fit(X_train, Y_train)

print("Model acc", lgb.score(X_test, Y_test))



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001836 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2038
[LightGBM] [Info] Number of data points in the train set: 15768, number of used features: 8
[LightGBM] [Info] Start training from score -0.695179
[LightGBM] [Info] Start training from score -0.691120
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -34.538776

