In [38]:
import numpy as np
import pandas as pd
import math
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import model_selection
from sklearn.utils import class_weight
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler

# MUST ADJUST OFFSET!!! http://www.sfu.ca/~dmackey/Chap%203-4%20EMG.pdf

# features from https://link.springer.com/article/10.1007/s11517-019-02073-z

def detrend(signal):
    detrended = []
    signal = np.array(signal)
    for k in range(7, len(signal)):
        detrended.append((signal[k] - (sum(signal[k-7:k])/8)))
    return detrended


data = pd.read_csv("EMG_Training_E.csv")
data2 = pd.read_csv("EMG_Training_H.csv")
data3 = pd.read_csv("EMG_Training_S.csv")

training_data_pure1 = np.array(data["input"][0:99])
training_data_overlapped1 = np.array(data["input"][99:])

training_labels_pure1 = np.array(data["label"][0:99])
training_labels_overlapped1 = np.array(data["label"][99:])

training_data_pure2 = np.array(data2["input"][0:99])
training_data_overlapped2 = np.array(data2["input"][99:])

training_labels_pure2 = np.array(data2["label"][0:99])
training_labels_overlapped2 = np.array(data2["label"][99:])

training_data_pure3 = np.array(data3["input"][0:49])
training_data_overlapped3 = np.array(data3["input"][49:])

training_labels_pure3 = np.array(data3["label"][0:49])
training_labels_overlapped3 = np.array(data3["label"][49:])

training_data_pure = np.concatenate((training_data_pure1, training_data_pure2, training_data_pure3))
training_data_overlapped = np.concatenate((training_data_overlapped1, training_data_overlapped2, training_data_overlapped3))

training_labels_pure = np.concatenate((training_labels_pure1, training_labels_pure2, training_labels_pure3))
training_labels_overlapped = np.concatenate((training_labels_overlapped1, training_labels_overlapped2, training_labels_overlapped3))

training_labels = np.concatenate((training_labels_pure, training_labels_overlapped))

# remove first 200 samples because of gesture delay
processed_training_data_pure = []
processed_training_data_overlapped = []

for val in training_data_pure:
    # process string and read it as integers
    val = val[1:len(val)-1]
    processed_training_data_pure.append(np.array([float(x.strip()) for x in val.split(",")[200:]]))

for val in training_data_overlapped:
    # process string and read it as integers
    val = val[1:len(val) - 1]
    processed_training_data_overlapped.append(np.array([float(x.strip()) for x in val.split(",")[200:]]))

processed_training_data = []
processed_training_data.extend(processed_training_data_pure)
processed_training_data.extend(processed_training_data_overlapped)

# detrend and recenter around 0
training_data_recentered = []

for arr in processed_training_data:
    offset = np.median(arr)
    recentered = [(x - offset) for x in arr]
    detrended = detrend(recentered)
    training_data_recentered.append(detrended)

# apply highpass filter to avoid aliasing effects
filtered_data = []

for arr in training_data_recentered:
    fft = np.fft.rfft(arr)
    sr = (len(arr) + 200)/3
    freqs = np.fft.rfftfreq(len(fft), 1/sr)
    index = np.searchsorted(freqs, sr/2)

    lowpass = fft[:]
    lowpass[index:] = 0
    filtered_data.append(np.fft.irfft(lowpass))


rectified_training = []

for rest in filtered_data:
    rectified_training.append(np.abs(rest))


# since we have a low sample rate, take these for the entire window
iav_training = []

for rest in rectified_training:
    iav_training.append(np.sum(rest))

mav_training = []

for rest in rectified_training:
    mav_training.append(np.sum(rest)/len(rest))


rms_training = []

for rest in rectified_training:
    rms_training.append(math.sqrt(np.sum(rest ** 2)/len(rest)))


std_training = []

for rest in filtered_data:
    std_training.append(np.std(rest))

var_training = []

for rest in filtered_data:
    var_training.append(np.var(rest))

wl_training = []

for rest in filtered_data:
    count = 0
    for i in range(1, len(rest)):
        count = count + abs(rest[i] - rest[i-1])
    wl_training.append(count)


all_features_training = np.array([iav_training, mav_training, rms_training, std_training, var_training, wl_training])
all_features_training = np.transpose(all_features_training)


# training_labels1 = np.array(data["label"])
# training_labels2 = np.array(data2["label"])
# training_labels3 = np.array(data3["label"])
#
# training_labels = np.concatenate((training_labels1, training_labels2, training_labels3))


####### TRAINING ########
X_train = all_features_training
y_train = training_labels

# scale data
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)

models = [
          ('RF', RandomForestClassifier()),
          ('KNN', KNeighborsClassifier()),
          ('SVM', SVC(C=100, kernel="rbf")),
        ('GB', GradientBoostingClassifier())
]
results = []
names = []
scoring = ['accuracy', 'precision_weighted', 'recall_weighted', 'f1_weighted', 'roc_auc']
target_names = ['rest', 'right']
scoring = ['accuracy', 'precision_weighted', 'recall_weighted', 'f1_weighted', 'roc_auc']
# kfold = model_selection.KFold(n_splits=5, shuffle=True, random_state=90210)
for name, model in models:
    print(name)
    display(pd.DataFrame(model_selection.cross_validate(model, X_train, y_train, cv=5, return_train_score=True, scoring=scoring)).describe().loc[['mean','std']])


RF


Unnamed: 0,fit_time,score_time,test_accuracy,train_accuracy,test_precision_weighted,train_precision_weighted,test_recall_weighted,train_recall_weighted,test_f1_weighted,train_f1_weighted,test_roc_auc,train_roc_auc
mean,0.318861,0.05293,0.937374,1.0,0.950787,1.0,0.937374,1.0,0.935464,1.0,0.984122,1.0
std,0.008631,0.003632,0.084933,0.0,0.057457,0.0,0.084933,0.0,0.089099,0.0,0.031557,0.0


KNN


Unnamed: 0,fit_time,score_time,test_accuracy,train_accuracy,test_precision_weighted,train_precision_weighted,test_recall_weighted,train_recall_weighted,test_f1_weighted,train_f1_weighted,test_roc_auc,train_roc_auc
mean,0.002901,0.022891,0.943434,0.966728,0.954658,0.966955,0.943434,0.966728,0.942039,0.966726,0.965714,0.996627
std,0.000741,0.00348,0.077126,0.012291,0.054021,0.012196,0.077126,0.012291,0.080162,0.012291,0.041283,0.001792


SVM


Unnamed: 0,fit_time,score_time,test_accuracy,train_accuracy,test_precision_weighted,train_precision_weighted,test_recall_weighted,train_recall_weighted,test_f1_weighted,train_f1_weighted,test_roc_auc,train_roc_auc
mean,0.004392,0.011486,0.949495,0.971267,0.959459,0.971469,0.949495,0.971267,0.948322,0.971266,0.978776,0.994139
std,0.000884,0.000517,0.074911,0.009541,0.053786,0.009455,0.074911,0.009541,0.077475,0.009541,0.040154,0.001238


GB


Unnamed: 0,fit_time,score_time,test_accuracy,train_accuracy,test_precision_weighted,train_precision_weighted,test_recall_weighted,train_recall_weighted,test_f1_weighted,train_f1_weighted,test_roc_auc,train_roc_auc
mean,0.225561,0.01161,0.933333,1.0,0.947258,1.0,0.933333,1.0,0.931405,1.0,0.988776,1.0
std,0.011843,0.000559,0.083172,0.0,0.05606,0.0,0.083172,0.0,0.087307,0.0,0.011905,7.850462000000001e-17
