In [1]:
import numpy as np
import random
import os
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from joblib import dump

import itertools
from sklearn.utils import shuffle
from scipy import signal
%matplotlib inline


from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

from sklearn.feature_selection import SelectFdr, chi2

from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import accuracy_score

from utils.svm import preProcess, evaluate
from utils.visualize import showMe
from utils.augment import augment
%load_ext autoreload
%autoreload 2


In [2]:
input_length = 100
root_path = 'C:/resources/EMG/'
post_fix = '_1s_augmented'

train_sessions = []
for subject in os.listdir(root_path):
    print(f"Loading dataset from subject: {subject}")
    for session in os.listdir(os.path.join(root_path,subject)):
        train_sessions.append(os.path.join(root_path,subject, session))
                       
test_sessions  = train_sessions

train_sessions.pop()  # Remove last session for testing purpose
classes = ['Rest', 'Eyebrow','Smile', 'Chew']

records = {}
for c in classes:
    class_data = []
    for session in train_sessions:
        class_data.append(np.load(os.path.join(session,c+post_fix+'.npy'),allow_pickle=True))
    records[c] = np.concatenate(class_data)


Loading dataset from subject: S001
Loading dataset from subject: S003
Loading dataset from subject: S004
Loading dataset from subject: S005
Loading dataset from subject: S006
Loading dataset from subject: S007
Loading dataset from subject: S008
Loading dataset from subject: S009


In [7]:
def showFirstN(data, limit = 5):
    i = 0
    for trial in data:
        showMe(trial)
        if i == 5:
            break
        i += 1


In [None]:
showFirstN(records['Eyebrow'])

In [3]:
X = np.concatenate((list(records.values())),axis = 0)

s = records['Rest'].shape[0]

y = np.concatenate(( np.zeros(s),np.ones(s),np.ones(s)*2,np.ones(s)*3))


c = list(zip(X, y))
random.seed(42)
random.shuffle(c)
X,y = zip(*c)


X = np.array(X).reshape(-1,6*input_length)
y = np.array(y)
print(X.shape)
print(y.shape)
X, y = augment(X, y)
print(X.shape)
print(y.shape)

(7800, 600)
(7800,)
(15600, 600)
(15600,)


In [4]:
# SMALLER C -> better fit
# HIGHER gamma -> better fit
#param_grid = {'C': [1, 10, 100,1000], 'gamma': [1,0.1,0.01,0.001,0.0001]} #slow
param_grid = {'C': [10, 100], 'gamma': [0.01,0.001]} 

In [5]:
accs = []
models = []
def grid(X_train,y_train, X_test, y_test):
    grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=2)
    grid.fit(X_train,y_train)

    grid_predictions = grid.predict(X_test)
    acc = accuracy_score(y_test,grid_predictions)
    accs.append(acc)
    models.append(grid.best_estimator_)
    if acc > 0.9:
        return True
    else:
        return False


skf = StratifiedKFold(n_splits=10,random_state= 42, shuffle = True)
for train, test in skf.split(X, y):
    
    X_train = X[train]
    y_train = y[train]
    X_test = X[test]
    y_test = y[test]

    if grid(X_train,y_train, X_test, y_test):
        break
    break
    

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] END ...................................C=10, gamma=0.01; total time= 5.6min
[CV] END ...................................C=10, gamma=0.01; total time= 7.6min
[CV] END ...................................C=10, gamma=0.01; total time= 3.9min
[CV] END ...................................C=10, gamma=0.01; total time= 3.1min
[CV] END ...................................C=10, gamma=0.01; total time= 3.7min
[CV] END ..................................C=10, gamma=0.001; total time= 2.5min
[CV] END ..................................C=10, gamma=0.001; total time= 2.5min
[CV] END ..................................C=10, gamma=0.001; total time= 2.7min
[CV] END ..................................C=10, gamma=0.001; total time= 2.5min
[CV] END ..................................C=10, gamma=0.001; total time= 2.7min
[CV] END ..................................C=100, gamma=0.01; total time= 6.9min
[CV] END ..................................C=100,

In [6]:
model = models[accs.index(max(accs))]
for acc in accs:
    print(acc)

0.7423076923076923


In [7]:
for session in test_sessions:
    print("Evaluating session: {}".format(session))
    evaluate(model, session, classes, post_fix)

Evaluating session: C:/resources/EMG/S001\session_0
Accuracy : 87.0%
[[64  0  0 11]
 [ 0 68  1  6]
 [ 8  0 59  8]
 [ 4  0  1 70]]

Evaluating session: C:/resources/EMG/S001\session_1
Accuracy : 79.0%
[[53  0  7 15]
 [ 7 67  1  0]
 [14  0 52  9]
 [ 7  0  4 64]]

Evaluating session: C:/resources/EMG/S003\session_0
Accuracy : 100.0%
[[75  0  0  0]
 [ 0 74  1  0]
 [ 0  0 75  0]
 [ 0  0  0 75]]

Evaluating session: C:/resources/EMG/S004\session_1
Accuracy : 89.0%
[[69  1  2  3]
 [ 0 75  0  0]
 [11  0 56  8]
 [ 6  0  1 68]]

Evaluating session: C:/resources/EMG/S004\session_2
Accuracy : 85.0%
[[63  0 11  1]
 [ 0 74  1  0]
 [13  0 62  0]
 [ 9  0 10 56]]

Evaluating session: C:/resources/EMG/S004\session_3
Accuracy : 73.0%
[[53  1 15  6]
 [ 0 69  6  0]
 [20  0 53  2]
 [17  1 12 45]]

Evaluating session: C:/resources/EMG/S004\session_4
Accuracy : 75.0%
[[68  0  0  7]
 [ 3 71  1  0]
 [26  0 34 15]
 [13  0 10 52]]

Evaluating session: C:/resources/EMG/S005\session_1
Accuracy : 95.0%
[[74  0  0  1

In [8]:
dump(model, 'saved_models/svm_03_04.joblib') 

['saved_models/svm_03_04.joblib']