In [83]:
import numpy as np
import pandas as pd
import random
import os
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from joblib import dump
from tqdm.notebook import tqdm

import itertools
from sklearn.utils import shuffle
from scipy import signal
%matplotlib inline


from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

from sklearn.feature_selection import SelectFdr, chi2

from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import accuracy_score

from utils.svm import preProcess, evaluate_set
from utils.visualize import showMe
from utils.augment import augment
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [71]:
input_length = 100
root_path = 'C:/resources/EMG/'
post_fix = '_1s_augmented'
exclude = ['S011']
exclude_last = ['S001', 'S006', 'S007']
train_sessions = []
for subject in os.listdir(root_path):
    if subject not in exclude:
        for session in os.listdir(os.path.join(root_path,subject)):
            if session == 'session_4' and subject in exclude_last:
                continue
            train_sessions.append(os.path.join(root_path,subject, session))
        #print(f"{len(os.listdir(os.path.join(root_path,subject)))} session loaded from subject: {subject}")
print(f"{len(train_sessions)} sessions loaded for training")


test_sessions = []
for subject in os.listdir(root_path):
    if subject not in exclude:
        for session in os.listdir(os.path.join(root_path,subject)):
            if session == 'session_4' and subject in exclude_last:
                test_sessions.append(os.path.join(root_path,subject, session))
print(f"{len(test_sessions)} session loaded for testing")



31 sessions loaded for training
### TESTING ###
3 session loaded for testing


In [72]:

classes = ['Rest', 'Eyebrow','Smile', 'Chew']

records = {}
for c in classes:
    class_data = []
    for session in train_sessions:
        class_data.append(np.load(os.path.join(session,c+post_fix+'.npy'),allow_pickle=True))
    records[c] = np.concatenate(class_data)


In [3]:
def showFirstN(data, limit = 5):
    i = 0
    for trial in data:
        showMe(trial)
        if i == 5:
            break
        i += 1


In [None]:
showFirstN(records['Eyebrow'])

In [73]:
X = np.concatenate((list(records.values())),axis = 0)

s = records['Rest'].shape[0]

y = np.concatenate(( np.zeros(s),np.ones(s),np.ones(s)*2,np.ones(s)*3))


c = list(zip(X, y))
random.seed(42)
random.shuffle(c)
X,y = zip(*c)


X = np.array(X).reshape(-1,6*input_length)
y = np.array(y)
print(X.shape)
print(y.shape)
X, y = augment(X, y)
print(X.shape)
print(y.shape)

(9300, 600)
(9300,)
(18600, 600)
(18600,)


In [103]:
# SMALLER C -> better fit
# HIGHER gamma -> better fit
#param_grid = {'C': [1, 10, 100,1000], 'gamma': [1,0.1,0.01,0.001,0.0001]} #acc 88 test acc 45
param_grid = {'C': [100,1000], 'gamma': [0.01,0.001,0.0001]} #slow

#param_grid = {'C': [0.1, 1], 'gamma': [1, 0.1]} 

In [104]:
accs = []
models = []
def grid(X_train,y_train, X_test, y_test):
    grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=2)
    grid.fit(X_train,y_train)

    grid_predictions = grid.predict(X_test)
    acc = accuracy_score(y_test,grid_predictions)
    accs.append(acc)
    models.append(grid.best_estimator_)
    if acc > 0.9:
        return True
    else:
        return False


skf = StratifiedKFold(n_splits=10,random_state= 42, shuffle = True)
for train, test in skf.split(X, y):
    
    X_train = X[train]
    y_train = y[train]
    X_test = X[test]
    y_test = y[test]

    if grid(X_train,y_train, X_test, y_test):
        break
    break
    

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END ..................................C=100, gamma=0.01; total time= 1.9min
[CV] END ..................................C=100, gamma=0.01; total time= 1.8min
[CV] END ..................................C=100, gamma=0.01; total time= 1.6min
[CV] END ..................................C=100, gamma=0.01; total time= 1.3min
[CV] END ..................................C=100, gamma=0.01; total time= 1.4min
[CV] END .................................C=100, gamma=0.001; total time= 1.5min
[CV] END .................................C=100, gamma=0.001; total time= 1.6min
[CV] END .................................C=100, gamma=0.001; total time= 1.6min
[CV] END .................................C=100, gamma=0.001; total time= 1.4min
[CV] END .................................C=100, gamma=0.001; total time= 1.5min
[CV] END ................................C=100, gamma=0.0001; total time= 1.5min
[CV] END ................................C=100, g

In [96]:
model = models[accs.index(max(accs))]
for acc in accs:
    print(acc)

0.739247311827957


In [97]:
evaluate_set(model, train_sessions, classes, post_fix)

  0%|          | 0/31 [00:00<?, ?it/s]

Global accuracy: 88.0%
          Accuracy
Subject           
S001     89.666667
S002     92.000000
S003     99.000000
S004     79.750000
S005     87.500000
S006     92.666667
S007     88.666667
S008     89.250000
S009     79.250000
S010     95.250000


In [99]:
evaluate_set(model, test_sessions, classes, post_fix)

  0%|          | 0/3 [00:00<?, ?it/s]

Global accuracy: 45.33%
         Accuracy
Subject          
S001           62
S006           27
S007           47


In [10]:
dump(model, 'saved_models/svm_03_14.joblib') 

['saved_models/svm_03_14.joblib']