In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, KFold
from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import pickle
from matplotlib.lines import Line2D

from itertools import cycle

<font size="5">     <b>I. Classification</b>

<font size="4">    <b>Loading the data</b>

In [64]:
# EEG data with the shape (samples * channels) fs=250Hz
pID = '4'

path = "/Users/willy/Documents/GitHub/2021-fastReach/data/study/eeglab2python/"

data_fname = '\data.csv'
data_eeg = pd.read_csv(path+pID+data_fname)

# EMG data with the shape (samples * channels) fs=1000Hz DUMMY
sample = np.round(np.arange(-1+(1/1000),0+(1/1000),1/1000), decimals=3).tolist()*max(data_eeg['epoch_ix'])
epoch_ix = np.arange(1,99,1).repeat(1000)
rp_class = np.arange(1,3).repeat(len(sample)/2)
chan_1 = data_eeg['Cz'].repeat(4)
chan_2 = data_eeg['T7'].repeat(4)
data_emg = pd.DataFrame([sample, epoch_ix, rp_class, chan_1, chan_2], index=['sample', 'epoch_ix', 'rp_class', 'chan_1', 'chan_2']).transpose()

<font size="4">    <b>Features definition</b>

In [66]:
def rms(values):
    return np.sqrt(sum(values**2)/len(values))

def mini(values):
    return np.min(values)

def maxi(values):
    return np.max(values)

def mav(values):
    return np.mean(abs(values))

def mean(values):
    return np.mean(values)

def var(values):
    return np.var(values)

<font size="4">    <b>Feature extraction</b>
    
EEG: We want to extract 1 feature (mean) at each window having a size of 25 samples, so 10 times per epoch or per second.
    
    -> 10 features per epoch per channel.

EMG: We want to extract X features (6 ? See if selection) at each window having a size of 100 samples, so 10 times per epoch or per seconds.
    
    ->10 features per epoch per channel (better do features on win_size 25 ? so 40 features per epoch per channel).

In [111]:
# Write the features list we want to use for the EMG data, EEG only requires mean.
feat_list = [rms, mini, maxi, mav, mean, var]
nfeat_list = ['rms', 'mini', 'maxi', 'mav', 'mean', 'var']

muscle_1 =[None] * len(feat_list)
muscle_2 =[None] * len(feat_list)

# Define the window size on which we will extract the features
# EEG Window 
win_eeg = 25
win_eeg_scaled = np.arange(len(data_eeg))//win_eeg

# EMG Window 
win_emg = 25
win_emg_scaled = np.arange(len(data_emg))//win_emg

# Exctract the features at each window size for each channel for each epoch and put them in a DataFrame

#EEG 1st DataFrame
win_eeg_mean = data_eeg.groupby(win_eeg_scaled).mean() 
#EMG 2nd DataFrame
for i in range(len(feat_list)):
    muscle_1[i] = data_emg.groupby(win_emg_scaled)['chan_1'].apply(feat_list[i])
    muscle_2[i] = data_emg.groupby(win_emg_scaled)['chan_2'].apply(feat_list[i])
    
m1 = pd.DataFrame(muscle_1, index=nfeat_list).transpose()
m2 = pd.DataFrame(muscle_2, index=nfeat_list).transpose()
mk = {'Biceps':m1, 'Deltoïd':m2}
muscles = pd.concat([m1,m2], axis=1, keys=mk.keys())
c_labels = data_emg.iloc[:,:3].groupby(win_emg_scaled).apply(mean) # Labels for the EMG DataFrame
win_emg_mean = pd.concat([c_labels, muscles], axis=1)

<font size="4">    <b>Data frames</b>
    
From the imported data structure to the final structure adapted for the following classification.

In [112]:
#EEG
df_eeg = win_eeg_mean.pivot(index=['epoch_ix', 'rp_class'], columns='sample', values=win_eeg_mean.columns[4:].tolist())
df_eeg.reset_index(inplace=True)

#EMG

df_emg = win_emg_mean.pivot(index=['epoch_ix', 'rp_class'], columns='sample', values=win_emg_mean.columns[3:].tolist())
df_emg.reset_index(inplace=True)

#EEG - EMG
df_eeg_emg = pd.concat([df_eeg, df_emg.iloc[:,2:]], axis=1)

<font size="4">    <b>Linear Discriminent Analysis</b>

<font size="3">    <b>Motion</b>

In [100]:
clf = LDA()

motion_dim = 3
clf.fit(data_eeg.iloc[:,motion_dim].values.reshape(-1,1), data_eeg.rp_class)

# splits data randomly in n=folds 
kfolds = KFold(n_splits=10, random_state=1, shuffle=True) 

cv_results = cross_val_score(clf, data_eeg.iloc[:,motion_dim].values.reshape(-1,1), data_eeg.rp_class, cv=kfolds)
cv_average = cv_results.mean()
print(cv_average)
st_deviation = cv_results.std()
print(st_deviation)

0.9060816326530612
0.004405327886312489


<font size="3">    <b>EEG</b>

In [113]:
Xeeg = df_eeg.iloc[:,2:].values
yeeg = np.asarray(df_eeg['rp_class'])

clf_eeg = LDA(solver='lsqr', shrinkage='auto')

clf_eeg.fit(Xeeg, yeeg)

# splits data randomly in n=folds 
kfolds = KFold(n_splits=10, random_state=1, shuffle=True) 

cv_results = cross_val_score(clf_eeg, Xeeg, yeeg, cv=kfolds)
cv_average = cv_results.mean()
print(cv_average)
st_deviation = cv_results.std()
print(st_deviation)

0.6344444444444445
0.14635066968934077


<font size="3">    <b>EMG</b>

In [114]:
Xemg = df_emg.iloc[:,2:].values
yemg = np.asarray(df_emg['rp_class'])

clf_emg = LDA(solver='lsqr', shrinkage='auto')

clf_emg.fit(Xemg, yemg)

# splits data randomly in n=folds 
kfolds = KFold(n_splits=10, random_state=1, shuffle=True) 

cv_results = cross_val_score(clf_emg, Xemg, yemg, cv=kfolds)
cv_average = cv_results.mean()
print(cv_average)
st_deviation = cv_results.std()
print(st_deviation)

0.48999999999999994
0.15335346083677825


<font size="3">    <b>EEG - EMG</b>

In [115]:
X_eeg_emg = df_eeg_emg.iloc[:,2:].values
y_eeg_emg = np.asarray(df_emg['rp_class'])

clf_eeg_emg = LDA(solver='lsqr', shrinkage='auto')

clf_eeg_emg.fit(X_eeg_emg, y_eeg_emg)

# splits data randomly in n=folds 
kfolds = KFold(n_splits=10, random_state=1, shuffle=True) 

cv_results = cross_val_score(clf_eeg_emg, X_eeg_emg, y_eeg_emg, cv=kfolds)
cv_average = cv_results.mean()
print(cv_average)
st_deviation = cv_results.std()
print(st_deviation)

0.6066666666666667
0.193550325225926


<font size="3">    <b>Feature representation</b>

<font size="5">     <b>II. Time serie representation</b>