# Project 4

### Dependencies and Constants

In [205]:
import time
import numpy as np
from numpy.fft import fft  # to get amplitudes
import pandas as pd
import scipy.signal as ss  # for psd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedShuffleSplit
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier
from biosppy.signals import eeg  # signal processing
from biosppy.signals import emg  # signal processing
from spectrum import arburg
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization


PROTOTYPING = False

### Read data

In [5]:
start = time.time()

# import train sets
train_eeg1_raw = pd.read_csv('files/train_eeg1.csv').drop('Id', axis=1).values
train_eeg2_raw = pd.read_csv('files/train_eeg2.csv').drop('Id', axis=1).values
train_emg_raw = pd.read_csv('files/train_emg.csv').drop('Id', axis=1).values

# import test sets
test_eeg1_raw = pd.read_csv('files/test_eeg1.csv').drop('Id', axis=1).values
test_eeg2_raw = pd.read_csv('files/test_eeg2.csv').drop('Id', axis=1).values
test_emg_raw = pd.read_csv('files/test_emg.csv').drop('Id', axis=1).values

# import eeg features directly
eeg_train = pd.read_csv('files/eeg_feats_train.csv').values
eeg_test  = pd.read_csv('files/eeg_feats_test.csv').values

# import reduced eeg features by pca (to 45 components - already scaled)
eeg_train_red = pd.read_csv('files/eeg_train_pca45.csv').values
eeg_test_red  = pd.read_csv('files/eeg_test_pca45.csv').values

# import labels
train_labels_raw = pd.read_csv('files/train_labels.csv').drop('Id', axis=1).values

print(train_eeg1_raw.shape, train_eeg2_raw.shape, train_emg_raw.shape)
print(test_eeg1_raw.shape, test_eeg2_raw.shape, test_emg_raw.shape)
print(train_labels_raw.shape)
print(eeg_train.shape, eeg_test.shape)

print("Time: ", time.time() - start)

(64800, 512) (64800, 512) (64800, 512)
(43200, 512) (43200, 512) (43200, 512)
(64800, 1)
(64800, 55) (43200, 55)
Time:  30.928338766098022


### Feature extraction for EEG signals

In [3]:
start = time.time()

def calculate_statistics(list_values):
    n5 = np.nanpercentile(list_values, 5)
    n25 = np.nanpercentile(list_values, 25)
    n75 = np.nanpercentile(list_values, 75)
    n95 = np.nanpercentile(list_values, 95)
    median = np.nanpercentile(list_values, 50)
    mean = np.nanmean(list_values)
    std = np.nanstd(list_values)
    var = np.nanvar(list_values)
    rms = np.nanmean(np.sqrt(list_values**2))
    return [n5, n25, n75, n95, median, mean, std, var, rms]
 
def calculate_crossings(list_values):
    zero_crossing_indices = np.nonzero(np.diff(np.array(list_values) > 0))[0]
    no_zero_crossings = len(zero_crossing_indices)
    mean_crossing_indices = np.nonzero(np.diff(np.array(list_values) > np.nanmean(list_values)))[0]
    no_mean_crossings = len(mean_crossing_indices)
    return [no_zero_crossings, no_mean_crossings]
 
def get_features(list_values):
    crossings = calculate_crossings(list_values)
    statistics = calculate_statistics(list_values)
    return crossings + statistics

def extract_features(eeg1, eeg2, emg):
    features = None
    
    for i in range(eeg1.shape[0]):
        if i % 1000 == 0:
            print(i, "/", eeg1.shape[0])
        row = np.array([])

        signal = np.array([eeg1[i], eeg2[i]]).T
        analysis = eeg.eeg(signal=signal, sampling_rate=128, show=False)    

        # theta
        row = np.append(row, get_features(analysis["theta"]))
        # row = np.append(row, get_features(analysis["theta"][:, 1]))

        # alpha low
        row = np.append(row, get_features(analysis["alpha_low"]))
        # row = np.append(row, get_features(analysis["alpha_low"][:, 1]))

        # alpha low
        row = np.append(row, get_features(analysis["alpha_high"]))
        # row = np.append(row, get_features(analysis["alpha_high"][:, 1]))

        # beta
        row = np.append(row, get_features(analysis["beta"]))
        # row = np.append(row, get_features(analysis["beta"][:, 1]))

        # gamma
        row = np.append(row, get_features(analysis["gamma"][:, 0]))
        # row = np.append(row, get_features(analysis["gamma"]))

        # format
        row = row.reshape((1, -1))

        # concatenate
        if features is None:
            features = row
        else:
            features = np.concatenate((features, row), axis=0)
    return features

X_train = extract_features(train_eeg1_raw, train_eeg2_raw, train_emg_raw)


if not PROTOTYPING:
    X_test = extract_features(test_eeg1_raw, test_eeg2_raw, test_emg_raw)
    print("X_test", X_test.shape)
print("X_train", X_train.shape)

print("Time: ", time.time() - start)

0 / 64800
1000 / 64800
2000 / 64800
3000 / 64800
4000 / 64800
5000 / 64800
6000 / 64800
7000 / 64800
8000 / 64800
9000 / 64800
10000 / 64800
11000 / 64800
12000 / 64800
13000 / 64800
14000 / 64800
15000 / 64800
16000 / 64800
17000 / 64800
18000 / 64800
19000 / 64800
20000 / 64800
21000 / 64800
22000 / 64800
23000 / 64800
24000 / 64800
25000 / 64800
26000 / 64800
27000 / 64800
28000 / 64800
29000 / 64800
30000 / 64800
31000 / 64800
32000 / 64800
33000 / 64800
34000 / 64800
35000 / 64800
36000 / 64800
37000 / 64800
38000 / 64800
39000 / 64800
40000 / 64800
41000 / 64800
42000 / 64800
43000 / 64800
44000 / 64800
45000 / 64800
46000 / 64800
47000 / 64800
48000 / 64800
49000 / 64800
50000 / 64800
51000 / 64800
52000 / 64800
53000 / 64800
54000 / 64800
55000 / 64800
56000 / 64800
57000 / 64800
58000 / 64800
59000 / 64800
60000 / 64800
61000 / 64800
62000 / 64800
63000 / 64800
64000 / 64800
0 / 43200
1000 / 43200
2000 / 43200
3000 / 43200
4000 / 43200
5000 / 43200
6000 / 43200
7000 / 43200
80

AttributeError: 'numpy.ndarray' object has no attribute 'to_frame'

In [6]:
# save features for future imports
pd.DataFrame.to_csv(pd.DataFrame(X_train), 'files/eeg_feats_train.csv', index=False)
pd.DataFrame.to_csv(pd.DataFrame(X_test), 'files/eeg_feats_test.csv', index=False)

In [217]:
# obtain features by simply doing a FFT on the data
# probably more suitable for a neural network approach
eeg1_freqs_train = []
eeg2_freqs_train = []
eeg1_freqs_test  = []
eeg2_freqs_test  = []
for i in range(train_eeg1_raw.shape[0]):
    eeg1_freqs_train.append(np.real(fft(train_eeg1_raw[i])))
    eeg2_freqs_train.append(np.real(fft(train_eeg2_raw[i])))
    
for i in range(test_eeg1_raw.shape[0]):
    eeg1_freqs_test.append(np.real(fft(test_eeg1_raw[i])))
    eeg2_freqs_test.append(np.real(fft(test_eeg2_raw[i])))

In [221]:
# concatenate frequency fetures from fft
start = time.time()
eeg_freqs_train = np.array(np.column_stack((eeg1_freqs_train, eeg2_freqs_train)))
eeg_freqs_test  = np.array(np.column_stack((eeg1_freqs_test, eeg2_freqs_test)))
print("Time: ", time.time() - start)

# save features for future imports
pd.DataFrame.to_csv(pd.DataFrame(eeg_freqs_train), 'files/eeg_freqs_train.csv', index=False)
pd.DataFrame.to_csv(pd.DataFrame(eeg_freqs_test), 'files/eeg_freqs_test.csv', index=False)

Time:  4.171844482421875


### homemade Feature Extraction for EMG signals

In [6]:
# even more features: https://ieeexplore.ieee.org/document/7748960
# find out if we were provided with some threshold or so (some feats need one)

# functions are implemented from this paper:
# https://www.researchgate.net/publication/323587464_A_Comprehensive_Study_on_EMG_Feature_Extraction_and_Classifiers
# https://www.researchgate.net/publication/224148281_Evaluation_of_EMG_Feature_Extraction_for_Hand_Movement_Recognition_Based_on_Euclidean_Distance_and_Standard_Deviation

# Functions for the TIME Domain

# integrated EMG is the area under the rectified EMG signal 
def IEMG(signal):
    iemg = np.sum(np.abs(signal))
    return iemg

# Mean Absolute Value
# PRE : Requires rectified signal
def MAV(signal, N):
    mav = np.sum(np.abs(signal))/N
    return mav


# Mean Absolute Value Slope  (potentially computationally very expensive)
def MAVS(signal, N):
    temp = 0
    for i in range(signal.shape[0]-1):
        temp += np.abs(signal[i+1] - signal[i])
    mavs = temp/N
    return mavs


# modified mean absolute value type 1
def MAV1(signal, N):
    # interval borders
    lower = 0.25 * N
    upper = 0.75 * N 
    temp  = 0
    for i in range(signal.shape[0]):
        if i >= lower and i <= upper:
            temp += 1 * np.abs(signal[i])
        else:
            temp += 0.5 * np.abs(signal[i])
    mav1 = temp/N
    return mav1


# modified mean absolute value type 2
def MAV2(signal, N):
    # interval borders
    lower = 0.25 * N
    upper = 0.75 * N 
    temp  = 0
    for i in range(signal.shape[0]):
        if i >= lower and i <= upper:
            temp += 1 * np.abs(signal[i])
        elif i < lower:
            temp += (4*i/N) * np.abs(signal[i])
        elif i > upper:
            temp += (4*(i-N)/N) * np.abs(signal[i])
        
    mav2 = temp/N
    return mav2


# Simple Square Integral (SSI) expresses the energy of the EMG signal
# PRE : Requires rectified signal
def SSI(signal, N):
    ssi = np.sum(np.abs(signal)**2)/N  # should square every value in signal element-wise
    return ssi

# The variance of EMG signal
# PRE : Requires rectified signal
def VAREMG(signal, N):
    varemg = np.sum(signal**2)/(N-1)  # should square every value in signal element-wise
    return varemg

# Root Mean Square
# PRE : Requires rectified signal
def RMS(signal, N):
    rms = np.sqrt(np.sum(np.abs(signal)**2)/N)  # should square every value in signal element-wise
    return rms

# the 3rd temporal moment
def TM3(signal, N):
    tm3 = np.sum(np.abs(signal**3))/N
    return tm3

# the 4th temporal moment
def TM4(signal, N):
    tm4 = np.sum(np.abs(signal**4))/N
    return tm4

# the 5th temporal moment
def TM5(signal, N):
    tm5 = np.sum(np.abs(signal**5))/N
    return tm5

# Waveform Length
def WL(signal, N):
    wl = 0
    temp = 0
    for j in range(signal.shape[0]-1):
        temp = np.abs(signal[j+1] - signal[j])
        wl += temp
    return wl


# TODO : find a suitable threshold (maybe 0) => visual inspection required
def AAC(signal, N):
    pass

def DASDV(signal, N):
    pass

def ZC(signal, N):
    pass

def SSC(signal, N):
    pass


In [9]:
# IMPROVEMENT : Wavelet-Analysis for FExtr.:
# https://www.researchgate.net/publication/51997893_Techniques_for_Feature_Extraction_from_EMG_Signal
# Functions for the FREQUENCY Domain

# frequency median : requires the power spectrum density
def FMD(psd):
    fmd = 0.5 * np.sum(psd)
    return fmd

# frequency mean : requires psd, freqs and frequency median for faster computation
def FMN(psd, freqs, fmd):
    fmd = fmd * 2  # simply sum of all psd elements
    fmn = np.sum(np.multiply(psd, freqs))/fmd
    return fmn

# same as FMD(), but based on amplitudes
def MMFD(amplitudes):
    mmfd = 0.5 * np.sum(amplitudes)
    return mmfd

# same as FMD(), but based on amplitudes
def MMNF(signal, amplitudes, mmfd):
    freqs = np.fft.fftfreq(amplitudes.size)  # freqs based on fourier transform
    mmnf = np.sum(np.multiply(amplitudes, freqs))/mmfd
    return mmnf
    

# estimate the AR coefficients of k-th order (k=6 based on literature research)
def AR(signal, order=6):
    ar, _, _ = arburg(signal, order)  # only save AR coefs
    return ar

# Wavelets analysis
# import pywt


In [176]:
# PRE : raw emg signal
# POST: returns the extracted features
def extract_features_emg(data):
    N = data.shape[0]
    #onsets_list = []  # save onsets of EMG signals
    #filtered_list = []
    # generate more features
    mav_list = []
    ssi_list = []
    vemg_list= []
    rms_list = []
    wl_list  = []
    iemg_list= []
    mavs_list= []
    mav1_list= []
    mav2_list= []
    tm3_list = []
    tm4_list = []
    tm5_list = []
    fmd_list = []
    fmn_list = []
    mmfd_list= []
    mmnf_list= []
    ar_list  = []

    start = time.time()
    for i in range(data.shape[0]):
        _, filt_emg, _ = emg.emg(signal=data[i].T, sampling_rate=512, show=False)  # obtain only filtered signal
        freqs, psd = ss.welch(data[i], fs=512)  # get the PSD of the signal for the frequencies and amplitudes
        amplitudes = np.abs(fft(data[i]))
        #filtered_list.append(filt_emg)
        #onsets_list.append(onsets_emg)
        # compute features
        mav_list.append(MAV(filt_emg, N))
        ssi_list.append(SSI(filt_emg, N))
        vemg_list.append(VAREMG(filt_emg, N))
        rms_list.append(RMS(filt_emg, N))
        wl_list.append(WL(filt_emg, N))
        iemg_list.append(IEMG(filt_emg))
        mavs_list.append(MAVS(filt_emg, N))
        mav1_list.append(MAV1(filt_emg, N))
        mav2_list.append(MAV2(filt_emg, N))
        tm3_list.append(TM3(filt_emg, N))
        tm4_list.append(TM4(filt_emg, N))
        tm5_list.append(TM5(filt_emg, N))
        fmd_res = FMD(psd)
        fmd_list.append(fmd_res)
        fmn_list.append(FMN(psd, freqs, fmd_res))
        mmfd_res = MMFD(amplitudes)
        mmfd_list.append(mmfd_res)
        mmnf_list.append(MMNF(data[i], amplitudes, mmfd_res))
        ar_list.append(AR(filt_emg))

    print("Time: ", time.time() - start)
    emg_features = [mav_list,ssi_list,vemg_list,rms_list,wl_list,iemg_list,mavs_list,mav1_list,mav2_list,
                    tm3_list,tm4_list,tm5_list,fmd_list,fmn_list,mmfd_list,mmnf_list,ar_list]
    
    return emg_features

In [177]:
# get emg features for X_train and X_test
emg_feats_train = extract_features_emg(train_emg_raw)
emg_feats_test  = extract_features_emg(test_emg_raw)

Time:  1361.9812316894531
Time:  859.1871929168701


In [185]:
# extract the coefs and save them in separate lists
def extract_ar_coefs(features):
    ar_feats_0 = []
    ar_feats_1 = []
    ar_feats_2 = []
    ar_feats_3 = []
    ar_feats_4 = []
    ar_feats_5 = []
    # 17th idx is where the AR coefs list is in
    # we only care for the real part. the complex part is 0j anyway
    for i in range(len(features[16])):
        ar_feats_0.append(np.real(features[16][i][0]))
        ar_feats_1.append(np.real(features[16][i][1]))
        ar_feats_2.append(np.real(features[16][i][2]))
        ar_feats_3.append(np.real(features[16][i][3]))
        ar_feats_4.append(np.real(features[16][i][4]))
        ar_feats_5.append(np.real(features[16][i][5]))
    
    return ar_feats_0, ar_feats_1, ar_feats_2, ar_feats_3, ar_feats_4, ar_feats_5


In [186]:
# remove the AR features list and substitute them with the individual data lists
# else, scaling will not work properly
start = time.time()
ar_feats_0, ar_feats_1, ar_feats_2, ar_feats_3, ar_feats_4, ar_feats_5 = extract_ar_coefs(emg_feats_train)
emg_feats_train_mod = np.column_stack((np.transpose(emg_feats_train[0:16]),ar_feats_0,ar_feats_1,ar_feats_2,
                                       ar_feats_3,ar_feats_4,ar_feats_5))

ar_feats_0, ar_feats_1, ar_feats_2, ar_feats_3, ar_feats_4, ar_feats_5 = extract_ar_coefs(emg_feats_test)
emg_feats_test_mod  = np.column_stack((np.transpose(emg_feats_test[0:16]),ar_feats_0,ar_feats_1,ar_feats_2,
                                      ar_feats_3,ar_feats_4,ar_feats_5))
print("Time: ", time.time() - start)

Time:  1.307163953781128


In [254]:
# remove certain features (which are believed to be "bad")
temp = emg_feats_train_mod[:,0:6]
temp2= emg_feats_train_mod[:,16:]
emg_feats_train_mod2 = np.array(np.column_stack((temp, temp2)))

temp_ = emg_feats_test_mod[:,0:6]
temp2_= emg_feats_test_mod[:,16:]
emg_feats_test_mod2 = np.array(np.column_stack((temp_, temp2_)))

In [189]:
# save emg features for future imports
pd.DataFrame.to_csv(pd.DataFrame(emg_feats_train), 'files/emg_feats_train.csv', index=False)
pd.DataFrame.to_csv(pd.DataFrame(emg_feats_test), 'files/emg_feats_test.csv', index=False)

In [256]:
# create full train and testsets
X_train_ = np.array(np.column_stack((eeg_train, emg_feats_train_mod2)))
X_test_  = np.array(np.column_stack((eeg_test, emg_feats_test_mod2)))

### Splitting

In [257]:
start = time.time()

def split(X_train, y_train):
    return train_test_split(
            X_train, 
            y_train, 
            test_size=0.1, 
            shuffle=False, 
            random_state=0)

print(X_train_.shape, train_labels_raw.shape)
if PROTOTYPING:
    X_train, X_test, y_train, y_test = split(X_train, train_labels_raw)
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
else:
    y_train = train_labels_raw
    
    
print("Time: ", time.time() - start)

(64800, 67) (64800, 1)
Time:  0.0009963512420654297


### Feature scaling

In [258]:
# trick(?): scale eeg feats first, then add emg feats and scale again...
start = time.time()

def scale(X_train, X_test):
    scaler = StandardScaler().fit(X_train)

    # scale
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

X_train_s, X_test_s = scale(X_train_, X_test_)

print("Time: ", time.time() - start)

Time:  0.20249724388122559


### Training

In [259]:
# GridSearch for SVC
start = time.time()

kernels = ("rbf", "sigmoid")
C_values = np.logspace(0, 1.5, num=2)
param_grid = {"kernel" : kernels,
              "C"      : C_values}
scoring_method = "balanced_accuracy"
cv = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=0)

classifier = SVC(kernel="rbf", class_weight="balanced", gamma="auto", decision_function_shape="ovo")
grid = GridSearchCV(estimator=classifier, param_grid=param_grid, scoring=scoring_method, cv=cv, verbose=11)
grid.fit(X_train_s, np.ravel(y_train))
best = grid.best_params_
print("Grid with best params: %s and score %f" % (grid.best_params_, grid.best_score_))
print("Time: ", time.time() - start)


Fitting 1 folds for each of 4 candidates, totalling 4 fits
[CV] C=1.0, kernel=rbf ...............................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ................... C=1.0, kernel=rbf, score=0.926, total= 1.9min
[CV] C=1.0, kernel=sigmoid ...........................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.9min remaining:    0.0s


[CV] ............... C=1.0, kernel=sigmoid, score=0.753, total= 2.3min
[CV] C=31.622776601683793, kernel=rbf ................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  4.1min remaining:    0.0s


KeyboardInterrupt: 

In [168]:
# testing logspace for more refined C estimates
np.logspace(0.32,0.45,num=10)

array([2.08929613, 2.15995371, 2.23300085, 2.30851836, 2.38658979,
       2.4673015 , 2.55074278, 2.63700596, 2.72618645, 2.81838293])

In [260]:
# SVM approach
start = time.time()

classifier = SVC(C=1, class_weight="balanced", gamma="auto", decision_function_shape="ovo")
classifier.fit(X_train_s, np.ravel(y_train))
y_predict = classifier.predict(X_test_s)

if PROTOTYPING:
    print(balanced_accuracy_score(y_test, y_predict))
    
print("Time: ", time.time() - start)

  y = column_or_1d(y, warn=True)


Time:  165.16580438613892


In [None]:
# test best params for classifier
#classifier.fit(X_train_s, y_train)  # determine classifier based on best params
#y_predict = classifier.predict(X_test_s)

In [None]:
# bagging
y_train = train_labels_raw
start = time.time()

classifier = BaggingClassifier(SVC(C=1, class_weight="balanced", gamma="auto", decision_function_shape="ovo"),
                               n_estimators=100, random_state=0)
classifier.fit(X_train_s, np.ravel(y_train))
y_predict = classifier.predict(X_test_s)

if PROTOTYPING:
    print(balanced_accuracy_score(y_test, y_predict))
    
print("Time: ", time.time() - start)

In [222]:
# scale freqs from fft alone
eeg_freqs_train_s, eeg_freqs_test_s = scale(eeg_freqs_train, eeg_freqs_test)

In [227]:
# neural network approach
# define the model architecture
ann = Sequential()
ann.add(Dense(512, input_dim = np.shape(X_train_s)[1], activation = 'relu'))
#ann.add(Dropout(0.3))
#ann.add(BatchNormalization())
#ann.add(Dense(512, activation = 'relu'))
#ann.add(Dropout(0.1))
#ann.add(BatchNormalization())
ann.add(Dense(256, activation = 'relu'))
#ann.add(Dropout(0.25))
#ann.add(BatchNormalization())
ann.add(Dense(128, activation = 'relu'))
#ann.add(Dropout(0.2))
#ann.add(BatchNormalization())
# final output layer
ann.add(Dense(4, activation = 'softmax'))

opt = keras.optimizers.Adam(lr = 1e-3, decay = 1e-5)

ann.compile(loss = 'sparse_categorical_crossentropy',
            optimizer = opt,
            metrics = ['accuracy'])

# fit and predict
ann.fit(X_train_s, y_train, epochs = 100, batch_size = 5000, class_weight = 'balanced')
y_predict = ann.predict_classes(X_test_s)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


### Write result

In [261]:
start = time.time()

output = pd.read_csv('files/sample.csv')
for i in range(output.shape[0]):
    output.iat[i, 1] = y_predict[i]
output.to_csv("files/SVM_OvO_3_otherfeats_bagged.csv", index=False)
        
print("Time: ", time.time() - start)

Time:  0.5989737510681152


### Experimental Testing

In [117]:
print(len(onsets_list))  # perhaps use #onsets also as a feature?
#for item in onsets_list:
#    print(len(item))

64800


In [38]:
# add EMG features to X_train
# if fourier freqs are used as feats, concatenation takes really long...
X_train_ = np.column_stack((eeg1_freqs_train,eeg2_freqs_train,mav_list,ssi_list,vemg_list,rms_list,wl_list,iemg_list,mavs_list,mav1_list,
                            mav2_list,fmd_list,fmn_list))

In [42]:
# add EMG features to X_test
X_test_ = np.column_stack((eeg1_freqs_test,eeg2_freqs_test,mav_list,ssi_list,vemg_list,rms_list,wl_list,iemg_list,mavs_list,mav1_list,
                           mav2_list,fmd_list,fmn_list))

In [45]:
np.real(X_test_.shape)

array([43200,  1035])

In [37]:
xx = AR(train_emg_raw[0], order=5)
print(xx)

[-0.3826554 +0.j -0.02521546+0.j -0.13306799+0.j -0.14358056+0.j
 -0.16622746+0.j]


In [46]:
emg_feats_train[11][50000]

array([1.84194114+0.j, 2.54266141+0.j, 2.69612235+0.j, 2.17536706-0.j,
       1.34618836-0.j, 0.51530807-0.j])

In [101]:
np.shape(np.transpose(emg_feats_train[0:11]))

(64800, 11)

In [148]:
best

{'C': 2.154434690031882, 'kernel': 'rbf'}

In [152]:
grid.best_estimator_

SVC(C=2.154434690031882, cache_size=200, class_weight='balanced', coef0=0.0,
    decision_function_shape='ovo', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [253]:
np.shape(emg_feats_train_mod[:,6:16])

(64800, 10)

In [255]:
np.shape(emg_feats_test_mod2)

(43200, 12)