In [2]:
import numpy as np
from scipy import signal
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.model_selection import ShuffleSplit, cross_val_score
from sklearn.preprocessing import LabelBinarizer
import csv
import pickle
import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

In [300]:
# set path to datasets
datapath = '/home/jingyan/Documents/ME499-WinterProject/mindwave/src/motor_cortex_ml/data/'
def load_record(exp,sec,run):
    for secnum in sec:
        for runnum in run: #6runs for each session
            savetag='_exp'+str(exp)+'_sec'+str(secnum)+'_run'+str(runnum)
            csvname =datapath + 'record'+savetag+'.csv'
            data = np.loadtxt(csvname,delimiter = ',')
            if runnum==run[0] and secnum==sec[0]:
                x = data[:,:-1]
                y = data[:,-1]
            else:
                x=np.append(x,data[:,:-1],axis=0)
                y=np.append(y,data[:,-1],axis=0)
    return x, y

def fft(data, fs):
    L = len(data)
    freq = np.linspace(0.0, 1.0 / (2.0 * fs **-1), int(L / 2))
    yi = np.fft.fft(data)#[1:]
    y = yi[range(int(L / 2))]
    # ysample= (2.0*abs(yi/L))[range(int(L/2))]
    return freq, abs(y)
def drop_class(data,labels,remove_list=[0]):
    keep_list=np.isin(labels,remove_list,invert=True)
    label_keep=labels[keep_list]
    data_keep=data[keep_list]
    return data_keep,label_keep
def set_baseline_ref(data,labels,base_class=[0],use_baseline=False,ref_base=None):
    base_list=np.isin(labels,base_class,invert=False)
    if use_baseline==False:
        baseline_ffts=data[base_list]
        baseline_fft_ref=np.mean(baseline_ffts,axis=0)
    else:
        baseline_fft_ref=ref_base
    data_keep,label_keep=drop_class(data,labels)
    if data_keep.shape[1]!=baseline_fft_ref.shape[0]: ValueError('size mismatch')
    data_ref=(data_keep-baseline_fft_ref)/baseline_fft_ref
    return data_ref,label_keep,baseline_fft_ref    

channel_num=8
fs=250
train_data,train_label=load_record(16,[1],[1,2,3])
# test_data,test_label=load_record(16,[1],[])

(13500, 24)

In [302]:
train_data,train_label=shuffle(train_data,train_label)
# train_data1,test_data,train_label1,test_label=train_test_split(train_data,train_label,test_size=0.50)
train_data1,test_data=train_data[:4500],train_data[4500:]
train_label1,test_label=train_label[:4500],train_label[4500:]

In [303]:
test_label.shape

(9000,)

In [304]:

train_keep,label_keep,baseline_fft_ref=set_baseline_ref(train_data1,train_label1)
test_keep,test_label_keep,baseline_fft_ref_test=set_baseline_ref(test_data,test_label)#,use_baseline=True,ref_base=baseline_fft_ref)
# scaler=MinMaxScaler()
# scaled_train_data=scaler.fit_transform(train_keep)
# scaled_test_data=scaler.fit_transform(test_keep)


np.savetxt("baseline_ref.csv", baseline_fft_ref, delimiter=",")
x_train,y_train,x_test,y_test=train_keep,label_keep,test_keep,test_label_keep
# x_train,x_test,y_train,y_test=train_test_split(train_keep,label_keep,test_size=0.25)
print(x_train.shape,y_test.shape)

((3009, 24), (5991,))


In [305]:
#Linear SVM classifier
clf1 = LinearSVC(C = 0.1, intercept_scaling=1, loss='hinge', max_iter=1000,multi_class='ovr', penalty='l2', random_state=1, tol=0.00001)
clf1.fit(x_train,y_train)
score_train1=clf1.score(x_train,y_train)
score_test1=clf1.score(x_test,y_test)
print(score_train1,score_test1)

(0.5882352941176471, 0.5738607911867801)


In [320]:
model=KNeighborsClassifier(n_neighbors=4)
model.fit(x_train,y_train)
score_train=model.score(x_train,y_train)
score_test=model.score(x_test,y_test)
print(score_train,score_test)
model_filename='mindwave_model.pkl'
with open(model_filename,'wb') as file:
    pickle.dump(model,file)

(0.8624127617148555, 0.7489567684860624)


In [307]:
model=GradientBoostingClassifier()
model.fit(x_train,y_train)
score_train=model.score(x_train,y_train)
score_test=model.score(x_test,y_test)
print(score_train,score_test)
model_filename='mindwave_model.pkl'
with open(model_filename,'wb') as file:
    pickle.dump(model,file)

(0.807909604519774, 0.6633283258220665)


In [284]:
model=AdaBoostClassifier(n_estimators=100)#OneVsRestClassifier(SVC(kernel='linear'))
model.fit(x_train,y_train)
score_train=model.score(x_train,y_train)
score_test=model.score(x_test,y_test)
print(score_train,score_test)
# model_filename='mindwave_model.pkl'
# with open(model_filename,'wb') as file:
#     pickle.dump(model,file)

(0.7417417417417418, 0.6553446553446554)


In [273]:
model=DecisionTreeClassifier()
model.fit(x_train,y_train)
score_train=model.score(x_train,y_train)
score_test=model.score(x_test,y_test)
print(score_train,score_test)
model_filename='mindwave_model.pkl'
with open(model_filename,'wb') as file:
    pickle.dump(model,file,protocol=2)

(1.0, 0.6664440734557596)


In [274]:
model=LinearDiscriminantAnalysis()
model.fit(x_train,y_train)
score_train=model.score(x_train,y_train)
score_test=model.score(x_test,y_test)
print(score_train,score_test)
# model_filename='mindwave_model.pkl'
# with open(model_filename,'wb') as file:
#     pickle.dump(model,file)

(0.5973377703826955, 0.593322203672788)


In [275]:
model2 = SVC(20,'rbf', degree=10, gamma='auto', coef0=0.0, tol=0.001, cache_size=10000, max_iter=-1, decision_function_shape='ovr')
model2.fit(x_train,y_train)
score_train2=model2.score(x_train,y_train)
score_test2=model2.score(x_test,y_test)
print(score_train2,score_test2)

(0.7078202995008319, 0.6767946577629382)
