In [1]:
import random
import itertools
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler

In [2]:
eeg1 = pd.read_csv("eeg1.csv", delimiter="\t")
new_columns = eeg1.columns.values 
new_columns[0] = 'time'     
new_columns[33] = 'sample' 
eeg1.columns = new_columns

events1 = pd.read_csv("events1.csv") #, delimiter="\t"

In [5]:

# takes in eeg dataframe and event dataframe, cleans them, 1hot encodes the events
def clean_eeg(eeg, events, event_interval_length, eeg_slice_length):
    #event_list = []
    array_list = [] 
    index_list = []
    eeg = standardize_eeg(eeg) # function for standardizing the eeg readings
    #events_new = build_zero_events(events)
    # iterate over the rows of the events and slice out the corresponding eeg data
    for index, row in itertools.islice(events.iterrows(), event_interval_length): # loop through events data
        #build_event_list(row, event_list) #
        tmin, tmax = build_event_intervals(row, events)
        eeg_slice = cut_event_intervals(eeg, tmin, tmax)
        array_list, index_list = build_array(eeg_slice, eeg_slice_length, 
                                             index, index_list, array_list)
    y_int = events.iloc[index_list] # take the event types for the correct index
    y_int = y_int['type'].values    # take just the event types as an array
    #y_int = y_int.as_matrix()            # save the event types as a matrix
    #y, lb = one_hot_events(y_int)        # one-hot the event types and save the binarizer
    X = np.stack(array_list, axis = 0)   # stack the arrays so the whole thing is 3D
    return X, y_int                     # return the data, outputs, and the binarizer
    
        
def build_event_list(row, event_list):
    # helper function to pull event types out of event data in the right order
    event_type = getattr(row, "type")
    event_list.append(event_type)
        
def build_event_intervals(row, events):
    # helper function to get the time intervals associated with each event
    tmin = getattr(row, "latency")
    tmin_in = getattr(row, "number")
    tmax_in = tmin_in + 1
    tmax = events1.loc[tmax_in, "latency"]
    return tmin, tmax

def cut_event_intervals(eeg, tmin, tmax):
    # helper function to slice up the eeg data so each slice is associated with one event
    eeg_slice = eeg.loc[(eeg["time"] > tmin) & (eeg["time"] < tmax)]
    eeg_slice.drop(["time", "sample"], axis = 1, inplace = True)
    return eeg_slice
    
def build_array(eeg_slice, eeg_slice_length, index, index_list, array_list):
    # helper function to build an array out of the eeg slices and pad them out to a standard length
    if len(eeg_slice) < eeg_slice_length:
        index_list.append(index)
        eeg_matrix = eeg_slice.as_matrix()
        padded_matrix = np.pad(eeg_matrix, ((0, eeg_slice_length - len(eeg_matrix)), (0,0)),
                                   'constant', constant_values=0)
        array_list.append(padded_matrix)
    return array_list, index_list

def one_hot_events(events):
    # helper function for one-hot encoding the events
    events_list = list(events)
    lb = preprocessing.LabelBinarizer()
    lb.fit(events_list)
    events_1hot = lb.transform(events_list)
    return events_1hot, lb

def invert_one_hot(events, lb):
    # function for decoding one-hot, binarizer made in one_hot_events
    inv_events = lb.inverse_transform(events)
    return inv_events
def standardize_eeg(eeg_data):
    # breaks apart an eeg dataframe, scales the eeg readings, and reassmbles it into a dataframe
    column_list = eeg_data.columns[1:33]
    time = eeg_data['time']
    sample = eeg_data['sample']
    eeg_array = eeg_data[column_list]
    eeg_stnd = scale_data(eeg_array)
    eeg_stnd_df = pd.DataFrame(eeg_stnd, index=eeg_data.index, columns=column_list)
    eeg_stnd = pd.concat([time, eeg_stnd_df, sample], axis =1)
    return eeg_stnd

def scale_data(unscaled_data):
    # helper function for standardize_eeg, fits a scaler and transforms the data 
    scaler = StandardScaler()
    scaler.fit(unscaled_data)
    scaled_data = scaler.transform(unscaled_data)
    return scaled_data

In [7]:

# full dataset parameters

# define model parameters
samples = 3625  # how many trials of eeg data
n_features = 32  # how many channels of eeg in each sample
time_steps = 1300 # how many ms was each sample run for
event_types = 2 #len(set(y))  # how many different event types (light, sound, etc) are there # 6 large, 4 smol

In [8]:
X, y = clean_eeg(eeg1, events1, samples, time_steps)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [14]:
# couple thousand examples
remove_list = [0,2,4,5,6]              # designate unwanted event types
drop_list = np.isin(y, remove_list)    # create a list of indices associated with unwanted events                  
drop_array = np.array(drop_list) 

# make X, y's with the unwanted events removed
y_short_int = y[np.isin(y,remove_list, invert=True)]
X_short = X[np.isin(y, remove_list, invert=True)]
y_short, lb = one_hot_events(y_short_int)

In [25]:
from sklearn.model_selection import StratifiedShuffleSplit
seed=42
random.seed(seed)
# use strat. shuffle split to get indices for test and training data 
sss = StratifiedShuffleSplit(n_splits=2, test_size=0.2, random_state=seed)
sss.get_n_splits(X_short, y_short)
for train_index, test_index in sss.split(X_short, y_short):
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X_short[train_index], X_short[test_index]
    y_train, y_test = y_short[train_index], y_short[test_index]

In [26]:
eeg=standardize_eeg(eeg1)

In [27]:
eeg

Unnamed: 0,time,FP1,FPZ,FP2,F7,F3,FZ,F4,F8,FC5,...,P7,P3,PZ,P4,P8,POZ,O1,OZ,O2,sample
0,0.000000e+00,-0.712285,-0.396547,-0.035001,-0.803389,-0.922809,0.326563,1.041307,0.792884,0.405204,...,0.327269,0.976372,1.625318,1.163008,0.219832,0.848531,0.057826,0.464357,0.254708,
1,9.766000e-01,-0.930173,-0.493055,-0.126935,-0.818269,-0.665935,0.162705,0.806923,0.545167,0.064492,...,0.636017,1.153345,1.728048,1.196690,0.195446,0.974330,0.169278,0.637324,0.434820,
2,1.953100e+00,-1.128196,-0.581234,-0.224357,-0.786880,-0.399173,-0.022292,0.515395,0.244912,-0.251039,...,0.909051,1.319667,1.802920,1.197511,0.160652,1.084870,0.260180,0.796272,0.601812,
3,2.929700e+00,-1.292775,-0.654974,-0.321596,-0.703620,-0.130105,-0.219870,0.181324,-0.095989,-0.528091,...,1.141474,1.468626,1.843989,1.160158,0.111654,1.173610,0.333865,0.935515,0.742173,
4,3.906200e+00,-1.412387,-0.709165,-0.412864,-0.566734,0.132981,-0.420534,-0.176716,-0.462317,-0.757452,...,1.331871,1.594697,1.847963,1.082065,0.045711,1.235619,0.396148,1.051228,0.844360,
5,4.882800e+00,-1.478671,-0.740217,-0.492792,-0.378877,0.381479,-0.614624,-0.537678,-0.836662,-0.934991,...,1.481957,1.693891,1.814461,0.963918,-0.038203,1.268066,0.454662,1.141838,0.899990,
6,5.859400e+00,-1.487197,-0.746428,-0.556935,-0.147289,0.606813,-0.793077,-0.879794,-1.200956,-1.061940,...,1.595756,1.763982,1.746169,0.809844,-0.138912,1.270525,0.517864,1.208085,0.905044,
7,6.835900e+00,-1.437914,-0.728134,-0.602148,0.116671,0.801212,-0.948080,-1.182532,-1.537886,-1.144524,...,1.678695,1.804457,1.648583,0.627268,-0.252855,1.244937,0.593848,1.252864,0.860404,
8,7.812500e+00,-1.335311,-0.687737,-0.626924,0.398101,0.958075,-1.073747,-1.428324,-1.832348,-1.193272,...,1.736598,1.816454,1.529502,0.426435,-0.374116,1.195503,0.689243,1.280849,0.772030,
9,8.789100e+00,-1.188128,-0.629532,-0.631551,0.679595,1.072601,-1.166484,-1.604241,-2.072620,-1.221810,...,1.774667,1.802455,1.398262,0.219633,-0.494670,1.128190,0.808189,1.297916,0.650574,
