# About the Code
This code has been performed on eeg data collected from [NeuroSky headset](https://store.neurosky.com/pages/mindwave).

This is not the exact code that was used to perform the experimentation, but rather a guideline on our implementation for Image Recall test performed by a set of users on a series of images of flags.

In [None]:
import csv
import matplotlib.pyplot as plt
import pandas as pd
import glob
import re

In [None]:
### user-folder-name refers to a folder inside the data folder for any particular user
path ='data/user-folder-name/' # use your path
allFiles = glob.glob(path + "/*.csv")


def atoi(text):
    return int(text) if text.isdigit() else text

def natural_keys(text):
    '''
    alist.sort(key=natural_keys) sorts in human order
    http://nedbatchelder.com/blog/200712/human_sorting.html
    (See Toothy's implementation in the comments)
    '''
    return [ atoi(c) for c in re.split('(\d+)', text) ]


allFiles.sort(key=natural_keys)
print(allFiles)
    

eeg = pd.DataFrame()
list_=[]
for file_ in allFiles:
    df = pd.read_csv(file_,index_col=None, header=0)
    list_.append(df)
    
### concat all the data for all the tests in a single pandas dataframe
eeg = pd.concat(list_)
### user-file-name is a file name containing results for each user.
event = pd.read_csv('result/user-file-name.csv', sep=',', low_memory=False)

# CLEAN THE DATA

### remove all the rows where the result is -1 which means the user could not submit an answer for that question
event = event.loc[event['result'] != -1]
### remove unnecessary columns from data  
eeg = eeg.drop(eeg.columns[[1, 4, 5 , 6, 9, 10 , 11, 12, 13, 14, 15, 16]], axis=1)

### remove duplicates based on timestamp
eeg = eeg.dropna()
event = event.dropna()
eeg = eeg.drop_duplicates(['timestampMs'], keep='last')
    
### pad values to list              REMOVE PADDING IF ERRORS.
#res['res']=res['res'][:eeg['timestampMs'].count()] + [0]*(eeg['timestampMs'].count()-len(res['res']))

### remove the timestamps from training data 
eeg=eeg.drop(eeg.columns[[0]], axis=1)

### reset the indices
eeg = eeg.reset_index(drop=True)

### convert to np array
eeg=eeg.values

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np
import random
import math

samples = []
### length is the average length of a question answering event.
length = math.ceil(len(eeg)/len(event))

eeg = np.pad(eeg, ((0, len(event)*length - len(eeg)),(0,0)), 'constant')

eeg = np.reshape(eeg, (len(event), length, 4))

seed = 42
random.seed(seed)
sss = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=seed)

result = np.array(event['result'])

sss.get_n_splits(eeg, result)


In [None]:

for train_index, test_index in sss.split(eeg, result):
    # print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = eeg[train_index], eeg[test_index]
    y_train, y_test = result[train_index], result[test_index]



In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Embedding
from keras.layers import LSTM


# code for building an LSTM with 100 neurons and dropout. Runs for 50 epochs

model = Sequential()
model.add(LSTM(100, return_sequences=False, input_shape=(length, 4)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
his=[]

for train_index, test_index in sss.split(eeg, result):
    # print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = eeg[train_index], eeg[test_index]
    y_train, y_test = result[train_index], result[test_index]
    history = model.fit(X_train, y_train, batch_size=length, epochs=100)
    score = model.evaluate(X_test, y_test, batch_size=length)
    his.append(history)
    print(np.mean(history.history['acc']))
    print(score)


### save the model
model.save('your_model_name.h5')

In [None]:
np.mean(history.history['acc']) # numpy assumed imported as np

In [None]:
model.metrics_names

In [None]:
sum=0
for h in his:
    sum=sum+np.mean(h.history['acc'])
print(sum/len(his))