## This is the same classifier, trained on Mika's data and tested on Daniel's data

Done to see how data compares between people

In [51]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
import numpy as np
import csv

In [52]:
begin, end = 3, 30 # (begin is inclusive, end is exclusive)
count_samples = {
    "active": 5,
    "meditate": 5,
    "neutral": 5
}

Define `Sample` class to store data

In [53]:
class Sample:
    def __init__(self):
        self.data = {
            'RawEEG': [],
            'Alpha': [],
            'Low Beta': [],
            'High Beta': [],
            'Gamma': [],
            'Theta': [],
            'Delta': [],
            'Meditation': [],
            'Attention': []
        }

    def recordDataPoint(self, RawEEG, Attention, Meditation, Alpha, Delta, Theta, LowBeta, HighBeta, Gamma):
        self.data['RawEEG'].append(float(RawEEG))
        self.data['Attention'].append(float(Attention))
        self.data['Meditation'].append(float(Meditation))
        self.data['Alpha'].append(float(Alpha))
        self.data['Delta'].append(float(Delta))
        self.data['Theta'].append(float(Theta))
        self.data['Low Beta'].append(float(LowBeta))
        self.data['High Beta'].append(float(HighBeta))
        self.data['Gamma'].append(float(Gamma))

    '''
    Record a line of data from the CSV output, which takes form RawEEG, Alpha, Delta, Gamma, Low Beta, High Beta, Theta, Attention, Meditation

    '''
    def recordDataLine(self, line):
        self.recordDataPoint(line[0], line[7], line[8], line[1], line[2], line[6], line[4], line[5], line[3])
    
    def getEEG(self):
        return self.data['RawEEG']
    
    def getAttention(self):
        return self.data["Attention"]
    
    def getMeditation(self):
        return self.data["Meditation"]
    
    def getAlpha(self):
        return self.data["Alpha"]
    
    def getDelta(self):
        return self.data["Delta"]
    
    def getTheta(self):
        return self.data["Theta"]
    
    def getLowBeta(self):
        return self.data["Low Beta"]
    
    def getHighBeta(self):
        return self.data["High Beta"]
    
    def getGamma(self):
        return self.data["Gamma"]

    def get(self, key):
        return self.data[key]

    '''
    Scales the data by standard deviation of the EEG data
    '''
    def scale(self):
        eeg_std_dev = np.std(self.data['RawEEG'])
        self.data['Alpha'] = [x / eeg_std_dev for x in self.data['Alpha']]
        self.data['Delta'] = [x / eeg_std_dev for x in self.data['Delta']]
        self.data['Theta'] = [x / eeg_std_dev for x in self.data['Theta']]
        self.data['Low Beta'] = [x / eeg_std_dev for x in self.data['Low Beta']]
        self.data['High Beta'] = [x / eeg_std_dev for x in self.data['High Beta']]
        self.data['Gamma'] = [x / eeg_std_dev for x in self.data['Gamma']]

Transcribe from file to samples stored in the `data` and `dataLabels` lists

In [54]:
data = []
dataLabels = []

def transcribeFileToSample(sampleN: int, state: str):
    sample_data = Sample()

    with open("raw_cv_data/" + state + str(sampleN) + ".csv") as f:
        reader = csv.reader(f)

        header = next(reader)
        
        for row in reader:
            sample_data.recordDataLine(row)

        for key in sample_data.data:
            sample_data.data[key] = sample_data.data[key][begin:end]

        sample_data.scale()

        data.append(sample_data)
        dataLabels.append(state)

for state in count_samples:
    for i in range(count_samples[state]):
        transcribeFileToSample(i + 1, state)

Extract features from raw data and store in `dataExtracted`

In [55]:
dataExtracted = []

for point in data:
    extractedPoint = []

    extractedPoint.append(np.mean(point.getAlpha()))
    extractedPoint.append(np.mean(point.getLowBeta()))
    extractedPoint.append(np.std(point.getAlpha()))
    extractedPoint.append(np.mean(point.getTheta()))
    extractedPoint.append(np.std(point.getGamma()))
    extractedPoint.append(np.std(point.getDelta()))

    dataExtracted.append(extractedPoint)


Train/test split and train different classifiers - average over many trials

In [56]:
clf = make_pipeline(StandardScaler(), LinearDiscriminantAnalysis(n_components=1))
clf.fit(dataExtracted, dataLabels)

0.5829999999999996
{'active': {'neutral': 358, 'meditate': 110}, 'neutral': {'active': 578, 'meditate': 67}, 'meditate': {'active': 134, 'neutral': 4}}


In [57]:
cv_begin = 5
cv_end = 61

cv_data = []
cv_dataLabels = []

cv_samples = {
    "active": 15,
    "neutral": 15,
    "meditate": 15
}

def cv_transcribeFileToSample(sampleN: int, state: str):
    sample_data = Sample()

    with open("raw_data/" + state + str(sampleN) + ".csv") as f:
        reader = csv.reader(f)

        header = next(reader)
        
        for row in reader:
            sample_data.recordDataLine(row)

        for key in sample_data.data:
            sample_data.data[key] = sample_data.data[key][cv_begin:cv_end]

        sample_data.scale()

        cv_data.append(sample_data)
        cv_dataLabels.append(state)

for state in cv_samples:
    for i in range(cv_samples[state]):
        cv_transcribeFileToSample(i + 1, state)

In [58]:
cv_extracted = []

for point in cv_data:
    extractedPoint = []

    extractedPoint.append(np.mean(point.getAlpha()))
    extractedPoint.append(np.mean(point.getLowBeta()))
    extractedPoint.append(np.std(point.getAlpha()))
    extractedPoint.append(np.mean(point.getTheta()))
    extractedPoint.append(np.std(point.getGamma()))
    extractedPoint.append(np.std(point.getDelta()))

    cv_extracted.append(extractedPoint)


In [59]:
predictions = clf.predict(cv_extracted)

print("Predictions: ", predictions)
print("Real Labels: ", cv_dataLabels)
print("Correct " + str(100*clf.score(cv_extracted, cv_dataLabels)) + "% of the time!")

Predictions:  ['neutral' 'neutral' 'neutral' 'neutral' 'neutral' 'neutral' 'neutral'
 'neutral' 'meditate' 'neutral' 'neutral' 'neutral' 'neutral' 'neutral'
 'neutral' 'neutral' 'neutral' 'neutral' 'neutral' 'neutral' 'neutral'
 'neutral' 'neutral' 'neutral' 'neutral' 'neutral' 'neutral' 'active'
 'neutral' 'neutral' 'meditate' 'meditate' 'neutral' 'neutral' 'meditate'
 'meditate' 'meditate' 'meditate' 'meditate' 'meditate' 'meditate'
 'meditate' 'meditate' 'meditate' 'meditate']
Real Labels:  ['active', 'active', 'active', 'active', 'active', 'active', 'active', 'active', 'active', 'active', 'active', 'active', 'active', 'active', 'active', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'meditate', 'meditate', 'meditate', 'meditate', 'meditate', 'meditate', 'meditate', 'meditate', 'meditate', 'meditate', 'meditate', 'meditate', 'meditate', 'meditate', 'meditate']
Corre

In [60]:
sample_data = Sample()

with open("active_lang_mcq.csv") as f:
    reader = csv.reader(f)

    header = next(reader)
    
    for row in reader:
        sample_data.recordDataLine(row)

    for key in sample_data.data:
        sample_data.data[key] = sample_data.data[key][5:58]

    sample_data.scale()

extractedPoint = []

extractedPoint.append(np.mean(sample_data.getAlpha()))
extractedPoint.append(np.mean(sample_data.getLowBeta()))
extractedPoint.append(np.std(sample_data.getAlpha()))
extractedPoint.append(np.mean(sample_data.getTheta()))
extractedPoint.append(np.std(sample_data.getGamma()))
extractedPoint.append(np.std(sample_data.getDelta()))

pred = clf.predict(np.array(extractedPoint).reshape(1, -1))
pred

array(['active'], dtype='<U8')