In [38]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
import numpy as np
import csv
import math
import antropy as ant

In [39]:
begin, end = 1, 61 # (begin is inclusive, end is exclusive)
num_people = 14
count_samples = {
    "active": 8,
    "meditate": 8,
    "neutral": 8
}

class Sample:
    def __init__(self):
        self.data = {
            'RawEEG': [],
            'Alpha': [],
            'Low Beta': [],
            'High Beta': [],
            'Gamma': [],
            'Theta': [],
            'Delta': [],
            'Meditation': [],
            'Attention': []
        }

    def recordDataPoint(self, RawEEG, Attention, Meditation, Alpha, Delta, Theta, LowBeta, HighBeta, Gamma):
        self.data['RawEEG'].append(float(RawEEG))
        self.data['Attention'].append(float(Attention))
        self.data['Meditation'].append(float(Meditation))
        self.data['Alpha'].append(float(Alpha))
        self.data['Delta'].append(float(Delta))
        self.data['Theta'].append(float(Theta))
        self.data['Low Beta'].append(float(LowBeta))
        self.data['High Beta'].append(float(HighBeta))
        self.data['Gamma'].append(float(Gamma))

    '''
    Record a line of data from the CSV output, which takes form RawEEG, Alpha, Delta, Gamma, Low Beta, High Beta, Theta, Attention, Meditation

    '''
    def recordDataLine(self, line):
        self.recordDataPoint(line[0], line[7], line[8], line[1], line[2], line[6], line[4], line[5], line[3])
    
    def getEEG(self):
        return self.data['RawEEG']
    
    def getAttention(self):
        return self.data["Attention"]
    
    def getMeditation(self):
        return self.data["Meditation"]
    
    def getAlpha(self):
        return self.data["Alpha"]
    
    def getDelta(self):
        return self.data["Delta"]
    
    def getTheta(self):
        return self.data["Theta"]
    
    def getLowBeta(self):
        return self.data["Low Beta"]
    
    def getHighBeta(self):
        return self.data["High Beta"]
    
    def getGamma(self):
        return self.data["Gamma"]

    def get(self, key):
        return self.data[key]

    '''
    Filter out all outliers, as defined by being outside 3*std from the mean, and replace with mean of the samples around them
    '''
    def filter_outliers(self):
        sampleBad = False
        for key in ['RawEEG', 'Alpha', 'Theta', 'Low Beta', 'High Beta', "Gamma", 'Delta']:
            data = self.data[key]
            
            filtered = []

            iqr = np.subtract(*np.percentile(data, [75, 25]))
            med = np.median(data)

            for x in data:
                
                if (med - 1.5*iqr > x) or (med + 1.5*iqr < x) or abs(x - np.mean(data)) > 2 * np.std(data):
                    filtered.append(med)
                    # filtered.append(np.median(data[max(0, i-5):i] + data[i+1:min(len(data), i+5)]))
                else:
                    filtered.append(x)
                    
            self.data[key] = filtered
        return sampleBad

In [40]:
# {personNum : {state: [sampleNums]}}
# 0 = key for throwing away all samples of that state

badSamples = {
    1: {"active": [5], "neutral": [2], "meditate": []},
    2: {"active": [0], "neutral": [0], "meditate": [0]},
    3: {"active": [1, 4], "neutral": [1], "meditate": [5, 6, 7, 8]},
    4: {"active": [2], "neutral": [7], "meditate": [1, 8]}, # maybe n1
    5: {"active": [], "neutral": [], "meditate": []}, # i love you person 5 
    6: {"active": [], "neutral": [2, 6], "meditate": []},
    7: {"active": [5], "neutral": [4, 6, 7], "meditate": [1, 3, 4, 8]}, # think about killing some of this data
    8: {"active": [5], "neutral": [1], "meditate": []}, # maybe m5 and m8
    9: {"active": [], "neutral": [], "meditate": []}, 
    10: {"active": [6, 8], "neutral": [4, 5, 6], "meditate": []},
    11: {"active": [4], "neutral": [4, 8], "meditate": [1, 2, 3, 5, 7]},
    12: {"active": [2, 3, 8], "neutral": [0], "meditate": [6]}, # maybe n0
    13: {"active": [], "neutral": [8], "meditate": []},
    14: {"active": [4, 5, 8], "neutral": [0], "meditate": [1, 2, 8]}
}

In [41]:
data = []
dataLabels = []

def transcribeFileToSample(personN: int, sampleN: int, state: str, X, y, outlierFiltering = True):
    sample_data = Sample()

    with open("data/all_data/" + state + "_" + str(personN) + "_" + str(sampleN) + ".csv") as f:
        reader = csv.reader(f)

        header = next(reader)
        
        for row in reader:
            sample_data.recordDataLine(row)

        if (outlierFiltering):   
            if (0 not in badSamples[personN][state] and sampleN not in badSamples[personN][state]):

                for key in sample_data.data:
                    sample_data.data[key] = sample_data.data[key][begin:end]

                sample_data.filter_outliers()
                X.append(sample_data)
                y.append(state)
        else:
            X.append(sample_data)
            y.append(state)

for person in range(num_people - 1):
    for state in count_samples:
        for i in range(count_samples[state]):
            transcribeFileToSample(person + 1, i + 1, state, data, dataLabels)

In [42]:
dataExtracted = []

def safety_check(x):
    if math.isnan(x): return 0
    if math.isinf(x): return 99999999999
    return x

for point in data:
    extractedPoint = []

    extractedPoint.append(np.mean(point.getAlpha()))
    extractedPoint.append(np.mean(point.getLowBeta()))
    extractedPoint.append(np.mean(point.getHighBeta())) 
    extractedPoint.append(np.mean(point.getGamma())) 
    extractedPoint.append(np.mean(point.getTheta()))
    extractedPoint.append(np.std(point.getHighBeta())) 
    extractedPoint.append(np.std(point.getGamma()))
    extractedPoint.append(np.std(point.getDelta()))
    extractedPoint.append(safety_check(ant.sample_entropy(point.getDelta())))
    
    # extractedPoint.append(safety_check(ant.spectral_entropy(point.getEEG(), sf=1)))
    extractedPoint.append(np.mean(point.getDelta()))
    # extractedPoint.append(np.std(point.getAlpha())) 
    extractedPoint.append(np.std(point.getLowBeta())) 
    # extractedPoint.append(np.std(point.getTheta()))

    dataExtracted.append(extractedPoint)

In [54]:
from sklearn.model_selection import KFold

cvclf = make_pipeline(StandardScaler(), RandomForestClassifier(max_depth=20, n_estimators=2000))
cv = KFold(n_splits=10, shuffle=True, random_state=0)
scores = cross_val_score(cvclf, dataExtracted, dataLabels, cv=cv, n_jobs=-1)
print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

0.74 accuracy with a standard deviation of 0.09


In [44]:
# total = 0
# n = 10
# errorLabels = {"active": {"neutral": 0, "meditate": 0}, 
#             "neutral": {"active": 0, "meditate": 0}, 
#             "meditate": {"active": 0, "neutral": 0}}
#             # first key = actual, second key = prediction

# for _ in range(n):
#     train, test, trainLabels, testLabels = train_test_split(dataExtracted, dataLabels, test_size=0.20)
#     clf = make_pipeline(StandardScaler(), RandomForestClassifier(max_depth=70, n_estimators=1800))
#     clf.fit(train, trainLabels)

#     predictions = clf.predict(test)
#     for i in range(len(predictions)):
#         if predictions[i] != testLabels[i]:
#             errorLabels[testLabels[i]][predictions[i]] += 1 

#     total += clf.score(test, testLabels) 

# print(total/n)
# print(errorLabels)

In [45]:
print(len(dataExtracted))
print(len(dataLabels))

237
237


In [59]:
smallData = []
smallDataLabels = []

for state in count_samples:
    for i in range(count_samples[state]):
        transcribeFileToSample(14, i + 1, state, smallData, smallDataLabels, False)

sampleX = []

def safety_check(x):
    if math.isnan(x): return 0
    if math.isinf(x): return 99999999999
    return x

for point in smallData:
    extractedPoint = []

    extractedPoint.append(np.mean(point.getAlpha()))
    extractedPoint.append(np.mean(point.getLowBeta()))
    extractedPoint.append(np.mean(point.getHighBeta())) 
    extractedPoint.append(np.mean(point.getGamma())) 
    extractedPoint.append(np.mean(point.getTheta()))
    extractedPoint.append(np.std(point.getHighBeta())) 
    extractedPoint.append(np.std(point.getGamma()))
    extractedPoint.append(np.std(point.getDelta()))
    extractedPoint.append(safety_check(ant.sample_entropy(point.getDelta())))

    sampleX.append(extractedPoint)

train, test, trainLabels, testLabels = train_test_split(dataExtracted, dataLabels, test_size=0.20)
clf = make_pipeline(StandardScaler(), RandomForestClassifier(max_depth=70, n_estimators=1800))
clf.fit(train, trainLabels)
clf.predict(sampleX)
clf.score(sampleX, smallDataLabels)

0.5

In [52]:
i = 1
for state, predicted in zip(smallDataLabels, clf.predict(sampleX)):
    print("Sample " + state + " " + str(i) + " is predicted to be " + predicted)
    i += 1
    if i == 9:
        i = 1

#    14: {"active": [4, 5, 8], "neutral": [0], "meditate": [1, 2, 8]}

Sample active 1 is predicted to be active
Sample active 2 is predicted to be active
Sample active 3 is predicted to be active
Sample active 4 is predicted to be neutral
Sample active 5 is predicted to be active
Sample active 6 is predicted to be neutral
Sample active 7 is predicted to be neutral
Sample active 8 is predicted to be neutral
Sample meditate 1 is predicted to be active
Sample meditate 2 is predicted to be active
Sample meditate 3 is predicted to be meditate
Sample meditate 4 is predicted to be meditate
Sample meditate 5 is predicted to be meditate
Sample meditate 6 is predicted to be meditate
Sample meditate 7 is predicted to be meditate
Sample meditate 8 is predicted to be meditate
Sample neutral 1 is predicted to be neutral
Sample neutral 2 is predicted to be neutral
Sample neutral 3 is predicted to be active
Sample neutral 4 is predicted to be neutral
Sample neutral 5 is predicted to be neutral
Sample neutral 6 is predicted to be active
Sample neutral 7 is predicted to b