In [47]:
import time
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA


In [48]:
begin, end = 1, 61 # (begin is inclusive, end is exclusive)
num_people = 2
count_samples = {
    "active": 8,
    "meditate": 8,
    "neutral": 8
}

class Sample:
    def __init__(self):
        self.data = {
            'RawEEG': [],
            'Alpha': [],
            'Low Beta': [],
            'High Beta': [],
            'Gamma': [],
            'Theta': [],
            'Delta': [],
            'Meditation': [],
            'Attention': []
        }

    def recordDataPoint(self, RawEEG, Attention, Meditation, Alpha, Delta, Theta, LowBeta, HighBeta, Gamma):
        self.data['RawEEG'].append(float(RawEEG))
        self.data['Attention'].append(float(Attention))
        self.data['Meditation'].append(float(Meditation))
        self.data['Alpha'].append(float(Alpha))
        self.data['Delta'].append(float(Delta))
        self.data['Theta'].append(float(Theta))
        self.data['Low Beta'].append(float(LowBeta))
        self.data['High Beta'].append(float(HighBeta))
        self.data['Gamma'].append(float(Gamma))

    '''
    Record a line of data from the CSV output, which takes form RawEEG, Alpha, Delta, Gamma, Low Beta, High Beta, Theta, Attention, Meditation

    '''
    def recordDataLine(self, line):
        self.recordDataPoint(line[0], line[7], line[8], line[1], line[2], line[6], line[4], line[5], line[3])
    
    def getEEG(self):
        return self.data['RawEEG']
    
    def getAttention(self):
        return self.data["Attention"]
    
    def getMeditation(self):
        return self.data["Meditation"]
    
    def getAlpha(self):
        return self.data["Alpha"]
    
    def getDelta(self):
        return self.data["Delta"]
    
    def getTheta(self):
        return self.data["Theta"]
    
    def getLowBeta(self):
        return self.data["Low Beta"]
    
    def getHighBeta(self):
        return self.data["High Beta"]
    
    def getGamma(self):
        return self.data["Gamma"]

    def get(self, key):
        return self.data[key]

    '''
    Scales the data by standard deviation of the EEG data
    '''
    def scale(self):
        eeg_std_dev = np.std(self.data['RawEEG'])
        self.data['Alpha'] = [x / eeg_std_dev for x in self.data['Alpha']]
        self.data['Delta'] = [x / eeg_std_dev for x in self.data['Delta']]
        self.data['Theta'] = [x / eeg_std_dev for x in self.data['Theta']]
        self.data['Low Beta'] = [x / eeg_std_dev for x in self.data['Low Beta']]
        self.data['High Beta'] = [x / eeg_std_dev for x in self.data['High Beta']]
        self.data['Gamma'] = [x / eeg_std_dev for x in self.data['Gamma']]

In [49]:
data = []
dataLabels = []

def transcribeFileToSample(personN: int, sampleN: int, state: str):
    sample_data = Sample()

    with open("data/all_data/" + state + "_" + str(personN) + "_" + str(sampleN) + ".csv") as f:
        reader = csv.reader(f)

        header = next(reader)
        
        for row in reader:
            sample_data.recordDataLine(row)

        for key in sample_data.data:
            sample_data.data[key] = sample_data.data[key][begin:end]

        # sample_data.scale()

        data.append(sample_data)
        dataLabels.append(state)

for person in range(num_people):
    for state in count_samples:
        for i in range(count_samples[state]):
            transcribeFileToSample(person + 1, i + 1, state)

In [50]:
dataExtracted = []

for point in data:
    extractedPoint = []

    extractedPoint.append(np.mean(point.getAlpha()))
    extractedPoint.append(np.std(point.getAlpha()))

    extractedPoint.append(np.mean(point.getLowBeta()))
    extractedPoint.append(np.std(point.getLowBeta()))

    extractedPoint.append(np.mean(point.getHighBeta()))
    extractedPoint.append(np.std(point.getHighBeta()))

    extractedPoint.append(np.mean(point.getGamma()))
    extractedPoint.append(np.std(point.getGamma()))

    extractedPoint.append(np.mean(point.getTheta()))
    extractedPoint.append(np.std(point.getTheta()))

    extractedPoint.append(np.mean(point.getDelta()))
    extractedPoint.append(np.std(point.getDelta()))
    
    dataExtracted.append(extractedPoint)

In [51]:
def forest_test(X, y):
    X_Train, X_Test, y_Train, y_Test = train_test_split(X, y, 
                                                        test_size = 0.25)
    trainedforest = RandomForestClassifier(n_estimators=700).fit(X_Train,y_Train)
    predictionforest = trainedforest.predict(X_Test)
    print(classification_report(y_Test,predictionforest))

In [52]:
pca = PCA(n_components=3)
X_pca = pca.fit_transform(dataExtracted)

forest_test(X_pca, dataLabels)
# forest_test(dataExtracted, dataLabels)

              precision    recall  f1-score   support

      active       0.80      1.00      0.89         4
    meditate       1.00      1.00      1.00         4
     neutral       1.00      0.75      0.86         4

    accuracy                           0.92        12
   macro avg       0.93      0.92      0.92        12
weighted avg       0.93      0.92      0.92        12



In [53]:
# combine regular features + PCA data extracted features for ultimate X
test = np.concatenate((dataExtracted, X_pca), axis=1)