In [11]:
import numpy as np
import csv

from sklearn.feature_selection import SelectFdr, chi2
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import VarianceThreshold
import antropy as ant
import math

In [2]:
begin, end = 6, 61 # (begin is inclusive, end is exclusive)
count_samples = {
    "active": 16,
    "meditate": 15,
    "neutral": 15
}

In [3]:
class Sample:
    def __init__(self):
        self.data = {
            'RawEEG': [],
            'Alpha': [],
            'Low Beta': [],
            'High Beta': [],
            'Gamma': [],
            'Theta': [],
            'Delta': [],
            'Meditation': [],
            'Attention': []
        }

    def recordDataPoint(self, RawEEG, Attention, Meditation, Alpha, Delta, Theta, LowBeta, HighBeta, Gamma):
        self.data['RawEEG'].append(float(RawEEG))
        self.data['Attention'].append(float(Attention))
        self.data['Meditation'].append(float(Meditation))
        self.data['Alpha'].append(float(Alpha))
        self.data['Delta'].append(float(Delta))
        self.data['Theta'].append(float(Theta))
        self.data['Low Beta'].append(float(LowBeta))
        self.data['High Beta'].append(float(HighBeta))
        self.data['Gamma'].append(float(Gamma))

    '''
    Record a line of data from the CSV output, which takes form RawEEG, Alpha, Delta, Gamma, Low Beta, High Beta, Theta, Attention, Meditation

    '''
    def recordDataLine(self, line):
        self.recordDataPoint(line[0], line[7], line[8], line[1], line[2], line[6], line[4], line[5], line[3])
    
    def getEEG(self):
        return self.data['RawEEG']
    
    def getAttention(self):
        return self.data["Attention"]
    
    def getMeditation(self):
        return self.data["Meditation"]
    
    def getAlpha(self):
        return self.data["Alpha"]
    
    def getDelta(self):
        return self.data["Delta"]
    
    def getTheta(self):
        return self.data["Theta"]
    
    def getLowBeta(self):
        return self.data["Low Beta"]
    
    def getHighBeta(self):
        return self.data["High Beta"]
    
    def getGamma(self):
        return self.data["Gamma"]

    def get(self, key):
        return self.data[key]

    '''
    Scales the data by standard deviation of the EEG data
    '''
    def scale(self):
        eeg_std_dev = np.std(self.data['RawEEG'])

        self.data['Alpha'] = [x / eeg_std_dev for x in self.data['Alpha']]
        self.data['Delta'] = [x / eeg_std_dev for x in self.data['Delta']]
        self.data['Theta'] = [x / eeg_std_dev for x in self.data['Theta']]
        self.data['Low Beta'] = [x / eeg_std_dev for x in self.data['Low Beta']]
        self.data['High Beta'] = [x / eeg_std_dev for x in self.data['High Beta']]
        self.data['Gamma'] = [x / eeg_std_dev for x in self.data['Gamma']]

In [6]:
data = []
dataLabels = []

def transcribeFileToSample(sampleN: int, state: str):
    sample_data = Sample()

    with open("data/raw_data/" + state + str(sampleN) + ".csv") as f:
        reader = csv.reader(f)

        header = next(reader)
        
        for row in reader:
            sample_data.recordDataLine(row)

        for key in sample_data.data:
            sample_data.data[key] = sample_data.data[key][begin:end]

        data.append(sample_data)
        dataLabels.append(state)

for state in count_samples:
    for i in range(count_samples[state]):
        transcribeFileToSample(i + 1, state)

In [7]:
dataExtracted = []

for point in data:
    extractedPoint = []

    extractedPoint.append(np.mean(point.getAlpha()))
    extractedPoint.append(np.std(point.getAlpha()))

    extractedPoint.append(np.mean(point.getLowBeta()))
    extractedPoint.append(np.std(point.getLowBeta()))

    extractedPoint.append(np.mean(point.getHighBeta()))
    extractedPoint.append(np.std(point.getHighBeta()))

    extractedPoint.append(np.mean(point.getGamma()))
    extractedPoint.append(np.std(point.getGamma()))

    extractedPoint.append(np.mean(point.getTheta()))
    extractedPoint.append(np.std(point.getTheta()))

    extractedPoint.append(np.mean(point.getDelta()))
    extractedPoint.append(np.std(point.getDelta()))
    
    dataExtracted.append(extractedPoint)


In [8]:
X = dataExtracted.copy()
y = dataLabels.copy()
feature_names = []
bands = ['Alpha', 'Low Beta', 'High Beta', 'Gamma', 'Theta', 'Delta']
for band in bands:
    feature_names.append("mean " + band)
    feature_names.append("std " + band)

fdr = SelectFdr()
X_new = fdr.fit_transform(X, y)
fdr.get_feature_names_out(feature_names)

array(['mean Alpha', 'std Alpha', 'mean Low Beta', 'std Low Beta',
       'std High Beta', 'mean Gamma', 'std Gamma', 'mean Theta',
       'std Delta'], dtype=object)

mean low beta, mean alpha, std dev alpha, mean theta, std dev delta
<br>
one of std dev high beta or gamma


In [9]:
kbest = SelectKBest(chi2, k=5)
X_k = kbest.fit_transform(X, y)
kbest.get_feature_names_out(feature_names)

array(['mean Alpha', 'mean Low Beta', 'std High Beta', 'std Gamma',
       'std Delta'], dtype=object)

In [20]:
dataExtracted_1 = []

stat_funcs = [np.mean, np.std, ant.sample_entropy, ant.petrosian_fd]

def safety_check(x):
    if math.isnan(x): return 0
    if math.isinf(x): return 99999999999
    return x

for point in data:
    extractedPoint = []

    for func in stat_funcs:
        extractedPoint.append(safety_check(func(point.getEEG())))
        extractedPoint.append(safety_check(func(point.getAlpha())))
        extractedPoint.append(safety_check(func(point.getDelta())))
        extractedPoint.append(safety_check(func(point.getTheta())))
        extractedPoint.append(safety_check(func(point.getLowBeta())))
        extractedPoint.append(safety_check(func(point.getHighBeta())))
        extractedPoint.append(safety_check(func(point.getGamma())))
    extractedPoint.append(safety_check(ant.spectral_entropy(point.getEEG(), sf=1)))
    
    dataExtracted_1.append(extractedPoint)

In [26]:
X = dataExtracted_1.copy()
y = dataLabels.copy()
feature_names = []
func_names = ['mean', 'std', 'sample_entropy', 'petrosian_fd']
bands = ['Raw EEG', 'Alpha', 'Delta', 'Theta', 'Low Beta', 'High Beta', 'Gamma']
for func_name in func_names:
    for band in bands:
        feature_names.append(func_name + ' ' + band)
feature_names.append('Raw EEG spectral_entropy')

fdr = SelectFdr()

X_new = fdr.fit_transform(X, y)
fdr.get_feature_names_out(feature_names)

array(['mean Alpha', 'mean Theta', 'mean Low Beta', 'mean Gamma',
       'std Raw EEG', 'std Alpha', 'std Delta', 'std Low Beta',
       'std High Beta', 'std Gamma', 'sample_entropy Low Beta',
       'sample_entropy High Beta', 'sample_entropy Gamma',
       'petrosian_fd High Beta', 'petrosian_fd Gamma'], dtype=object)

In [34]:
kbest = SelectKBest(k=8)
X_k = kbest.fit_transform(X, y)
kbest.get_feature_names_out(feature_names)

array(['mean Alpha', 'mean Low Beta', 'std Raw EEG', 'std Delta',
       'std High Beta', 'std Gamma', 'sample_entropy Low Beta',
       'sample_entropy Gamma'], dtype=object)