# EEG Processor
  
**Requirements**: FFT band export file as well as the Segment Info file should be in the EEG Export folder and the, named according to processing documentation  
**Input**: two files mentioned above mentioned above  
**Output**: .csv (if export=True) of the two files merged, with zscore of each band-channel pair as well as whether the trial contained artifact or if its position was estimated (two factors for excluding when making model)

# Import Packages

In [2]:
# For finding the correct files
import glob
import re

# for datastructures and tools
import numpy as np
import pandas as pd

#for zscoring
from scipy import stats

# Initial Editable Parameters
Edit these variables before processing each file  
 -**participant_num**: an integer, the participant number whose files you are going to process   
 -**sart_sequence**: a string ('A' or 'B'), for which sart file of the participant  
 -**export**: boolean, if True, the final processed file will be saved (probably what you want)

In [41]:
participant_num = 1

sart_sequence = 'A'

# Export?
export = True

# Open, process and merge data

In [42]:
stim_val = '20'

# number of channels there should be in final record
num_chns = 64
# number of bands analyzed
num_bands = 5

pathdict = {'pNum': participant_num, 'seq': sart_sequence}
base_path = 'X:/PROJECTS/14. Waking States and Memory'
participant_path = base_path + '/Data/%(pNum)02d' % pathdict

# Create a dataframe containing the correct sequence of events
correct_seq = pd.DataFrame.from_csv(path= base_path + '/Data/correct_seq.csv')

# This is the path where the fft band export files are. The asterisk allows the path to match all of the band files
# fft_path = ''.join(['X:/PROJECTS/14. Waking States and Memory/Analysis/EEG_Export/', participant_num, 'raw_FFT_*.txt'])
# This finds all of the fft band export files and puts them into a list
fft_paths = sorted(glob.glob(base_path + '/Analysis/EEG Export/%(pNum)02d%(seq)s[SI]raw_FFT_FFTBandExport*.txt' % pathdict))

# This is the path where the segment information file is
seg_info_path = glob.glob(base_path + '/Analysis/EEG Export/%(pNum)02d%(seq)s[SI]raw-SegmentInfo.csv' % pathdict)[0]

# This is the path where the twin processed file is
twin_path = participant_path + '/%(pNum)02d%(seq)sTwinProcessed.csv' % pathdict

print("Processing:")
for path in fft_paths:
    print(path)
print(seg_info_path)
print(twin_path)

Processing:
X:/PROJECTS/14. Waking States and Memory/Analysis/EEG Export\01AIraw_FFT_FFTBandExport_alpha.txt
X:/PROJECTS/14. Waking States and Memory/Analysis/EEG Export\01AIraw_FFT_FFTBandExport_beta.txt
X:/PROJECTS/14. Waking States and Memory/Analysis/EEG Export\01AIraw_FFT_FFTBandExport_delta.txt
X:/PROJECTS/14. Waking States and Memory/Analysis/EEG Export\01AIraw_FFT_FFTBandExport_slow.txt
X:/PROJECTS/14. Waking States and Memory/Analysis/EEG Export\01AIraw_FFT_FFTBandExport_theta.txt
X:/PROJECTS/14. Waking States and Memory/Analysis/EEG Export\01AIraw-SegmentInfo.csv
X:/PROJECTS/14. Waking States and Memory/Data/01/01ATwinProcessed.csv


In [43]:
# Create dataframe for fft
fft_df = pd.DataFrame()

# For each band file...
for band_file in fft_paths:
    # Get the name of the band for the file
    band = re.search(r'(beta|alpha|theta|delta|slow)', band_file).group(0)
    band_df = pd.DataFrame.from_csv(band_file, index_col=None, sep="\t")
    # add the band name to every column
    band_df.columns = band_df.columns + "-" + band
    # add the data to the fft merged df
    fft_df = pd.concat([fft_df, band_df], axis=1)

# Create a dataframe for segment info file
seg_info_df = pd.DataFrame.from_csv(seg_info_path, index_col=None)

# Create a dataframe for twin processed file
twin_df = pd.DataFrame.from_csv(twin_path, index_col=None)
# Get only the stimulus rows of the twin file. Also, only use the 'estimated' column
twin_df = twin_df.loc[twin_df.loc[:,'event'] == int(stim_val), 'estimated']
twin_df.reset_index(inplace=True, drop=True)

# Check that the fft, seg info and twin dfs are the correct size (make sure they have the same number of rows)
if len(fft_df.index) == len(seg_info_df.index) and len(fft_df.index) == len(twin_df.index):
    print("All Good")
else:
    raise Exception("The files don't have the same size. Fix this before continuing")

All Good


In [44]:
# Make some adjustments to the FFT dataframe
# Drop the redundant columns (they all start with 'Unnamed: ')
band_list = ['beta', 'alpha', 'delta', 'theta', 'slow']
for band in band_list:
    drop_name = 'Unnamed: 0-' + band
    try:
        fft_df.drop(drop_name, axis=1, inplace=True)
    except ValueError:
        continue

# Double check that there are the right number of columns
if len(fft_df.columns.tolist()) != num_chns*len(band_list):
    raise Exception("There's a problem, it looks like there's in correct number of channel-band pairs... there was probably a naming error in BVA")
    
# rename the fft_df column names (channels) to ensure all original channels and interpolated channels are the same naming format
new_chn_cols = [chnName.replace('-Cz', '').upper() for chnName in fft_df.columns.tolist()]
fft_df.columns = new_chn_cols
# reorder the columns alphabetically to make it consistent across participants
fft_df = fft_df.sort_index(axis=1)

# zscore each channel-band pair
fft_df = fft_df.apply(stats.zscore)

print("All Good")

All Good


In [45]:
# Merge the three dataframes
eeg_df = pd.concat([fft_df, seg_info_df, twin_df], axis=1)

# Drop the last row - an extra trial is logged in the SART program, but is not an actual trial
eeg_df.drop(eeg_df.tail(1).index, inplace=True)

eeg_df

Unnamed: 0,AF3-ALPHA,AF3-BETA,AF3-DELTA,AF3-SLOW,AF3-THETA,AF4-ALPHA,AF4-BETA,AF4-DELTA,AF4-SLOW,AF4-THETA,...,TP7-SLOW,TP7-THETA,TP8-ALPHA,TP8-BETA,TP8-DELTA,TP8-SLOW,TP8-THETA,segmentNumber,artifact,estimated
0,-0.542712,1.423740,0.191836,0.695377,1.517007,-1.079612,0.719396,1.051525,2.725234,-0.388770,...,0.651130,2.372643,-1.060069,-0.515184,2.035321,0.254989,-0.470338,1,good,False
1,-0.669889,1.291721,0.763979,0.333149,0.001115,-1.025998,2.118830,1.457984,-0.489141,-0.758280,...,0.259099,0.409915,-0.930916,-0.206595,0.120064,0.894412,0.344357,2,good,False
2,1.151183,0.095180,-1.371685,-0.559142,-0.742861,1.151687,-0.125520,-1.169786,0.090325,-1.127087,...,-0.546308,-1.199061,-0.943667,0.126251,0.232164,1.423848,0.429525,3,good,False
3,-1.918654,0.784429,1.152782,2.067034,1.539345,-1.380705,2.194269,1.512989,-0.334451,1.659668,...,3.245994,-0.530414,-2.090509,-0.922316,1.384699,1.703007,-0.255239,4,good,False
4,-0.345832,-1.088498,-0.677581,1.851749,0.312415,-0.924396,-0.040351,-0.574918,1.703907,-1.051536,...,0.047295,-0.813322,-0.561269,-0.908680,0.286949,0.496679,0.980001,5,good,False
5,-0.472380,0.315716,0.553943,-0.391288,1.139546,-0.143998,0.903093,0.183129,1.040340,0.403650,...,1.132256,0.098688,-1.255031,-0.043401,0.188285,1.174409,0.100555,6,good,False
6,-0.515697,-0.279861,0.616034,-0.040097,-0.309413,-0.305554,0.005121,-0.940595,1.416190,-0.874585,...,1.975309,-0.462007,-0.344852,-0.770893,1.910118,-0.565612,0.168266,7,good,False
7,0.560752,0.708004,0.202262,-1.382107,0.133267,0.091045,1.075412,0.070006,-0.264055,1.254400,...,-0.291754,0.298090,-0.125152,0.243383,-0.710009,0.799961,-0.854702,8,good,False
8,1.302263,-0.582360,-1.574392,0.225403,-0.394583,0.716740,0.113571,-1.192500,0.117117,0.697879,...,0.323208,-0.163260,2.311871,-1.051299,-1.534884,-0.382717,-1.542317,9,good,False
9,-1.796147,0.987406,2.767413,0.169625,0.334637,-2.137133,-1.277951,2.267911,0.466223,1.380327,...,0.864920,2.568635,-0.705661,0.284468,1.778342,-0.999254,-0.061620,10,good,False


In [46]:
# Just a quick preview of the quality of the participant's EEG
print('Frequency of good/bad artifact as well as estimated/not estimated:')
eeg_df.groupby([' artifact', 'estimated']).size()

Frequency of good/bad artifact as well as estimated/not estimated:


 artifact  estimated
 good      False        304
 reject    False         20
dtype: int64

# Export Data

In [47]:
if export:
    eeg_df.to_csv(path_or_buf=participant_path + '/%(pNum)02d%(seq)sEEGProcessed.csv' % pathdict)