# EEG Processor
  
**Requirements**: FFT band export file as well as the Segment Info file should be in the EEG Export folder and the, named according to processing documentation  
**Input**: two files mentioned above mentioned above  
**Output**: .csv (if export=True) of the two files merged, with zscore of each band-channel pair as well as whether the trial contained artifact or if its position was estimated (two factors for excluding when making model)

# Import Packages

In [10]:
# For finding the correct files
import glob
import re

# for datastructures and tools
import numpy as np
import pandas as pd

#for zscoring
from scipy import stats

# Initial Editable Parameters
Edit these variables before processing each file  
 -**participant_num**: an integer, the participant number whose files you are going to process   
 -**sart_sequence**: a string ('A' or 'B'), for which sart file of the participant  
 -**export**: boolean, if True, the final processed file will be saved (probably what you want)

In [56]:
participant_num = 3

sart_sequence = 'B'

# Export?
export = True

# Open, process and merge data

In [57]:
stim_val = '20'

# number of channels there should be in final record
num_chns = 64
# number of bands analyzed
num_bands = 5

pathdict = {'pNum': participant_num, 'seq': sart_sequence}
base_path = 'X:/PROJECTS/14. Waking States and Memory'
participant_path = base_path + '/Data/%(pNum)02d' % pathdict

# Create a dataframe containing the correct sequence of events
correct_seq = pd.DataFrame.from_csv(path= base_path + '/Data/correct_seq.csv')

# This is the path where the fft band export files are. The asterisk allows the path to match all of the band files
# fft_path = ''.join(['X:/PROJECTS/14. Waking States and Memory/Analysis/EEG_Export/', participant_num, 'raw_FFT_*.txt'])
# This finds all of the fft band export files and puts them into a list
fft_paths = sorted(glob.glob(base_path + '/Analysis/EEG Export/%(pNum)02d%(seq)s[SI]raw_FFT_FFTBandExport*.txt' % pathdict))

# This is the path where the segment information file is
seg_info_path = glob.glob(base_path + '/Analysis/EEG Export/%(pNum)02d%(seq)s[SI]raw-SegmentInfo.csv' % pathdict)[0]

# This is the path where the twin processed file is
twin_path = participant_path + '/%(pNum)02d%(seq)sTwinProcessed.csv' % pathdict

print("Processing:")
for path in fft_paths:
    print(path)
print(seg_info_path)
print(twin_path)

Processing:
X:/PROJECTS/14. Waking States and Memory/Analysis/EEG Export\03BIraw_FFT_FFTBandExport_alpha.txt
X:/PROJECTS/14. Waking States and Memory/Analysis/EEG Export\03BIraw_FFT_FFTBandExport_beta.txt
X:/PROJECTS/14. Waking States and Memory/Analysis/EEG Export\03BIraw_FFT_FFTBandExport_delta.txt
X:/PROJECTS/14. Waking States and Memory/Analysis/EEG Export\03BIraw_FFT_FFTBandExport_slow.txt
X:/PROJECTS/14. Waking States and Memory/Analysis/EEG Export\03BIraw_FFT_FFTBandExport_theta.txt
X:/PROJECTS/14. Waking States and Memory/Analysis/EEG Export\03BIraw-SegmentInfo.csv
X:/PROJECTS/14. Waking States and Memory/Data/03/03BTwinProcessed.csv


In [58]:
# Create dataframe for fft
fft_df = pd.DataFrame()

# For each band file...
for band_file in fft_paths:
    # Get the name of the band for the file
    band = re.search(r'(beta|alpha|theta|delta|slow)', band_file).group(0)
    band_df = pd.DataFrame.from_csv(band_file, index_col=None, sep="\t")
    # add the band name to every column
    band_df.columns = band_df.columns + "-" + band
    # add the data to the fft merged df
    fft_df = pd.concat([fft_df, band_df], axis=1)

# Create a dataframe for segment info file
seg_info_df = pd.DataFrame.from_csv(seg_info_path, index_col=None)

# Create a dataframe for twin processed file
twin_df = pd.DataFrame.from_csv(twin_path, index_col=None)
# Get only the stimulus rows of the twin file. Also, only use the 'estimated' column
twin_df = twin_df.loc[twin_df.loc[:,'event'] == int(stim_val), 'estimated']
twin_df.reset_index(inplace=True, drop=True)

# Check that the fft, seg info and twin dfs are the correct size (make sure they have the same number of rows)
if len(fft_df.index) == len(seg_info_df.index) and len(fft_df.index) == len(twin_df.index):
    print("All Good")
else:
    raise Exception("The files don't have the same size. Fix this before continuing")

All Good


In [59]:
# Make some adjustments to the FFT dataframe
# Drop the redundant columns (they all start with 'Unnamed: ')
band_list = ['beta', 'alpha', 'delta', 'theta', 'slow']
for band in band_list:
    drop_name = 'Unnamed: 0-' + band
    try:
        fft_df.drop(drop_name, axis=1, inplace=True)
    except ValueError:
        continue

# Double check that there are the right number of columns
if len(fft_df.columns.tolist()) != num_chns*len(band_list):
    raise Exception("There's a problem, it looks like there's in correct number of channel-band pairs... there was probably a naming error in BVA")
    
# rename the fft_df column names (channels) to ensure all original channels and interpolated channels are the same naming format
new_chn_cols = [chnName.replace('-Cz', '').upper() for chnName in fft_df.columns.tolist()]
fft_df.columns = new_chn_cols
# reorder the columns alphabetically to make it consistent across participants
fft_df = fft_df.sort_index(axis=1)

# zscore each channel-band pair
fft_df = fft_df.apply(stats.zscore)

print("All Good")

All Good


In [60]:
# Merge the three dataframes
eeg_df = pd.concat([fft_df, seg_info_df, twin_df], axis=1)
eeg_df

Unnamed: 0,AF3-ALPHA,AF3-BETA,AF3-DELTA,AF3-SLOW,AF3-THETA,AF4-ALPHA,AF4-BETA,AF4-DELTA,AF4-SLOW,AF4-THETA,...,TP7-SLOW,TP7-THETA,TP8-ALPHA,TP8-BETA,TP8-DELTA,TP8-SLOW,TP8-THETA,segmentNumber,artifact,estimated
0,-0.209203,-0.293494,0.739240,0.000946,-0.271662,-0.677762,-0.536834,0.635519,1.010809,-0.311089,...,-0.271430,0.525753,-1.337385,1.379136,0.182127,-0.209635,-0.941244,1,good,False
1,-1.164042,0.511018,2.716569,-1.149318,-0.298689,-0.997617,-0.147690,2.799810,-0.786062,-0.374881,...,-0.324667,-0.449790,-0.913569,0.657248,0.876166,-0.901426,-1.084109,2,good,False
2,-0.381855,1.571421,0.529648,-0.296978,0.211260,-0.723508,-0.770928,0.053491,2.387433,-0.702599,...,2.953353,-1.173948,-1.161745,1.509712,0.117912,0.201018,-0.965282,3,good,False
3,-0.112583,0.591649,-0.682801,-0.024654,0.861946,-0.173439,0.071546,-1.198913,0.243072,0.591687,...,-0.127749,0.262959,0.030204,-0.357890,-0.987211,1.130815,-0.686844,4,good,False
4,0.529854,-0.796222,1.628782,-0.111841,-1.432700,0.978598,-0.820766,1.517940,0.002576,-1.547700,...,-0.566655,-1.251698,-0.136855,-0.046806,1.867878,-1.058283,-0.895502,5,good,False
5,-0.901242,0.481583,0.517921,0.412660,-0.425623,0.095073,0.401775,1.274775,0.208579,-1.255287,...,-1.490396,-0.569726,-0.052981,-0.174363,1.013993,0.190585,-0.260063,6,good,False
6,-0.474698,-0.202021,1.261313,-1.060922,0.196911,-0.722280,0.797518,0.081253,-0.526833,1.569429,...,0.088487,0.076929,-0.243802,0.824969,1.005654,-0.973339,-0.525405,7,good,False
7,0.360281,1.103767,1.237741,-0.782948,-0.365061,-0.004844,0.300816,0.861296,-0.330786,1.378674,...,-0.429279,0.531640,-1.105880,0.669368,0.001188,0.788790,0.436029,8,good,False
8,-0.225627,1.378618,-0.551483,-0.122271,0.030252,0.095552,0.768955,-0.748604,-0.509417,1.206194,...,-1.034295,0.084550,1.263143,-0.925980,-0.371122,-0.622047,1.435413,9,good,False
9,-0.221279,0.937061,0.623137,-0.693111,-0.219642,-0.150466,1.293918,-1.216168,-0.544650,1.993781,...,0.495181,1.042082,0.537957,-0.970247,2.820639,-1.225802,-1.062026,10,good,False


In [61]:
# Just a quick preview of the quality of the participant's EEG
print('Frequency of good/bad artifact as well as estimated/not estimated:')
eeg_df.groupby([' artifact', 'estimated']).size()

Frequency of good/bad artifact as well as estimated/not estimated:


 artifact  estimated
 good      False        316
 reject    False          9
dtype: int64

# Export Data

In [62]:
if export:
    eeg_df.to_csv(path_or_buf=participant_path + '/%(pNum)02d%(seq)sEEGProcessed.csv' % pathdict)