In [1]:
from os.path import dirname, join as pjoin
import scipy.io as spio
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
from pandas.api.types import is_numeric_dtype
import matplotlib.pyplot as plt
import os, fnmatch
from operator import itemgetter

This script downloads protocol files containing the moments when each scene starts and button is pressed and brain data files containing ERSP evolutions at each EEG sensor for all scenes in three frequency bands (theta, alpha, and beta).

We perform the following steps:

1) for each subject, we aggregate ERSP over time between the scene start and button press using median value.
2) we exclude scenes with large ERSP values using z-score threshold.
3) we aggregate ERSP over all remaining scenes using median value.

As a result, for each subject we have ERSP values at each EEG sensor (all sensors are defined by the file 'EEG_SENSOR_NAMES.xlsx')

Note, that same procedure is done for three frequency bands: theta, alpha, and beta
Note, that this script contains the procedure for Picture-based scenes. Same procedure should be done for Video-based scenes

# Reading Protocol and ERSP Files from Directory (Picture)

In [2]:
protocol_files=[]
listOfFiles = os.listdir('.')  
pattern = "*_Text.xlsx"  
for entry in listOfFiles:  
    if fnmatch.fnmatch(entry, pattern):
        res = entry.split('_')
        res.append(entry)
        protocol_files.append(res)
protocol_files=sorted(protocol_files, key=itemgetter(2))

data_files_alpha=[]
import os, fnmatch
listOfFiles = os.listdir('.')  
pattern = "*alpha_Text.mat"  
for entry in listOfFiles:  
    if fnmatch.fnmatch(entry, pattern):
        res = entry.split('_')
        res.append(entry)
        data_files_alpha.append(res)
data_files_alpha=sorted(data_files_alpha, key=itemgetter(0))

data_files_theta=[]
import os, fnmatch
listOfFiles = os.listdir('.')  
pattern = "*theta_Text.mat"  
for entry in listOfFiles:  
    if fnmatch.fnmatch(entry, pattern):
        res = entry.split('_')
        res.append(entry)
        data_files_theta.append(res)
data_files_theta=sorted(data_files_theta, key=itemgetter(0))

data_files_beta=[]
import os, fnmatch
listOfFiles = os.listdir('.')  
pattern = "*beta_Text.mat"  
for entry in listOfFiles:  
    if fnmatch.fnmatch(entry, pattern):
        res = entry.split('_')
        res.append(entry)
        data_files_beta.append(res)
data_files_beta=sorted(data_files_beta, key=itemgetter(0))

channels=pd.read_excel('EEG_SENSOR_NAMES.xlsx')
ch=channels.channels.values.tolist()

# Aggregating ERSP Data over the time of trial using median value (Picture)

In [3]:
DATA_ALPHA=pd.DataFrame()
for k in range(len(protocol_files)):
    
    participant = protocol_files[k][2]
    protocol_file=protocol_files[k][4]
    data_alpha=data_files_alpha[k][3]
    
    mat = spio.loadmat(data_alpha)
    aa=mat['alpha_']
    
    protocol=pd.read_excel(protocol_file)
    RT=protocol['BP Latency']-protocol['SS Latency']
    
    matrix = np.zeros((aa.shape[0],aa.shape[1]))
    for i in range(aa.shape[1]):
        for j in range(aa.shape[0]):
            matrix[j,i]=np.median(aa[j,i,0:int(RT[0])])

    df = pd.DataFrame(matrix,columns=ch) 
    for cname in ch:
        df = df[abs(stats.zscore(df[cname]))< 3]
    df['participant']=int(participant)
    DATA_ALPHA = pd.concat([DATA_ALPHA,df])  

DATA_BETA=pd.DataFrame()
for k in range(len(protocol_files)):
    
    participant = protocol_files[k][2]
    protocol_file=protocol_files[k][4]
    data_beta=data_files_beta[k][3]
    
    mat = spio.loadmat(data_beta)
    aa=mat['beta_']
    
    protocol=pd.read_excel(protocol_file)
    RT=protocol['BP Latency']-protocol['SS Latency']
    
    matrix = np.zeros((aa.shape[0],aa.shape[1]))
    for i in range(aa.shape[1]):
        for j in range(aa.shape[0]):
            matrix[j,i]=np.median(aa[j,i,0:int(RT[0])])

    df = pd.DataFrame(matrix,columns=ch) 
    for cname in ch:
        df = df[abs(stats.zscore(df[cname]))< 3]
    df['participant']=int(participant)
    DATA_BETA = pd.concat([DATA_BETA,df])  

DATA_THETA=pd.DataFrame()
for k in range(len(protocol_files)):
    
    participant = protocol_files[k][2]
    protocol_file=protocol_files[k][4]
    data_theta=data_files_theta[k][3]
    
    mat = spio.loadmat(data_theta)
    aa=mat['theta_']
    
    protocol=pd.read_excel(protocol_file)
    RT=protocol['BP Latency']-protocol['SS Latency']
    
    matrix = np.zeros((aa.shape[0],aa.shape[1]))
    for i in range(aa.shape[1]):
        for j in range(aa.shape[0]):
            matrix[j,i]=np.median(aa[j,i,0:int(RT[0])])

    df = pd.DataFrame(matrix,columns=ch) 
    for cname in ch:
        df = df[abs(stats.zscore(df[cname]))< 3]
    df['participant']=int(participant)
    DATA_THETA = pd.concat([DATA_THETA,df]) 

# Aggregating ERSP across trials using median value (Picture)

In [4]:
DATA_THETA_AV=pd.DataFrame()
for participant in DATA_THETA['participant'].unique():
    df=DATA_THETA[DATA_THETA['participant']==participant]
    d2=df[ch].median()
    d=pd.DataFrame(d2).T
    d.columns=ch
    d['participant']=participant
    DATA_THETA_AV=pd.concat([DATA_THETA_AV,d])

DATA_ALPHA_AV=pd.DataFrame()
for participant in DATA_ALPHA['participant'].unique():
    df=DATA_ALPHA[DATA_ALPHA['participant']==participant]
    d2=df[ch].median()
    d=pd.DataFrame(d2).T
    d.columns=ch
    d['participant']=participant
    DATA_ALPHA_AV=pd.concat([DATA_ALPHA_AV,d])

DATA_BETA_AV=pd.DataFrame()
for participant in DATA_BETA['participant'].unique():
    df=DATA_BETA[DATA_BETA['participant']==participant]
    d2=df[ch].median()
    d=pd.DataFrame(d2).T
    d.columns=ch
    d['participant']=participant
    DATA_BETA_AV=pd.concat([DATA_BETA_AV,d])

# Export Subject-specific ERSP Data (Picture)

In [5]:
DATA_ALPHA_AV.to_csv('ERSP_TEXT_ALPHA.csv')
DATA_BETA_AV.to_csv('ERSP_TEXT_BETA.csv')
DATA_THETA_AV.to_csv('ERSP_TEXT_THETA.csv')