In [6]:
import json
import os
import pandas as pd
import numpy as np
from collections import defaultdict
from library import *

In [7]:
DATA_ROOT = '/home/mrussell/mw_hci/data/uploaded' #/Users/matt/git/hci_mw/data/uploaded'

files = [x for x in os.listdir(DATA_ROOT) if x.endswith('.json')]

pids = np.unique([x.split('_')[0] for x in files])

tasks = {
         'nback_0_task' : 3, 
         'nback_1_task' : 3, 
         'nback_2_task' : 3, 
         'nback_3_task' : 3,
         'stroop_task'  : 4, 
         'rotation_task': 4
         }

dfs = []
for pid in pids:
    print(pid)
    
    pfiles = [x for x in files if x.startswith(pid) and 'practice' not in x]

    pid_trial_idx = 0

    for task in tasks:        

        task_files = [x for x in pfiles if task in x]

        # make sure participant completed experiment
        if len(task_files) < tasks[task]:
            print(f'ERROR - missing tasks for {pid} with {task}; {len(task_files)} found, need {tasks[task]}')

        # don't double-count nbacks; the files save frequently during this one. 
        if 'nback' in task:
            task_sizes = [os.stat(os.path.join(DATA_ROOT, f)).st_size for f in task_files]           
            task_files = [task_files[task_sizes.index(max(task_sizes))]]
                        
        for task_file in task_files:
            print(task_file)

            # data we want from the eeg file
            data = {'_eeg': [], '_ppg': [], '_telem': [], '_events': [], '_trialData': [], '_pid':[]}
            
            x = json.load(open(os.path.join(DATA_ROOT, task_file)))

            ###
            ### load and process the eeg data
            ###
            eeg_df = pd.DataFrame(x['_eeg'])
            
            ###
            ### musejs timestamps are often fucked. we know sequences come in order every 256 ms, so we can interpolate.
            ### so for now, when we widen, we'll drop them. however, the first timestamps are always good, so we'll save 
            ### the first one with compatibility with the 'events' dataframe (both in ms since epoch). 
            ###
            init_time = eeg_df['timestamp'].iloc[0]

            ###
            ### widen so one row has data from all probes; drops timestamps
            ###
            eeg_df = eeg_df.pivot(index='index', columns='electrode', values='samples')             
           
        
            ### flatten pandas fucking odd multi-index situatikon
            ###
            eeg_df.columns = [f'probe-{x}' for x in eeg_df.columns.values]

            ###
            ### the occasional row (like 1 per participant) will have na values
            ### drop them preemptively. this will technically slighly mess the 
            ### timestamps up, but it's 1/256 of one second overall, so am not
            ### hyper concerned. 
            ###
            eeg_df = eeg_df.dropna()            

            ###
            ### 'explode' the arrays at each row into 12 new rows
            ###
            eeg_df = eeg_df.explode(list(eeg_df.columns)).reset_index(drop=True)


            ###
            ### musejs timestamps are often fucked. we know sequences come in order every 256 ms, so we can interpolate.
            ###
            eeg_df['timestamp'] = [i * (1/SAMPLING_RATE) * 1000 for i in range(eeg_df.shape[0])]

            
            ###
            ### PROCESS EVENTS
            ### Events come from two disparate places
            ###     A) _events    [we've manually placed these here]
            ###     B) _trialData [automatic from jspsych]
            ###
            ### Unfortunately we've got to merge these two. 
            ### Good news is that the _events is basically the same format, except there's always a 
            ### start/end paired together. There are few minor exceptions, but this makes life pretty 
            ### straightforward, all things considered. 
            ###

            ###
            ### for the sake of simplicity, these keys are ALL of the possible ones; we can filter later. 
            ###
            unique_keys = {
                            'isFirstThree', 'success', 'isNBack', 'correct', 'stimfile',
                            'colorIgnore', 'response', 'internal_node_id', 'reflected',
                            'stimulus', 'time_elapsed', 'experiment', 'letter', 'angle', 
                            'block', 'trial_index', 'congruent', 'imnum', 
                            'colorDraw', 'trial_type', 'subject_id', 'rt'
                            }

            # 'blockType',

            ###
            ### first start with the _trialData events
            ### start at #3 b/c that's the first instruction trial (others are muse connection, etc.)
            ###                        
            all_trialdata = defaultdict(list)            
            for trial in x['_trialData'][3:]:
                for unique_key in unique_keys:
                    if unique_key not in trial.keys():
                        all_trialdata[unique_key].append(None)
                    else:
                        all_trialdata[unique_key].append(trial[unique_key])
                                
            all_trialdata = pd.DataFrame(all_trialdata)

            all_trialdata['nback_block'] = all_trialdata['block']
           
            ###
            ### we don't need 'time_elapsed', as time is taken care of more accuractely re: muse
            ###
            del all_trialdata['time_elapsed']      


            ###
            ### unpack the events from the ; separated list in the 'value' column
            ###
            all_events = []
            for event in x['_events']:                             
                for info_key, info_value in [x.split(':') for x in event['value'].split(';')]:
                    event[info_key.strip()] = info_value.strip()
                del event['value']
                all_events.append(event)
            events_df = pd.DataFrame(all_events)
            
            ###
            ### transform the time column of events to ms from the first muse reading
            ###
            events_df['timestamp'] = events_df['timestamp'].astype(int) - init_time

            ###
            ### for any timestamps that are the same in the events (after above drops), they are always of the order:
            ###      [0] (prev) trial finish    -> increment by 0
            ###      [1] (next) trial start     -> increment by 3 * 1 == 3ms
            ### Go by index offset for simplicity [and technically vectorization]. 
            ### 3ms b/c that will go over the 2ms resolution in the merge_asof fn used below. 
            ### Note: no events happen 3ms after the start of the trial, so this is safe. 
            ###
            events_df['timestamp'] = events_df.groupby('timestamp')['timestamp'].transform(lambda x: x + np.array(x.index - x.index[0]) * 3) 

            ###
            ### use to test if above works; will print any rows that have the same timestamp.              
            ###
            for name, time_share_df in events_df.groupby(['timestamp']):
                if time_share_df.shape[0] == 1:
                    continue
                print(time_share_df)
                raise Exception(f'ERROR - duplicate timestamps in events: {name}')

            
            ###
            ### block ending data is totally superfluous; pressEnter is unnecessary (only done after instructions).
            ### these often conflict with start/end trial timestamps, and are not reflected in jspsych data, so drop them. 
            ###
            events_df = events_df[(events_df['STATUS'] != 'block_started') & 
                                  (events_df['STATUS'] != 'block_ended')
                                  ].reset_index(drop=True)
            
            ###
            ### first event is always a 'trial_finished' pressEnter trial after muse connect 
            ### now we will start with the first instruction instead
            ### this lines up with the all_trialdata df. 
            ###
            events_df = events_df.iloc[1:, :].reset_index(drop=True)
            
            ###
            ### there are some odd cases where the instruction trial is dropped from the events_df
            ### manually add some fake instruction loading here, which is just a placeholder. 
            ###        
            if events_df.shape[0] != all_trialdata.shape[0] * 2:
                print(f"events_df: {events_df.shape} - all_trialdata: {all_trialdata.shape}")
                rowsToAdd = pd.DataFrame({
                                            'timestamp': [events_df['timestamp'].iloc[0] - 100, events_df['timestamp'].iloc[0] - 50], 
                                            'STATUS': ['trial_loaded', 'trial_finished'], 
                                            'TRIAL_TYPE': ['undefined', 'undefined'], 
                                            'CORRECT': ['NaN', 'undefined'],
                                            'BLOCK_TYPE': ['instruction', 'instruction'], 
                                            'TASK_NAME': ['NaN', 'NaN'] 
                                        })
                events_df = pd.concat([rowsToAdd, events_df], ignore_index=True)
               
            assert(events_df.shape[0] == all_trialdata.shape[0] * 2)

            ###
            ### trialdata needs to be represented twice, so concat it with itself, and reset based on original index. 
            ###
            all_trialdata = pd.concat([all_trialdata, all_trialdata]).sort_index().reset_index(drop=True)

            ###
            ### now events contains all metadata
            ###
            events_df = pd.concat([events_df, all_trialdata], axis=1)
            

            ###                       
            ### merge the metadata with the eeg data    
            ###
            df = pd.merge_asof(eeg_df, events_df, on="timestamp", direction="nearest", tolerance=float(2.0))
            
            ###
            ### after merge, some status rows will be duplicated as a given status row might match multiple eeg times
            ### only keep the first of such duplicates
            ###
            mask = (df['STATUS'] == df['STATUS'].shift())
            df.loc[mask, events_df.keys()] = None
            
            df['pid'] = pid
            df['filename'] = task_file
            df['task'] = task

            df = mne_filt(df)
            trial_dfs, pid_trial_idx = separate_trials_and_bin_mw(df, pid_trial_idx) 

            dfs.append(trial_dfs)    

df = pd.concat(dfs)
df.to_csv('pdata.csv', index=False)

epochs, pads = create_mne_epochs(df)
epochs.save('WLOAD_notch_bp_avg_mastoid_annotated_ica_stroop_nback_length_segments-epo.fif', overwrite=True)



0cbaf29f
0cbaf29f_nback_0_task_210_1683300581366.955.json
Creating RawArray with float64 data, n_channels=4, n_times=69180
    Range : 0 ... 69179 =      0.000 ...   270.230 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


0cbaf29f_nback_1_task_158_1683300512396.763.json
Creating RawArray with float64 data, n_channels=4, n_times=51528
    Range : 0 ... 51527 =      0.000 ...   201.277 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window w

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


0cbaf29f_nback_2_task_210_1683300893203.869.json
Creating RawArray with float64 data, n_channels=4, n_times=71328
    Range : 0 ... 71327 =      0.000 ...   278.621 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window w

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


0cbaf29f_nback_3_task_158_1683300818741.493.json
Creating RawArray with float64 data, n_channels=4, n_times=52272
    Range : 0 ... 52271 =      0.000 ...   204.184 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window w

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


0cbaf29f_stroop_task_154_1683298863291.1082.json
Creating RawArray with float64 data, n_channels=4, n_times=38676
    Range : 0 ... 38675 =      0.000 ...   151.074 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window w

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


0cbaf29f_stroop_task_154_1683299032205.96.json
Creating RawArray with float64 data, n_channels=4, n_times=39648
    Range : 0 ... 39647 =      0.000 ...   154.871 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window wit

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


0cbaf29f_stroop_task_154_1683299224857.077.json
Creating RawArray with float64 data, n_channels=4, n_times=38460
    Range : 0 ... 38459 =      0.000 ...   150.230 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window wi

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


0cbaf29f_stroop_task_154_1683299393064.574.json
Creating RawArray with float64 data, n_channels=4, n_times=37800
    Range : 0 ... 37799 =      0.000 ...   147.652 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window wi

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


0cbaf29f_rotation_task_52_1683298115287.054.json
Creating RawArray with float64 data, n_channels=4, n_times=39804
    Range : 0 ... 39803 =      0.000 ...   155.480 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window w

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


0cbaf29f_rotation_task_52_1683298289399.014.json
Creating RawArray with float64 data, n_channels=4, n_times=29724
    Range : 0 ... 29723 =      0.000 ...   116.105 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window w

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


0cbaf29f_rotation_task_52_1683298438482.7158.json
Creating RawArray with float64 data, n_channels=4, n_times=31608
    Range : 0 ... 31607 =      0.000 ...   123.465 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window 

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


0cbaf29f_rotation_task_52_1683298563548.863.json
Creating RawArray with float64 data, n_channels=4, n_times=27828
    Range : 0 ... 27827 =      0.000 ...   108.699 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window w

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


239022dd
239022dd_nback_0_task_210_1683239928922.439.json
Creating RawArray with float64 data, n_channels=4, n_times=67656
    Range : 0 ... 67655 =      0.000 ...   264.277 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


239022dd_nback_1_task_210_1683239644229.0771.json
Creating RawArray with float64 data, n_channels=4, n_times=67512
    Range : 0 ... 67511 =      0.000 ...   263.715 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window 

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


239022dd_nback_2_task_106_1683239518067.785.json
Creating RawArray with float64 data, n_channels=4, n_times=35208
    Range : 0 ... 35207 =      0.000 ...   137.527 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window w

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


239022dd_nback_3_task_210_1683239347080.8242.json
Creating RawArray with float64 data, n_channels=4, n_times=67560
    Range : 0 ... 67559 =      0.000 ...   263.902 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window 

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


239022dd_stroop_task_154_1683238286548.9868.json
Creating RawArray with float64 data, n_channels=4, n_times=39588
    Range : 0 ... 39587 =      0.000 ...   154.637 secs
Ready.
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 59 - 61 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 59.35
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 59.10 Hz)
- Upper passband edge: 60.65 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 60.90 Hz)
- Filter length: 1691 samples (6.605 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window w

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [None]:
for (task, wload), d in pads.groupby(['task', 'workload']):
    print(f"{task} - {wload} - {d['numpads'].mean()}")

In [1]:
from library import *
import mne
from alice_ml.features import get_features_from_mne
from alice_ml.models import predict_mne

def mne_filt(df):

    CH_NAMES = ['probe-0','probe-1', 'probe-2', 'probe-3']        
    SAMPLING_RATE = 256

    data = df[CH_NAMES].T.values

    info = mne.create_info(ch_names=CH_NAMES, sfreq=SAMPLING_RATE, ch_types='eeg')
    raw = mne.io.RawArray(data, info)
        
    raw.notch_filter(freqs=60, filter_length='auto')
    raw.filter(l_freq=.1, h_freq=50, filter_length='auto') # from 1-40.; 77%ML in r.     
    raw.set_eeg_reference(ref_channels=['probe-0', 'probe-3'], projection=False)

    ica = mne.preprocessing.ICA(n_components=None, random_state=97)
    ica.fit(raw)
    features_df = get_features_from_mne(raw, ica)
    features_df
    
    predicted_df = predict_mne(raw, ica, model='lr', flags= 'all')
    print(f"predicted_df - {predicted_df}")
    artifact_indices = predicted_df[(predicted_df.flag_muscles > 0.5) | (predicted_df.flag_eyes > 0.5)].index
    print(f"artifact indices - {artifact_indices}")
    ica.exclude = artifact_indices
    raw = ica.apply(raw)
      
    df[CH_NAMES] = raw.get_data().T

    return df


In [2]:
def separate_trials_and_bin_mw(df, p_unique_trial_idx):
    
    # sometimes trial load signal didn't come through, but looks like fixation start and reagular finish do
    # so go from trial loaded fixation to not fixation trial finished

    mdata_cols = [
                    'TRIAL_TYPE', 'CORRECT', 'angle', 'block', 
                    'isFirstThree', 'success', 'congruent', 'isNBack', 
                    'imnum', 'correct', 'stimfile', 'colorIgnore', 'response',
                    'colorDraw', 'experiment', 'letter', 'rt', 'filename'
                ]    
    
    df['trial_id'] = None
    df['workload'] = None
    pid = df['pid'].iloc[0]
    

    start_idx = 0
    
    for index, row in df.iterrows():                
        trial_type = row['TRIAL_TYPE']
        status = row['STATUS']

        # a new trial starts!
        if status == 'trial_loaded' and trial_type == 'fixation':
            start_idx = index 
        elif status == 'trial_finished' and trial_type != 'fixation':
            df.loc[start_idx:index+1, 'trial_id'] = f"{pid}_{p_unique_trial_idx}"                        
            df.loc[start_idx:index+1, mdata_cols] = row[mdata_cols].values

            if trial_type == 'nback':
                workload = row['block']
            elif trial_type == 'stroop':
                workload = not row['congruent']
            elif trial_type == 'rotation':
                workload = int(row['angle']) / 50 # 0, 1, 2
            else:
                # these are 'instruction' or 'rest' tasks            
                workload = -1
            
            df.loc[start_idx:index+1, 'workload'] = workload
            
            p_unique_trial_idx += 1            
            
    return df, p_unique_trial_idx

In [3]:
import mne 

def create_mne_epochs(df):               
    all_raws = []
    all_metadata = []
    CH_NAMES = ['probe-0', 'probe-1', 'probe-2', 'probe-3']        
    SAMPLING_RATE = 256

    # Compute the 95th percentile of the trial lengths
    # ONLY DO THIS WITH STROOP/ROTATION. Will be better for classification. 
    trial_lengths = df[df['trialtype'] != 'rotation'].groupby(['trial_id']).size()
    threshold_length = trial_lengths.quantile(0.95)

    trial_lengths = df.groupby(['trial_id']).size()

    # Filter out trials longer than the 95th percentile
    valid_trial_ids = trial_lengths[trial_lengths <= threshold_length].index
    df_filtered = df[df['trial_id'].isin(valid_trial_ids)]

    # Determine the maximum trial length from the filtered trials
    max_len_samples = df_filtered.groupby(['trial_id']).size().max()

    info = mne.create_info(
        ch_names=['probe-0', 'probe-1', 'probe-2', 'probe-3'], 
        ch_types=['eeg', 'eeg', 'eeg', 'eeg'],
        sfreq=SAMPLING_RATE
    )

    pads = {'task':[], 'numpads':[], 'workload':[]}

    for trial_id, group in df_filtered.groupby(['trial_id']):
        # Zero-pad the trial data to the maximum length
        data = group.loc[:, CH_NAMES].values.T
        padded_data = np.pad(data, ((0, 0), (0, max_len_samples - data.shape[1])), 'constant')

        pads['task'].append(group['TRIAL_TYPE'].iloc[0])
        pads['numpads'].append(max_len_samples - data.shape[1])
        pads['workload'].append(group['workload'].iloc[0])

        raw = mne.io.RawArray(padded_data, info)
        
        # Metadata dictionary (excluding fixation)
        metadata = {
            "trialtype": group['TRIAL_TYPE'].iloc[0],
            "correct": group['correct'].iloc[0], 
            "workload": group['workload'].iloc[0],
            "filename": group['filename'].iloc[0],
            "pid": group['pid'].iloc[0],
            "trial_id": trial_id
        }

        annotations = mne.Annotations(onset=[0], duration=[0], description=["fixation"])
        raw.set_annotations(annotations)

        all_metadata.append(metadata)
        all_raws.append(raw)

    metadata_df = pd.DataFrame(all_metadata)

    all_raws = mne.concatenate_raws(all_raws)
    events, event_id = mne.events_from_annotations(all_raws, event_id={"fixation": 1})
    
    # Convert max_len_samples to time duration for tmax
    tmax_duration = max_len_samples / SAMPLING_RATE - 1/SAMPLING_RATE
    epochs = mne.Epochs(all_raws, events, event_id, tmin=0, tmax=tmax_duration, baseline=None)

    epochs.metadata = metadata_df

    pads = pd.DataFrame(pads)
    return epochs, pads

In [4]:
data_df = epochs.to_data_frame()
metadata_df = epochs.metadata.copy()

data_df = data_df.drop(columns=['condition'])
metadata_df['trial_id'] = metadata_df['trial_id'].apply(lambda x: x[0])

combined_df = pd.merge(data_df, metadata_df, left_on='epoch', right_index=True)
combined_df.to_csv('mw_data_raw_short_segments.csv', index=False)
# print(data_df)
# print(metadata_df)

# data_df.to_csv('mental_workload_braindata.csv')
# metadata_df.to_csv('mental_workload_metadata.csv')

NameError: name 'epochs' is not defined

In [None]:
for key in metadata_df.keys():
    print(key)
    print(metadata_df[key].unique())

Frequency Transformation

In [None]:
import mne
from mne.time_frequency import psd_array_multitaper

epochs = mne.read_epochs('WLOAD_notch_bp_avg_mastoid_annotated_ica-epo.fif', preload=True)

# Define frequency bands
fmin, fmax = 0.5, 45
freqs = np.arange(fmin, fmax + 1, 3)

psds, freqs = psd_array_multitaper(epochs.get_data(), sfreq=epochs.info['sfreq'], fmin=fmin, fmax=fmax, bandwidth=2, adaptive=True, low_bias=True)
psds

# Assuming psds shape is (epochs, channels, frequencies)
num_epochs, num_channels, num_freqs = psds.shape

# Reshape the psds to have shape (epochs, channels x frequencies)
psds_reshaped = psds.reshape(num_epochs, -1)

# Construct column names for the DataFrame
column_names = [f"ch_{ch}_freq_{freq:.2f}Hz" for ch in range(num_channels) for freq in freqs]

# Convert to DataFrame
psd_df = pd.DataFrame(psds_reshaped, columns=column_names)

metadata_df = epochs.metadata.copy()

df = pd.concat([psd_df, metadata_df], axis=1)

df.to_csv('WLOAD_notch_bp_avg_mastoid_annotated_ica_fft.csv', index=False)
df


In [None]:
df.to_csv('WLOAD_notch_bp_avg_mastoid_annotated_ica_fft.csv', index=False)