# Progress Summary
This document checks what processing steps have been completed for each subject and gives some utilities for generating bash scripts to run needed pipelines. It is only runnable if you have access to all raw data and are reproducing the full preprocessing methodology.

The rendered tables below still show the exact bad EEG channels and ICA channels dropped for each recording.

It should be regarded as a rough draft/work in progress to be tweaked for a particular need, but is a good starting point.

In [4]:
import os
import pandas as pd

ICA_COMPS = 0.9999

root = "/Volumes/eeg"
connectivity_dir = ""

def process(df, raw_dir, preprocessed_dir, processed_dir, ica_dir, connectivity_dir, notes_dir):
    fdt_set_exists = []
    raw_exists = []
    preprocessed_exists = []
    bads_file_exists = []
    processed_exists = []
    ica_exists = []
    ica_drops = []
    connectivity_exists = []
    notes = []

    for _, row in df.iterrows():
        id = row["id"]
        session = row["session"]
        fdt_path = os.path.join(raw_dir, id, f'{session}.fdt')
        set_path = os.path.join(raw_dir, id, f'{session}.set')
        raw_path = os.path.join(raw_dir, id, f'{session}_raw.fif')
        preprocessed_path = os.path.join(preprocessed_dir, id, f'{session}_raw.fif')
        bads_path = os.path.join(preprocessed_dir, id, f'{session}_bads.txt')
        ica_path = os.path.join(ica_dir, id, f'{session}_{ICA_COMPS}_ica.fif')
        ica_drops_path = os.path.join(processed_dir, id, f'{session}_{ICA_COMPS}_ica_drops.txt')
        processed_path = os.path.join(processed_dir, id, f'{session}_{ICA_COMPS}_raw.fif')
        connectivity_paths = {
            "demo_NoG": os.path.join(connectivity_dir, id, "NoG_connectivity.npy"),
            "demo_WiG": os.path.join(connectivity_dir, id, "WiG_connectivity.npy"),
            "BL_NoG": os.path.join(connectivity_dir, id, "BL_NoG_connectivity.npy"),
            "BL_WiG": os.path.join(connectivity_dir, id, "BL_WiG_connectivity.npy"),
        }
        note_path = os.path.join(notes_dir, id, 'info.txt')


        fdt_set_exists.append(os.path.exists(fdt_path) and os.path.exists(set_path))
        raw_exists.append(os.path.exists(raw_path))
        preprocessed_exists.append(os.path.exists(preprocessed_path))
        
        # Read the bads file and append the first line to bads_file_exists
        if os.path.exists(bads_path):
            with open(bads_path, 'r') as file:
                bads_file_exists.append(file.readline())
        else:
            bads_file_exists.append(None)

        ica_exists.append(os.path.exists(ica_path))

        # Read the ica drops file and append the first line to ica_drops
        if os.path.exists(ica_drops_path):
            with open(ica_drops_path, 'r') as file:
                ica_drops.append(file.readline())
        else:
            ica_drops.append(None)

        processed_exists.append(os.path.exists(processed_path))

        connectivity_for = []
        for key in connectivity_paths.keys():
            path = connectivity_paths[key]
            if(os.path.exists(path)):
                connectivity_for.append(key)
        if len(connectivity_for) == 4:
            connectivity_exists.append("All 4")
        else:
            connectivity_exists.append(connectivity_for)


        # Read note file and append to notes
        if os.path.exists(note_path):
            with open(note_path, 'r') as file:
                notes.append(file.readline())
        else:
            notes.append(None)



    df["Channel Locs Added via EEGLAB"] = fdt_set_exists
    df["Converted to MNE"] = raw_exists
    df["Bads Removed"] = bads_file_exists
    df["Preprocessed"] = preprocessed_exists
    df[f'{ICA_COMPS} Comp ICA Generated'] = ica_exists
    df["ICA Dropped Channels"] = ica_drops
    df["Processed"] = processed_exists
    df["Connectivity Calculated"] = connectivity_exists
    df["Notes"] = notes

    return df

def color_code(val):
    color = 'green' if val else 'red'
    if color == 'red':
        color = '#FF9999'
    if color == 'green':
        color = '#82ffa3'
    return f'background-color: {color}'

def style(df):
    styled_df = df.style.hide(axis='index')
    styled_df.set_table_styles([{'selector': 'th', 'props': [('font-size', '12pt')]}])
    styled_df.map(color_code, subset=df.columns[2:-1])

    # Make text black for all cells except the first two columns
    styled_df.set_properties(**{'color': 'black'}, subset=styled_df.columns[2:-1])
    
    return styled_df

def build_dfs():
    raw = os.path.join(root, "raw")
    raw_expert = os.path.join(raw, "expert")
    raw_novice = os.path.join(raw, "novice")

    preprocessed = os.path.join(root, "preprocessed")
    preprocessed_expert = os.path.join(preprocessed, "expert")
    preprocessed_novice = os.path.join(preprocessed, "novice")

    processed = os.path.join(root, "processed")
    processed_expert = os.path.join(processed, "expert")
    processed_novice = os.path.join(processed, "novice")

    ica = os.path.join(root, "ica")
    ica_expert = os.path.join(ica, "expert")
    ica_novice = os.path.join(ica, "novice")

    connectivity = os.path.join(root, connectivity_dir)
    connectivity_expert = os.path.join(connectivity, "expert")
    connectivity_novice = os.path.join(connectivity, "novice")

    notes = os.path.join(root, "notes")
    notes_expert = os.path.join(notes, "expert")
    notes_novice = os.path.join(notes, "novice")

    expert_ids = [d for d in os.listdir(raw_expert) if os.path.isdir(os.path.join(raw_expert, d))]
    novice_ids = [d for d in os.listdir(raw_novice) if os.path.isdir(os.path.join(raw_novice, d))]

    expert_ids = [str(item) for item in expert_ids for i in range(4)]
    novice_ids = [str(item) for item in novice_ids for i in range(4)]

    expert_df = pd.DataFrame(expert_ids, columns=["id"])
    novice_df = pd.DataFrame(novice_ids, columns=["id"])

    expert_df["session"] = expert_df.groupby("id").cumcount() + 1
    novice_df["session"] = novice_df.groupby("id").cumcount() + 1

    expert_df = process(expert_df, raw_expert, preprocessed_expert, processed_expert, ica_expert, connectivity_expert, notes_expert)
    novice_df = process(novice_df, raw_novice, preprocessed_novice, processed_novice, ica_novice, connectivity_novice, notes_novice)


    # Hide any row where session == 3
    # expert_df = expert_df[expert_df["session"] != 3]
    # novice_df = novice_df[novice_df["session"] != 3]

    # Drop rows where Channel Locs Added via EEGLAB is False and session >= 3
    expert_df = expert_df[~((expert_df["Channel Locs Added via EEGLAB"] == False) & (expert_df["session"] >= 3))]
    novice_df = novice_df[~((novice_df["Channel Locs Added via EEGLAB"] == False) & (novice_df["session"] >= 3))]
    
    # drop expert 5c session 3
    # expert_df = expert_df[~((expert_df["id"] == "5c") & (expert_df["session"] == 3))]
    # # drop expert 5b session 3
    # expert_df = expert_df[~((expert_df["id"] == "5b") & (expert_df["session"] == 3))]

    # Drop any row where Channel Locs Added via EEGLAB is false
    expert_df = expert_df[expert_df["Channel Locs Added via EEGLAB"] == True] 
    novice_df = novice_df[novice_df["Channel Locs Added via EEGLAB"] == True]


    # Replace Nones with empty strings in Info Column
    expert_df["Notes"] = expert_df["Notes"].fillna('')
    novice_df["Notes"] = novice_df["Notes"].fillna('')

    
    return expert_df, novice_df

In [5]:
expert_df, novice_df = build_dfs()
display(style(expert_df))


id,session,Channel Locs Added via EEGLAB,Converted to MNE,Bads Removed,Preprocessed,0.9999 Comp ICA Generated,ICA Dropped Channels,Processed,Connectivity Calculated,Notes
9b,1,True,True,"Bad channels: ['M1', 'AFF1']",True,True,"ICA Components Dropped: [2, 0, 3]",True,All 4,
9b,2,True,True,"Bad channels: ['M1', 'AFF1', 'ECG']",True,True,"ICA Components Dropped: [1, 2, 5, 15, 9]",True,All 4,
10b,1,True,True,"Bad channels: ['M1', 'VEOGU', 'FTT10h', 'FT10']",True,True,"ICA Components Dropped: [1, 0, 8, 6]",True,All 4,
10b,2,True,True,"Bad channels: ['M1', 'FTT10h', 'VEOGU', 'FT10']",True,True,"ICA Components Dropped: [2, 0]",True,All 4,
10c1,1,True,True,"Bad channels: ['M1', 'T7', 'FC5', 'M2', 'FT8', 'FTT10h', 'FFT8h', 'OI1h']",True,True,"ICA Components Dropped: [3, 0]",True,All 4,
10c1,2,True,True,"Bad channels: ['M1', 'T7', 'M2', 'FTT10h', 'FC5', 'POz', 'OI1h']",True,True,"ICA Components Dropped: [5, 4]",True,All 4,
7c,1,True,True,"Bad channels: ['M1', 'FT7']",True,True,ICA Components Dropped: [5],True,All 4,
7c,2,True,True,"Bad channels: ['M1', 'FT7', 'M2', 'TPP10h', 'POO3h', 'PPO9h']",True,True,"ICA Components Dropped: [1, 3, 8]",True,All 4,
6,1,True,True,"Bad channels: ['M1', 'ECG', 'AF3']",True,True,"ICA Components Dropped: [3, 2]",True,All 4,No BL in 1 or 2
6,2,True,True,"Bad channels: ['M1', 'ECG', 'AF3']",True,True,"ICA Components Dropped: [2, 0, 6, 23]",True,All 4,No BL in 1 or 2


In [6]:
expert_df, novice_df = build_dfs()
display(style(novice_df))

id,session,Channel Locs Added via EEGLAB,Converted to MNE,Bads Removed,Preprocessed,0.9999 Comp ICA Generated,ICA Dropped Channels,Processed,Connectivity Calculated,Notes
1,1,True,True,"Bad channels: ['FTT9h', 'FTT10h', 'ECG', 'M1', 'M2', 'FT9', 'FT10']",True,True,"ICA Components Dropped: [0, 1, 3]",True,All 4,Only 2 recordings. Have 2020-09-25_16-28-38 (BLs) and 17-55-11.(BISTs). Missing middle recording with Wig and NoG. Still missing after latest round of data.
1,2,True,True,"Bad channels: ['M1', 'M2', 'FT9', 'FTT9h', 'ECG', 'FT10', 'FTT10h']",True,True,"ICA Components Dropped: [0, 1]",True,All 4,Only 2 recordings. Have 2020-09-25_16-28-38 (BLs) and 17-55-11.(BISTs). Missing middle recording with Wig and NoG. Still missing after latest round of data.
3b,1,True,True,"Bad channels: ['M1', 'ECG']",True,True,"ICA Components Dropped: [1, 0, 10]",True,All 4,All needed events present but BL_NoG beg is in recording 1 and BL_NoG end is in recording 2. Okay to use?
3b,2,True,True,"Bad channels: ['M1', 'ECG']",True,True,"ICA Components Dropped: [2, 1, 0]",True,All 4,All needed events present but BL_NoG beg is in recording 1 and BL_NoG end is in recording 2. Okay to use?
4,1,True,True,"Bad channels: ['M1', 'PO3']",True,True,"ICA Components Dropped: [5, 2]",True,All 4,Only 1 recording. Have 2020-10-15_09-56-22 (BLs). Missing additional recordings with WiG/NoG. Still missing after latest round of data.
4,2,True,True,"Bad channels: ['M1', 'PO3', 'TPP9h']",True,True,"ICA Components Dropped: [3, 0]",True,All 4,Only 1 recording. Have 2020-10-15_09-56-22 (BLs). Missing additional recordings with WiG/NoG. Still missing after latest round of data.
5b,1,True,True,"Bad channels: ['M1', 'FTT9h', 'ECG', 'M2']",True,True,"ICA Components Dropped: [0, 1]",True,All 4,
5b,2,True,True,"Bad channels: ['M1', 'FTT9h', 'ECG']",True,True,"ICA Components Dropped: [0, 2]",True,All 4,
6,1,True,True,"Bad channels: ['FT8', 'ECG', 'TPP7h', 'FFC2h']",True,True,"ICA Components Dropped: [0, 1]",True,All 4,BL_WiG beg and end too close together to generate epochs
6,2,True,True,"Bad channels: ['TPP7h', 'ECG', 'FFC2h', 'FT8']",True,True,"ICA Components Dropped: [0, 1, 3]",True,All 4,BL_WiG beg and end too close together to generate epochs
