In [1]:
import os
import pandas as pd
import subprocess 

ICA_COMPS = 0.9999

root = "/Volumes/eeg"

def process(df, raw_dir, preprocessed_dir, processed_dir, ica_dir, notes_dir):
    fdt_set_exists = []
    raw_exists = []
    preprocessed_exists = []
    bads_file_exists = []
    processed_exists = []
    ica_exists = []
    ica_drops = []
    notes = []

    for _, row in df.iterrows():
        id = row["id"]
        session = row["session"]
        fdt_path = os.path.join(raw_dir, id, f'{session}.fdt')
        set_path = os.path.join(raw_dir, id, f'{session}.set')
        raw_path = os.path.join(raw_dir, id, f'{session}_raw.fif')
        preprocessed_path = os.path.join(preprocessed_dir, id, f'{session}_raw.fif')
        bads_path = os.path.join(preprocessed_dir, id, f'{session}_bads.txt')
        ica_path = os.path.join(ica_dir, id, f'{session}_{ICA_COMPS}_ica.fif')
        ica_drops_path = os.path.join(processed_dir, id, f'{session}_{ICA_COMPS}_ica_drops.txt')
        processed_path = os.path.join(processed_dir, id, f'{session}_{ICA_COMPS}_raw.fif')
        note_path = os.path.join(notes_dir, id, 'info.txt')


        fdt_set_exists.append(os.path.exists(fdt_path) and os.path.exists(set_path))
        raw_exists.append(os.path.exists(raw_path))
        preprocessed_exists.append(os.path.exists(preprocessed_path))
        
        # Read the bads file and append the first line to bads_file_exists
        if os.path.exists(bads_path):
            with open(bads_path, 'r') as file:
                bads_file_exists.append(file.readline())
        else:
            bads_file_exists.append(None)

        ica_exists.append(os.path.exists(ica_path))

        # Read the ica drops file and append the first line to ica_drops
        if os.path.exists(ica_drops_path):
            with open(ica_drops_path, 'r') as file:
                ica_drops.append(file.readline())
        else:
            ica_drops.append(None)

        processed_exists.append(os.path.exists(processed_path))

        # Read note file and append to notes
        if os.path.exists(note_path):
            with open(note_path, 'r') as file:
                notes.append(file.readline())
        else:
            notes.append(None)



    df["Channel Locs Added via EEGLAB"] = fdt_set_exists
    df["Converted to MNE"] = raw_exists
    df["Bads Removed"] = bads_file_exists
    df["Preprocessed"] = preprocessed_exists
    df[f'{ICA_COMPS} Comp ICA Generated'] = ica_exists
    df["ICA Dropped Channels"] = ica_drops
    df["Processed"] = processed_exists
    df["Notes"] = notes

    return df




# Create a color coded markdown that shows which files are missing and present with green checks and red xs
def color_code(val):
    color = 'green' if val else 'red'
    # Make the red a medium light red
    
    if color == 'red':
        color = '#FF9999'

    # Make the green a light green
    if color == 'green':
        color = '#82ffa3'
    return f'background-color: {color}'

def style(df):
    styled_df = df.style.hide(axis='index')
    styled_df.set_table_styles([{'selector': 'th', 'props': [('font-size', '12pt')]}])
    styled_df.map(color_code, subset=df.columns[2:-1])

    # Make text black for all cells except the first two columns
    styled_df.set_properties(**{'color': 'black'}, subset=styled_df.columns[2:-1])

    return styled_df

def build_dfs():
    raw = os.path.join(root, "raw")
    raw_expert = os.path.join(raw, "expert")
    raw_novice = os.path.join(raw, "novice")

    preprocessed = os.path.join(root, "preprocessed")
    preprocessed_expert = os.path.join(preprocessed, "expert")
    preprocessed_novice = os.path.join(preprocessed, "novice")

    processed = os.path.join(root, "processed")
    processed_expert = os.path.join(processed, "expert")
    processed_novice = os.path.join(processed, "novice")

    ica = os.path.join(root, "ica")
    ica_expert = os.path.join(ica, "expert")
    ica_novice = os.path.join(ica, "novice")

    notes = os.path.join(root, "notes")
    notes_expert = os.path.join(notes, "expert")
    notes_novice = os.path.join(notes, "novice")

    expert_ids = [d for d in os.listdir(raw_expert) if os.path.isdir(os.path.join(raw_expert, d))]
    novice_ids = [d for d in os.listdir(raw_novice) if os.path.isdir(os.path.join(raw_novice, d))]

    expert_ids = [str(item) for item in expert_ids for i in range(3)]
    novice_ids = [str(item) for item in novice_ids for i in range(3)]

    expert_df = pd.DataFrame(expert_ids, columns=["id"])
    novice_df = pd.DataFrame(novice_ids, columns=["id"])

    expert_df["session"] = expert_df.groupby("id").cumcount() + 1
    novice_df["session"] = novice_df.groupby("id").cumcount() + 1

    expert_df = process(expert_df, raw_expert, preprocessed_expert, processed_expert, ica_expert, notes_expert)
    novice_df = process(novice_df, raw_novice, preprocessed_novice, processed_novice, ica_novice, notes_novice)


    # Hide any row where session == 3
    expert_df = expert_df[expert_df["session"] != 3]
    novice_df = novice_df[novice_df["session"] != 3]

    # Replace Nones with empty strings in Info Column
    expert_df["Notes"] = expert_df["Notes"].fillna('')
    novice_df["Notes"] = novice_df["Notes"].fillna('')

    return expert_df, novice_df

In [2]:
expert_df, novice_df = build_dfs()
display(style(expert_df))

id,session,Channel Locs Added via EEGLAB,Converted to MNE,Bads Removed,Preprocessed,0.9999 Comp ICA Generated,ICA Dropped Channels,Processed,Notes
9b,1,True,True,"Bad channels: ['M1', 'AFF1']",True,True,"ICA Components Dropped: [2, 0, 3]",True,
9b,2,True,True,"Bad channels: ['M1', 'AFF1', 'ECG']",True,True,"ICA Components Dropped: [1, 2, 5, 15, 9]",True,
10b,1,True,True,"Bad channels: ['M1', 'VEOGU', 'FTT10h', 'FT10']",True,True,"ICA Components Dropped: [1, 0, 8, 6]",True,
10b,2,True,True,"Bad channels: ['M1', 'FTT10h', 'VEOGU', 'FT10']",True,True,"ICA Components Dropped: [2, 0]",True,
10c1,1,True,True,"Bad channels: ['M1', 'T7', 'FC5', 'M2', 'FT8', 'FTT10h', 'FFT8h', 'OI1h']",True,True,,False,
10c1,2,True,True,"Bad channels: ['M1', 'T7', 'M2', 'FTT10h', 'FC5', 'POz', 'OI1h']",True,True,,False,
7c,1,True,True,"Bad channels: ['M1', 'FT7']",True,True,,False,
7c,2,True,True,"Bad channels: ['M1', 'FT7', 'M2', 'TPP10h', 'POO3h', 'PPO9h']",True,True,,False,
6,1,True,True,"Bad channels: ['M1', 'ECG']",True,True,,False,
6,2,True,True,"Bad channels: ['M1', 'ECG', 'AF3']",True,True,,False,


In [3]:
expert_df, novice_df = build_dfs()
display(style(novice_df))

id,session,Channel Locs Added via EEGLAB,Converted to MNE,Bads Removed,Preprocessed,0.9999 Comp ICA Generated,ICA Dropped Channels,Processed,Notes
1,1,False,False,,False,False,,False,Only 2 recordings
1,2,False,False,,False,False,,False,Only 2 recordings
3b,1,False,False,,False,False,,False,Only 2 recordings
3b,2,False,False,,False,False,,False,Only 2 recordings
4,1,False,False,,False,False,,False,Only 2 recordings
4,2,False,False,,False,False,,False,Only 2 recordings
5b,1,True,True,"Bad channels: ['M1', 'FTT9h', 'ECG', 'M2']",True,True,,False,
5b,2,True,True,"Bad channels: ['M1', 'FTT9h', 'ECG']",True,True,,False,
6,1,True,True,"Bad channels: ['FT8', 'ECG', 'TPP7h', 'FFC2h']",True,True,,False,
6,2,True,True,"Bad channels: ['TPP7h', 'ECG', 'FFC2h', 'FT8']",True,True,,False,


In [5]:
target = "expert"
working_df = novice_df if target == "novice" else expert_df

In [9]:
# For every column where "Channel Locs Added via EEGLAB" is True, but "Converted to MNE" is False, run $ python 0_mark_bads.py expert id session
for _, row in working_df.iterrows():
    if row["Channel Locs Added via EEGLAB"] and not row["Converted to MNE"]:
        print("python", "0_mark_bads.py", target, row["id"], str(row["session"]))
    

In [10]:
# For every column where "Channel Locs Added via EEGLAB" is True, but "ICA_COMPS Comp ICA Generated" is False, run $ python 1_preproc.py expert id session
with open("run_all_1.sh", "w") as file:
    for _, row in working_df.iterrows():
        if row["Channel Locs Added via EEGLAB"] and not row[f'{ICA_COMPS} Comp ICA Generated']:
            file.write(f"python 1_preproc.py {target} {row['id']} {row['session']} --num_ica_comps {ICA_COMPS}\n")

In [8]:
# For every column where "ICA_COMPS Comp ICA Generated"  is True, but ICA Dropped Channels is None, run $ python 2_select_ica.py expert id session --num_ica_comps ICA_COMPS

with open("run_all_2.sh", "w") as file:
    for _, row in working_df.iterrows():
        if row[f'{ICA_COMPS} Comp ICA Generated'] and row["ICA Dropped Channels"] is None:
            # print("running: ", row["id"], row["session"])
            # proc = subprocess.run(["python", "2_select_ica.py", target, row["id"], str(row["session"]), "--num_ica_comps", f'{ICA_COMPS}']) 
            # Do the same as above but in a way that juptyer can handle
            file.write(f"python 2_select_ica.py {target} {row['id']} {row['session']} --num_ica_comps {ICA_COMPS}\n")
        