# Progress Summary
This document checks what processing steps have been completed for each subject and gives some utilities for generating bash scripts to run needed pipelines.

It should be regarded as a rough draft/work in progress to be tweaked for a particular need, but is a good starting point.

In [None]:
import os
import pandas as pd

ICA_COMPS = 0.9999

root = "/Volumes/eeg"
connectivity_dir = "connectivity_scores_shannon_entropy_dur2_ovlp1"

def process(df, raw_dir, preprocessed_dir, processed_dir, ica_dir, connectivity_dir, notes_dir):
    fdt_set_exists = []
    raw_exists = []
    preprocessed_exists = []
    bads_file_exists = []
    processed_exists = []
    ica_exists = []
    ica_drops = []
    connectivity_exists = []
    notes = []

    for _, row in df.iterrows():
        id = row["id"]
        session = row["session"]
        fdt_path = os.path.join(raw_dir, id, f'{session}.fdt')
        set_path = os.path.join(raw_dir, id, f'{session}.set')
        raw_path = os.path.join(raw_dir, id, f'{session}_raw.fif')
        preprocessed_path = os.path.join(preprocessed_dir, id, f'{session}_raw.fif')
        bads_path = os.path.join(preprocessed_dir, id, f'{session}_bads.txt')
        ica_path = os.path.join(ica_dir, id, f'{session}_{ICA_COMPS}_ica.fif')
        ica_drops_path = os.path.join(processed_dir, id, f'{session}_{ICA_COMPS}_ica_drops.txt')
        processed_path = os.path.join(processed_dir, id, f'{session}_{ICA_COMPS}_raw.fif')
        connectivity_paths = {
            "demo_NoG": os.path.join(connectivity_dir, id, "NoG_connectivity.npy"),
            "demo_WiG": os.path.join(connectivity_dir, id, "WiG_connectivity.npy"),
            "BL_NoG": os.path.join(connectivity_dir, id, "BL_NoG_connectivity.npy"),
            "BL_WiG": os.path.join(connectivity_dir, id, "BL_WiG_connectivity.npy"),
        }
        note_path = os.path.join(notes_dir, id, 'info.txt')


        fdt_set_exists.append(os.path.exists(fdt_path) and os.path.exists(set_path))
        raw_exists.append(os.path.exists(raw_path))
        preprocessed_exists.append(os.path.exists(preprocessed_path))
        
        # Read the bads file and append the first line to bads_file_exists
        if os.path.exists(bads_path):
            with open(bads_path, 'r') as file:
                bads_file_exists.append(file.readline())
        else:
            bads_file_exists.append(None)

        ica_exists.append(os.path.exists(ica_path))

        # Read the ica drops file and append the first line to ica_drops
        if os.path.exists(ica_drops_path):
            with open(ica_drops_path, 'r') as file:
                ica_drops.append(file.readline())
        else:
            ica_drops.append(None)

        processed_exists.append(os.path.exists(processed_path))

        connectivity_for = []
        for key in connectivity_paths.keys():
            path = connectivity_paths[key]
            if(os.path.exists(path)):
                connectivity_for.append(key)
        if len(connectivity_for) == 4:
            connectivity_exists.append("All 4")
        else:
            connectivity_exists.append(connectivity_for)


        # Read note file and append to notes
        if os.path.exists(note_path):
            with open(note_path, 'r') as file:
                notes.append(file.readline())
        else:
            notes.append(None)



    df["Channel Locs Added via EEGLAB"] = fdt_set_exists
    df["Converted to MNE"] = raw_exists
    df["Bads Removed"] = bads_file_exists
    df["Preprocessed"] = preprocessed_exists
    df[f'{ICA_COMPS} Comp ICA Generated'] = ica_exists
    df["ICA Dropped Channels"] = ica_drops
    df["Processed"] = processed_exists
    df["Connectivity Calculated"] = connectivity_exists
    df["Notes"] = notes

    return df

def color_code(val):
    color = 'green' if val else 'red'
    if color == 'red':
        color = '#FF9999'
    if color == 'green':
        color = '#82ffa3'
    return f'background-color: {color}'

def style(df):
    styled_df = df.style.hide(axis='index')
    styled_df.set_table_styles([{'selector': 'th', 'props': [('font-size', '12pt')]}])
    styled_df.map(color_code, subset=df.columns[2:-1])

    # Make text black for all cells except the first two columns
    styled_df.set_properties(**{'color': 'black'}, subset=styled_df.columns[2:-1])
    
    return styled_df

def build_dfs():
    raw = os.path.join(root, "raw")
    raw_expert = os.path.join(raw, "expert")
    raw_novice = os.path.join(raw, "novice")

    preprocessed = os.path.join(root, "preprocessed")
    preprocessed_expert = os.path.join(preprocessed, "expert")
    preprocessed_novice = os.path.join(preprocessed, "novice")

    processed = os.path.join(root, "processed")
    processed_expert = os.path.join(processed, "expert")
    processed_novice = os.path.join(processed, "novice")

    ica = os.path.join(root, "ica")
    ica_expert = os.path.join(ica, "expert")
    ica_novice = os.path.join(ica, "novice")

    connectivity = os.path.join(root, connectivity_dir)
    connectivity_expert = os.path.join(connectivity, "expert")
    connectivity_novice = os.path.join(connectivity, "novice")

    notes = os.path.join(root, "notes")
    notes_expert = os.path.join(notes, "expert")
    notes_novice = os.path.join(notes, "novice")

    expert_ids = [d for d in os.listdir(raw_expert) if os.path.isdir(os.path.join(raw_expert, d))]
    novice_ids = [d for d in os.listdir(raw_novice) if os.path.isdir(os.path.join(raw_novice, d))]

    expert_ids = [str(item) for item in expert_ids for i in range(4)]
    novice_ids = [str(item) for item in novice_ids for i in range(4)]

    expert_df = pd.DataFrame(expert_ids, columns=["id"])
    novice_df = pd.DataFrame(novice_ids, columns=["id"])

    expert_df["session"] = expert_df.groupby("id").cumcount() + 1
    novice_df["session"] = novice_df.groupby("id").cumcount() + 1

    expert_df = process(expert_df, raw_expert, preprocessed_expert, processed_expert, ica_expert, connectivity_expert, notes_expert)
    novice_df = process(novice_df, raw_novice, preprocessed_novice, processed_novice, ica_novice, connectivity_novice, notes_novice)


    # Hide any row where session == 3
    # expert_df = expert_df[expert_df["session"] != 3]
    # novice_df = novice_df[novice_df["session"] != 3]

    # Drop rows where Channel Locs Added via EEGLAB is False and session >= 3
    expert_df = expert_df[~((expert_df["Channel Locs Added via EEGLAB"] == False) & (expert_df["session"] >= 3))]
    novice_df = novice_df[~((novice_df["Channel Locs Added via EEGLAB"] == False) & (novice_df["session"] >= 3))]
    
    # drop expert 5c session 3
    # expert_df = expert_df[~((expert_df["id"] == "5c") & (expert_df["session"] == 3))]
    # # drop expert 5b session 3
    # expert_df = expert_df[~((expert_df["id"] == "5b") & (expert_df["session"] == 3))]

    # Drop any row where Channel Locs Added via EEGLAB is false
    expert_df = expert_df[expert_df["Channel Locs Added via EEGLAB"] == True] 
    novice_df = novice_df[novice_df["Channel Locs Added via EEGLAB"] == True]


    # Replace Nones with empty strings in Info Column
    expert_df["Notes"] = expert_df["Notes"].fillna('')
    novice_df["Notes"] = novice_df["Notes"].fillna('')

    
    return expert_df, novice_df

In [None]:
expert_df, novice_df = build_dfs()
display(style(expert_df))


In [None]:
expert_df, novice_df = build_dfs()
display(style(novice_df))

In [None]:
# Helper for saving to csv if needed
# cols_of_interest = ["id", "session", "Connectivity Calculated", "Notes"]

# # Isolate cols of interest
# novice_df_summary = novice_df[cols_of_interest]
# # Keep only rows where session is 2
# novice_df_summary = novice_df_summary[novice_df_summary["session"] == 2]
# # Dump to csv
# novice_df_summary.to_csv("novice_summary.csv", index=False)

In [None]:
# Helpers for automatically generating bash scripts to run pipelines for missing data
# target = "expert"
# working_df = novice_df if target == "novice" else expert_df
# expert_drop_ids = ["13b", "11", "1"]
# novice_drop_ids = ["3b", "6", "13"]

# if target == "expert":
#     working_df = working_df[~working_df["id"].isin(expert_drop_ids)]

# if target == "novice":
#     working_df = working_df[~working_df["id"].isin(novice_drop_ids)]

In [None]:
# For every column where "Channel Locs Added via EEGLAB" is True, but "Converted to MNE" is False, run $ python 0_mark_bads.py expert id session
# with open("run_all_0.sh", "w") as file:
#     for _, row in working_df.iterrows():
#         if row["Channel Locs Added via EEGLAB"] and not row["Converted to MNE"]:
#             print("python", "0_mark_bads.py", target, row["id"], str(row["session"]))
#             file.write(f'python 0_mark_bads.py {target} {row["id"]} {str(row["session"])}\n')
        

In [None]:
# For every column where "Channel Locs Added via EEGLAB" is True, but "ICA_COMPS Comp ICA Generated" is False, run $ python 1_preproc.py expert id session
# with open("run_all_1.sh", "w") as file:
#     for _, row in working_df.iterrows():
#         if row["Channel Locs Added via EEGLAB"] and not row[f'{ICA_COMPS} Comp ICA Generated']:
#             print("python", "1_preproc.py", target, row["id"], str(row["session"]), "--num_ica_comps", ICA_COMPS)
#             file.write(f"python 1_preproc.py {target} {row['id']} {row['session']} --num_ica_comps {ICA_COMPS}\n")

In [None]:
# For every column where "ICA_COMPS Comp ICA Generated"  is True, but ICA Dropped Channels is False, run $ python 3_compute_connectivity.py expert id session --num_ica_comps ICA_COMPS

# with open("run_all_2.sh", "w") as file:
#     for _, row in working_df.iterrows():
#         if row[f'{ICA_COMPS} Comp ICA Generated'] and row["ICA Dropped Channels"] is None:
#             print("python 2_select_ica.py", target, row["id"], row["session"], "--num_ica_comps", ICA_COMPS)
#             file.write(f"python 2_select_ica.py {target} {row['id']} {row['session']} --num_ica_comps {ICA_COMPS}\n")
        

In [None]:
# # For every column where "Processed" is True, but "Avg Connectivity Calculated" is None, run $ python 2_select_ica.py expert id session --num_ica_comps ICA_COMPS --filter_string "BL_NoG"
# ids_added = []
# with open("run_all_3.sh", "w") as file:
#     for _, row in working_df.iterrows():
        
#         if row["Processed"] and not row["Connectivity Calculated"]:
#             # Skip if id already added
#             if row["id"] in ids_added:
#                 pass
#             else:
#                 ids_added.append(row["id"])
#                 print("python", "3_compute_connectivity.py", target, row["id"], "--num_ica_comps", ICA_COMPS, "--baseline True --WiG False")
#                 print("python", "3_compute_connectivity.py", target, row["id"], "--num_ica_comps", ICA_COMPS, "--baseline False --WiG False")
#                 print("python", "3_compute_connectivity.py", target, row["id"], "--num_ica_comps", ICA_COMPS, "--baseline True --WiG True")
#                 print("python", "3_compute_connectivity.py", target, row["id"], "--num_ica_comps", ICA_COMPS, "--baseline False --WiG True")
                
#                 file.write(f"python 3_compute_connectivity.py {target} {row['id']} --num_ica_comps {ICA_COMPS} --baseline True --WiG False\n")
#                 file.write(f"python 3_compute_connectivity.py {target} {row['id']} --num_ica_comps {ICA_COMPS} --baseline False --WiG False\n")
#                 file.write(f"python 3_compute_connectivity.py {target} {row['id']} --num_ica_comps {ICA_COMPS} --baseline True --WiG True\n")
#                 file.write(f"python 3_compute_connectivity.py {target} {row['id']} --num_ica_comps {ICA_COMPS} --baseline False --WiG True\n")


