In [39]:
import os
import polars as pl

In [46]:
ROOT_PATH = "/home/bobby/repos/latent-neural-dynamics-modeling"
DATA_PATH = os.path.join(ROOT_PATH, "data")

In [103]:
participants = pl.read_csv(os.path.join(DATA_PATH, "participants.tsv"), separator="\t", null_values="n/a")

In [104]:
def list_files(folder_path: str, root_: bool = False) -> list:
    if root_:
        return os.listdir(folder_path)
    else:
        return  os.listdir(os.path.join(DATA_PATH, folder_path))

In [105]:
participants = participants.with_columns(
    pl.col("participant_id").map_elements(lambda pid: list_files(pid), return_dtype=pl.List(pl.String)).alias("session")
).explode(pl.col("session"))

In [106]:
participants_ieeg = participants.with_columns(
    pl.concat_str([
        pl.lit(DATA_PATH),
        pl.col("participant_id"),
        pl.col("session"),
        pl.lit("ieeg")
    ], separator="/").alias("ieeg_path"),
)

In [107]:
participants_ieeg = participants_ieeg.with_columns(pl.col("ieeg_path").map_elements(lambda ieeg_path: list_files(ieeg_path, root_=True), return_dtype=pl.List(pl.String)).alias("ieeg_file")).explode(pl.col("ieeg_file"))

In [108]:
participants_ieeg = participants_ieeg.with_columns(
    pl.col("ieeg_file").str.split(by="_").alias("splitted_file")
).with_columns(pl.col("splitted_file").list.get(-1).str.split(".").list.get(0).alias("type"),
               pl.col("splitted_file").list.get(-1).str.split(".").list.get(-1).alias("data_format"),
               pl.col("splitted_file").list.get(-2).alias("run")).drop("splitted_file")

In [110]:
channel_metadata_schema = pl.List(pl.Struct(
    [
        pl.Field("name", pl.Utf8),
        pl.Field("type", pl.Utf8),
        pl.Field("units", pl.Utf8),
        pl.Field("low_cutoff", pl.Float64),
        pl.Field("high_cutoff", pl.Float64),
        pl.Field("sampling_frequency", pl.Float64),
    ]
))

In [115]:
def read_csv_(row: dict[str, str]) -> pl.Series:
    df = pl.read_csv(os.path.join(row["ieeg_path"], row["ieeg_file"]), separator="\t", null_values="n/a")
    return df.to_struct()

In [None]:
channels_df = participants_ieeg.filter(
    (pl.col("type") == "channels") & (pl.col("data_format") == "tsv")
).select(
    "participant_id", "session", "run",
    pl.struct(["ieeg_path", "ieeg_file"])
    .map_elements(
        read_csv_,
        return_dtype=channel_metadata_schema
    )
    .alias("channels_info")
)


In [113]:
participants_ieeg = participants_ieeg.join(
    channels_df, on=["participant_id", "session", "run"], how="left"
).filter( ~((pl.col("type") == "channels") & (pl.col("data_format") == "tsv")))

In [118]:
events_schema = pl.List(pl.Struct(
    [
        pl.Field("onset", pl.Float64),
        pl.Field("duration", pl.Float64),
        pl.Field("trial_tyoe", pl.Float64),
        pl.Field("value", pl.Int64),
        pl.Field("sample", pl.Int64),
    ]
))

In [119]:
events_df = participants_ieeg.filter(
    (pl.col("type") == "events") & (pl.col("data_format") == "tsv")
).select(
    "participant_id", "session", "run",
    pl.struct(["ieeg_path", "ieeg_file"])
    .map_elements(
        read_csv_,
        return_dtype=events_schema
    )
    .alias("events")
)


In [120]:
participants_ieeg = participants_ieeg.join(
    events_df, on=["participant_id", "session", "run"], how="left"
).filter(~((pl.col("type") == "events") & (pl.col("data_format") == "tsv")))

In [125]:
participants_ieeg = participants_ieeg.filter(~(pl.col("data_format") == "json"))

In [127]:
import mne

def load_vhdr(row: dict):
    file_path = os.path.join(row["ieeg_path"], row["ieeg_file"])
    # read the BrainVision vhdr file (preload is set to False to avoid loading the full data into memory)
    raw = mne.io.read_raw_brainvision(file_path, preload=False)
    return raw

vhdr_files_df = participants_ieeg.filter(
    (pl.col("data_format") == "vhdr") & (pl.col("type") == "ieeg")
).with_columns(
    pl.struct(["ieeg_path", "ieeg_file"]).map_elements(
        lambda s: load_vhdr(s),
        return_dtype=pl.Object
    ).alias("raw_vhdr")
)

vhdr_files_df

Extracting parameters from /home/bobby/repos/latent-neural-dynamics-modeling/data/sub-PDI4/ses-3/ieeg/sub-PDI4_ses-3_task-copydraw_run-7_ieeg.vhdr...
Setting channel info structure...
Extracting parameters from /home/bobby/repos/latent-neural-dynamics-modeling/data/sub-PDI4/ses-3/ieeg/sub-PDI4_ses-3_task-copydraw_run-8_ieeg.vhdr...
Setting channel info structure...
Extracting parameters from /home/bobby/repos/latent-neural-dynamics-modeling/data/sub-PDI4/ses-3/ieeg/sub-PDI4_ses-3_task-copydraw_run-2_ieeg.vhdr...
Setting channel info structure...
Extracting parameters from /home/bobby/repos/latent-neural-dynamics-modeling/data/sub-PDI4/ses-3/ieeg/sub-PDI4_ses-3_task-copydraw_run-11_ieeg.vhdr...
Setting channel info structure...
Extracting parameters from /home/bobby/repos/latent-neural-dynamics-modeling/data/sub-PDI4/ses-3/ieeg/sub-PDI4_ses-3_task-copydraw_run-6_ieeg.vhdr...
Setting channel info structure...
Extracting parameters from /home/bobby/repos/latent-neural-dynamics-modeling/da

participant_id,age,sex,hand,weight,height,session,ieeg_path,ieeg_file,type,data_format,run,channels_info,events,raw_vhdr
str,str,str,str,str,str,str,str,str,str,str,str,list[struct[6]],list[struct[5]],object
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""vhdr""","""run-7""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{6.91,1.652091,null,25,2073}, {8.563333,9.126364,null,1,2569}, … {200.97,17.894227,null,15,60291}]","<RawBrainVision | sub-PDI4_ses-3_task-copydraw_run-7_ieeg.eeg, 24 x 65659 (218.9 s), ~24 KiB, data not loaded>"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""vhdr""","""run-8""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{9.426667,1.755,null,25,2828}, {11.183333,9.032591,null,1,3355}, … {207.833333,23.003455,null,15,62350}]","<RawBrainVision | sub-PDI4_ses-3_task-copydraw_run-8_ieeg.eeg, 24 x 69251 (230.8 s), ~24 KiB, data not loaded>"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""vhdr""","""run-2""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{8.006667,1.752273,null,25,2402}, {9.756667,9.015818,null,1,2927}, … {185.78,32.906273,null,15,55734}]","<RawBrainVision | sub-PDI4_ses-3_task-copydraw_run-2_ieeg.eeg, 24 x 65605 (218.7 s), ~24 KiB, data not loaded>"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""vhdr""","""run-11""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{25.2,3.1705,null,25,7560}, {28.37,9.000364,null,1,8511}, … {222.836667,18.552,null,15,66851}]","<RawBrainVision | sub-PDI4_ses-3_task-copydraw_run-11_ieeg.eeg, 24 x 72417 (241.4 s), ~24 KiB, data not loaded>"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""vhdr""","""run-6""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{12.356667,3.842682,null,25,3707}, {16.2,9.009409,null,1,4860}, … {203.396667,30.345227,null,15,61019}]","<RawBrainVision | sub-PDI4_ses-3_task-copydraw_run-6_ieeg.eeg, 24 x 70122 (233.7 s), ~24 KiB, data not loaded>"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""vhdr""","""run-4""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{6.033333,2.190818,null,25,1810}, {8.223333,9.112364,null,1,2467}, … {207.183333,29.352909,null,15,62155}]","<RawBrainVision | sub-PDI4_ses-3_task-copydraw_run-4_ieeg.eeg, 24 x 70961 (236.5 s), ~24 KiB, data not loaded>"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""vhdr""","""run-12""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{10.873333,2.071727,null,25,3262}, {12.946667,8.972773,null,1,3884}, … {218.49,175.53,null,15,65547}]","<RawBrainVision | sub-PDI4_ses-3_task-copydraw_run-12_ieeg.eeg, 24 x 118206 (394.0 s), ~24 KiB, data not loaded>"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""vhdr""","""run-10""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{10.396667,2.184591,null,25,3119}, {12.58,8.995864,null,1,3774}, … {204.013333,244.918636,null,15,61204}]","<RawBrainVision | sub-PDI4_ses-3_task-copydraw_run-10_ieeg.eeg, 24 x 134680 (448.9 s), ~24 KiB, data not loaded>"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""vhdr""","""run-3""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{7.226667,4.922045,null,25,2168}, {12.15,9.118227,null,1,3645}, … {193.126667,56.071045,null,15,57938}]","<RawBrainVision | sub-PDI4_ses-3_task-copydraw_run-3_ieeg.eeg, 24 x 74759 (249.2 s), ~24 KiB, data not loaded>"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""vhdr""","""run-5""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{8.11,4.156818,null,25,2433}, {12.266667,9.090409,null,1,3680}, … {199.966667,24.289955,null,15,59990}]","<RawBrainVision | sub-PDI4_ses-3_task-copydraw_run-5_ieeg.eeg, 24 x 67277 (224.3 s), ~24 KiB, data not loaded>"


In [126]:
participants_ieeg

participant_id,age,sex,hand,weight,height,session,ieeg_path,ieeg_file,type,data_format,run,channels_info,events
str,str,str,str,str,str,str,str,str,str,str,str,list[struct[6]],list[struct[5]]
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""eeg""","""run-7""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{6.91,1.652091,null,25,2073}, {8.563333,9.126364,null,1,2569}, … {200.97,17.894227,null,15,60291}]"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""vhdr""","""run-7""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{6.91,1.652091,null,25,2073}, {8.563333,9.126364,null,1,2569}, … {200.97,17.894227,null,15,60291}]"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""eeg""","""run-11""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{25.2,3.1705,null,25,7560}, {28.37,9.000364,null,1,8511}, … {222.836667,18.552,null,15,66851}]"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""eeg""","""run-3""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{7.226667,4.922045,null,25,2168}, {12.15,9.118227,null,1,3645}, … {193.126667,56.071045,null,15,57938}]"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""vhdr""","""run-8""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{9.426667,1.755,null,25,2828}, {11.183333,9.032591,null,1,3355}, … {207.833333,23.003455,null,15,62350}]"
…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""eeg""","""run-5""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{8.11,4.156818,null,25,2433}, {12.266667,9.090409,null,1,3680}, … {199.966667,24.289955,null,15,59990}]"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""eeg""","""run-8""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{9.426667,1.755,null,25,2828}, {11.183333,9.032591,null,1,3355}, … {207.833333,23.003455,null,15,62350}]"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""eeg""","""run-6""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{12.356667,3.842682,null,25,3707}, {16.2,9.009409,null,1,4860}, … {203.396667,30.345227,null,15,61019}]"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…","""ieeg""","""vhdr""","""run-5""","[{""LFP_1"",""DBS"",""V"",0.0,150.0,300.0}, {""LFP_2"",""DBS"",""V"",0.0,150.0,300.0}, … {""EOG_4"",""EOG"",""V"",0.0,150.0,300.0}]","[{8.11,4.156818,null,25,2433}, {12.266667,9.090409,null,1,3680}, … {199.966667,24.289955,null,15,59990}]"


In [None]:
participants_motion = participants.with_columns(
    pl.concat_str([
        pl.lit(DATA_PATH),
        pl.col("participant_id"),
        pl.col("session"),
        pl.lit("motion")
    ], separator="/").alias("motion_path"),
)

In [54]:
participants_motion = participants_motion.with_columns(pl.col("motion_path").map_elements(lambda motion_path: list_files(motion_path, root_=True), return_dtype=pl.List(pl.String)).alias("motion_file")).explode(pl.col("motion_file"))

In [55]:
participants_motion

participant_id,age,sex,hand,weight,height,session,motion_path,motion_file
str,str,str,str,str,str,str,str,str
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…"
…,…,…,…,…,…,…,…,…
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…"
"""sub-PDI4""",,,,,,"""ses-3""","""/home/bobby/repos/latent-neura…","""sub-PDI4_ses-3_task-copydraw_r…"
