In [1]:
from pathlib import Path
import nibabel as nib
import pandas as pd

In [2]:
bids_dir = Path("../bids")

Data counts

In [3]:
subjects = sorted([p for p in bids_dir.iterdir() if p.is_dir() and p.name.startswith("sub-")])
total_subjects = len(subjects)

In [6]:
subs_with_t1w = set()
subs_with_bold_200TR = set()

for sub_dir in subjects:
    sub = sub_dir.name.replace("sub-", "")
    has_t1w = False
    has_bold_200TR = False

    for ses_dir in sub_dir.glob("ses-*"):
        # T1w check
        if not has_t1w:
            anat_dir = ses_dir / "anat"
            if anat_dir.exists() and any(anat_dir.glob("*_T1w.nii.gz")):
                has_t1w = True

        # BOLD check
        if not has_bold_200TR:
            func_dir = ses_dir / "func"
            if func_dir.exists():
                for bold_file in func_dir.glob("*_bold.nii.gz"):
                    img = nib.load(bold_file, mmap=True)
                    n_trs = img.header.get_data_shape()[-1]
                    if n_trs >= 200:
                        has_bold_200TR = True
                        break  # stop checking other BOLD files in this session

        # If both criteria met, no need to check further sessions
        if has_t1w and has_bold_200TR:
            break

    # Add to sets if criteria satisfied
    if has_t1w:
        subs_with_t1w.add(sub)
    if has_bold_200TR:
        subs_with_bold_200TR.add(sub)

subs_with_both_200TR = subs_with_t1w & subs_with_bold_200TR

In [7]:
print(f"Total subjects: {total_subjects}")
print(f"Subjects with >=1 T1w: {len(subs_with_t1w)}")
print(f"Subjects with >=1 BOLD and >=200 TRs: {len(subs_with_bold_200TR)}")
print(f"Subjects with T1w and BOLD >=200 TRs: {len(subs_with_both_200TR)}")

Total subjects: 1482
Subjects with >=1 T1w: 890
Subjects with >=1 BOLD and >=200 TRs: 1330
Subjects with T1w and BOLD >=200 TRs: 758


In [8]:
print("Subject ids with T1w and BOLD >=200 TRs (first 5):")
print(list(subs_with_both_200TR)[:5])

Subject ids with T1w and BOLD >=200 TRs (first 5):
['170190', '101179', '141135', '226051', '162793']


Subject list for curation

In [18]:
metadata_dir = Path("../metadata")
# Read the master metadata CSV
master_list = metadata_dir / "PPMI_T1+fMRI_11_07_2025.csv"
subject_list = metadata_dir / "PPMI_T1+fMRI_gt200TR_valid_subjects.csv"

In [19]:
df_meta = pd.read_csv(master_list)
df_meta["Subject"] = df_meta["Subject"].astype(str)
# Filter only eligible subjects (with >=1 T1w & >=1 BOLD >=200 TRs)
eligible_subjects = sorted(subs_with_both_200TR)
df_subj = df_meta[df_meta["Subject"].isin(eligible_subjects)].copy()
# Sort the filtered dataframe by Subject and Session if present
if "Visit" in df_subj.columns:
    df_subj.sort_values(by=["Subject", "Visit"], inplace=True)
else:
    df_subj.sort_values(by="Subject", inplace=True)
# Save filtered subject list CSV
df_subj.to_csv(subject_list, index=False)

In [20]:
print(f"Saved curated CSV → {subject_list}")
print(f"Eligible subjects: {df_subj['Subject'].nunique()}")

Saved curated CSV → ../metadata/PPMI_T1+fMRI_gt200TR_valid_subjects.csv
Eligible subjects: 758


The End