### Notebook to generate mr_proc manifest(s)
- METAL_PD
- SERB_PD

In [1]:
import os
import pandas as pd

### Paths

In [2]:
dataset_dir = "/home/nikhil/projects/Parkinsons/nimhans/data/PD_METAL"

PD_participants_file = f"{dataset_dir}/scratch/T1/pd/T1_MetalPD.xlsx"
CTRL_participants_file = f"{dataset_dir}/scratch/T1/control/Metal_PD_HC.ods"

### Read raw tabular data

In [3]:
## PD
PD_participants_df = pd.read_excel(PD_participants_file,engine='openpyxl')
PD_participants_df = PD_participants_df.rename(columns={'Unnamed: 0':'participant_id',"Age":"age"})
PD_participants_df["sex"] = PD_participants_df["Gender(male-1)"].replace({1:"M", 2:"F"})
PD_participants_df["group"] = "PD"
PD_particpants = PD_participants_df['participant_id'].unique()
n_participants = len(PD_particpants)
print(f"n_participants: {n_participants}")
PD_participants_df.head()

n_participants: 67


Unnamed: 0,participant_id,Gender(male-1),age,Age at onset,Duration,UPDRS -III OFF,sex,group
0,MPD_T1_1,2.0,66.0,58.0,8.0,57.0,F,PD
1,MPD_T1_2,1.0,55.0,53.0,2.0,33.0,M,PD
2,MPD_T1_3,,,,,,,PD
3,MPD_T1_5,2.0,37.0,33.0,4.0,28.5,F,PD
4,MPD_T1_6,2.0,46.0,43.0,3.0,39.0,F,PD


In [4]:
## CTRL
CTRL_participants_df = pd.read_excel(CTRL_participants_file,engine='odf')
CTRL_participants_df = CTRL_participants_df.rename(columns={'Code':'participant_id',"Age":"age"})
CTRL_participants_df["sex"] = CTRL_participants_df["Gender (Male:1; Female:2)"].replace({1:"M", 2:"F"})
CTRL_participants_df["group"] = "control"

# Zero pad single digit participant ids
CTRL_participants_df[["participant_prefix", "id"]] = CTRL_participants_df["participant_id"].str.rsplit("_", n=1, expand=True)
CTRL_participants_df["id"] = CTRL_participants_df["id"].str.zfill(2)
CTRL_participants_df["participant_id"] = CTRL_participants_df["participant_prefix"] + "_" + CTRL_participants_df["id"]

control_particpants = CTRL_participants_df['participant_id'].unique()
n_participants = len(control_particpants)
print(f"n_participants: {n_participants}")

CTRL_participants_df.head()

n_participants: 65


Unnamed: 0,participant_id,age,Gender (Male:1; Female:2),sex,group,participant_prefix,id
0,MPD_HC_T1_01,50,1,M,control,MPD_HC_T1,1
1,MPD_HC_T1_02,40,2,F,control,MPD_HC_T1,2
2,MPD_HC_T1_03,38,2,F,control,MPD_HC_T1,3
3,MPD_HC_T1_04,60,1,M,control,MPD_HC_T1,4
4,MPD_HC_T1_05,42,2,F,control,MPD_HC_T1,5


### Generate nipoppy manifest
- available_datatypes: ["anat"]
- manifest_cols = ["participant_id","visit","session","datatype","bids_id"]

In [5]:
participant_cols = ["participant_id","group","age","sex"]
participants_df = pd.concat([CTRL_participants_df[participant_cols], PD_participants_df[participant_cols]],axis=0)
participants_df["visit"] = "V01"
participants_df["session"] = "ses-01"
participants_df["datatype"] = "['anat']"
participants_df["id"] = participants_df["participant_id"].str.rsplit("_", n=1, expand=True)[1]
participants_df["id"] = participants_df["id"].str.zfill(3)
participants_df.loc[participants_df["group"]=="control", "id"] = "HC" + participants_df.loc[participants_df["group"]=="control", "id"].astype(str)
participants_df["bids_id"] = "sub-METAL" + participants_df["id"]
participants_df.head()

Unnamed: 0,participant_id,group,age,sex,visit,session,datatype,id,bids_id
0,MPD_HC_T1_01,control,50.0,M,V01,ses-01,['anat'],HC001,sub-METALHC001
1,MPD_HC_T1_02,control,40.0,F,V01,ses-01,['anat'],HC002,sub-METALHC002
2,MPD_HC_T1_03,control,38.0,F,V01,ses-01,['anat'],HC003,sub-METALHC003
3,MPD_HC_T1_04,control,60.0,M,V01,ses-01,['anat'],HC004,sub-METALHC004
4,MPD_HC_T1_05,control,42.0,F,V01,ses-01,['anat'],HC005,sub-METALHC005


In [6]:
participants_df.tail()

Unnamed: 0,participant_id,group,age,sex,visit,session,datatype,id,bids_id
62,MPD_T1_76,PD,39.0,M,V01,ses-01,['anat'],76,sub-METAL076
63,MPD_T1_77,PD,44.0,F,V01,ses-01,['anat'],77,sub-METAL077
64,MPD_T1_78,PD,50.0,M,V01,ses-01,['anat'],78,sub-METAL078
65,MPD_T1_79,PD,50.0,M,V01,ses-01,['anat'],79,sub-METAL079
66,MPD_T1_80,PD,55.0,M,V01,ses-01,['anat'],80,sub-METAL080


In [7]:
# ### Save the manifest file
manifest_cols = ["participant_id","visit","session","datatype"]
manifest_df = participants_df[manifest_cols].copy()
manifest_file = f"{dataset_dir}/tabular/manifest.csv"
manifest_df.to_csv(manifest_file, index=False)

manifest_df.head()

Unnamed: 0,participant_id,visit,session,datatype
0,MPD_HC_T1_01,V01,ses-01,['anat']
1,MPD_HC_T1_02,V01,ses-01,['anat']
2,MPD_HC_T1_03,V01,ses-01,['anat']
3,MPD_HC_T1_04,V01,ses-01,['anat']
4,MPD_HC_T1_05,V01,ses-01,['anat']


### Generate demographics file

In [8]:
demo_cols = ["participant_id","visit","group","age","sex"]
demo_df = participants_df[demo_cols].copy()
demo_df.head()

Unnamed: 0,participant_id,visit,group,age,sex
0,MPD_HC_T1_01,V01,control,50.0,M
1,MPD_HC_T1_02,V01,control,40.0,F
2,MPD_HC_T1_03,V01,control,38.0,F
3,MPD_HC_T1_04,V01,control,60.0,M
4,MPD_HC_T1_05,V01,control,42.0,F


In [9]:
# ## Save the manifest file
# demo_file = f"{dataset_dir}/tabular/demographics/demographics.csv"
# demo_df.to_csv(demo_file, index=False)

### Create participant --> dicom_dir mapping (this is used in doughnut generation)
- Although there are no DICOMs for METAL_PD, dicom_id is used to create cleaner bids_id

In [13]:
# dicom_map_cols = ["participant_id","session","bids_id"]

# participant_dicom_dir_map_df = participants_df[dicom_map_cols].copy()
# participant_dicom_dir_map_df["dicom_id"] = participant_dicom_dir_map_df["bids_id"].str.replace("sub-","")

# participant_dicom_dir_map_file = f"{dataset_dir}/scratch/participant_id_bids_id_map.csv"
# participant_dicom_dir_map_df.to_csv(participant_dicom_dir_map_file, index=False)

# participant_dicom_dir_map_df.head()

Unnamed: 0,participant_id,session,bids_id,dicom_id
0,MPD_HC_T1_01,ses-01,sub-METALHC001,METALHC001
1,MPD_HC_T1_02,ses-01,sub-METALHC002,METALHC002
2,MPD_HC_T1_03,ses-01,sub-METALHC003,METALHC003
3,MPD_HC_T1_04,ses-01,sub-METALHC004,METALHC004
4,MPD_HC_T1_05,ses-01,sub-METALHC005,METALHC005
