## Scratch notebook to keep track of all the subjects

- QPN DICOMs (will) live on the bic DICOM server bic:/data/dicom
- from there, we will create symlinks to bic:/data/pd/qpn/dicom

In [1]:
import pandas as pd
import numpy as np

### Compare subject counts across neuropsych and imaging 

In [2]:
sheet_names = ["Parkinson patients", "Control"]
subject_id_list = []
dx_list = []

demographics_file = "/home/nikhil/projects/QPN_processing/clinical/BD_RPQ_UPDATE_Neuropsy_2_June_2022.xlsx"

neuropsy_df_concat = pd.DataFrame()

for sheet_name in sheet_names:
    print(sheet_name)

    neuropsy_df = pd.read_excel(demographics_file,sheet_name=sheet_name, engine='openpyxl',header=1)
    neuropsy_df = neuropsy_df.dropna(how='all')

    # Get rid of "/T1" from subject ID (timepoint)
    neuropsy_df["Patient #"] = neuropsy_df["Patient #"].str.split("/",expand=True)[0]
    subject_ids = list(neuropsy_df["Patient #"].values)
    print(f"Number of participants: {len(subject_ids)}")

    subject_id_list +=subject_ids
    dx_list += list(np.tile(sheet_name,len(subject_ids)))

    neuropsy_df_concat = neuropsy_df_concat.append(neuropsy_df)

print(f"Total number of neuropsy subjects: {len(subject_id_list)}")
neuropsy_df_concat.head()

Parkinson patients
Number of participants: 292
Control
Number of participants: 46
Total number of neuropsy subjects: 338


Unnamed: 0,Patient #,Administered by,Parkinson Disease or Control subject,Sex (1=men; 2=women),Language,Date of assessment,Date of birth,Age at time of assessment,Date of apparition of first symptom,Date of diagnosis,...,"Stroop - D-Kefs, COLORS (condition 1): Time (sec) (Raw score)",BNT sans indices,"Subjective Complaint (yes/no) DO you have feel that you issues with your memory, judgement, concentration, planning, etc?",Comments,Unnamed: 75,Unnamed: 76,Unnamed: 77,Unnamed: 78,Unnamed: 79,Unnamed: 80
0,PD00209,Sabrina,Parkinson,1.0,french,no record,21/05/1960,59.0,,2013,...,,,,,,,,,,
1,PD00119,Erika,Parkinson,1.0,english,2017-07-12 00:00:00,12/11/1951,66.0,,2008,...,,,,,,,,,,
2,PD00820,Erika,Parkinson,1.0,french,2017-08-01 00:00:00,20/08/1947,69.0,,2009,...,,,,,,,,,,
3,PD00262,Erika,Parkinson,2.0,999,2017-08-12 00:00:00,26/07/1947,71.0,2011.0,2011,...,,,,,,,,,,
4,PD00523,Erika,Parkinson,2.0,999,2017-12-14 00:00:00,01/10/1933,84.0,,999,...,,,,,,,,,,


In [5]:
neuropsy_df_concat[neuropsy_df_concat["Patient #"]=="PD00215"]

Unnamed: 0,Patient #,Administered by,Parkinson Disease or Control subject,Sex (1=men; 2=women),Language,Date of assessment,Date of birth,Age at time of assessment,Date of apparition of first symptom,Date of diagnosis,...,"Stroop - D-Kefs, COLORS (condition 1): Time (sec) (Raw score)",BNT sans indices,"Subjective Complaint (yes/no) DO you have feel that you issues with your memory, judgement, concentration, planning, etc?",Comments,Unnamed: 75,Unnamed: 76,Unnamed: 77,Unnamed: 78,Unnamed: 79,Unnamed: 80
34,PD00215,Sabrina,Parkinson,1.0,french,2019-04-10 00:00:00,1971-05-22 00:00:00,48.0,2012,2014,...,,,,,,,,,,
247,PD00215,Marie,Parkinson,1.0,french,2022-03-11 00:00:00,1971-05-22 00:00:00,50.0,2012,2014,...,,,,,,,,,,


In [3]:
save_csv = False

partcipants_df = neuropsy_df_concat[["Patient #","Parkinson Disease or Control subject","Sex (1=men; 2=women)","Age at time of assessment"]].copy()

partcipants_df = partcipants_df.rename(columns={"Patient #":"participant_id",
                                                "Parkinson Disease or Control subject":"group",
                                                "Sex (1=men; 2=women)":"sex",
                                                "Age at time of assessment":"age"})

partcipants_df = partcipants_df.dropna(how="all")

partcipants_df['participant_id'] = partcipants_df['participant_id'].str.strip()
partcipants_df["age"] = np.round(partcipants_df["age"],1)
partcipants_df["sex"] = partcipants_df["sex"].replace({1:"M",2:"F"})

partcipants_df = partcipants_df[["participant_id","age","sex","group"]]
partcipants_df = partcipants_df.sort_values(by=["participant_id","age"])

# Keep only unique participant ids (age refers to the baseline visit)
print(f"number of participants entries (includes duplicates from multiple visits): {len(partcipants_df)}")
partcipants_df = partcipants_df.drop_duplicates(subset=["participant_id"],keep="first")
print(f"number of unique participants: {len(partcipants_df)}")

if save_csv:
    partcipants_df.to_csv("../metadata/participants.csv", index=False)

partcipants_df.head()


number of participants entries (includes duplicates from multiple visits): 319
number of unique participants: 296


Unnamed: 0,participant_id,age,sex,group
174,MNI0056,79.0,M,Parkinson
183,MNI0058,57.0,M,Parkinson
175,MNI0059,63.0,M,Parkinson
182,MNI0068,69.0,M,Parkinson
227,MNI0079,50.0,F,Parkinson


### Compare qpn.loris imaging subject master list with BIC 
- BIC will have some duplicates due to failed acq


In [38]:
ID_list_dir = "/home/nikhil/projects/QPN_processing/metadata/ID_lists/subjects/"

# QPN/COPN LORIS
qpn_loris_imaging_subs_file = f"{ID_list_dir}QPN_loris_imaging_subject_list_14_Feb_2022.csv"
bic_data_qpn_dicom_subs_file = "qpn_dicom_v1.txt"

qpn_loris_imaging_subs_df = pd.read_csv(qpn_loris_imaging_subs_file)
qpn_loris_imaging_subs_df.head()

Unnamed: 0,Site,PSCID,DCCID,Project,Vist Label,Visit QC Status,First Acquisition,First Insertion,Last QC,New Data,Links,SessionID,Sequence Type,Pending New
0,Montreal Neurological Institute,MNI0056,864854,COPN,MRI01,,2021-08-18,2021-08-20 23:54:00,,new,native,12041,"2DFLAIRFS,3DT1,BOLDRSgrefieldmappingTE1,BOLDRS...",N
1,Montreal Neurological Institute,MNI0058,197308,QPN,MRI01,,2021-08-18,2021-08-20 23:28:39,,new,native,12073,"2DFLAIRFS,3DT1,BOLDRSgrefieldmappingTE1,BOLDRS...",N
2,Montreal Neurological Institute,MNI0068,842090,QPN,MRI01,,2021-08-27,2021-08-28 00:04:44,,new,native,12027,"2DFLAIRFS,3DT1,BOLDRSgrefieldmappingTE1,BOLDRS...",N
3,Montreal Neurological Institute,MNI0079,760662,COPN,MRI01,,2021-12-22,2022-01-03 14:26:44,,new,native,12070,"2DFLAIRFS,3DT1,BOLDRSgrefieldmappingTE1,BOLDRS...",N
4,Montreal Neurological Institute,MNI0103,369057,COPN,MRI01,,2021-11-08,2021-11-19 23:13:54,,new,native,12069,"2DFLAIRFS,3DT1,BOLDRSgrefieldmappingTE1,BOLDRS...",N


In [39]:
bic_data_qpn_dicom_subs_df = pd.read_csv(bic_data_qpn_dicom_subs_file, header=None)[0].str.split("_",expand=True)
cols = ["PSCID","DCCID","Vist Label","Site","First Acquisition","Unknown Col"]
bic_data_qpn_dicom_subs_df.columns = cols

bic_data_qpn_dicom_subs_df["DCCID"] = bic_data_qpn_dicom_subs_df["DCCID"].astype("int64")

bic_data_qpn_dicom_subs_df

Unnamed: 0,PSCID,DCCID,Vist Label,Site,First Acquisition,Unknown Col
0,MNI0056,864854,MRI01,MNI,20210818,151510608
1,MNI0058,197308,MRI01,MNI,20210818,105219098
2,MNI0068,842090,MRI01,MNI,20210827,150412426
3,MNI0103,369057,MRI01,MNI,20211116,132143505
4,MNI0109,584299,MRI01,MNI,20210924,135512466
...,...,...,...,...,...,...
199,PD75,20210730,134904606,,,
200,PD76,20211104,094045499,,,
201,PD77,20211202,110105862,,,
202,PD78,20211122,132024191,,,


In [42]:
image_proc_visit_01_df = pd.merge(qpn_loris_imaging_subs_df[cols[:5]], bic_data_qpn_dicom_subs_df, on=["PSCID","DCCID","Vist Label"], how="left")

In [62]:
image_proc_visit_01_df["dicom_name"] = image_proc_visit_01_df["PSCID"] + "_" + image_proc_visit_01_df["DCCID"].astype(str) + "_" + \
    image_proc_visit_01_df["Vist Label"] + "_MNI"

image_proc_visit_01_df["bids_name"] = image_proc_visit_01_df["PSCID"] + "D" + image_proc_visit_01_df["DCCID"].astype(str) 

image_proc_visit_01_df.head()

Unnamed: 0,PSCID,DCCID,Vist Label,Site_x,First Acquisition_x,Site_y,First Acquisition_y,Unknown Col,dicom_name,bids_name
0,MNI0056,864854,MRI01,Montreal Neurological Institute,2021-08-18,MNI,20210818.0,151510608.0,MNI0056_864854_MRI01_MNI,MNI0056D864854
1,MNI0058,197308,MRI01,Montreal Neurological Institute,2021-08-18,MNI,20210818.0,105219098.0,MNI0058_197308_MRI01_MNI,MNI0058D197308
2,MNI0068,842090,MRI01,Montreal Neurological Institute,2021-08-27,MNI,20210827.0,150412426.0,MNI0068_842090_MRI01_MNI,MNI0068D842090
3,MNI0079,760662,MRI01,Montreal Neurological Institute,2021-12-22,,,,MNI0079_760662_MRI01_MNI,MNI0079D760662
4,MNI0103,369057,MRI01,Montreal Neurological Institute,2021-11-08,MNI,20211116.0,132143505.0,MNI0103_369057_MRI01_MNI,MNI0103D369057


## Filter out missing / bad subjects
- These are listed in: /home/nikhil/projects/QPN_processing/bids/empty_dicom_dirs.txt


In [68]:
!cat /home/nikhil/projects/QPN_processing/bids/empty_dicom_dirs.txt

/data/dicom/PD01746_898695_MRI01_MNI_20210702_135421425
/data/dicom/PD01713_406442_MRI01_MNI_20210630_135929223
/data/dicom/PD01674_700552_MRI01_MNI_20210707_134910496
/data/dicom/PD01626_466737_MRI01_MNI_20210709_125654756
/data/dicom/PD00953_971810_MRI01_MNI_20210630_103326275


In [71]:
remove_subjects = ["PD01746","PD01713","PD01674","PD01626","PD00953"]
image_proc_visit_01_df = image_proc_visit_01_df[~image_proc_visit_01_df["PSCID"].isin(remove_subjects)]

In [79]:
# image_proc_visit_01_df[["dicom_name","bids_name"]].reset_index(drop=True).to_csv("qpn_imaging_subject_list_v1.csv")

### PyBIDS tests

In [2]:
from bids import BIDSLayout
from bids.tests import get_test_data_path

In [3]:
data_path = "/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test"
layout = BIDSLayout(data_path)



In [16]:
layout.get_fieldmap("/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test/sub-PD01369D713546/ses-01/func/sub-PD01369D713546_ses-01_task-rest_run-1_bold.nii.gz",
return_list=True)
# layout.get(subject='PD01369D713546', extension='nii.gz', return_type='filename')

[{'epi': '/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test/sub-PD01369D713546/ses-01/fmap/sub-PD01369D713546_ses-01_dir-AP_run-1_epi.nii.gz',
  'suffix': 'epi'},
 {'epi': '/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test/sub-PD01369D713546/ses-01/fmap/sub-PD01369D713546_ses-01_dir-PA_run-1_epi.nii.gz',
  'suffix': 'epi'},
 {'phasediff': '/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test/sub-PD01369D713546/ses-01/fmap/sub-PD01369D713546_ses-01_run-1_phasediff.nii.gz',
  'magnitude1': '/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test/sub-PD01369D713546/ses-01/fmap/sub-PD01369D713546_ses-01_run-1_magnitude1.nii.gz',
  'suffix': 'phasediff',
  'magnitude2': '/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test/sub-PD01369D713546/ses-01/fmap/sub-PD01369D713546_ses-01_run-1_magnitude2.nii.gz'}]