## Scratch notebook to keep track of all the subjects

- QPN DICOMs (will) live on the bic DICOM server bic:/data/dicom
- from there, we will create symlinks to bic:/data/pd/qpn/dicom

In [2]:
import pandas as pd
import numpy as np

### Compare subject counts across neuropsych and imaging 

In [6]:
sheet_names = ["Parkinson patients", "Control"]
subject_id_list = []
dx_list = []

for sheet_name in sheet_names:
    print(sheet_name)

    neuropsy_df = pd.read_excel("BD_RPQ_UPDATE_Neuropsy.xlsx",sheet_name=sheet_name, engine='openpyxl',header=1)
    neuropsy_df = neuropsy_df.dropna(how='all')

    # Get rid of "/T1" from subject ID (timepoint)
    subject_ids = list(neuropsy_df["Patient #"].str.split("/",expand=True)[0].values)
    print(f"Number of participants: {len(subject_ids)}")

    subject_id_list +=subject_ids
    dx_list += list(np.tile(sheet_name,len(subject_ids)))

print(f"Total number of neuropsy subjects: {len(subject_id_list)}")
neuropsy_df.head()

Parkinson patients
Number of participants: 233
Control
Number of participants: 37
Total number of neuropsy subjects: 270


Unnamed: 0,Patient #,Administered by,Parkinson Disease or Control subject,Sex (1=men; 2=women),language,Date of assessment,Date of birth,Age at time of assessment,Patient cognitive complaint,"Education level (primary school = 6 years, high school = 12, cegep = 14, bachelor = 17, master = 19, phD = 23",...,"Normal cognition (superior or equal to 26 on MoCa, yes=1, no=2)","Subjective Complaint (yes/no) DO you have feel that you issues with your memory, judgement, concentration, planning, etc?",Functional Impact (yes/no) Do these cognitive complaints affect your daily activities?,Comments,Unnamed: 75,Unnamed: 76,Unnamed: 77,Unnamed: 78,Unnamed: 79,Unnamed: 80
0,PD01100,Sabrina,Control,2.0,french,2018-02-23,1949-12-12,68.0,999,université,...,,,,pas de MoCA,,,,,,
1,PD01062,Sabrina,Control,2.0,english,2018-09-10,1946-07-17,72.0,No,14,...,,,,pas de MoCA,,,,,,
2,PD00952,Sabrina,Control,1.0,english,2018-09-24,1965-04-27,53.0,Yes,6,...,,,,indiqué qu'il a un impact fonctionnel,,,,,,
3,PD01232,Sabrina,Control,2.0,french,2019-04-01,1955-11-11,63.0,999,université,...,,,,,,,,,,
4,PD01289,Sabrina,Control,2.0,french,2019-04-10,1956-12-15,63.0,999,6,...,,,,,,,,,,


### Year-wise subject list
- This is grabbed from dagher8/dagher dir from BIC

In [5]:
subject_ids_yearwise = ["IDs_2018.txt","IDs_2019.txt","IDs_2020.txt","IDs_2021.txt"]
id_yearwise_list = []
year_list = []
for year in subject_ids_yearwise:
    f = f"./ID_lists/dagher/{year}"
    id_yearwise = np.squeeze(pd.read_csv(f,header=None).values)
    print(f"year: {f}, n_subjects: {len(id_yearwise)}")
    id_yearwise_list += list(id_yearwise)
    year = f.split("_")[1].split(".")[0]
    year_list += list(np.tile(year,len(id_yearwise)))

    
years_df = pd.DataFrame()
years_df["subject_id"] = id_yearwise_list
years_df["year"] = year_list

print(f"Total number of year-wise subjects: {len(years_df)}")
years_df.head()

year: ./ID_lists/dagher/IDs_2018.txt, n_subjects: 10
year: ./ID_lists/dagher/IDs_2019.txt, n_subjects: 73
year: ./ID_lists/dagher/IDs_2020.txt, n_subjects: 12
year: ./ID_lists/dagher/IDs_2021.txt, n_subjects: 11
Total number of year-wise subjects: 106


Unnamed: 0,subject_id,year
0,PD00020,lists/dagher/IDs
1,PD00119,lists/dagher/IDs
2,PD00435,lists/dagher/IDs
3,PD00457,lists/dagher/IDs
4,PD00458,lists/dagher/IDs


In [5]:
BIDS_df = pd.DataFrame()
BIDS_df["subject_id"] = subject_id_list
BIDS_df["Dx"] = dx_list

BIDS_df = pd.merge(BIDS_df,years_df,on="subject_id",how="left")
BIDS_df.head()

Unnamed: 0,subject_id,Dx,year
0,PD00209,Parkinson patients,
1,PD00119,Parkinson patients,2018.0
2,PD00820,Parkinson patients,
3,PD00262,Parkinson patients,
4,PD00523,Parkinson patients,


In [24]:
subjects_neuropsy = BIDS_df["subject_id"].values
n_subjects_neuropsy = len(subjects_neuropsy)
subjects_processed = BIDS_df[~BIDS_df["year"].isna()]
n_subjects_processed = len(subjects_processed)
subjects_TODO = BIDS_df[BIDS_df["year"].isna()]
n_subjects_TODO = len(subjects_TODO)

print(f"Number of subjects in neuropsy sheet: {n_subjects_neuropsy}\n"
      f"Number of subjects processed thru BIDS: {n_subjects_processed}\n"
      f"Number of subjects in neuropsy yet to be processed: {n_subjects_TODO}")

Number of subjects in neuropsy sheet: 270
Number of subjects processed thru BIDS: 98
Number of subjects in neuropsy yet to be processed: 172


### Compare qpn.loris imaging subject master list with BIC 
- BIC will have some duplicates due to failed acq


In [38]:
ID_list_dir = "/home/nikhil/projects/QPN_processing/metadata/ID_lists/subjects/"

# QPN/COPN LORIS
qpn_loris_imaging_subs_file = f"{ID_list_dir}QPN_loris_imaging_subject_list_14_Feb_2022.csv"
bic_data_qpn_dicom_subs_file = "qpn_dicom_v1.txt"

qpn_loris_imaging_subs_df = pd.read_csv(qpn_loris_imaging_subs_file)
qpn_loris_imaging_subs_df.head()

Unnamed: 0,Site,PSCID,DCCID,Project,Vist Label,Visit QC Status,First Acquisition,First Insertion,Last QC,New Data,Links,SessionID,Sequence Type,Pending New
0,Montreal Neurological Institute,MNI0056,864854,COPN,MRI01,,2021-08-18,2021-08-20 23:54:00,,new,native,12041,"2DFLAIRFS,3DT1,BOLDRSgrefieldmappingTE1,BOLDRS...",N
1,Montreal Neurological Institute,MNI0058,197308,QPN,MRI01,,2021-08-18,2021-08-20 23:28:39,,new,native,12073,"2DFLAIRFS,3DT1,BOLDRSgrefieldmappingTE1,BOLDRS...",N
2,Montreal Neurological Institute,MNI0068,842090,QPN,MRI01,,2021-08-27,2021-08-28 00:04:44,,new,native,12027,"2DFLAIRFS,3DT1,BOLDRSgrefieldmappingTE1,BOLDRS...",N
3,Montreal Neurological Institute,MNI0079,760662,COPN,MRI01,,2021-12-22,2022-01-03 14:26:44,,new,native,12070,"2DFLAIRFS,3DT1,BOLDRSgrefieldmappingTE1,BOLDRS...",N
4,Montreal Neurological Institute,MNI0103,369057,COPN,MRI01,,2021-11-08,2021-11-19 23:13:54,,new,native,12069,"2DFLAIRFS,3DT1,BOLDRSgrefieldmappingTE1,BOLDRS...",N


In [39]:
bic_data_qpn_dicom_subs_df = pd.read_csv(bic_data_qpn_dicom_subs_file, header=None)[0].str.split("_",expand=True)
cols = ["PSCID","DCCID","Vist Label","Site","First Acquisition","Unknown Col"]
bic_data_qpn_dicom_subs_df.columns = cols

bic_data_qpn_dicom_subs_df["DCCID"] = bic_data_qpn_dicom_subs_df["DCCID"].astype("int64")

bic_data_qpn_dicom_subs_df

Unnamed: 0,PSCID,DCCID,Vist Label,Site,First Acquisition,Unknown Col
0,MNI0056,864854,MRI01,MNI,20210818,151510608
1,MNI0058,197308,MRI01,MNI,20210818,105219098
2,MNI0068,842090,MRI01,MNI,20210827,150412426
3,MNI0103,369057,MRI01,MNI,20211116,132143505
4,MNI0109,584299,MRI01,MNI,20210924,135512466
...,...,...,...,...,...,...
199,PD75,20210730,134904606,,,
200,PD76,20211104,094045499,,,
201,PD77,20211202,110105862,,,
202,PD78,20211122,132024191,,,


In [42]:
image_proc_visit_01_df = pd.merge(qpn_loris_imaging_subs_df[cols[:5]], bic_data_qpn_dicom_subs_df, on=["PSCID","DCCID","Vist Label"], how="left")

In [62]:
image_proc_visit_01_df["dicom_name"] = image_proc_visit_01_df["PSCID"] + "_" + image_proc_visit_01_df["DCCID"].astype(str) + "_" + \
    image_proc_visit_01_df["Vist Label"] + "_MNI"

image_proc_visit_01_df["bids_name"] = image_proc_visit_01_df["PSCID"] + "D" + image_proc_visit_01_df["DCCID"].astype(str) 

image_proc_visit_01_df.head()

Unnamed: 0,PSCID,DCCID,Vist Label,Site_x,First Acquisition_x,Site_y,First Acquisition_y,Unknown Col,dicom_name,bids_name
0,MNI0056,864854,MRI01,Montreal Neurological Institute,2021-08-18,MNI,20210818.0,151510608.0,MNI0056_864854_MRI01_MNI,MNI0056D864854
1,MNI0058,197308,MRI01,Montreal Neurological Institute,2021-08-18,MNI,20210818.0,105219098.0,MNI0058_197308_MRI01_MNI,MNI0058D197308
2,MNI0068,842090,MRI01,Montreal Neurological Institute,2021-08-27,MNI,20210827.0,150412426.0,MNI0068_842090_MRI01_MNI,MNI0068D842090
3,MNI0079,760662,MRI01,Montreal Neurological Institute,2021-12-22,,,,MNI0079_760662_MRI01_MNI,MNI0079D760662
4,MNI0103,369057,MRI01,Montreal Neurological Institute,2021-11-08,MNI,20211116.0,132143505.0,MNI0103_369057_MRI01_MNI,MNI0103D369057


## Filter out missing / bad subjects
- These are listed in: /home/nikhil/projects/QPN_processing/bids/empty_dicom_dirs.txt


In [68]:
!cat /home/nikhil/projects/QPN_processing/bids/empty_dicom_dirs.txt

/data/dicom/PD01746_898695_MRI01_MNI_20210702_135421425
/data/dicom/PD01713_406442_MRI01_MNI_20210630_135929223
/data/dicom/PD01674_700552_MRI01_MNI_20210707_134910496
/data/dicom/PD01626_466737_MRI01_MNI_20210709_125654756
/data/dicom/PD00953_971810_MRI01_MNI_20210630_103326275


In [71]:
remove_subjects = ["PD01746","PD01713","PD01674","PD01626","PD00953"]
image_proc_visit_01_df = image_proc_visit_01_df[~image_proc_visit_01_df["PSCID"].isin(remove_subjects)]

In [79]:
# image_proc_visit_01_df[["dicom_name","bids_name"]].reset_index(drop=True).to_csv("qpn_imaging_subject_list_v1.csv")

### PyBIDS tests

In [2]:
from bids import BIDSLayout
from bids.tests import get_test_data_path

In [3]:
data_path = "/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test"
layout = BIDSLayout(data_path)



In [16]:
layout.get_fieldmap("/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test/sub-PD01369D713546/ses-01/func/sub-PD01369D713546_ses-01_task-rest_run-1_bold.nii.gz",
return_list=True)
# layout.get(subject='PD01369D713546', extension='nii.gz', return_type='filename')

[{'epi': '/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test/sub-PD01369D713546/ses-01/fmap/sub-PD01369D713546_ses-01_dir-AP_run-1_epi.nii.gz',
  'suffix': 'epi'},
 {'epi': '/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test/sub-PD01369D713546/ses-01/fmap/sub-PD01369D713546_ses-01_dir-PA_run-1_epi.nii.gz',
  'suffix': 'epi'},
 {'phasediff': '/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test/sub-PD01369D713546/ses-01/fmap/sub-PD01369D713546_ses-01_run-1_phasediff.nii.gz',
  'magnitude1': '/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test/sub-PD01369D713546/ses-01/fmap/sub-PD01369D713546_ses-01_run-1_magnitude1.nii.gz',
  'suffix': 'phasediff',
  'magnitude2': '/home/nikhil/projects/QPN_processing/tmp/BIDS_dir_test/sub-PD01369D713546/ses-01/fmap/sub-PD01369D713546_ses-01_run-1_magnitude2.nii.gz'}]