## Notebook to keep track of tabular data
- Raw tabular data is here
    - `/home/nikhil/projects/Parkinsons/QPN_processing/tabular` 
        - `release_<>/QPN_Imaging_Codes.xlsx`: UPDRS, MoCA, Neuropsych etc
- Currently collating and tracking following assessments:
    - UPDRS
    - MoCA


In [1]:
import pandas as pd
import numpy as np

In [61]:
tabular_dir = "/home/nikhil/projects/Parkinsons/QPN_processing/tabular/"

current_release = "release_Nov2022"

# Current mr_proc manifest
current_mr_proc_manifest_csv = f"{tabular_dir}{current_release}/mr_proc_manifest.csv"

# Current PD-assessment CSV
clinical_assessment_xls = f"{tabular_dir}/{current_release}/QPN_Imaging_Codes.xlsx"

# NeuroBagel CSV
bagel_csv = f"{tabular_dir}/{current_release}/clinical_scores_summary.csv"


### Read mr_proc manifest

In [54]:
current_mr_proc_manifest_df = pd.read_csv(current_mr_proc_manifest_csv)
n_participants = len(current_mr_proc_manifest_df["participant_id"].unique())
print(f"n_participants: {n_participants}")
current_mr_proc_manifest_df.head()

n_participants: 208


Unnamed: 0,participant_id,sex,dob,group,visit_01,visit_02,bids_id
0,MNI0056,M,1942-05-21,PD,2021-08-18,,sub-MNI0056D864854
1,MNI0058,M,1964-03-14,PD,2021-08-18,,sub-MNI0058D197308
2,MNI0068,M,1952-08-05,PD,2021-08-27,,sub-MNI0068D842090
3,MNI0079,F,1971-11-25,PD,2021-12-22,,sub-MNI0079D760662
4,MNI0103,M,1939-08-17,PD,2021-08-11,,sub-MNI0103D369057


### Read UPDRS and MoCA for imaging subjects

In [25]:
MoCA_df =  pd.read_excel(clinical_assessment_xls,sheet_name="MoCA", engine='openpyxl')
MoCA_df.head()

Unnamed: 0,participant_id,delete,delete.1,moca_date,moca_vse,moca_nam,moca_att,moca_lang,moca_abs,moca_del,...,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25
0,MNI0056,,T1,11-06-2021,2.0,3.0,4.0,1.0,2.0,3.0,...,,,,,,,,,,
1,MNI0058,,T1,23-07-2021,4.0,3.0,5.0,2.0,0.0,4.0,...,,,,,,,,,,
2,MNI0079,,,,,,,,,,...,,,,,,,,,,
3,MNI0079,,T2,22-12-2021,2.0,3.0,5.0,3.0,2.0,5.0,...,,,,,,,,,,
4,MNI0103,,T1,08-11-2021,1.0,3.0,5.0,2.0,1.0,2.0,...,,,,,,,,,,


In [26]:
UPDRS_df =  pd.read_excel(clinical_assessment_xls,sheet_name="MDS-UPDRS", engine='openpyxl')
UPDRS_df.head()

Unnamed: 0,participant_id,updrs_date,updrs_staff,updrs_in_person,updrs_1_a,updrs_1_1,updrs_1_2,updrs_1_3,updrs_1_4,updrs_1_5,...,updrs_4_1,updrs_4_2,updrs_4_3,updrs_4_4,updrs_4_5,updrs_4_6,updrs_1_total,updrs_2_total,updrs_3_total,updrs_4_total
0,record_id,Date,Saff,Method: 1= virtually; 2= in-person,Source of information1=Patient;\n 2=Caregiver...,Cognitive impairment,Hallucination and psychosis,Depressed mood,Anxious mood,Apathy,...,Time spent w ith dyskinesias,Functional impact of dyskinesias,Time spent in the OFF state,Functional impact of fluctuations,Complexity of motor fluctuations,Painful OFF-state dystonia,Total_score_part_1,Total_score_part_2,Total_score_part_3,Total_score_part_4
1,MNI0056,2021-11-06 00:00:00,Sarah Bogard,2,1,0,0,0,1,0,...,0,0,0,0,0,0,12,22,58,0
2,MNI0058,23-07-2021,Sarah Bogard,2,1,0,0,0,0,0,...,0,0,0,0,0,0,0,6,26,0
3,MNI0068,27-08-2021,Sarah Bogard,2,3,2,0,1,1,0,...,0,0,0,0,0,0,11,8,28,0
4,MNI0079,21-01-2022,Roozbeh Sattari,2,1,0,0,0,1,0,...,1,1,1,2,1,2,13,10,22,8


### Merge assessments into NeuroBagel format
- MoCA column(s): [`moca_total`]
- UPDRS column(s): [`updrs_1_total`,`updrs_2_total`,`updrs_3_total`,`updrs_4_total`]

In [50]:
moca_cols = ["moca_total"]
updrs_cols = ["updrs_1_total","updrs_2_total","updrs_3_total","updrs_4_total"]

bagel_df = pd.merge(current_mr_proc_manifest_df["participant_id"],MoCA_df[["participant_id"] + moca_cols],
                    on="participant_id",how="left")
                    
bagel_df = pd.merge(bagel_df,UPDRS_df[["participant_id"] + updrs_cols],
                      on="participant_id",how="left")

bagel_df.head()

Unnamed: 0,participant_id,moca_total,updrs_1_total,updrs_2_total,updrs_3_total,updrs_4_total
0,MNI0056,21.0,12,22,58,0
1,MNI0058,24.0,0,6,26,0
2,MNI0068,,11,8,28,0
3,MNI0079,,13,10,22,8
4,MNI0079,26.0,13,10,22,8


### Assign availability status per assessment column

In [56]:
print(f"n_mr_proc_participants: {n_participants}")

for col in moca_cols + updrs_cols:
    bagel_df[f"{col}_status"] = ~bagel_df[col].isna()
    n_available_participants = np.sum(bagel_df[f"{col}_status"])
    print(f"{col}, n_available_participants: {n_available_participants}")

bagel_df.head()

n_mr_proc_participants: 208
moca_total, n_available_participants: 178
updrs_1_total, n_available_participants: 108
updrs_2_total, n_available_participants: 108
updrs_3_total, n_available_participants: 114
updrs_4_total, n_available_participants: 109


Unnamed: 0,participant_id,moca_total,updrs_1_total,updrs_2_total,updrs_3_total,updrs_4_total,moca_total_status,updrs_1_total_status,updrs_2_total_status,updrs_3_total_status,updrs_4_total_status
0,MNI0056,21.0,12,22,58,0,True,True,True,True,True
1,MNI0058,24.0,0,6,26,0,True,True,True,True,True
2,MNI0068,,11,8,28,0,False,True,True,True,True
3,MNI0079,,13,10,22,8,False,True,True,True,True
4,MNI0079,26.0,13,10,22,8,True,True,True,True,True


### Save bagel_df

In [62]:
save_bagel = False

print(f"Save path: {bagel_csv}")
if save_bagel:
    bagel_df.to_csv(bagel_csv)

Save path: /home/nikhil/projects/Parkinsons/QPN_processing/tabular//release_Nov2022/clinical_scores_summary.csv
