In [1]:
from aidream_data.core import AidreamDatabase
from aidream_data import constants

import pandas as pd
import numpy as np


In [2]:
def sort_name_list(list_patients):

    sorted_list = [list_patients[i] for i in np.argsort([int(p.strip("AIDREAM_")) for p in list_patients])]
    return sorted_list
    

# 1 -  Determine the list of AIDREAM perfusion patients : 

In [3]:
# Load the AIDREAM Database : 
aidream_db = AidreamDatabase()
aidream_db.connect()


In [4]:
# Load the AIDREAM clinical data spreadsheet : 
df_clinical_data = aidream_db.get_clinical_data()
list_patients_per_clinical_data = df_clinical_data['id_aidream'].loc[df_clinical_data["avec_perf"] == "TRUE"].tolist()
list_patients_per_clinical_data = sort_name_list(list_patients_per_clinical_data)


In [5]:
# Load the AIDREAM cohort info spreadsheet : 
df_cohort_info = aidream_db.get_cohort_info()
list_patients_per_cohort_info = df_cohort_info['patient id']['patient id'].loc[df_cohort_info['']['Perfusion?'] == "oui"].tolist()
list_patients_per_cohort_info = sort_name_list(list_patients_per_cohort_info)


In [9]:
# AIDREAM perfusion patients in cohort_info but not in clinical_data : 
cohort_info_diff_clinical_data = list(set(list_patients_per_cohort_info) - set(list_patients_per_clinical_data))

df_clinical_data.loc[df_clinical_data['id_aidream'].isin(cohort_info_diff_clinical_data)]


Unnamed: 0,id_aidream,Centre,Cohorte,id_cercare,IPP,AIDREAM_ID_300pat,avec_perf,IRM_preRT_ax,IRM_rechute_ax,Reperes_preRT,...,recidive_distance,recidive_distance_date,recidive_infield_distant,CT_date,Dosi_date,IRM__preRT_date,IRM__rechute_date,delai_IRMrechute_IRMpreRT_mois,delai_irmrechute_RTj1_jours,PTV_volume_total_cm3
161,AIDREAM_248,IGR,new400pat,AIDREAM_248,8003022898.0,300pat,False,nonAx,à axaliser,GE_repNATIV,...,na,na,na,7/6/2018,07/06/2018,13/06/2018,16/01/2019,7,208,198
209,AIDREAM_303,IGR,new400pat,AIDREAM_303,8014669160.0,AIDREAM_303,False,à axaliser,à axaliser,GE_repNativAXIALISEDnew_surAppliGE,...,na,na,na,12/11/2020,12/11/2020,20/10/2020,20/01/2021,3,62,337
242,AIDREAM_334,IGR,new400pat,AIDREAM_334,,AIDREAM_334,False,à axaliser,nonAx,GE_repNativAXIALISEDnew_surAppliGE,...,na,na,na,18/03/2019,18/03/2019,21/02/2019,07/12/2019,9,256,302
243,AIDREAM_335,IGR,new400pat,AIDREAM_335,,AIDREAM_335,False,à axaliser,nonAx,GE_repNativAXIALISEDnew_surAppliGE,...,na,na,na,21/03/2019,21/03/2019,22/02/2019,13/05/2020,15,408,na
247,AIDREAM_339,IGR,new400pat,AIDREAM_339,,AIDREAM_339,False,à axaliser,à axaliser,GE_repNativAXIALISEDnew_surAppliGE,...,na,na,na,06/09/2019,06/09/2019,17/08/2019,08/09/2020,13,357,564


In [11]:
# AIDREAM perfusion patients in clinical_data  but not in cohort_info : 
clinical_data_diff_cohort_info = list(set(list_patients_per_clinical_data) - set(list_patients_per_cohort_info))

df_cohort_info.loc[df_cohort_info['patient id']['patient id'].isin(clinical_data_diff_cohort_info)]


Unnamed: 0_level_0,patient id,id,Center,Cohort,Unnamed: 5_level_0,Data at Thera,Data at Thera,Data at Thera,Data at Thera,Unnamed: 10_level_0,...,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd
Unnamed: 0_level_1,patient id,Unnamed: 2_level_1,Ce,Co,Perfusion?,CT&MR,Reg?,GTV in RTSS,CTV in RTSS,RTSS zones,...,Vox_X,Vox_Y,Vox_Z,PixelBandwidth_mr,Manufacturer_mr,ModelName_mr,MagneticField_mr,EchoNumbers_mr,EchoTime_mr,EchoTrainLength_mr
80,AIDREAM_134,134,exclu;img manquantes,xMMI_PROB_166pat,xoui,OK,OK,OK,OK,OK,...,0.4688,0.4688,1.0,139.492,GE MEDICAL SYSTEMS,DISCOVERY MR750w,3.0,1.0,2.892,1.0
84,AIDREAM_138,138,exclu;img manquantes,xMMI_PROB_166pat,xoui,OK,NO (exclu),OK,OK,OK,...,,,,,,,,,,
102,AIDREAM_154,154,IGR,MMI_PROB_166pat,xoui,OK,,NO,OK,OK - 2 RTSS,...,0.4688,0.4688,1.0,139.492,GE MEDICAL SYSTEMS,DISCOVERY MR750w,3.0,1.0,2.904,1.0
193,AIDREAM_236,236,IGR,xxMMI_PROB_166pat,xoui,OK,OK,OK,OK,OK - 2 RTSS,...,0.4688,0.4688,1.0,139.492,GE MEDICAL SYSTEMS,DISCOVERY MR750w,3.0,1.0,2.904,1.0
204,AIDREAM_246,246,exclu;img manquantes,xMMI_PROB,xoui,OK,OK,OK,OK,OK,...,0.5078,0.5078,1.0,139.492,GE MEDICAL SYSTEMS,DISCOVERY MR750w,3.0,1.0,2.992,1.0
663,AIDREAM_66,66,IGR,xxMMI_PROB_166pat,xoui,OK,,NO,OK,OK - 2 RTSS,...,0.4688,0.4688,1.0,139.492,GE MEDICAL SYSTEMS,DISCOVERY MR750w,3.0,1.0,2.892,1.0


In [17]:
# if the list of perfusion patients defined per the intersection of cohort_info and clinical_data : 
list_patients = list(set(list_patients_per_clinical_data).intersection(list_patients_per_cohort_info))
list_patients = sort_name_list(list_patients)

print(fr"the Number of Perfusion Patients (per intersection of cohort_info & clinical_data): {len(list_patients)}")


the Number of Perfusion Patients (per intersection of cohort_info & clinical_data): 229


In [22]:
# if the list of patients defined per cohort_info : 
list_patients = list_patients_per_cohort_info
list_patients = sort_name_list(list_patients)

print(fr"the Number of Perfusion Patients (per cohort_info): {len(list_patients)}")


the Number of Perfusion Patients (per cohort_info): 234


In [25]:
# Save the perfusion patients list : 
with open("list_patients.txt", 'w') as f:
    f.writelines([f"{p}\n" for p in list_patients])
    

# 2. Describe the dataset per 