In [1]:
from aidream_data.core import AidreamDatabase
from aidream_data import constants

import pandas as pd
import numpy as np


In [2]:
def sort_name_list(list_patients):

    sorted_list = [list_patients[i] for i in np.argsort([int(p.strip("AIDREAM_")) for p in list_patients])]
    return sorted_list
    

# 1 -  Determine the list of AIDREAM perfusion patients : 

In [3]:
# Load the AIDREAM Database : 
aidream_db = AidreamDatabase()
aidream_db.connect()


In [4]:
# Load the AIDREAM clinical data spreadsheet : 
df_clinical_data = aidream_db.get_clinical_data()
list_patients_per_clinical_data = df_clinical_data['id_aidream'].loc[df_clinical_data["avec_perf"] == "TRUE"].tolist()
list_patients_per_clinical_data = sort_name_list(list_patients_per_clinical_data)


In [5]:
# Load the AIDREAM cohort info spreadsheet : 
df_cohort_info = aidream_db.get_cohort_info()
list_patients_per_cohort_info = df_cohort_info['patient id']['patient id'].loc[df_cohort_info['']['Perfusion?'] == "oui"].tolist()
list_patients_per_cohort_info = sort_name_list(list_patients_per_cohort_info)


In [6]:
# AIDREAM perfusion patients in cohort_info but not in clinical_data : 
cohort_info_diff_clinical_data = list(set(list_patients_per_cohort_info) - set(list_patients_per_clinical_data))

df_clinical_data.loc[df_clinical_data['id_aidream'].isin(cohort_info_diff_clinical_data)]


Unnamed: 0,id_aidream,Centre,Cohorte,id_cercare,IPP,AIDREAM_ID_300pat,avec_perf,IRM_preRT_ax,IRM_rechute_ax,Reperes_preRT,...,recidive_distance,recidive_distance_date,recidive_infield_distant,CT_date,Dosi_date,IRM__preRT_date,IRM__rechute_date,delai_IRMrechute_IRMpreRT_mois,delai_irmrechute_RTj1_jours,PTV_volume_total_cm3
161,AIDREAM_248,IGR,new400pat,AIDREAM_248,8003022898.0,300pat,False,nonAx,à axaliser,GE_repNATIV,...,na,na,na,7/6/2018,07/06/2018,13/06/2018,16/01/2019,7,208,198
209,AIDREAM_303,IGR,new400pat,AIDREAM_303,8014669160.0,AIDREAM_303,False,à axaliser,à axaliser,GE_repNativAXIALISEDnew_surAppliGE,...,na,na,na,12/11/2020,12/11/2020,20/10/2020,20/01/2021,3,62,337
242,AIDREAM_334,IGR,new400pat,AIDREAM_334,,AIDREAM_334,False,à axaliser,nonAx,GE_repNativAXIALISEDnew_surAppliGE,...,na,na,na,18/03/2019,18/03/2019,21/02/2019,07/12/2019,9,256,302
243,AIDREAM_335,IGR,new400pat,AIDREAM_335,,AIDREAM_335,False,à axaliser,nonAx,GE_repNativAXIALISEDnew_surAppliGE,...,na,na,na,21/03/2019,21/03/2019,22/02/2019,13/05/2020,15,408,na
247,AIDREAM_339,IGR,new400pat,AIDREAM_339,,AIDREAM_339,False,à axaliser,à axaliser,GE_repNativAXIALISEDnew_surAppliGE,...,na,na,na,06/09/2019,06/09/2019,17/08/2019,08/09/2020,13,357,564


In [7]:
# AIDREAM perfusion patients in clinical_data  but not in cohort_info : 
clinical_data_diff_cohort_info = list(set(list_patients_per_clinical_data) - set(list_patients_per_cohort_info))

df_cohort_info.loc[df_cohort_info['patient id']['patient id'].isin(clinical_data_diff_cohort_info)]


Unnamed: 0_level_0,patient id,id,Center,Cohort,Unnamed: 5_level_0,Data at Thera,Data at Thera,Data at Thera,Data at Thera,Unnamed: 10_level_0,...,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd,Technical characteristics - preRT T1gd
Unnamed: 0_level_1,patient id,Unnamed: 2_level_1,Ce,Co,Perfusion?,CT&MR,Reg?,GTV in RTSS,CTV in RTSS,RTSS zones,...,Vox_X,Vox_Y,Vox_Z,PixelBandwidth_mr,Manufacturer_mr,ModelName_mr,MagneticField_mr,EchoNumbers_mr,EchoTime_mr,EchoTrainLength_mr
80,AIDREAM_134,134,exclu;img manquantes,xMMI_PROB_166pat,xoui,OK,OK,OK,OK,OK,...,0.4688,0.4688,1.0,139.492,GE MEDICAL SYSTEMS,DISCOVERY MR750w,3.0,1.0,2.892,1.0
84,AIDREAM_138,138,exclu;img manquantes,xMMI_PROB_166pat,xoui,OK,NO (exclu),OK,OK,OK,...,,,,,,,,,,
102,AIDREAM_154,154,IGR,MMI_PROB_166pat,xoui,OK,,NO,OK,OK - 2 RTSS,...,0.4688,0.4688,1.0,139.492,GE MEDICAL SYSTEMS,DISCOVERY MR750w,3.0,1.0,2.904,1.0
179,AIDREAM_223,223,IGR,MMI_PROB_166pat,xoui,OK,OK,OK,OK,OK - 2 RTSS,...,0.5078,0.5078,3.0,61.0547,GE MEDICAL SYSTEMS,SIGNA EXCITE,1.5,1.0,5.168,1.0
193,AIDREAM_236,236,IGR,xxMMI_PROB_166pat,xoui,OK,OK,OK,OK,OK - 2 RTSS,...,0.4688,0.4688,1.0,139.492,GE MEDICAL SYSTEMS,DISCOVERY MR750w,3.0,1.0,2.904,1.0
204,AIDREAM_246,246,exclu;img manquantes,xMMI_PROB,xoui,OK,OK,OK,OK,OK,...,0.5078,0.5078,1.0,139.492,GE MEDICAL SYSTEMS,DISCOVERY MR750w,3.0,1.0,2.992,1.0
663,AIDREAM_66,66,IGR,xxMMI_PROB_166pat,xoui,OK,,NO,OK,OK - 2 RTSS,...,0.4688,0.4688,1.0,139.492,GE MEDICAL SYSTEMS,DISCOVERY MR750w,3.0,1.0,2.892,1.0


In [367]:
# if the list of perfusion patients defined per the intersection of cohort_info and clinical_data : 
list_patients = list(set(list_patients_per_clinical_data).intersection(list_patients_per_cohort_info))
list_patients = sort_name_list(list_patients)

print(fr"the Number of Perfusion Patients (per intersection of cohort_info & clinical_data): {len(list_patients)}")


the Number of Perfusion Patients (per intersection of cohort_info & clinical_data): 228


In [8]:
# if the list of patients defined per cohort_info : 
list_patients = list_patients_per_cohort_info
list_patients = sort_name_list(list_patients)

print(fr"the Number of Perfusion Patients (per cohort_info): {len(list_patients)}")


the Number of Perfusion Patients (per cohort_info): 233


In [369]:
# Save the perfusion patients list : 
with open("list_patients.txt", 'w') as f:
    f.writelines([f"{p}\n" for p in list_patients])
    

# 2. Create the Metadata csv : 

In [174]:
# Load the perfusion patients list : 
with open("list_patients.txt", 'r') as f:
    list_patients = f.readlines()

list_patients = [p.strip('\n') for p in list_patients]
    

In [370]:
df_cohort_info = df_cohort_info.loc[df_cohort_info['patient id']['patient id'].isin(list_patients)]


In [176]:
cols =     [(                  'patient id',                         'patient id'),
            (                      'Cohort',                                 'Co'),
            (                            '',                       'Type surgery'),
            (                 'Multi-tumor',                        'Multifocal?'),
            (                      'pre-RT',                           'Autoseg?'),
            (                      'pre-RT',                             'Label5'),
            (                      'pre-RT',                             'Label3'),
            (                      'pre-RT',                             'Label1'),
            (                      'pre-RT',                             'Label4'),
            (                      'pre-RT',                             'Label2'),
            (                      'pre-RT',                             'Label6'),
            (                      'pre-RT',                             'Label7'),
            (                      'pre-RT',                             'Label8'),
            (                      'pre-RT',                             'Label9'),
            (                      'pre-RT',                            'Label10'),
            (                      'pre-RT',           'Export données corrigées'),
            (                      'pre-RT',                      'Obs2_preRT_CS'),
            (                      'pre-RT',                      'Obs3_preRT_CV'),
            (                     'Relapse',                           'Autoseg?'),
            (                     'Relapse',                             'Label5'),
            (                     'Relapse',                             'Label3'),
            (                     'Relapse',                             'Label1'),
            (                     'Relapse',                             'Label4'),
            (                     'Relapse',                             'Label2'),
            (                     'Relapse',                             'Label6'),
            (                     'Relapse',                             'Label7'),
            (                     'Relapse',                             'Label8'),
            (                     'Relapse',                             'Label9'),
            (                     'Relapse',                            'Label10'),
            (                     'Relapse',                    'OBS2_relapse_CS'),
            (                     'Relapse',                    'OBS3_relapse_CV'),
            (                         'Sex',                                'Sex'),
            (                 'Age at diag',                        'Age at diag'),
            (                   'IDH muted',                          'IDH muted'),
            (            'MGMT methylation',                   'MGMT methylation'),
            (                      'Center',         'Anatomic pathology/surgery'),
            ('RT treatment characteristics',               'RT_dose_prescrite_Gy'),
            ('RT treatment characteristics',                    'RT_nr_fractions'),
            ('RT treatment characteristics',                'RT_dose_fraction_Gy'),
            ('RT treatment characteristics',                 'RT_etalement_jours'),
            ('RT treatment characteristics',                       'RT_Technique'),
           ]


In [177]:
df_cohort_info = df_cohort_info[cols]

In [178]:
dict_cols = {(                  'patient id',                         'patient id') :                      "aidream_id",
             (                      'Cohort',                                 'Co') :                          "cohort",
             (                            '',                       'Type surgery') :                    'surgery type',
             (                 'Multi-tumor',                        'Multifocal?') :                    'Multifocal ?',
             (                      'pre-RT',                           'Autoseg?') :                 'pre_RT Autoseg?',
             (                      'pre-RT',                             'Label5') :                   'pre_RT Label5',
             (                      'pre-RT',                             'Label3') :                   'pre_RT Label3',
             (                      'pre-RT',                             'Label1') :                   'pre_RT Label1',
             (                      'pre-RT',                             'Label4') :                   'pre_RT Label4',
             (                      'pre-RT',                             'Label2') :                   'pre_RT Label2',
             (                      'pre-RT',                             'Label6') :                   'pre_RT Label6',
             (                      'pre-RT',                             'Label7') :                   'pre_RT Label7',
             (                      'pre-RT',                             'Label8') :                   'pre_RT Label8',
             (                      'pre-RT',                             'Label9') :                   'pre_RT Label9',
             (                      'pre-RT',                            'Label10') :                  'pre_RT Label10',
             (                      'pre-RT',           'Export données corrigées') : 'pre_RT Export données corrigées',
             (                      'pre-RT',                      'Obs2_preRT_CS') :            'pre_RT Obs2_preRT_CS',
             (                      'pre-RT',                      'Obs3_preRT_CV') :            'pre_RT Obs3_preRt_CV',
             (                     'Relapse',                           'Autoseg?') :                'Rechute Autoseg?',
             (                     'Relapse',                             'Label5') :                  'Rechute Label5',
             (                     'Relapse',                             'Label3') :                  'Rechute Label3',
             (                     'Relapse',                             'Label1') :                  'Rechute Label1',
             (                     'Relapse',                             'Label4') :                  'Rechute Label4',
             (                     'Relapse',                             'Label2') :                  'Rechute Label2',
             (                     'Relapse',                             'Label6') :                  'Rechute Label6',
             (                     'Relapse',                             'Label7') :                  'Rechute Label7',
             (                     'Relapse',                             'Label8') :                  'Rechute Label8',
             (                     'Relapse',                             'Label9') :                  'Rechute Label9',
             (                     'Relapse',                            'Label10') :                 'Rechute Label10',
             (                     'Relapse',                    'OBS2_relapse_CS') :         'Rechute Obs2_relapse_CS',
             (                     'Relapse',                    'OBS3_relapse_CV') :         'Rechute Obs3_relapse_CV',
             (                         'Sex',                                'Sex') :                             'Sex',
             (                 'Age at diag',                        'Age at diag') :                     'Age at diag',
             (                   'IDH muted',                          'IDH muted') :                       'IDH muted',
             (            'MGMT methylation',                   'MGMT methylation') :                'MGMT methylation',
             (                      'Center',         'Anatomic pathology/surgery') :      'Anatomic pathology/surgery',
             ('RT treatment characteristics',               'RT_dose_prescrite_Gy') :            'RT_dose_prescrite_Gy',
             ('RT treatment characteristics',                    'RT_nr_fractions') :                 'RT_nr_fractions',
             ('RT treatment characteristics',                'RT_dose_fraction_Gy') :             'RT_dose_fraction_Gy',
             ('RT treatment characteristics',                 'RT_etalement_jours') :              'RT_etalement_jours',
             ('RT treatment characteristics',                       'RT_Technique') :                    'RT_Technique'
            }


In [179]:
df_cohort_info = df_cohort_info.reset_index(drop=True)

df_cohort_info_new = pd.DataFrame(columns=dict_cols.values())


In [180]:
for col, new_col in dict_cols.items():

    df_cohort_info_new[new_col] = df_cohort_info[col]


In [181]:
df_cohort_info_new.to_csv("cohort_info_perfusion.csv", index=False)


In [182]:
df_clinical_data = df_clinical_data.loc[df_clinical_data['id_aidream'].isin(list_patients)].reset_index(drop=True)

In [183]:
cols = ['id_aidream', 'id_cercare', 'IRM_preRT_ax', 'IRM_rechute_ax', 'deces', 'TYPE_CHIR(Bx=0_STR=1_GTR=2)', 'delai_irmrechute_RTj1_jours', 'PTV_volume_total_cm3']

In [184]:
df_clinical_data_new = df_clinical_data[cols]

In [185]:
df_cohort_info_new.to_csv("clinical_data_perfusion.csv", index=False)


In [190]:
df_cohort_info_new.rename(columns={"aidream_id": "id_aidream"}, inplace=True)

In [192]:
df_metadata = df_cohort_info_new.merge(df_clinical_data_new, 
                                       on="id_aidream",
                                       how="left")

df_metadata.to_csv("metadata_perfusion.csv", index=False)


In [194]:
# Load the interim perfusion patients list : 
with open("list_intermediary_patients.txt", 'r') as f:
    list_patients_interim = f.readlines()

list_patients_interim = [p.strip('\n') for p in list_patients_interim]


In [195]:
df_metadata = df_metadata.loc[df_metadata["id_aidream"].isin(list_patients_interim)].reset_index(drop=True)

In [197]:
df_metadata.to_csv(fr"metadata_perfusion_interim.csv", index=False)

In [198]:
df_clinical_data_new.columns

Index(['id_aidream', 'id_cercare', 'IRM_preRT_ax', 'IRM_rechute_ax', 'deces',
       'TYPE_CHIR(Bx=0_STR=1_GTR=2)', 'delai_irmrechute_RTj1_jours',
       'PTV_volume_total_cm3'],
      dtype='object')

# 3 - Univariate analysis : 

In [214]:
import matplotlib.pyplot as plt
import seaborn as sns


In [215]:
df_metadata = pd.read_csv("metadata_perfusion.csv")
df_metadata_interim = pd.read_csv("metadata_perfusion_interim.csv")


In [228]:
df_metadata.columns

Index(['id_aidream', 'cohort', 'surgery type', 'Multifocal ?',
       'pre_RT Autoseg?', 'pre_RT Label5', 'pre_RT Label3', 'pre_RT Label1',
       'pre_RT Label4', 'pre_RT Label2', 'pre_RT Label6', 'pre_RT Label7',
       'pre_RT Label8', 'pre_RT Label9', 'pre_RT Label10',
       'pre_RT Export données corrigées', 'pre_RT Obs2_preRT_CS',
       'pre_RT Obs3_preRt_CV', 'Rechute Autoseg?', 'Rechute Label5',
       'Rechute Label3', 'Rechute Label1', 'Rechute Label4', 'Rechute Label2',
       'Rechute Label6', 'Rechute Label7', 'Rechute Label8', 'Rechute Label9',
       'Rechute Label10', 'Rechute Obs2_relapse_CS', 'Rechute Obs3_relapse_CV',
       'Sex', 'Age at diag', 'IDH muted', 'MGMT methylation',
       'Anatomic pathology/surgery', 'RT_dose_prescrite_Gy', 'RT_nr_fractions',
       'RT_dose_fraction_Gy', 'RT_etalement_jours', 'RT_Technique',
       'id_cercare', 'IRM_preRT_ax', 'IRM_rechute_ax', 'deces',
       'TYPE_CHIR(Bx=0_STR=1_GTR=2)', 'delai_irmrechute_RTj1_jours',
       'PT

In [335]:
df_clinical_data.columns

Index(['id_aidream', 'Centre', 'Cohorte', 'id_cercare', 'IPP',
       'AIDREAM_ID_300pat', 'avec_perf', 'IRM_preRT_ax', 'IRM_rechute_ax',
       'Reperes_preRT',
       ...
       'recidive_distance', 'recidive_distance_date',
       'recidive_infield_distant', 'CT_date', 'Dosi_date', 'IRM__preRT_date',
       'IRM__rechute_date', 'delai_IRMrechute_IRMpreRT_mois',
       'delai_irmrechute_RTj1_jours', 'PTV_volume_total_cm3'],
      dtype='object', length=136)

In [371]:
for col in df_cohort_info.columns:
    print(col)
    

('patient id', 'patient id')
('id', '')
('Center', 'Ce')
('Cohort', 'Co')
('', 'Perfusion?')
('Data at Thera', 'CT&MR')
('Data at Thera', 'Reg?')
('Data at Thera', 'GTV in RTSS')
('Data at Thera', 'CTV in RTSS')
('', 'RTSS zones')
('', 'Used AI')
('Statuts Surgery', 'Surgery?')
('', 'Type surgery')
('', 'Date surgery')
('', 'Residual disease?')
('Multi-tumor', 'Multifocal?')
('Multi-tumor', '#tumor')
('pre-RT', 'Autoseg?')
('pre-RT', 'Batch autoseg')
('pre-RT', 'On TPS?')
('pre-RT', 'Label5')
('pre-RT', 'Label3')
('pre-RT', 'Label1')
('pre-RT', 'Label4')
('pre-RT', 'Label2')
('pre-RT', 'Label6')
('pre-RT', 'Label7')
('pre-RT', 'Label8')
('pre-RT', 'Label9')
('pre-RT', 'Label10')
('pre-RT', 'Vérifié par Sophie')
('pre-RT', 'Export données corrigées')
('pre-RT', 'Contourer')
('pre-RT', 'Time Manual')
('pre-RT', 'Time GE')
('pre-RT', 'Issues')
('pre-RT', 'CAMI_JADE_verification-radiologue')
('pre-RT', 'Possible for 1st DL Thera')
('pre-RT', 'Possible for CERCARE')
('pre-RT', 'Obs2_preRT_C

In [354]:
df_clinical_data['tmz_conco'].unique()

array(['1', '0'], dtype=object)

In [353]:
df_clinical_data.loc[df_clinical_data['recidive_diametre_max_mm'] == 'na']

Unnamed: 0,id_aidream,Centre,Cohorte,id_cercare,IPP,AIDREAM_ID_300pat,avec_perf,IRM_preRT_ax,IRM_rechute_ax,Reperes_preRT,...,recidive_distance,recidive_distance_date,recidive_infield_distant,CT_date,Dosi_date,IRM__preRT_date,IRM__rechute_date,delai_IRMrechute_IRMpreRT_mois,delai_irmrechute_RTj1_jours,PTV_volume_total_cm3
0,AIDREAM_1,IGR,MIM_PROB,MMI-PROB_140,,300pat,TRUE,nonAx,à axaliser,AC_repATLAS,...,na,na,na,23/08/2012,23/08/2012,22/08/2012,07/08/2013,11,339,204
1,AIDREAM_10,IGR,MIM_PROB,MMI-PROB_050,,300pat,TRUE,nonAx,à axaliser,AC_repATLAS,...,0,na,na,18/02/2010,18/02/2010,26/02/2010,18/03/2011,13,374,208
2,AIDREAM_100,IGR,MIM_PROB,MMI-PROB_139,,300pat,TRUE,nonAx,à axaliser,AC_repATLAS,...,0,na,na,03/08/2012,03/08/2012,03/08/2012,08/04/2013,8,247,128
3,AIDREAM_101,IGR,MIM_PROB,MMI-PROB_130,,300pat,TRUE,à axaliser,à axaliser,AC_repATLAS,...,1,na,na,05/07/2012,05/07/2012,11/07/2012,05/06/2014,23,612,201
4,AIDREAM_102,IGR,MIM_PROB,MMI-PROB_085,,AIDREAM_102,TRUE,nonAx,à axaliser,AC_repATLAS,...,0,na,na,24/02/2011,24/02/2011,01/03/2011,08/08/2011,5,159,367
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
228,AIDREAM_94,IGR,MIM_PROB,MMI-PROB_207,,300pat,TRUE,nonAx,à axaliser,AC_repATLAS,...,na,na,na,22/01/2015,22/01/2015,16/01/2015,05/06/2015,5,131,202
229,AIDREAM_96,IGR,MIM_PROB,MMI-PROB_024,,300pat,TRUE,nonAx,à axaliser,AC_repATLAS,...,na,na,na,20/05/2009,20/05/2009,19/05/2009,27/05/2013,48,1 457,191
230,AIDREAM_97,IGR,MIM_PROB,MMI-PROB_133,,300pat,TRUE,nonAx,à axaliser,AC_repATLAS,...,na,na,na,31/05/2012,31/05/2012,31/05/2012,15/03/2014,21,647,495
231,AIDREAM_98,IGR,MIM_PROB,MMI-PROB_201,,AIDREAM_98,TRUE,à axaliser,à axaliser,AC_repATLAS,...,na,na,na,28/08/2014,28/08/2014,29/08/2014,29/04/2015,8,232,123
