In [8]:
from aidream_registration import constants
import aidream_registration.utils.cohort_utils as cu

In [9]:
from tqdm import tqdm
import shutil
import os

In [10]:
# the list of perfusion patients :
list_patients = cu.get_perfusion_patients()
print(fr"Number of patients: {len(list_patients)}")


Number of patients: 186


In [11]:
# the referential table :
df_ref = cu.get_referential_table(list_patients)
df_ref.head(5)


Unnamed: 0,AIDREAM_ID,cohort_ID,local_ID,perfusion,complete_validation,surgery_type,pre_RT_reference,pre_RT_TPS,valid_pre_RT_segmentation_model,valid_pre_RT_perfusion_model,Rechute_reference,Rechute_TPS,valid_Rechute_segmentation_model,valid_Rechute_perfusion_model,FREESURFER STATUS
0,AIDREAM_1,MMI-PROB_140,201211519BP,yes,1,2.0,AC_repATLAS,RS,yes,yes,AC_repATLAS,RS,yes,yes,ASEG
1,AIDREAM_10,MMI-PROB_050,201002036RR,yes,1,2.0,AC_repATLAS native,RS,yes,yes,AC_repATLAS,RS,yes,yes,ASEG
2,AIDREAM_100,MMI-PROB_139,201210738EE,yes,1,0.0,AC_repATLAS,RS,yes,yes,AC_repATLAS,RS,yes,no,ASEG
3,AIDREAM_102,MMI-PROB_085,201101726ZB,yes,1,2.0,AC_repATLAS,RS,yes,yes,AC_repATLAS,RS,yes,yes,ASEG
4,AIDREAM_103,MMI-PROB_166,201311898RR,yes,1,2.0,AC_repATLAS,RS,yes,yes,AC_repATLAS,RS,yes,yes,ASEG


In [12]:
# Config dictionary to map where each file should be copied :
dict_config = {}

# PROCESSED directory where data should be stored :
dir_processed = constants.DIR_DEFAULT_HARD_DRIVE / "PERFUSION_DATA" / "PROCESSED"
dir_processed.mkdir(parents=True, exist_ok=True)

# SRC directory where data is stored :
DIR_SRC = constants.DIR_DEFAULT_HARD_DRIVE / "AIDREAM DATA"
assert DIR_SRC.exists(), fr"{DIR_SRC} does not exist !"

# List bad patients :
list_bad_patients = set()


In [18]:
# Step 1 : add the MRI pre_RT images :

for patient in list_patients:
    for imaging in ["T1", "T1CE", "FLAIR"]:

        path_src = (DIR_SRC
                    / "MRI DATA"
                    / "REGISTERED MRI BY PIPELINE"
                    / "pre_RT"
                    / "OUTPUT_DIR"
                    / patient
                    / "skullstripping"
                    / fr"image_n4_register_ss_{imaging.lower()}.nii.gz")

        if not path_src.exists():
            list_bad_patients.add(patient)
            continue

        path_dst = (dir_processed
                    / patient
                    / "MRI"
                    / fr"{patient}_pre_RT_{imaging}.nii.gz")

        dict_config[path_src] = path_dst

print(fr"Number of bad patients after MRI step {len(list_bad_patients)}")


Number of bad patients after MRI step 0


In [19]:
# Step 2 : add the pre_RT T1 mask :
for patient in list_patients:

    path_src = (DIR_SRC
                / "MRI DATA"
                / "REGISTERED MRI BY PIPELINE"
                / "pre_RT"
                / "OUTPUT_DIR"
                / patient
                / "skullstripping"
                / fr"image_n4_register_brain_mask_t1.nii.gz")

    if not path_src.exists():
        list_bad_patients.add(patient)
        continue

    path_dst = (dir_processed
                / patient
                / "MRI"
                / fr"{patient}_pre_RT_T1_mask.nii.gz")

    dict_config[path_src] = path_dst

print(fr"Number of bad patients after T1 mask step {len(list_bad_patients)}")


Number of bad patients after T1 mask step 0


In [20]:
# Step 3 : add the CERCARE biomarker maps :
for patient in list_patients:
    for interpolator in constants.LIST_INTERPOLATORS:
        for biomarker in ["COV", "CTH", "Delay", "rCBV", "rLeakage", "OEF", "rCMRO2"]:

            path_src = (DIR_SRC
                        / "CERCARE DATA"
                        / "REGISTERED CERCARE BY PADDING"
                        / patient
                        / interpolator
                        / fr"{patient}_{biomarker}_registered_by_padding_{interpolator}.nii.gz")

            if not path_src.exists():
                list_bad_patients.add(patient)
                continue

            path_dst = (dir_processed
                        / patient
                        / "CERCARE"
                        / interpolator
                        / fr"{patient}_{biomarker}_{interpolator}.nii.gz")

            dict_config[path_src] = path_dst

print(fr"Number of bad patients after CERCARE step {len(list_bad_patients)}")


Number of bad patients after CERCARE step 0


In [21]:
# Step 3 : add the cercare brainmask :
for patient in list_patients:

    path_src = (DIR_SRC
                / "CERCARE DATA"
                / "REGISTERED CERCARE BY PADDING"
                / patient
                / "genericLabel"
                / fr"{patient}_brainmask_registered_by_padding_genericLabel.nii.gz")

    if not path_src.exists():
        list_bad_patients.add(patient)
        continue

    path_dst = (dir_processed
                / patient
                / "CERCARE"
                / fr"{patient}_CERCARE_brainmask_genericLabel.nii.gz")

    dict_config[path_src] = path_dst

print(fr"Number of bad patients after CERCARE brainmask step {len(list_bad_patients)}")


Number of bad patients after CERCARE brainmask step 0


In [22]:
# Step 4 : add the Affine labels :
list_patients_not_visited_per_stage = {"pre_RT": set(list_patients), "Rechute": set(list_patients)}

dict_labels = {"pre_RT": ["L1", "L2", "L3", "L4", "L5"],
               "Rechute": ["L1R", "L2R", "L3R", "L4R", "L5R"]}

for stage in ["pre_RT", "Rechute"]:
    dir_stage = (DIR_SRC / "LABELS DATA" / "REGISTERED LABELS ON PRE_RT T1" / stage)
    for subfolder in os.listdir(dir_stage):

        dir_subfolder = dir_stage / subfolder

        if not dir_subfolder.is_dir():
            continue

        for patient in os.listdir(dir_subfolder):

            list_patients_not_visited_per_stage[stage] = list_patients_not_visited_per_stage[stage] - {patient}

            for label in dict_labels[stage]:

                path_src = dir_subfolder / patient / "Affine" / fr"{patient}_{stage}_{label}_Affine.nii.gz"

                if not path_src.exists():
                    list_bad_patients.add(patient)
                    continue

                path_dst = (dir_processed
                            / patient
                            / "LABELS"
                            / stage
                            / "Affine"
                            / fr"{patient}_{stage}_{label}_Affine.nii.gz")

                dict_config[path_src] = path_dst

list_bad_patients = list_bad_patients.union(list_patients_not_visited_per_stage["pre_RT"])
list_bad_patients = list_bad_patients.union(list_patients_not_visited_per_stage["Rechute"])

print(fr"Number of bad patients after labels step {len(list_bad_patients)}")


Number of bad patients after labels step 0


In [23]:
# Step 5 : add Affine registered SOURCE IMAGING :

list_patients_not_visited_per_stage = {"pre_RT": set(list_patients), "Rechute": set(list_patients)}

for stage in ["pre_RT", "Rechute"]:
    dir_stage = (DIR_SRC / "LABELS DATA" / "REGISTERED LABELS ON PRE_RT T1" / stage)
    for subfolder in os.listdir(dir_stage):

        dir_subfolder = dir_stage / subfolder

        if not dir_subfolder.is_dir():
            continue

        for patient in os.listdir(dir_subfolder):

            list_patients_not_visited_per_stage[stage] = list_patients_not_visited_per_stage[stage] - {patient}
            path_src = dir_subfolder / patient / "REGISTERED SOURCE IMAGING"  / fr"{patient}_{stage}_T1CE_Affine.nii.gz"

            if path_src.exists():
                path_dst = (dir_processed
                            / patient
                            / "SOURCE IMAGING"
                            / fr"{patient}_{stage}_T1CE_Affine.nii.gz")

                dict_config[path_src] = path_dst

list_bad_patients = list_bad_patients.union(list_patients_not_visited_per_stage["pre_RT"])
list_bad_patients = list_bad_patients.union(list_patients_not_visited_per_stage["Rechute"])

print(fr"Number of bad patients after Affine source imaging step {len(list_bad_patients)}")


Number of bad patients after Affine source imaging step 0


In [24]:
for path_src, path_dst in tqdm(dict_config.items()):
    path_dst.parent.mkdir(parents=True, exist_ok=True)

    shutil.copyfile(path_src, path_dst)


100%|██████████| 9622/9622 [14:06<00:00, 11.37it/s] 


In [25]:
# Step 5 : add SyN labels :
dict_config = {}
list_patients_not_visited_per_stage = {"pre_RT": set(list_patients), "Rechute": set(list_patients)}

dict_labels = {"pre_RT": ["L1", "L2", "L3", "L4", "L5"],
               "Rechute": ["L1R", "L2R", "L3R", "L4R", "L5R"]}

for stage in ["pre_RT", "Rechute"]:
    dir_stage = (DIR_SRC / "LABELS DATA" / "REGISTERED LABELS ON PRE_RT T1" / stage)
    for subfolder in os.listdir(dir_stage):

        dir_subfolder = dir_stage / subfolder

        if not dir_subfolder.is_dir():
            continue

        for patient in os.listdir(dir_subfolder):



            for label in dict_labels[stage]:

                path_src = dir_subfolder / patient / "SyN" / fr"{patient}_{stage}_{label}_SyN.nii.gz"

                if  path_src.exists():
                    list_patients_not_visited_per_stage[stage] = list_patients_not_visited_per_stage[stage] - {patient}
                    path_dst = (dir_processed
                                / patient
                                / "LABELS"
                                / stage
                                / "SyN"
                                / fr"{patient}_{stage}_{label}_SyN.nii.gz")

                    dict_config[path_src] = path_dst

list_bad_patients = list_bad_patients.union(list_patients_not_visited_per_stage["pre_RT"])
list_bad_patients = list_bad_patients.union(list_patients_not_visited_per_stage["Rechute"])

print(fr"Number of bad patients after Affine source imaging step {len(list_bad_patients)}")



Number of bad patients after Affine source imaging step 45


In [26]:
# Step 6 : add SyN registered SOURCE IMAGING :

list_patients_not_visited_per_stage = {"pre_RT": set(list_patients), "Rechute": set(list_patients)}

for stage in ["pre_RT", "Rechute"]:
    dir_stage = (DIR_SRC / "LABELS DATA" / "REGISTERED LABELS ON PRE_RT T1" / stage)
    for subfolder in os.listdir(dir_stage):

        dir_subfolder = dir_stage / subfolder

        if not dir_subfolder.is_dir():
            continue

        for patient in os.listdir(dir_subfolder):


            path_src = dir_subfolder / patient / "REGISTERED SOURCE IMAGING"  / fr"{patient}_{stage}_T1CE_SyN.nii.gz"

            if path_src.exists():
                list_patients_not_visited_per_stage[stage] = list_patients_not_visited_per_stage[stage] - {patient}
                path_dst = (dir_processed
                            / patient
                            / "SOURCE IMAGING"
                            / fr"{patient}_{stage}_T1CE_SyN.nii.gz")

                dict_config[path_src] = path_dst

list_bad_patients = list_bad_patients.union(list_patients_not_visited_per_stage["pre_RT"])
list_bad_patients = list_bad_patients.union(list_patients_not_visited_per_stage["Rechute"])

print(fr"Number of bad patients after Affine source imaging step {len(list_bad_patients)}")


Number of bad patients after Affine source imaging step 45


In [27]:
for path_src, path_dst in tqdm(dict_config.items()):
    path_dst.parent.mkdir(parents=True, exist_ok=True)

    shutil.copyfile(path_src, path_dst)


100%|██████████| 1932/1932 [01:17<00:00, 25.09it/s] 


In [13]:
list_bad_patients = set()

In [14]:
# Step 7 : add the ventricle segmentation :

dict_config = {}

for patient in list_patients:

    path_src = (DIR_SRC
                / "VENTRICLES SEGMENTATION"
                / "SEGMENTED VENTRICLES"
                / fr"{patient}_pre_RT_T1_ventricle_seg.nii.gz")

    if not path_src.exists():
        list_bad_patients.add(patient)
        continue


    path_dst = (dir_processed
                / patient
                / "MRI"
                / fr"{patient}_pre_RT_T1_ventricle_mask.nii.gz")

    dict_config[path_src] = path_dst

print(fr"Number of bad patients after ventricle segmentation step {len(list_bad_patients)}")


Number of bad patients after ventricle segmentation step 0


In [15]:
for path_src, path_dst in tqdm(dict_config.items()):
    path_dst.parent.mkdir(parents=True, exist_ok=True)

    shutil.copyfile(path_src, path_dst)


100%|██████████| 186/186 [00:00<00:00, 700.60it/s]
