In [1]:
import io
import os
import numpy as np
import pandas as pd
import pydicom
import nrrd
import matplotlib.pyplot as plt

import SimpleITK as sitk

from dotenv import load_dotenv
load_dotenv();

In [2]:
data_path = os.getenv("MAIN_PATH", "")

DICOM_FOLDER = f"{data_path}/MRI_SEG_DICOM/Breast_MRI_086/01-01-1990-NA-BREASTROUTINE-86704/5.000000-ax dyn 1st pass-39884/"
# ANNOTATION_BOX_FILE = f"{data_path}/Supplemental-Data/Annotation_Boxes.xlsx"
ANNOTATION_BOX_FILE = f"{data_path}/Annotation_Boxes.xlsx"

NRRD_FILE = f"{data_path}/nrrd_images/Breast_MRI_052/post_1.nrrd"

## Tumor masks

### convert DICOM to NRRD

In [3]:
def collect_patient_data(root_dir, max_patients=0):

    collected_paths = []
    patient_count = 0

    for patient_folder in os.listdir(root_dir):
        patient_path = os.path.join(root_dir, patient_folder)

        if not os.path.isdir(patient_path):
            continue

        if patient_count >= max_patients:
            break

        # get the random folder inside the patient folder
        random_folder = next((f for f in os.listdir(patient_path) if os.path.isdir(os.path.join(patient_path, f))), None)
        if not random_folder:
            continue

        random_folder_path = os.path.join(patient_path, random_folder)

        for modality_folder in os.listdir(random_folder_path):
            modality_path = os.path.join(random_folder_path, modality_folder)
            if os.path.isdir(modality_path) and "segment" not in modality_path.lower():
                idx = modality_path.find("Breast_MRI_")
                collected_paths.append(modality_path[idx:])

        patient_count += 1

    return collected_paths

MRI_DICOM_FOLDER = f"{data_path}/MRI_SEG_DICOM/"
max_patients_to_process = 2
valid_paths = collect_patient_data(MRI_DICOM_FOLDER, max_patients=max_patients_to_process)

In [4]:
mapping_paths = {}
for chunk in pd.read_csv("/media/pedro-lima/HDD/downloads/File_Path_Mapping_Tables.csv", chunksize=1_000):
    chunk["original_path_and_filename"] = chunk["original_path_and_filename"].apply(os.path.dirname).str.replace("DICOM_Images/", data_path+"/MRI_NRRD/") + ".nrrd"
    
    chunk["descriptive_path"] = chunk["descriptive_path"].apply(os.path.dirname)
    chunk["descriptive_path"] = chunk["descriptive_path"].str.replace(r'BreastMRI(\d+)', r'Breast_MRI_\1', regex=True)
    chunk["descriptive_path"] = chunk["descriptive_path"].apply(lambda x: x[x.find("Breast_MRI_"):])

    chunk.drop_duplicates(inplace=True)
    mapping_paths.update(chunk.set_index("descriptive_path")["original_path_and_filename"].to_dict())

mapping_paths = {k: v for k, v in mapping_paths.items() if list(mapping_paths.values()).count(v) == 1}

In [5]:
new_dict = {}

for p in valid_paths:
    for m,v in mapping_paths.items():
        if p.split("/")[0] == m.split("/")[0] and p.split("-")[-1] == m.split("-")[-1]:
            new_dict[f"{data_path}/MRI_SEG_DICOM/{p}"] = v

In [6]:
def dicom_to_nrrd(dicom_folder, nrrd_path):
    """load the DICOM images, stack them into a 3D array, and save them as an NRRD file"""
    dicom_reader = sitk.ImageSeriesReader()
    dicom_files = dicom_reader.GetGDCMSeriesFileNames(dicom_folder)
    dicom_reader.SetFileNames(dicom_files)
    image_3d = dicom_reader.Execute()
    
    os.makedirs(os.path.dirname(nrrd_path), exist_ok=True)
    sitk.WriteImage(image_3d, nrrd_path)


In [None]:
for d_folder, nrrd_file  in new_dict.items():
    dicom_to_nrrd(d_folder, nrrd_file)

In [4]:
### checar se foram criados os arquivos nrrd
def count_files(main_path):
    total_files = 0

    for root, dirs, files in os.walk(main_path):
        # dirs[:] = [d for d in dirs if 'segment' not in d.lower()]
        total_files += len(files)
    
    return total_files

file_count_nrrd = count_files(f"{data_path}/MRI_NRRD/")
print(f"Total files: {file_count_nrrd}")

Total files: 5033


### get 3D bounding boxes from annotation boxes

In [None]:
# Create a 3D Mask Based on Annotation Boxes
# create a mask where the tumor voxels are set to 1 and other voxels are set to 0
def create_3d_mask(nrrd_path, annotation_df, patient_id):
    image_3d = sitk.ReadImage(nrrd_path)

    image_array = sitk.GetArrayFromImage(image_3d)
    print("Image shape:", image_array.shape)

    mask_array = np.zeros_like(image_array)
    total_slices = image_array.shape[0]

    annotation = annotation_df[annotation_df['Patient ID'] == patient_id].iloc[0]
    start_row = int(annotation["Start Row"])
    end_row = int(annotation["End Row"])
    start_column = int(annotation["Start Column"])
    end_column = int(annotation["End Column"])
    start_slice = int(annotation["Start Slice"])
    end_slice = int(annotation["End Slice"])
    print(start_row, end_row, start_column, end_column, start_slice, end_slice)

    slice_spacing = image_3d.GetSpacing()[2]

    if slice_spacing < 0:  # negative spacing indicates descending order of height
        start_slice = total_slices - end_slice + 1
        end_slice = total_slices - start_slice + 1

    mask_array[start_slice:end_slice, start_row:end_row, start_column:end_column] = 1

    mask_image = sitk.GetImageFromArray(mask_array)
    mask_image.CopyInformation(image_3d)
    # mask_image = sitk.Cast(mask_image, sitk.sitkUInt8)  # ensure it's a label map (binary)

    mask_path = f'{data_path}/nrrd_masks/output_{patient_id}.seg.nrrd'
    sitk.WriteImage(mask_image, mask_path)

# example
annotation_df = pd.read_excel(ANNOTATION_BOX_FILE)
patient_id = NRRD_FILE.split("/")[-2]
create_3d_mask(NRRD_FILE, annotation_df, patient_id)

Image shape: (128, 448, 448)
148 173 340 361 54 67
