This program assumes UCSF dataset is downloaded as "UCSF_dataset" and BraTS dataset is downloaded as "BraTS_dataset". For both datasets, it gets T1, T2, FLAIR and tumor segmentation data for each patient, converts nifti formatted files to png images, binarizes segmentation images and stores outputs in new directories: "UCSF" for UCSF dataset, and "BraTS" for BraTS dataset.

In [None]:
import os
import nibabel as nib
import numpy as np
import cv2

# Process UCSF

In [None]:
# Paths for the dataset and output
dataset_path = "UCSF_dataset"
output_base_path = "UCSF"  # New output directory

In [None]:
def get_folders(dataset_path):
    """
    Retrieve and sort folder names from the dataset directory.
    """
    folders = [folder for folder in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, folder))]
    folders.sort() 
    return folders

In [3]:
def find_files_ucsf(folder_path):
    """
    Locate relevant files in a UCSF folder and rename them for consistency.
    """
    files = {key: None for key in ["tumor_segmentation", "t1", "t2", "flair"]}
    for file in os.listdir(folder_path):
        if "tumor_segmentation" in file and file.endswith(".nii.gz"):
            files["tumor_segmentation"] = os.path.join(folder_path, file)
        elif "T1.nii" in file and file.endswith(".nii.gz"):
            files["t1"] = os.path.join(folder_path, file)
        elif "T2.nii" in file and file.endswith(".nii.gz"):
            files["t2"] = os.path.join(folder_path, file)
        elif "FLAIR.nii" in file and file.endswith(".nii.gz"):
            files["flair"] = os.path.join(folder_path, file)
    return files

In [4]:
def process_files_ucsf(folder_path, output_patient_folder, files):
    """
    Process and save slices for each file type (UCSF dataset).
    """
    for file_type, file_path in files.items():
        if file_path:
            print(f"Processing {file_type} file: {file_path}")
            
            # Load the NIfTI file using nibabel
            nii_file = nib.load(file_path)
            data = nii_file.get_fdata()
            
            # Map the file types to folder names
            folder_name_map = {
                "tumor_segmentation": "Segmentation",
                "t1": "T1",
                "t2": "T2",
                "flair": "Flair"
            }
            output_folder = os.path.join(output_patient_folder, folder_name_map[file_type])
            os.makedirs(output_folder, exist_ok=True)
            
            # Process slices
            for slice_idx in range(data.shape[2]):  # Iterate through all slices
                slice_data = data[:, :, slice_idx]
                
                if file_type == "tumor_segmentation":
                    # Binarize tumor segmentation slices
                    processed_slice = (slice_data > 0).astype(np.uint8) * 255
                else:
                    # Normalize other modalities (T1, T2, FLAIR) to 0-255
                    processed_slice = cv2.normalize(slice_data, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
                
                output_file = os.path.join(output_folder, f"{file_type}_slice_{slice_idx + 1}.png")
                cv2.imwrite(output_file, processed_slice)
            
            print(f"Saved slices to {output_folder}")
        else:
            print(f"{file_type} file not found in folder: {folder_path}")

In [None]:
for i, folder in enumerate(os.listdir(dataset_path), start=1):
    folder_path = os.path.join(dataset_path, folder)
    output_patient_folder = os.path.join(output_base_path, f"{folder}_patient_{i}")
    os.makedirs(output_patient_folder, exist_ok=True)
    
    # Find relevant files
    files = find_files_ucsf(folder_path)
    process_files_ucsf(folder_path, output_patient_folder, files)

# Process BraTS

In [5]:
dataset_path = "BraTS_dataset"
output_base_path = "BraTS" 

In [None]:
def find_files_brats(folder_path):
    """
    Locate relevant files in a folder and rename them for consistency.
    """
    files = {key: None for key in ["tumor_segmentation", "t1", "t2", "flair"]}
    for file in os.listdir(folder_path):
        if "-seg.nii.gz" in file:
            files["tumor_segmentation"] = os.path.join(folder_path, file)
        elif "-t1c.nii.gz" in file:
            files["t1"] = os.path.join(folder_path, file)
        elif "-t2w.nii.gz" in file:
            files["t2"] = os.path.join(folder_path, file)
        elif "-t2f.nii.gz" in file:
            files["flair"] = os.path.join(folder_path, file)
    return files

In [None]:
def process_files_brats(folder_path, output_patient_folder, files):
    """
    Process and save slices for each file type.
    """
    for file_type, file_path in files.items():
        if file_path:
            print(f"Processing {file_type} file: {file_path}")
            
            # Load the NIfTI file using nibabel
            nii_file = nib.load(file_path)
            data = nii_file.get_fdata()
            
            # Map the file types to folder names
            folder_name_map = {
                "tumor_segmentation": "Segmentation",
                "t1": "T1",
                "t2": "T2",
                "flair": "Flair"
            }
            output_folder = os.path.join(output_patient_folder, folder_name_map[file_type])
            os.makedirs(output_folder, exist_ok=True)
            
            # Process slices
            for slice_idx in range(data.shape[2]):  # Iterate through all slices
                slice_data = data[:, :, slice_idx]
                
                if file_type == "tumor_segmentation":
                    # Binarize tumor segmentation slices
                    processed_slice = (slice_data > 0).astype(np.uint8) * 255
                else:
                    # Normalize other modalities (T1, T2, FLAIR) to 0-255
                    processed_slice = cv2.normalize(slice_data, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
                
                output_file = os.path.join(output_folder, f"{file_type}_slice_{slice_idx + 1}.png")
                cv2.imwrite(output_file, processed_slice)
            
            print(f"Saved slices to {output_folder}")
        else:
            print(f"{file_type} file not found in folder: {folder_path}")

In [None]:
for i, folder in enumerate(os.listdir(dataset_path), start=1):
    folder_path = os.path.join(dataset_path, folder)
    output_patient_folder = os.path.join(output_base_path, f"{folder}_patient_{i}")
    os.makedirs(output_patient_folder, exist_ok=True)
    
    # Find relevant files
    files = find_files_brats(folder_path)
    process_files_brats(folder_path, output_patient_folder, files)

In [1]:
def rename_segmentation_files(base_folder):
    """
    Traverse the folder structure starting at `base_folder`, locate each 'Segmentation' subfolder,
    and rename files from `tumor_segmentation_slice_1` to `tumor_slice_1`.
    
    :param base_folder: Path to the base directory containing the folders to traverse.
    """
    for root, dirs, files in os.walk(base_folder):
        # Check if the current folder is a 'Segmentation' folder
        if os.path.basename(root) == 'Segmentation':
            for file_name in files:
                if file_name.startswith("tumor_segmentation_slice_"):
                    # Construct full file paths
                    old_file_path = os.path.join(root, file_name)
                    # Rename to `tumor_slice_...`
                    new_file_name = file_name.replace("tumor_segmentation_", "tumor_")
                    new_file_path = os.path.join(root, new_file_name)
                    
                    # Rename the file
                    os.rename(old_file_path, new_file_path)
                    print(f"Renamed: {old_file_path} -> {new_file_path}")

In [None]:
rename_segmentation_files('UCSF')
rename_segmentation_files('BraTS')