# Slider-Mania comparing UPenn and BraTS

Name structure of files in BraTS:
"BraTS-MEN-00004-000-seg.nii.gz" (MEN/MET/GLI)
There are for files in each patient folder:
- Segmentation Mask (ending: "seg.nii.gz")
- T1n (ending: "t1n.nii.gz")
- T1c (ending: "t1c.nii.gz")
- T2w (ending: "t2w.nii.gz")
- T2f (ending: "t2f.nii.gz")

Name structure of files in UPenn:
- UPENN-GBM-00007_11_segm.nii.gz
- UPENN-GBM-00007_11_T1.nii.gz
- UPENN-GBM-00007_11_T1GD.nii.gz
- UPENN-GBM-00007_11_T2.nii.gz
- UPENN-GBM-00007_11_FLAIR.nii.gz

I suppose t1n = t1, t1c=t1gd, t2w=t2, and t2f=flair




In [1]:
# Install Dependencies.
!echo "deb https://packages.cloud.google.com/apt gcsfuse-`lsb_release -c -s` main" | sudo tee /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
!apt -qq update && apt -qq install gcsfuse

!pip install pydicom matplotlib transformers nibabel ipywidgets

# Authenticate.
from google.colab import auth
auth.authenticate_user()

deb https://packages.cloud.google.com/apt gcsfuse-jammy main
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  2659  100  2659    0     0  22677      0 --:--:-- --:--:-- --:--:-- 22726
OK
32 packages can be upgraded. Run 'apt list --upgradable' to see them.
[1;33mW: [0mhttps://packages.cloud.google.com/apt/dists/gcsfuse-jammy/InRelease: Key is stored in legacy trusted.gpg keyring (/etc/apt/trusted.gpg), see the DEPRECATION section in apt-key(8) for details.[0m
The following NEW packages will be installed:
  gcsfuse
0 upgraded, 1 newly installed, 0 to remove and 32 not upgraded.
Need to get 5,561 kB of archives.
After this operation, 0 B of additional disk space will be used.
Selecting previously unselected package gcsfuse.
(Reading database ... 120880 files and directories currently installed.)
Preparing to unpack .../gcsfuse_1.2.1_amd64.deb ...
Unpacking gcsfuse (1.2.1) 

In [2]:
import os
import shutil

folder_path = "/content"

# Iterate over all the items in the folder
for item_name in os.listdir(folder_path):
    item_path = os.path.join(folder_path, item_name)

    # Remove the item, whether it's a file or a folder
    if os.path.isfile(item_path):
        os.remove(item_path)
    elif os.path.isdir(item_path):
        shutil.rmtree(item_path)


# Search Folders to find URLs needed for Nifti Files

In [3]:
import subprocess
import re

def find_all_scans_and_segm_files_gcs(patient_folder, dataset):
    '''
    Finds all the scan types and segmentation files in a GCS bucket, handling both 'brats' and 'upenn' datasets.

    Args:
        patient_folder (string): Path to the GCS bucket
        dataset (string): Type of dataset ('brats' or 'upenn')
    Returns:
        scan_paths (dict): Dictionary containing paths to all scan types and the segmentation file in GCS
        patient_id (string): Extracted patient ID
    '''

    # Regex pattern for extracting patient ID
    patient_id_regex = r'-(\d{5})-' if dataset == 'brats' else r'UPENN-GBM-(\d{5})'
    match = re.search(patient_id_regex, patient_folder)
    if not match:
        print(f"Invalid bucket path format, unable to extract patient ID for {dataset}.")
        return None

    patient_id = match.group(1)  # Extract the patient ID

    # Define scan path keys
    scan_paths = {
        f'{patient_id}_t1': None,   # Corresponds to 't1' in UPenn and 't1n' in Brats
        f'{patient_id}_t1c': None,  # Corresponds to 't1gd' in UPenn and 't1c' in Brats
        f'{patient_id}_t2': None,   # Corresponds to 't2' in UPenn and 't2w' in Brats
        f'{patient_id}_flair': None,  # Corresponds to 'flair' in UPenn and 't2f' in Brats
        f'{patient_id}_seg': None   # Segmentation file
    }

    # Use gsutil to list files in the bucket
    cmd = f'gsutil ls -r "{patient_folder}**"'
    process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = process.communicate()

    if process.returncode != 0:
        print("Error:", err.decode('utf-8'))
        return None

    # Decode output and split into lines
    files = out.decode('utf-8').splitlines()

    # Search for the files
    for file in files:
        if dataset == 'brats':
            # File matching for 'brats'
            if 't1n.nii.gz' in file:
                scan_paths[f'{patient_id}_t1'] = file
            elif 't1c.nii.gz' in file:
                scan_paths[f'{patient_id}_t1c'] = file
            elif 't2w.nii.gz' in file:
                scan_paths[f'{patient_id}_t2'] = file
            elif 't2f.nii.gz' in file:
                scan_paths[f'{patient_id}_flair'] = file
            elif 'seg.nii.gz' in file:
                scan_paths[f'{patient_id}_seg'] = file
        elif dataset == 'upenn':
            # File matching for 'upenn'
            if '_11_segm.nii' in file:
                scan_paths[f'{patient_id}_seg'] = file
            else:
                for upenn_type, brats_type in [('flair', 'flair'), ('t1', 't1'), ('t1gd', 't1c'), ('t2', 't2')]:
                    scan_file_pattern = f'_11_{upenn_type.upper()}.nii'
                    if file.endswith(scan_file_pattern):
                        scan_paths[f'{patient_id}_{brats_type}'] = file

    return scan_paths, patient_id




# Save to Colab

In [4]:
import os
import subprocess
import re

def save_to_colab(patient_id, scan_paths):
    """
    Downloads MRI scan files from Google Cloud Storage (GCS) paths to a Colab environment,
    organized in a directory named after the patient ID.

    :param patient_id: Unique identifier for the patient.
    :param scan_paths: Dictionary mapping scan types to their GCS paths.
    """

    # Create a directory named after the patient ID
    patient_dir = f'/content/{patient_id}'
    os.makedirs(patient_dir, exist_ok=True)

    # Download each file
    for scan_type, gcs_path in scan_paths.items():
        # Determine the correct file extension (.nii or .nii.gz)
        file_extension = '.nii.gz' if gcs_path.endswith('.nii.gz') else '.nii'

        local_path = f'{patient_dir}/{scan_type}{file_extension}'
        cmd = f'gsutil cp {gcs_path} {local_path}'
        process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        out, err = process.communicate()

        if process.returncode != 0:
            print(f"Error downloading {gcs_path}: {err.decode('utf-8')}")





# Read Nifti Files

In [5]:
import nibabel as nib
import matplotlib.pyplot as plt
import numpy as np

def read_nii(file_name):
    '''
    Reads a NIfTI file and returns the data as a numpy array
    '''

    # reads the NIfTI file
    nii_file = nib.load(file_name)

    # Access the data
    data = nii_file.get_fdata()
    return data

# Get segmentations and slices for every channel

In [6]:
import os

def load_patient_data(patient_id):
    """
    Loads MRI scan data from NIfTI files for a given patient ID into NumPy arrays.
    Handles both .nii.gz and .nii file extensions.
    """

    base_path = f'/content/{patient_id}/{patient_id}'
    scan_types = ['t1', 't1c', 't2', 'flair', 'seg']
    data = []

    for scan in scan_types:
        file_path_gz = f'{base_path}_{scan}.nii.gz'
        file_path_nii = f'{base_path}_{scan}.nii'

        if os.path.exists(file_path_gz):
            data.append(read_nii(file_path_gz))
        elif os.path.exists(file_path_nii):
            data.append(read_nii(file_path_nii))
        else:
            print(f"File for {scan} not found.")
            data.append(None)

    return tuple(data)


In [7]:
import numpy as np
import matplotlib.pyplot as plt

def mask_slices(original_data_t1, original_data_t1c, original_data_t2, original_data_flair, mask_data):
    '''
    Function to display and return slices from four different MRI scan types and
    their segmented versions with the largest tumor area.

    Args:
        original_data_t1 (numpy array): T1 MRI scan
        original_data_t1c (numpy array): T1 contrast MRI scan
        original_data_t2 (numpy array): T2 MRI scan
        original_data_flair (numpy array): FLAIR MRI scan
        mask_data (numpy array): segmentation mask
    Returns:
        dict: dictionary containing slices and segmented slices for each scan type
    '''

    # Initialize variables to track the largest slice
    max_non_black_count = 0
    max_slice_index = 0

    # Iterate through each slice in the mask
    for i in range(mask_data.shape[2]):
        non_black_count = np.count_nonzero(mask_data[:, :, i])
        if non_black_count > max_non_black_count:
            max_non_black_count = non_black_count
            max_slice_index = i

    print("Slice with the largest affected area:", max_slice_index)

    # Function to create masked image
    def create_masked_image(original_data, mask_slice):
        return np.where(mask_slice > 0, original_data, 0)

    # Gather the slices and segmented slices
    slices = {
        'mask_slice': mask_data[:, :, max_slice_index],
        't1_slice': original_data_t1[:, :, max_slice_index],
        't1c_slice': original_data_t1c[:, :, max_slice_index],
        't2_slice': original_data_t2[:, :, max_slice_index],
        'flair_slice': original_data_flair[:, :, max_slice_index],
        't1_segmented': create_masked_image(original_data_t1[:, :, max_slice_index], mask_data[:, :, max_slice_index]),
        't1c_segmented': create_masked_image(original_data_t1c[:, :, max_slice_index], mask_data[:, :, max_slice_index]),
        't2_segmented': create_masked_image(original_data_t2[:, :, max_slice_index], mask_data[:, :, max_slice_index]),
        'flair_segmented': create_masked_image(original_data_flair[:, :, max_slice_index], mask_data[:, :, max_slice_index])
    }

    return slices, max_slice_index


In [8]:
import matplotlib.pyplot as plt
from ipywidgets import interactive, IntSlider
from IPython.display import display
import numpy as np

def nifti_slider(mask_data, original_data_t1n, original_data_t1c, original_data_t2w, original_data_t2f):
    '''
    Creates a slider to scroll through the slices of multiple 3D volumes including segmentation masks.
    '''

    # Determine the slice with the largest tumor area
    tumor_areas = np.sum(mask_data, axis=(0, 1))
    max_slice_index = np.argmax(tumor_areas)

    # Function to apply mask and isolate tumor
    def apply_mask(data, mask, slice_number):
        # Apply mask using np.where
        return np.where(mask[:, :, slice_number] > 0, data[:, :, slice_number], 0)

    # Function to update plots
    def plot_slice(slice_number):
        fig, axs = plt.subplots(3, 4, figsize=(8, 6))  # 3 rows and 4 columns
        axs = axs.flatten()

        # Data sets and titles for the first two rows
        data_sets = [
            original_data_t1n[:, :, slice_number],
            apply_mask(original_data_t1n, mask_data, slice_number),
            original_data_t1c[:, :, slice_number],
            apply_mask(original_data_t1c, mask_data, slice_number),
            original_data_t2w[:, :, slice_number],
            apply_mask(original_data_t2w, mask_data, slice_number),
            original_data_t2f[:, :, slice_number],
            apply_mask(original_data_t2f, mask_data, slice_number)
        ]

        titles = ['T1', 'T1 Tumor', 'T1c', 'T1c Tumor', 'T2', 'T2 Tumor', 'Flair', 'Flair Tumor']

        # Plot the first two rows
        for ax, data, title in zip(axs[:8], data_sets, titles):
            ax.imshow(data, cmap='gray')
            ax.set_title(title)
            ax.axis('off')

        # Plot the segmentation mask in the third row
        axs[8].imshow(mask_data[:, :, slice_number], cmap='gray')
        axs[8].set_title('Mask')
        axs[8].axis('off')

        # Hide any remaining empty subplots
        for ax in axs[9:]:
            ax.axis('off')

        plt.tight_layout()
        plt.show()

    # Set up slider
    max_slice = original_data_t1n.shape[2] - 1
    slider = IntSlider(min=0, max=max_slice, step=1, value=max_slice_index)

    # Display the widget and the initial plot
    interactive_plot = interactive(plot_slice, slice_number=slider)
    display(interactive_plot)

    return max_slice_index


#Putting this together

In [9]:
def slider_mania(bucket_path, patient_folder, dataset):
    scan_paths, patient_id = find_all_scans_and_segm_files_gcs(bucket_path+patient_folder, dataset)
    save_to_colab(patient_id, scan_paths)
    original_data_t1n, original_data_t1c, original_data_t2w, original_data_t2f, mask_data = load_patient_data(patient_id)
    max_slice_index = nifti_slider(mask_data, original_data_t1n, original_data_t1c, original_data_t2w, original_data_t2f)
    print(f'Patient ID: {patient_id}, Slice with maximal Tumor: {max_slice_index}')

# Compare different patients with sliders

## Patient 00002, Diagnose: Glioblastoma?, Dataset:UPenn

In [10]:
bucket_path = 'gs://test_nifti/' # Replace with actual bucket path
patient_folder = 'UPENN-GBM-00002/' # Replace with actual folder name
dataset="upenn"
slider_mania(bucket_path, patient_folder, dataset)

interactive(children=(IntSlider(value=67, description='slice_number', max=154), Output()), _dom_classes=('widg…

Patient ID: 00002, Slice with maximal Tumor: 67


###Patient 00021, Diagnosis: MEN, Dataset: BraTS

In [11]:
bucket_path = 'gs://test_brats/brats/' # Replace with actual bucket path
patient_folder = 'BraTS-MEN-00021-000/' # Replace with actual folder name
dataset="brats"
slider_mania(bucket_path, patient_folder, dataset)

interactive(children=(IntSlider(value=95, description='slice_number', max=154), Output()), _dom_classes=('widg…

Patient ID: 00021, Slice with maximal Tumor: 95


###Patient 00163, Diagnosis: MET, Dataset: BraTS

In [12]:
bucket_path = 'gs://test_brats/brats/'
patient_folder = 'BraTS-MET-00163-000/'
dataset="brats"
slider_mania(bucket_path, patient_folder, dataset)

interactive(children=(IntSlider(value=37, description='slice_number', max=154), Output()), _dom_classes=('widg…

Patient ID: 00163, Slice with maximal Tumor: 37


###Patient 00006, Diagnosis: GLI, Dataset: BraTS

In [13]:
bucket_path = 'gs://test_brats/brats/'
patient_folder = 'BraTS-GLI-00006-000/'
dataset="brats"
slider_mania(bucket_path, patient_folder, dataset)


interactive(children=(IntSlider(value=69, description='slice_number', max=154), Output()), _dom_classes=('widg…

Patient ID: 00006, Slice with maximal Tumor: 69
