# BraTS2020 Brain Tumor Segmentation

Takes data from https://www.kaggle.com/datasets/awsaf49/brats20-dataset-training-validation, which is from the BraTS2020 Competition. <br><br> There are 4 goals of the project:
1. Manual segmentation labels of tumor sub-regions,
2. Clinical data of overall survival,
3. Clinical evaluation of progression status,
4. Uncertainty estimation for the predicted tumor sub-regions.

## 1. Download Dataset

In [None]:
! pip install kaggle

In [None]:
# Set your working directory first so the data downloads where you want
! kaggle datasets download awsaf49/brats20-dataset-training-validation

In [None]:
! unzip brats20-dataset-training-validation.zip

## 2. Load/Explore Data

In [None]:
import monai
import os
import torch
import matplotlib.pyplot as plt
import shutil
import tempfile
import time
import onnxruntime
import random
import nibabel as nib
from sklearn.model_selection import train_test_split
import glob
from tqdm import tqdm

from monai.apps import DecathlonDataset
from monai.config import print_config
from monai.data import DataLoader, decollate_batch
from monai.handlers.utils import from_engine
from monai.losses import DiceLoss
from monai.inferers import sliding_window_inference
from monai.metrics import DiceMetric
from monai.networks.nets import SegResNet
from monai.transforms import (
    Activations,
    Activationsd,
    AsDiscrete,
    AsDiscreted,
    Compose,
    Invertd,
    LoadImaged,
    MapTransform,
    NormalizeIntensityd,
    Orientationd,
    RandFlipd,
    RandScaleIntensityd,
    RandShiftIntensityd,
    RandSpatialCropd,
    RandRotated,
    RandZoomd,
    Spacingd,
    EnsureTyped,
    EnsureChannelFirstd,
    ScaleIntensityd,
)
from monai.utils import set_determinism


print_config()

In [None]:
os.environ["MONAI_DATA_DIRECTORY"] = "Your path"
directory = os.environ.get("MONAI_DATA_DIRECTORY")
if directory is not None:
    os.makedirs(directory, exist_ok=True)
root_dir = tempfile.mkdtemp() if directory is None else directory
print(root_dir)

The segmentation file in "BraTS20_Training_355" folder has an incorrect name. Before moving forward, rename it to maintain similarity.

In [None]:
# Set path to 355 folder for renaming
rename_PATH = root_dir + "/BraTS2020_TrainingData/MICCAI_BraTS2020_TrainingData/BraTS20_Training_355/"
print(rename_PATH)

In [None]:
old_name = rename_PATH + "W39_1998.09.19_Segm.nii"
new_name = rename_PATH + "BraTS20_Training_355_seg.nii"

try:
    os.rename(old_name, new_name)
    print("File has been re-named successfully!")
except:
    print("File is already renamed!")

Now that the files are named correctly, we can explore the data.

In [None]:
# load .nii file as a numpy array
test_PATH = root_dir + "/BraTS2020_TrainingData/MICCAI_BraTS2020_TrainingData/"
test_image_flair = nib.load(test_PATH + "BraTS20_Training_355/BraTS20_Training_355_flair.nii").get_fdata()
print("Shape: ", test_image_flair.shape)
print("Dtype: ", test_image_flair.dtype)

In [None]:
print("Min: ", test_image_flair.min())
print("Max: ", test_image_flair.max())

## 3. Split the Data

In [None]:
# Define the path to your training data directory
train_data_dir = root_dir + "/BraTS2020_TrainingData/MICCAI_BraTS2020_TrainingData/"

# Get all patient directories in the training set
train_dirs = sorted(glob.glob(os.path.join(train_data_dir, "BraTS20_Training_*")))

# Create a list of dictionaries, where each dictionary contains paths to the modalities and segmentation
train_files = []
for train_dir in train_dirs:
    data_dict = {
        "t1": os.path.join(train_dir, f"{os.path.basename(train_dir)}_t1.nii.gz"),
        "t1ce": os.path.join(train_dir, f"{os.path.basename(train_dir)}_t1ce.nii.gz"),
        "t2": os.path.join(train_dir, f"{os.path.basename(train_dir)}_t2.nii.gz"),
        "flair": os.path.join(train_dir, f"{os.path.basename(train_dir)}_flair.nii.gz"),
        "seg": os.path.join(train_dir, f"{os.path.basename(train_dir)}_seg.nii.gz"),
    }
    train_files.append(data_dict)

# Print the number of training patients
print(f"Total number of training patients: {len(train_files)}")

Since we don't have a defined test set, we can split the validation set randomly to get a train/val/test split to about 75/12.5/12.5

In [None]:
# Define the path to your validation data directory
val_data_dir = root_dir + "/BraTS2020_ValidationData/MICCAI_BraTS2020_ValidationData/"

# Get all patient directories in the validation set
val_patient_dirs = sorted(glob.glob(os.path.join(val_data_dir, "BraTS20_Validation_*")))

# Print the number of validation patients
print(f"Total number of validation and test patients: {len(val_patient_dirs)}")


In [None]:
# Split validation into new validation and test sets (50/50 split)
val_dirs, test_dirs = train_test_split(val_patient_dirs, test_size=0.5, random_state=42)

# Create a list of dictionaries, where each dictionary contains paths to the modalities and segmentation
val_files = []
for val_dir in val_dirs:
    data_dict = {
        "t1": os.path.join(val_dir, f"{os.path.basename(val_dir)}_t1.nii.gz"),
        "t1ce": os.path.join(val_dir, f"{os.path.basename(val_dir)}_t1ce.nii.gz"),
        "t2": os.path.join(val_dir, f"{os.path.basename(val_dir)}_t2.nii.gz"),
        "flair": os.path.join(val_dir, f"{os.path.basename(val_dir)}_flair.nii.gz"),
    }
    val_files.append(data_dict)

    # Create a list of dictionaries, where each dictionary contains paths to the modalities and segmentation
test_files = []
for test_dir in test_dirs:
    data_dict = {
        "t1": os.path.join(test_dir, f"{os.path.basename(test_dir)}_t1.nii.gz"),
        "t1ce": os.path.join(test_dir, f"{os.path.basename(test_dir)}_t1ce.nii.gz"),
        "t2": os.path.join(test_dir, f"{os.path.basename(test_dir)}_t2.nii.gz"),
        "flair": os.path.join(test_dir, f"{os.path.basename(test_dir)}_flair.nii.gz"),
    }
    test_files.append(data_dict)


# Print the sizes of each split
print(f"New Validation set size: {len(val_files)}")
print(f"Test set size: {len(test_files)}")

### Check the data before transforming

In [None]:
print(f"Original train size: {len(train_files)}")
print(f"Original val size: {len(val_files)}")
print(f"Original test size: {len(test_files)}")

## 5. Transform the data

In [None]:
from monai.transforms import (
    LoadImage, EnsureChannelFirst, Compose, ScaleIntensity, RandFlip, RandRotate, RandZoom
)
from monai.data import Dataset, DataLoader

# Define transformations for training, validation, and test datasets
train_transforms = Compose(
    [
        LoadImaged(keys=["t1", "t1ce", "t2", "flair", "seg"]),  # Load images from file paths
        EnsureChannelFirstd(keys=["t1", "t1ce", "t2", "flair", "seg"]),  # Ensure channels are first
        ScaleIntensityd(keys=["t1", "t1ce", "t2", "flair"]),  # Normalize intensity values
        RandFlipd(keys=["t1", "t1ce", "t2", "flair", "seg"], spatial_axis=0, prob=0.5),  # Random flip
        RandRotated(keys=["t1", "t1ce", "t2", "flair", "seg"], range_x=0.4, prob=0.5, keep_size=True),  # Random rotation
        RandZoomd(keys=["t1", "t1ce", "t2", "flair", "seg"], min_zoom=0.9, max_zoom=1.1, prob=0.5),  # Random zoom
    ]
)

val_transforms = Compose(
    [
        LoadImage(keys=["t1", "t1ce", "t2", "flair"]),
        EnsureChannelFirstd(keys=["t1", "t1ce", "t2", "flair"]),
        ScaleIntensityd(keys=["t1", "t1ce", "t2", "flair"]),
    ]
)

In [None]:
# Create MONAI datasets for training, new validation, and test sets
train_ds = Dataset(data=train_files, transform=train_transforms)
val_ds = Dataset(data=val_files, transform=val_transforms)
test_ds = Dataset(data=test_files, transform=val_transforms)  # Test set with no augmentation

# Create DataLoaders
train_loader = DataLoader(train_ds, batch_size=2, shuffle=True, num_workers=8)
val_loader = DataLoader(val_ds, batch_size=2, num_workers=8)
test_loader = DataLoader(test_ds, batch_size=2, num_workers=8)

# Print data loader sizes
print(f"Training DataLoader size: {len(train_loader)}")
print(f"Validation DataLoader size: {len(val_loader)}")
print(f"Test DataLoader size: {len(test_loader)}")