What follows is **a** solution for the 2021 [RSNA-MICCAI Brain Tumor Radiogenomic Classification](https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/overview) Kaggle competition. This is definitely **not** a winning solution, but is offered as an example of an end-to-end solution. It uses TorchIO for data manipulation, NiBabel for reading NifiTi images, and Tensorflow to train a 3d Convolutional Neural Network.

The only **prerequisite** to running *this* kernel, is to "Add data" from the "Notebook Output File" of "pip-download-torchio" (https://www.kaggle.com/ohbewise/pip-download-torchio) to this kernel.  This is needed to enable pip to install TorchIO offline.

Note with `demo = True` only 10 patients are processed, and when `demo = False` all patients are processed but the notebook runs out of ram.  For this purpose I have provided link to each step of the solution that can process all the patients and not run out of ram!

## Install and import libraries

In [None]:
# If this line fails please see the prerequisite above
!pip install --quiet --no-index --find-links ../input/pip-download-torchio/ --requirement ../input/pip-download-torchio/requirements.txt

In [None]:
# import libraries
import os
import csv
import pickle
import numpy as np
import pandas as pd
import nibabel as nib
import torchio as tio
import tensorflow as tf
from pathlib import Path
import matplotlib.pyplot as plt

# Parameters to limit the processing power needed.
demo  = True # if True limits to 10 patients
scan_types    = ['FLAIR','T1w','T1wCE','T2w'] # uses all scan types

## Preprocess data: DICOM to normalized NIfTI with TorchIO
 Uses TorchIO to convert folders of DICOM images into a NIfTI file. More importantly it normalizes, resizes and rotates the MRI scans. For a stand alone kernel of this section see https://www.kaggle.com/ohbewise/dicom-to-normalized-nifti-with-torchio

In [None]:
# Preprocess data 
data_dir   = '/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/'
out_dir    = '/kaggle/working/processed'

for dataset in ['train']:
    dataset_dir = f'{data_dir}{dataset}'
    patients = os.listdir(dataset_dir)
    if demo:
        patients = patients[:10]
    
    # Remove cases the competion host said to exclude 
    # https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/discussion/262046
    if '00109' in patients: patients.remove('00109')
    if '00123' in patients: patients.remove('00123')
    if '00709' in patients: patients.remove('00709')
    
    print(f'Total patients in {dataset} dataset: {len(patients)}')

    count = 0
    for patient in patients:
        count = count + 1
        print(f'{dataset}: {count}/{len(patients)}')

        for scan_type in scan_types:
            scan_src  = f'{dataset_dir}/{patient}/{scan_type}/'
            scan_dest = f'{out_dir}/{dataset}/{patient}/{scan_type}/'
            Path(scan_dest).mkdir(parents=True, exist_ok=True)
            image = tio.ScalarImage(scan_src)
            transforms = [
                tio.ToCanonical(),
                tio.Resample(1),
                tio.ZNormalization(masking_method=tio.ZNormalization.mean),
                tio.RescaleIntensity((-1, 1)),
                tio.CropOrPad((128,128,64)),
            ]
            transform = tio.Compose(transforms)
            preprocessed = transform(image)
            preprocessed.save(f'{scan_dest}/{scan_type}.nii.gz')

## Build datasets: NIfTI to Split Dataset with NiBabel
Uses NiBabel to read the NIfTI files in the "processed/train" folder and split them into a Training and Validation dataset. For a stand alone kernel of this section see https://www.kaggle.com/ohbewise/nifti-to-split-dataset-with-nibabel

In [None]:
# build datasets

# dataset processing functions
def read_nifti_file(filepath):
    """Read and load volume"""
    # Read file
    scan = nib.load(filepath)
    # Get raw data
    scan = scan.get_fdata()
    return scan

def add_batch_channel(volume):
    """Process validation data by adding a channel."""
    volume = tf.expand_dims(volume, axis=-1)
    volume = tf.expand_dims(volume, axis=0)
    return volume

def process_scan(filepath):
    scan = read_nifti_file(filepath)
    volume = add_batch_channel(scan)
    return volume

# get labels
labels_df = pd.read_csv(data_dir+'train_labels.csv', index_col=0)

# split patients
patients = os.listdir(f'{out_dir}/train')
from sklearn.model_selection import train_test_split
train, validation = train_test_split(patients, test_size=0.3, random_state=42)
print(f'{len(patients)} total patients.\n   {len(train)} in the train split.\n   {len(validation)} in the validation split')

splits_dict = {'train':train, 'validation':validation}

for scan_type in scan_types:
    print(f'{scan_type} start')
    for split_name, split_list in splits_dict.items():
        print(f'   {split_name} start')
        label_list = []
        filepaths = []
        for patient in split_list:
            label = labels_df._get_value(int(patient), 'MGMT_value')
            label = add_batch_channel(label)
            label_list.append(label)
            filepath  = f'{out_dir}/train/{patient}/{scan_type}/{scan_type}.nii.gz'
            filepaths.append(filepath)

        features = np.array([process_scan(filepath) for filepath in filepaths if filepath])
        labels = np.array(label_list, dtype=np.uint8)
        dataset = tf.data.Dataset.from_tensor_slices((features, labels))
        
        # save dataset   
        tf_data_path = f'./datasets/{scan_type}_{split_name}_dataset'
        tf.data.experimental.save(dataset, tf_data_path, compression='GZIP')
        with open(tf_data_path + '/element_spec', 'wb') as out_:  # also save the element_spec to disk for future loading
            pickle.dump(dataset.element_spec, out_)
        print(f'   {split_name} done')
    print(f'{scan_type} done')

## Define, train, and evaluate model:  Dataset to Model with Tensorflow

Kernel uses Tensorflow to define, train, and evaluate a model.  For a stand alone kernel of this section see https://www.kaggle.com/ohbewise/dataset-to-model-with-tensorflow

In [None]:
# Define, train, and evaluate model
# source: https://keras.io/examples/vision/3D_image_classification/
def get_model(width=128, height=128, depth=64, name='3dcnn'):
    """Build a 3D convolutional neural network model."""

    inputs = tf.keras.Input((width, height, depth, 1))

    x = tf.keras.layers.Conv3D(filters=64, kernel_size=3, activation="relu")(inputs)
    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
    x = tf.keras.layers.BatchNormalization()(x)

    x = tf.keras.layers.Conv3D(filters=64, kernel_size=3, activation="relu")(x)
    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
    x = tf.keras.layers.BatchNormalization()(x)

    x = tf.keras.layers.Conv3D(filters=128, kernel_size=3, activation="relu")(x)
    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
    x = tf.keras.layers.BatchNormalization()(x)

    x = tf.keras.layers.Conv3D(filters=256, kernel_size=3, activation="relu")(x)
    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
    x = tf.keras.layers.BatchNormalization()(x)

    x = tf.keras.layers.GlobalAveragePooling3D()(x)
    x = tf.keras.layers.Dense(units=512, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.3)(x)

    outputs = tf.keras.layers.Dense(units=1, activation="sigmoid")(x)

    # Define the model.
    model = tf.keras.Model(inputs, outputs, name=name)
    
    # Compile model.
    initial_learning_rate = 0.0001
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
    )
    model.compile(
        loss="binary_crossentropy",
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
        metrics=["acc"],
    )
    
    return model

In [None]:
for scan_type in scan_types:
    # load train_dataset dataset
    tf_data_path = f'./datasets/{scan_type}_train_dataset'
    with open(tf_data_path + '/element_spec', 'rb') as in_:
        es = pickle.load(in_)
    train_dataset = tf.data.experimental.load(tf_data_path, es, compression='GZIP')
    
    # load validation_dataset
    tf_data_path = f'./datasets/{scan_type}_validation_dataset'
    with open(tf_data_path + '/element_spec', 'rb') as in_:
        es = pickle.load(in_)
    validation_dataset = tf.data.experimental.load(tf_data_path, es, compression='GZIP')

    # Get Model
    model = get_model(width=128, height=128, depth=64,name=scan_type)
    
    # Define callbacks.
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
        f'{scan_type}_3d_image_classification.h5', save_best_only=True
    )
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor="val_acc", patience=15)

    epochs = 100
    model.fit(
        train_dataset,
        validation_data=validation_dataset,
        epochs=epochs,
        shuffle=True,
        verbose=2,
        callbacks=[checkpoint_cb, early_stopping_cb],
    )
    
    #save model
    model.save(f'./models/{scan_type}')
    
    # show metrics
    fig, ax = plt.subplots(1, 2, figsize=(20, 3))
    ax = ax.ravel()

    for i, metric in enumerate(["acc", "loss"]):
        ax[i].plot(model.history.history[metric])
        ax[i].plot(model.history.history["val_" + metric])
        ax[i].set_title("{} Model {}".format(scan_type, metric))
        ax[i].set_xlabel("epochs")
        ax[i].set_ylabel(metric)
        ax[i].legend(["train", "val"])

## Write predictions to submission.csv: Model Prediction to Submission
Kernel uses the model to predict the test set and write results to submission.csv.  For a stand alone kernel of this section see https://www.kaggle.com/ohbewise/model-prediction-to-submission

In [None]:
# write predictions to submission.csv

# Set up directories
data_dir   = '/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/'
test_dir   = f'{data_dir}test'
patients = os.listdir(test_dir)
if demo:
    patients = patients[:10]
print(f'Total patients: {len(patients)}\n\n')

out_dir    = '/kaggle/working/processed'

scan_types = ['FLAIR', 'T1w', 'T1wCE', 'T2w']
scan_types = ['T1wCE']

for scan_type in scan_types:
    f = open(f'/kaggle/working/submission.csv', 'w')
    writer = csv.writer(f)
    writer.writerow(['BraTS21ID','MGMT_value'])
    for patient in patients:
        # dicom to nifiti
        scan_src  = f'{test_dir}/{patient}/{scan_type}/'
        scan_dest = f'{out_dir}/test/{patient}/{scan_type}/'
        Path(scan_dest).mkdir(parents=True, exist_ok=True)
        image = tio.ScalarImage(scan_src)  # subclass of Image
        transforms = [
            tio.ToCanonical(),
            tio.Resample(1),
            tio.ZNormalization(masking_method=tio.ZNormalization.mean),
            tio.RescaleIntensity((-1, 1)),
            tio.CropOrPad((128,128,64)),
        ]
        transform = tio.Compose(transforms)
        preprocessed = transform(image)
        filepath = f'{scan_dest}/{scan_type}.nii.gz'
        preprocessed.save(filepath)
        
        # process_scan
        case = process_scan(filepath)

        # tf model
        model = tf.keras.models.load_model(f'./models/{scan_type}')

        # get prediction
        prediction = model.predict(case)
        
        # write prediction
        print(f'{patient},{prediction[0][0]}')
        writer.writerow([patient, prediction[0][0]])

    f.close()