In [None]:
!pip install ../input/rsnamiccai-btrc-dataset/packages/monai-0.7.0-202109240007-py3-none-any.whl

In [None]:
import os
from glob import glob
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import cv2
import scipy.ndimage
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import roc_auc_score, roc_curve, auc
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from monai.networks.nets import SegResNet, DenseNet

In [None]:
RAW_DATA_PATH = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'
PROCESSED_DATA_PATH = '../input/rsnamiccai-btrc-dataset'
MODELS_PATH = '../input/rsnamiccai-btrc-dataset'
PREDICTIONS_PATH = '../input/rsnamiccai-btrc-dataset/predictions'

In [None]:
train_test_dtypes = {
    'BraTS21ID': 'object',
    'MGMT_value': np.uint8,
    'fold': np.uint8
}

df_train = pd.read_csv(f'{PROCESSED_DATA_PATH}/train_task2.csv', dtype=train_test_dtypes)
df_test = pd.read_csv(f'{RAW_DATA_PATH}/sample_submission.csv', usecols=['BraTS21ID'], dtype=train_test_dtypes)

print(f'Training Set Shape: {df_train.shape} - Memory Usage: {df_train.memory_usage().sum() / 1024 ** 2:.2f} MB')
print(f'Test Set Shape: {df_test.shape} - Memory Usage: {df_test.memory_usage().sum() / 1024 ** 2:.2f} MB')

## DICOM Preprocessing

In [None]:
def apply_manual_voi_lut(dicom_file, window_width, window_center):

    """
    Create a VOI LUT with given window width and window center and apply it to dicom file's pixel array

    Parameters
    ----------
    dicom_file (pydicom.dataset.FileDataset): Dicom file read into memory
    window_width (int): Width of the modality pixel values
    window_center (int): Center of the modality pixel values

    Returns
    -------
    image [array-like of shape (width, height)]: Array of 2D image after manual VOI LUT applied to pixel array
    """

    min_pixel_value = int(np.amin(dicom_file.pixel_array))
    max_pixel_value = int(np.amax(dicom_file.pixel_array))

    # Make an empty array for the LUT the size of the pixel 'width' in the raw pixel data
    voi_lut = [0] * (max_pixel_value + 1)

    # Invert pixel values and window level for MONOCHROME1 photometric interpretation
    invert = False
    if dicom_file.PhotometricInterpretation == 'MONOCHROME1':
        invert = True
    else:
        window_center = (max_pixel_value - min_pixel_value) - window_center

    # Loop through the pixels and calculate each LUT value
    for pixel_value in range(min_pixel_value, max_pixel_value):
        modality_lut_value = pixel_value * float(dicom_file.RescaleSlope) + float(dicom_file.RescaleIntercept)
        voi_lut_value = (((modality_lut_value - window_center) / window_width + 0.5) * 255.0)
        clamped_value = min(max(voi_lut_value, 0), 255)

        if invert:
            voi_lut[pixel_value] = round(255 - clamped_value)
        else:
            voi_lut[pixel_value] = round(clamped_value)

    voi_lut = np.array(voi_lut)
    return np.uint8(voi_lut[dicom_file.pixel_array])


def apply_auto_voi_lut(dicom_file):

    """
    Apply VOI LUT if it exists in the dicom file, otherwise use window width and window center given in the dicom file

    Parameters
    ----------
    dicom_file (pydicom.dataset.FileDataset): Dicom file read into memory

    Returns
    -------
    image [array-like of shape (width, height)]: Array of 2D image after automatic VOI LUT applied to pixel array
    """

    image = apply_voi_lut(dicom_file.pixel_array, dicom_file)

    if dicom_file.PhotometricInterpretation == 'MONOCHROME1':
        image = np.amax(image) - image

    image = image - np.min(image)
    image = image / np.max(image)
    image = (image * 255).astype(np.uint8)

    return image


def get_plane(dicom_file):

    """
    Extract image plane from ImageOrientationPatient field of the dicom file

    Parameters
    ----------
    dicom_file (pydicom.dataset.FileDataset): Dicom file read into memory

    Returns
    -------
    plane (str): Image plane (Coronal, Sagittal or Axial)
    """

    image_orientation_patient = dicom_file[0x0020, 0x0037]

    row_x = round(image_orientation_patient[0])
    row_y = round(image_orientation_patient[1])
    col_x = round(image_orientation_patient[3])
    col_y = round(image_orientation_patient[4])

    plane = None

    if row_x == 1 and row_y == 0 and col_x == 0 and col_y == 0:
        plane = 'Coronal'

    if row_x == 0 and row_y == 1 and col_x == 0 and col_y == 0:
        plane = 'Sagittal'

    if row_x == 1 and row_y == 0 and col_x == 0 and col_y == 1:
        plane = 'Axial'

    return plane


def change_spacing(mri, current_spacing, new_spacing):

    """
    Change spacing of height, width and depth

    Parameters
    ----------
    mri [array-like of shape (depth, width, height)]: Array of 3D mpMRI
    current_spacing [array-like of shape (3)]: Array of current spacings in Z, X, Y direction (millimeters)
    new_spacing [array-like of shape (3)]: Array of new spacings in Z, X, Y direction (millimeters)

    Returns
    -------
    mri [array-like of shape (depth, width, height)]: Array of 3D mpMRI after change spacings
    """

    resize_factor = current_spacing / new_spacing
    normalized_shape = np.round(mri.shape * resize_factor)
    resize_factor = normalized_shape / mri.shape
    mri = scipy.ndimage.interpolation.zoom(mri, resize_factor, mode='nearest')

    return mri


def load_mri(mri_path, window_width=None, window_center=None, voi_lut=None, new_spacing=None, reorder_plane=False, resize_shape=None, verbose=False):

    """
    Read slices of mpMRI into memory and apply preprocessing steps

    Parameters
    ----------
    mri_path (str): Directory of the mpMRI
    window_width (int or None): Width of the modality pixel values
    window_center (int or None): Center of the modality pixel values
    voi_lut (str or None): Whether to use manual or auto VOI LUT ("manual" or "auto")
    new_spacing [array-like of shape (3)] or None: Array of new spacings in Z, X, Y direction (millimeters)
    reorder_plane (bool): Whether reorder planes or not
    resize_shape (int or None): Resize shape of the planes
    verbose (bool): Verbosity flag

    Returns
    -------
    mri [np.ndarray of shape (depth, width, height)]: Array of 3D mpMRI
    """

    slice_paths = sorted(glob(f'{mri_path}/*.dcm'), key=lambda x: int(str(x).split('-')[-1].split('.')[0]))
    dicom_files = [pydicom.dcmread(slice_path) for slice_path in slice_paths]
    slices = []

    for i, dicom_file in enumerate(dicom_files):

        if voi_lut == 'manual':
            # Applying manually created voi lut
            image = apply_manual_voi_lut(dicom_file=dicom_file, window_width=window_width, window_center=window_center)
        elif voi_lut == 'auto':
            # Applying voi lut of the dicom file
            image = apply_auto_voi_lut(dicom_file=dicom_file)
        else:
            # Not applying voi lut
            image = dicom_file.pixel_array

            if dicom_file.PhotometricInterpretation == 'MONOCHROME1':
                image = np.amax(image) - image

            # Exclude empty slices
            if np.all(image == np.min(image)):
                continue

        slices.append(image)

    # Not processing all zero mpMRIs
    if len(slices) == 0:
        return None

    mri = np.stack(slices)

    # Change spacing if new spacing is given
    current_spacing = np.array([float(dicom_files[0].SliceThickness)] + list(dicom_files[0].PixelSpacing), dtype=np.float32)
    if new_spacing is not None:
        if np.any(np.array(new_spacing) != current_spacing):
            mri = change_spacing(mri=mri, current_spacing=current_spacing, new_spacing=new_spacing)

    positions = [dicom_file.ImagePositionPatient for dicom_file in dicom_files]
    plane = get_plane(dicom_file=dicom_files[0])

    # Reorder plane if it is set to True
    if reorder_plane:
        if plane == 'Coronal':
            if positions[0][1] < positions[-1][1]:
                mri = mri[::-1]
            mri = mri.transpose((1, 0, 2))
        elif plane == 'Sagittal':
            if positions[0][0] < positions[-1][0]:
                mri = mri[::-1]
            mri = mri.transpose((1, 2, 0))
            mri = np.rot90(mri, 2, axes=(1, 2))
        elif plane == 'Axial':
            if positions[0][2] > positions[-1][2]:
                mri = mri[::-1]
            mri = np.rot90(mri, 2)

    # Crop non-zero slices along Z-X, Z-Y and X-Y axes
    mmin = np.array((mri > 0).nonzero()).min(axis=1)
    mmax = np.array((mri > 0).nonzero()).max(axis=1)
    mri = mri[
        mmin[0]:mmax[0] + 1,
        mmin[1]:mmax[1] + 1,
        mmin[2]:mmax[2] + 1,
    ]

    # Resize sampled planes from longest axis if resize shape is given
    if resize_shape is not None:

        resized_mri = np.zeros((resize_shape, resize_shape, resize_shape), dtype=np.int16)

        if np.argmax(mri.shape) == 0:
            for i, s in enumerate(np.linspace(0, mri.shape[0] - 1, resize_shape)):
                resized_mri[i] = cv2.resize(mri[int(s)], (resize_shape, resize_shape), interpolation=cv2.INTER_LANCZOS4)
        elif np.argmax(mri.shape) == 1:
            for i, s in enumerate(np.linspace(0, mri.shape[1] - 1, resize_shape)):
                resized_mri[:, i] = cv2.resize(mri[:, int(s)], (resize_shape, resize_shape), interpolation=cv2.INTER_LANCZOS4)
        elif np.argmax(mri.shape) == 2:
            for i, s in enumerate(np.linspace(0, mri.shape[2] - 1, resize_shape)):
                resized_mri[:, :, i] = cv2.resize(mri[:, :, int(s)], (resize_shape, resize_shape), interpolation=cv2.INTER_LANCZOS4)

        mri = resized_mri

    if verbose:
        print(f'{mri_path} - MRI Shape: {mri.shape} - Mean: {np.mean(mri):.2f} - Std: {np.std(mri):.2f} - Min: {np.min(mri):.2f} - Max: {np.max(mri):.2f} - Type: {mri.dtype}')

    return mri


## Tumor Segmentation

In [None]:
class SegmentationFeatureExtractor:

    def __init__(self, df_train, df_test, train_features_path=None):

        self.df_train = df_train
        self.df_test = df_test
        self.train_features_path = train_features_path
        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    def load_segmentation_models(self):

        models = {}

        for mri_type in ['FLAIR', 'T1w']:
            model = SegResNet(
                spatial_dims=3,
                in_channels=1,
                out_channels=3,
                init_filters=8,
                dropout_prob=0.0,
                blocks_down=(1, 2, 2, 4),
                blocks_up=(1, 1, 1),
                upsample_mode='nontrainable'
            )
            model.load_state_dict(torch.load(f'{MODELS_PATH}/segresnet/segresnet_{mri_type}.pt'))
            model.to(self.device)
            model.eval()
            models[mri_type] = model
            print(f'Loaded segresnet_{mri_type}.pt model')

        return models

    def extract_features(self):

        models = self.load_segmentation_models()
        
        print('\nCreating training set features')
        if self.train_features_path:
            # Load precomputed training set features
            df_train_features = pd.read_csv(f'{PROCESSED_DATA_PATH}/train_features.csv')
            self.df_train = pd.concat([self.df_train, df_train_features], axis=1)
        else:
            # Create training set features
            for case in tqdm(self.df_train['BraTS21ID'].values):
                case_mpmris = os.listdir(f'{RAW_DATA_PATH}/train/{case}')
                for mri_type in ['FLAIR', 'T1w']:
                    
                    mri = load_mri(
                        mri_path=f'{RAW_DATA_PATH}/train/{case}/{mri_type}',
                        window_width=None,
                        window_center=None,
                        voi_lut=None,
                        new_spacing=None,
                        reorder_plane=True,
                        resize_shape=144,
                        verbose=False
                    )
                    
                    mri = (mri - mri.mean()) / mri.std()
                    mri = torch.as_tensor(mri, dtype=torch.float)
                    mri = torch.unsqueeze(mri, 0)
                    mri = torch.unsqueeze(mri, 0)
                    mri = mri.to(self.device)

                    mask = torch.sigmoid(models[mri_type](mri)).detach().cpu().numpy()
                    mask = np.squeeze(mask, axis=0)
                    mask = np.round(mask)

                    self.df_train.loc[self.df_train['BraTS21ID'] == case, f'{mri_type}_Whole_Tumor_Area'] = np.sum(mask[0])
                    self.df_train.loc[self.df_train['BraTS21ID'] == case, f'{mri_type}_Tumor_Core_Area'] = np.sum(mask[1])
                    self.df_train.loc[self.df_train['BraTS21ID'] == case, f'{mri_type}_Enhancing_Tumor_Area'] = np.sum(mask[2])
        
        print('Creating test set features')
        # Create test set features
        for case in tqdm(self.df_test['BraTS21ID'].values):
            case_mpmris = os.listdir(f'{RAW_DATA_PATH}/test/{case}')
            for mri_type in ['FLAIR', 'T1w']:
                
                mri = load_mri(
                    mri_path=f'{RAW_DATA_PATH}/test/{case}/{mri_type}',
                    window_width=None,
                    window_center=None,
                    voi_lut=None,
                    new_spacing=None,
                    reorder_plane=True,
                    resize_shape=144,
                    verbose=False
                )
                
                # This block is required for a failproof pipeline
                # Some excluded problematic MRIs in training set caused this block to trigger
                if mri is None:
                    continue

                mri = (mri - mri.mean()) / mri.std()
                mri = torch.as_tensor(mri, dtype=torch.float)
                mri = torch.unsqueeze(mri, 0)
                mri = torch.unsqueeze(mri, 0)
                mri = mri.to(self.device)

                mask = torch.sigmoid(models[mri_type](mri)).detach().cpu().numpy()
                mask = np.squeeze(mask, axis=0)
                mask = np.round(mask)

                self.df_test.loc[self.df_test['BraTS21ID'] == case, f'{mri_type}_Whole_Tumor_Area'] = np.sum(mask[0])
                self.df_test.loc[self.df_test['BraTS21ID'] == case, f'{mri_type}_Tumor_Core_Area'] = np.sum(mask[1])
                self.df_test.loc[self.df_test['BraTS21ID'] == case, f'{mri_type}_Enhancing_Tumor_Area'] = np.sum(mask[2])
                
        return self.df_train.copy(deep=True), self.df_test.copy(deep=True)


In [None]:
segmentation_feature_extractor = SegmentationFeatureExtractor(
    df_train=df_train,
    df_test=df_test,
    train_features_path=None
)

#df_train, df_test = segmentation_feature_extractor.extract_features()

## 3D Classification

In [None]:
def load_predictions_and_evaluate_model(df, model_name, mri_types=('FLAIR', 'T1w', 'T1wCE', 'T2w'), sigmoid=False):

    print(f'\n{"-" * 30}\nEvaluating {model_name}\n{"-" * 30}\n')
    for mri_type in mri_types:

        predictions_column_name = f'{model_name[:-3]}_{mri_type}_predictions'
        train_column_name = f'{model_name}_{mri_type}_predictions'
        print(f'{mri_type}\n{"-" * len(mri_type)}')

        df_train_predictions = pd.read_csv(f'{PREDICTIONS_PATH}/train_{model_name}_{mri_type}_predictions.csv')
        if sigmoid:
            df_train_predictions[predictions_column_name] = 1 / (1 + np.exp(-df_train_predictions[predictions_column_name].values))
            
        df[train_column_name] = df_train_predictions[predictions_column_name].values

        for fold in sorted(df['fold'].unique()):
            _, val_idx = df.loc[df['fold'] != fold].index, df.loc[df_train['fold'] == fold].index
            fold_score = roc_auc_score(df.loc[val_idx, 'MGMT_value'], df.loc[val_idx, train_column_name])
            print(f'Fold {fold} - ROC AUC Score: {fold_score:.6f}')
        oof_score = roc_auc_score(df['MGMT_value'], df[train_column_name])
        print(f'{"-" * 30}\nOOF ROC AUC Score: {oof_score:.6}\n{"-" * 30}\n')


In [None]:
load_predictions_and_evaluate_model(
    df=df_train,
    model_name='densenet121',
    mri_types=('FLAIR', 'T1w', 'T1wCE', 'T2w'),
    sigmoid=True
)

In [None]:
load_predictions_and_evaluate_model(
    df=df_train,
    model_name='densenet169',
    mri_types=('FLAIR', 'T1w', 'T1wCE', 'T2w'),
    sigmoid=True
)

## Blending

In [None]:
prediction_columns = [col for col in df_train.columns if col.endswith('predictions')]

fig = plt.figure(figsize=(16, 16), dpi=100)
sns.heatmap(
    df_train[prediction_columns + ['MGMT_value']].corr(),
    annot=True,
    square=True,
    cmap='coolwarm',
    annot_kws={'size': 12},
    fmt='.4f'
)

plt.tick_params(axis='x', labelsize=10, rotation=90)
plt.tick_params(axis='y', labelsize=10, rotation=0)
plt.title('Prediction Correlations', size=20, pad=20)

plt.show()

In [None]:
df_train['densenet_blend_predictions'] = (
    (df_train['densenet121_FLAIR_predictions'] * 0.125) +
    (df_train['densenet121_T1w_predictions'] * 0.125) +
    (df_train['densenet121_T1wCE_predictions'] * 0.125) +
    (df_train['densenet121_T2w_predictions'] * 0.125) +
    (df_train['densenet169_FLAIR_predictions'] * 0.125) +
    (df_train['densenet169_T1w_predictions'] * 0.125) +
    (df_train['densenet169_T1wCE_predictions'] * 0.125) +
    (df_train['densenet169_T2w_predictions'] * 0.125)
)


print(f'Blend\n{"-" * 5}')
for fold in sorted(df_train['fold'].unique()):
    _, val_idx = df_train.loc[df_train['fold'] != fold].index, df_train.loc[df_train['fold'] == fold].index
    fold_score = roc_auc_score(df_train.loc[val_idx, 'MGMT_value'], df_train.loc[val_idx, 'densenet_blend_predictions'])
    print(f'Fold {fold} - ROC AUC Score: {fold_score:.6f}')
oof_score = roc_auc_score(df_train['MGMT_value'], df_train['densenet_blend_predictions'])
print(f'{"-" * 30}\nOOF ROC AUC Score: {oof_score:.6}\n{"-" * 30}\n')

## Inference

In [None]:
class MRIClassificationDataset(Dataset):

    def __init__(self, cases, targets, mri_type, transforms=None):

        self.cases = cases
        self.targets = targets
        self.mri_type = mri_type
        self.transforms = transforms

    def __len__(self):
        return len(self.cases)

    def __getitem__(self, idx):

        """
        Get the idxth element in the dataset

        Parameters
        ----------
        idx (int): Index of the sample (0 <= idx < len(self.cases))

        Returns
        -------
        mri [torch.FloatTensor of shape (channel, depth, height, width)]: Preprocessed 4D mpMRI
        target [torch.FloatTensor of shape (1)]: MGMT value
        """

        mri = load_mri(
            mri_path=f'{RAW_DATA_PATH}/test/{self.cases[idx]}/{self.mri_type}',
            window_width=None,
            window_center=None,
            voi_lut=None,
            new_spacing=None,
            reorder_plane=True,
            resize_shape=144,
            verbose=False
        )
        if self.transforms is not None:
            mri = self.transforms(mri)

        mri = (mri - mri.mean()) / mri.std()
        mri = torch.as_tensor(mri, dtype=torch.float)
        mri = torch.unsqueeze(mri, 0)

        if self.targets is not None:
            target = self.targets[idx]
            target = torch.as_tensor(target, dtype=torch.float)
            target = torch.unsqueeze(target, 0)
            return mri, target
        else:
            return mri


In [None]:
class DenseNetModel(nn.Module):

    def __init__(self, init_features, growth_rate, block_config, bn_size, dropout_prob):

        super(DenseNetModel, self).__init__()

        self.backbone = DenseNet(
            spatial_dims=3,
            in_channels=1,
            out_channels=1,
            init_features=init_features,
            growth_rate=growth_rate,
            block_config=block_config,
            bn_size=bn_size,
            dropout_prob=dropout_prob
        )

    def forward(self, x):
        return self.backbone(x)
    

model_configs = {
    'densenet121': {
        'init_features': 64,
        'growth_rate': 32,
        'block_config': (6, 12, 24, 16),
        'bn_size': 4,
        'dropout_prob': 0
    },
    'densenet169': {
        'init_features': 64,
        'growth_rate': 32,
        'block_config': (6, 12, 48, 32),
        'bn_size': 4,
        'dropout_prob': 0
    }
}

In [None]:
folds_to_use = {
    'densenet121': {
        'FLAIR': [1, 2, 3, 4, 5],
        'T1w': [1, 2, 3, 4, 5],
        'T1wCE': [1, 2, 3, 4, 5],
        'T2w': [1, 2, 3, 4, 5]
    },
    'densenet169': {
        'FLAIR': [1, 2, 3, 4, 5],
        'T1w': [1, 2, 3, 4, 5],
        'T1wCE': [1, 2, 3, 4, 5],
        'T2w': [1, 2, 3, 4, 5]
    }
}


def inference(df, model_name, mri_types=('FLAIR', 'T1w', 'T1wCE', 'T2w'), sigmoid=True):

    print(f'\n{"-" * 30}\nRunning {model_name} for Inference ({mri_types}\n{"-" * 30}')
    
    for mri_type in mri_types:
        
        predictions_column_name = f'{model_name}_{mri_type}_predictions'
        df_test[predictions_column_name] = 0

        for fold in sorted(df_train['fold'].unique()):
            
            if fold not in folds_to_use[model_name][mri_type]:
                continue

            test_dataset = MRIClassificationDataset(
                cases=df_test['BraTS21ID'].values.tolist(),
                targets=None,
                mri_type=mri_type,
                transforms=None
            )
            test_loader = DataLoader(
                test_dataset,
                batch_size=16,
                sampler=SequentialSampler(test_dataset),
                pin_memory=True,
                drop_last=False,
                num_workers=4,
            )

            device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
            model = DenseNetModel(**model_configs[model_name])
            model.load_state_dict(torch.load(f'{MODELS_PATH}/{model_name}/{model_name}_{mri_type}_fold{fold}.pt'))
            model.to(device)
            model.eval()

            predictions = []
            with torch.no_grad():
                for mri in tqdm(test_loader):
                    mri = mri.to(device)
                    output = model(mri)
                    output = output.detach().cpu().numpy().flatten().tolist()
                    predictions += output

            df_test[predictions_column_name] += predictions
            print(f'Finished Inference for {model_name} Model Fold {fold} ({mri_type})')
            
        df_test[predictions_column_name] = df_test[predictions_column_name] / len(folds_to_use[model_name][mri_type])
        if sigmoid:
            df_test[predictions_column_name] = 1 / (1 + np.exp(-df_test[predictions_column_name].values))


In [None]:
inference(
    df=df_test,
    model_name='densenet121',
    mri_types=('FLAIR', 'T1w', 'T1wCE', 'T2w'),
    sigmoid=True
)

In [None]:
inference(
    df=df_test,
    model_name='densenet169',
    mri_types=('FLAIR', 'T1w', 'T1wCE', 'T2w'),
    sigmoid=True
)

## Submission

In [None]:
df_test['MGMT_value'] = (
    (df_test['densenet121_FLAIR_predictions'] * 0.25) +
    #(df_test['densenet121_T1w_predictions'] * 0.125) +
    #(df_test['densenet121_T1wCE_predictions'] * 0.125) +
    (df_test['densenet121_T2w_predictions'] * 0.25) +
    (df_test['densenet169_FLAIR_predictions'] * 0.25) +
    #(df_test['densenet169_T1w_predictions'] * 0.125) +
    #(df_test['densenet169_T1wCE_predictions'] * 0.125) +
    (df_test['densenet169_T2w_predictions'] * 0.25)
)
df_test[['BraTS21ID', 'MGMT_value']].to_csv('submission.csv', index=False)