Acknowledgements: <br>
https://www.kaggle.com/ohbewise/a-rsna-mri-solution-from-dicom-to-submission <br>
U.Baid, et al., “The RSNA-ASNR-MICCAI BraTS 2021 Benchmark on Brain Tumor Segmentation and Radiogenomic Classification”, arXiv:2107.02314, 2021.

## Install and import libraries

In [1]:
# If this line fails please see the prerequisite above
!pip install --quiet --no-index --find-links ../input/pip-download-torchio/ --requirement ../input/pip-download-torchio/requirements.txt



In [2]:
# import libraries
import os
import csv
import pickle
import numpy as np
import pandas as pd
import nibabel as nib
import torchio as tio
import tensorflow as tf
from pathlib import Path
import matplotlib.pyplot as plt

# Parameters to limit the processing power needed.
scan_types    = ['FLAIR','T2w'] # uses all scan types

2021-09-21 04:57:54.007320: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


## Preprocess data: DICOM to normalized NifiTi with TorchIO


In [None]:
# Preprocess data 
data_dir   = '/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/'
out_dir    = '/kaggle/working/processed'

for dataset in ['train']:
    dataset_dir = f'{data_dir}{dataset}'
    patients = os.listdir(dataset_dir)
    
    # Remove cases the competion host said to exclude 
    # https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/discussion/262046
    if '00109' in patients: patients.remove('00109')
    if '00123' in patients: patients.remove('00123')
    if '00709' in patients: patients.remove('00709')
    
    print(f'Total patients in {dataset} dataset: {len(patients)}')

    count = 0
    for patient in patients:
        count = count + 1
        print(f'{dataset}: {count}/{len(patients)}')

        for scan_type in scan_types:
            scan_src  = f'{dataset_dir}/{patient}/{scan_type}/'
            scan_dest = f'{out_dir}/{dataset}/{patient}/{scan_type}/'
            Path(scan_dest).mkdir(parents=True, exist_ok=True)
            image = tio.ScalarImage(scan_src)
            transforms = [
                tio.ToCanonical(),
                tio.Resample(1),
                tio.ZNormalization(masking_method=tio.ZNormalization.mean),
                tio.CropOrPad((128,128,64)),
            ]
            transform = tio.Compose(transforms)
            preprocessed = transform(image)
            preprocessed.save(f'{scan_dest}/{scan_type}.nii.gz')

Total patients in train dataset: 582
train: 1/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000773065

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:7.6146e-05



train: 2/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000844631



train: 3/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000603393

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000551437



train: 4/582
train: 5/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00081274



train: 6/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000892558



train: 7/582
train: 8/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000708299

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000772679



train: 9/582
train: 10/582
train: 11/582
train: 12/582
train: 13/582
train: 14/582
train: 15/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000871493

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000765089



train: 16/582
train: 17/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000497143

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000119059



train: 18/582
train: 19/582
train: 20/582
train: 21/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000661485

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000904419



train: 22/582
train: 23/582
train: 24/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000613626



train: 25/582
train: 26/582
train: 27/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.0004



train: 28/582
train: 29/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000931539

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.0003



train: 30/582
train: 31/582
train: 32/582
train: 33/582
train: 34/582
train: 35/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000866044



train: 36/582
train: 37/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.0002



train: 38/582
train: 39/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000392982

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000395294



train: 40/582
train: 41/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000632884

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000925408



train: 42/582
train: 43/582
train: 44/582
train: 45/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000836238

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00073222



train: 46/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000103474

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000102875



train: 47/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000682788

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000683343



train: 48/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000940841

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.0001



train: 49/582
train: 50/582
train: 51/582
train: 52/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000959891



train: 53/582
train: 54/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000960751

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000897991



train: 55/582
train: 56/582
train: 57/582
train: 58/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000899893



train: 59/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000920703

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000484682



train: 60/582
train: 61/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000632946

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000662765



train: 62/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00079876

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:7.49456e-05



train: 63/582
train: 64/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000663613

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000575338



train: 65/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000634487

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000709338



train: 66/582
train: 67/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000392593

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000395062



train: 68/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000893224

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00091787



train: 69/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.0001



train: 70/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000299034

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.0003



train: 71/582
train: 72/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000709485

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000180181



train: 73/582
train: 74/582
train: 75/582
train: 76/582
train: 77/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000662967

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00063874



train: 78/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00049763

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000575334



train: 79/582
train: 80/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000985184



train: 81/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:1.51402



train: 82/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000739719

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000173117



train: 83/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000937524

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000846142



train: 84/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00100146

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000590929



train: 85/582
train: 86/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:8.12015e-05



train: 87/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000607788

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00065128



train: 88/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000681415

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000498771



train: 89/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000757016



train: 90/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000817082

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:7.36603e-05



train: 91/582
train: 92/582
train: 93/582
train: 94/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000699291



train: 95/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000669543

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000593539



train: 96/582
train: 97/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000820594

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000535881



train: 98/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000199715

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000632266



train: 99/582
train: 100/582
train: 101/582
train: 102/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000922949

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000711477



train: 103/582
train: 104/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00077212

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000174916



train: 105/582
train: 106/582
train: 107/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000640664



train: 108/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000766265

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000808871



train: 109/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00057089

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000108729



train: 110/582
train: 111/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000757599

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000803631



train: 112/582
train: 113/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000803064



train: 114/582
train: 115/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000877669



train: 116/582
train: 117/582
train: 118/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000198995



train: 119/582
train: 120/582
train: 121/582
train: 122/582
train: 123/582
train: 124/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000998388

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000124396



train: 125/582
train: 126/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00097864

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000100987



train: 127/582
train: 128/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000397436

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.0005



train: 129/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000555953

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000858202



train: 130/582
train: 131/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000106332



train: 132/582
train: 133/582
train: 134/582
train: 135/582
train: 136/582
train: 137/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000619974



train: 138/582
train: 139/582
train: 140/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000530832

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000992739



train: 141/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000666519

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000623132



train: 142/582
train: 143/582
train: 144/582
train: 145/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000621815

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000655227



train: 146/582
train: 147/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000397484



train: 148/582
train: 149/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000296471



train: 150/582
train: 151/582
train: 152/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000625487

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000163944



train: 153/582
train: 154/582
train: 155/582
train: 156/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000199476

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000950422



train: 157/582
train: 158/582
train: 159/582
train: 160/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000942467



train: 161/582
train: 162/582
train: 163/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000509684



train: 164/582
train: 165/582
train: 166/582
train: 167/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000497487



train: 168/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000710576



train: 169/582
train: 170/582
train: 171/582
train: 172/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000625618



train: 173/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000199015



train: 174/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000997436



train: 175/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000956469

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000238899



train: 176/582
train: 177/582
train: 178/582
train: 179/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000497717

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.0005



train: 180/582
train: 181/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000979325

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00080326



train: 182/582
train: 183/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000955034

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000973752



train: 184/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000591055

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000955473



train: 185/582
train: 186/582
train: 187/582
train: 188/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000298462

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000199467



train: 189/582
train: 190/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000662598

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000763409



train: 191/582
train: 192/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000571316

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000951542



train: 193/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000871636



train: 194/582
train: 195/582
train: 196/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000686132

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:8.88725e-05



train: 197/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000827345

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000609291



train: 198/582
train: 199/582
train: 200/582
train: 201/582
train: 202/582
train: 203/582
train: 204/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00110118

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000845892



train: 205/582
train: 206/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000765916

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000498771



train: 207/582
train: 208/582
train: 209/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000810772

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000649423



train: 210/582
train: 211/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000953709



train: 212/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000100512

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.0002



train: 213/582
train: 214/582
train: 215/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000296875



train: 216/582
train: 217/582
train: 218/582
train: 219/582
train: 220/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000199497

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000649727



train: 221/582
train: 222/582
train: 223/582
train: 224/582
train: 225/582
train: 226/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000567639

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000642232



train: 227/582
train: 228/582
train: 229/582
train: 230/582
train: 231/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000602461

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000637886



train: 232/582
train: 233/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000890803

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000470311



train: 234/582
train: 235/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000947886



train: 236/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000873118

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000676487



train: 237/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000298997

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00051541



train: 238/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:1.51518



train: 239/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000397487

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.0005



train: 240/582
train: 241/582
train: 242/582
train: 243/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000397143



train: 244/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000786456

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000675432



train: 245/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000976392

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000399017



train: 246/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000653985



train: 247/582
train: 248/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00070657

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000848886



train: 249/582
train: 250/582
train: 251/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000842124

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000684121



train: 252/582
train: 253/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000684923

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000122862



train: 254/582
train: 255/582
train: 256/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000299248



train: 257/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000397487

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.0005



train: 258/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000398462



train: 259/582
train: 260/582
train: 261/582
train: 262/582
train: 263/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000398068

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.0001



train: 264/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000937531



train: 265/582
train: 266/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000806725

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000803743



train: 267/582
train: 268/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000772112

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000821799



train: 269/582
train: 270/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000794832



train: 271/582
train: 272/582
train: 273/582
train: 274/582
train: 275/582
train: 276/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000814357

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000774241



train: 277/582
train: 278/582
train: 279/582
train: 280/582
train: 281/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000570295

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00101078



train: 282/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00086455

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000477789



train: 283/582
train: 284/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000957152



train: 285/582
train: 286/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000199517



train: 287/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:9.14411e-05

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00100209



train: 288/582
train: 289/582
train: 290/582
train: 291/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000716942

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000882742



train: 292/582
train: 293/582
train: 294/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000907459



train: 295/582
train: 296/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:2.53521



train: 297/582
train: 298/582
train: 299/582
train: 300/582
train: 301/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000854277

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000557215



train: 302/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000682392



train: 303/582
train: 304/582
train: 305/582
train: 306/582
train: 307/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00039884



train: 308/582
train: 309/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.0001



train: 310/582
train: 311/582
train: 312/582
train: 313/582
train: 314/582
train: 315/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000864861

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000552123



train: 316/582
train: 317/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000666595

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000120979



train: 318/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000688597

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000681919



train: 319/582
train: 320/582
train: 321/582
train: 322/582
train: 323/582
train: 324/582
train: 325/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000997326

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000400827



train: 326/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000497783

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00086439



train: 327/582
train: 328/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000397674

ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000498771



train: 329/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000497382



train: 330/582
train: 331/582


ImageSeriesReader (0x562faaaa2210): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000730786



## Build datasets: NifiTi to Split Dataset with NiBabel

In [None]:
# build datasets

# dataset processing functions
def read_nifti_file(filepath):
    """Read and load volume"""
    # Read file
    scan = nib.load(filepath)
    # Get raw data
    scan = scan.get_fdata()
    return scan

def add_batch_channel(volume):
    """Process validation data by adding a channel."""
    volume = tf.expand_dims(volume, axis=-1)
    volume = tf.expand_dims(volume, axis=0)
    return volume

def process_scan(filepath):
    scan = read_nifti_file(filepath)
    volume = add_batch_channel(scan)
    return volume

# get labels
labels_df = pd.read_csv(data_dir+'train_labels.csv', index_col=0)

# split patients
patients = os.listdir(f'{out_dir}/train')
from sklearn.model_selection import train_test_split
train, validation = train_test_split(patients, test_size=0.3, random_state=42)
print(f'{len(patients)} total patients.\n   {len(train)} in the train split.\n   {len(validation)} in the validation split')

splits_dict = {'train':train, 'validation':validation}

for scan_type in scan_types:
    print(f'{scan_type} start')
    for split_name, split_list in splits_dict.items():
        print(f'   {split_name} start')
        label_list = []
        filepaths = []
        for patient in split_list:
            label = labels_df._get_value(int(patient), 'MGMT_value')
            label = add_batch_channel(label)
            label_list.append(label)
            filepath  = f'{out_dir}/train/{patient}/{scan_type}/{scan_type}.nii.gz'
            filepaths.append(filepath)

        features = np.array([process_scan(filepath) for filepath in filepaths if filepath])
        labels = np.array(label_list, dtype=np.uint8)
        dataset = tf.data.Dataset.from_tensor_slices((features, labels))
        
        # save dataset   
        tf_data_path = f'./datasets/{scan_type}_{split_name}_dataset'
        tf.data.experimental.save(dataset, tf_data_path, compression='GZIP')
        with open(tf_data_path + '/element_spec', 'wb') as out_:  # also save the element_spec to disk for future loading
            pickle.dump(dataset.element_spec, out_)
        print(f'   {split_name} done')
    print(f'{scan_type} done')

## Define, train, and evaluate model:  Dataset to Model with Tensorflow


In [None]:
# Define, train, and evaluate model
# source: https://keras.io/examples/vision/3D_image_classification/
def get_model(width=128, height=128, depth=64, name='3dcnn'):
    """Build a 3D convolutional neural network model."""

    inputs = tf.keras.Input((width, height, depth, 1))

    x = tf.keras.layers.Conv3D(filters=64, kernel_size=3, activation="relu")(inputs)
    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
    x = tf.keras.layers.BatchNormalization()(x)

    x = tf.keras.layers.Conv3D(filters=64, kernel_size=3, activation="relu")(x)
    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
    x = tf.keras.layers.BatchNormalization()(x)

    x = tf.keras.layers.Conv3D(filters=128, kernel_size=3, activation="relu")(x)
    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
    x = tf.keras.layers.BatchNormalization()(x)

    x = tf.keras.layers.Conv3D(filters=256, kernel_size=3, activation="relu")(x)
    x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
    x = tf.keras.layers.BatchNormalization()(x)

    x = tf.keras.layers.GlobalAveragePooling3D()(x)
    x = tf.keras.layers.Dense(units=512, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.3)(x)

    outputs = tf.keras.layers.Dense(units=1, activation="sigmoid")(x)

    # Define the model.
    model = tf.keras.Model(inputs, outputs, name=name)
    
    # Compile model.
    initial_learning_rate = 0.0001
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
    )
    model.compile(
        loss="binary_crossentropy",
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
        metrics=["acc"],
    )
    
    return model

In [None]:
for scan_type in scan_types:
    # load train_dataset dataset
    tf_data_path = f'./datasets/{scan_type}_train_dataset'
    with open(tf_data_path + '/element_spec', 'rb') as in_:
        es = pickle.load(in_)
    train_dataset = tf.data.experimental.load(tf_data_path, es, compression='GZIP')
    
    # load validation_dataset
    tf_data_path = f'./datasets/{scan_type}_validation_dataset'
    with open(tf_data_path + '/element_spec', 'rb') as in_:
        es = pickle.load(in_)
    validation_dataset = tf.data.experimental.load(tf_data_path, es, compression='GZIP')

    # Get Model
    model = get_model(width=128, height=128, depth=64,name=scan_type)
    
    # Define callbacks.
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
        f'{scan_type}_3d_image_classification.h5', save_best_only=True
    )
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor="val_acc", patience=15)

    epochs = 50
    model.fit(
        train_dataset,
        validation_data=validation_dataset,
        epochs=epochs,
        shuffle=True,
        verbose=2,
        callbacks=[checkpoint_cb, early_stopping_cb],
    )
    
    #save model
    model.save(f'./models/{scan_type}')
    
    # show metrics
    fig, ax = plt.subplots(1, 2, figsize=(20, 3))
    ax = ax.ravel()

    for i, metric in enumerate(["acc", "loss"]):
        ax[i].plot(model.history.history[metric])
        ax[i].plot(model.history.history["val_" + metric])
        ax[i].set_title("{} Model {}".format(scan_type, metric))
        ax[i].set_xlabel("epochs")
        ax[i].set_ylabel(metric)
        ax[i].legend(["train", "val"])

## Write predictions to submission.csv: Model Prediction to Submission

In [None]:
# write predictions to submission.csv

# Set up directories
data_dir   = '/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/'
test_dir   = f'{data_dir}test'
patients = os.listdir(test_dir)
if demo:
    patients = patients[:10]
print(f'Total patients: {len(patients)}\n\n')

out_dir    = '/kaggle/working/processed'

scan_types = ['FLAIR', 'T2w']

for scan_type in scan_types:
    f = open(f'/kaggle/working/submission.csv', 'w')
    writer = csv.writer(f)
    writer.writerow(['BraTS21ID','MGMT_value'])
    for patient in patients:
        # dicom to nifiti
        scan_src  = f'{test_dir}/{patient}/{scan_type}/'
        scan_dest = f'{out_dir}/test/{patient}/{scan_type}/'
        Path(scan_dest).mkdir(parents=True, exist_ok=True)
        image = tio.ScalarImage(scan_src)  # subclass of Image
        transforms = [
            tio.ToCanonical(),
            tio.Resample(1),
            tio.ZNormalization(masking_method=tio.ZNormalization.mean),
            tio.CropOrPad((128,128,64)),
        ]
        transform = tio.Compose(transforms)
        preprocessed = transform(image)
        filepath = f'{scan_dest}/{scan_type}.nii.gz'
        preprocessed.save(filepath)
        
        # process_scan
        case = process_scan(filepath)

        # tf model
        model = tf.keras.models.load_model(f'./models/{scan_type}')

        # get prediction
        prediction = model.predict(case)
        
        # write prediction
        print(f'{patient},{prediction[0][0]}')
        writer.writerow([patient, prediction[0][0]])

    f.close()