I'm reading through several existing notebooks and trying to distill down the information into a new notebook to help me understand the project.  All help appreciated!

# References

- [Advanced EDA - Brain Tumor Data](https://www.kaggle.com/smoschou55/advanced-eda-brain-tumor-data)
- [Team 9 Second Week](https://www.kaggle.com/evanyao27/team-9-second-week)
  - The only model that is working. get_model02()
- [Dataset to Model with Tensorflow](https://www.kaggle.com/ohbewise/dataset-to-model-with-tensorflow)
- [Brain Tumer Train Class Flair](https://www.kaggle.com/lucamtb/brain-tumer-train-class-flair)
  - Uses TPU
  - Generates a Tensorflow model: Brain_flair_model_effect_3e-05_0.0001.h5
- [Brain Tumor very basic inference](https://www.kaggle.com/lucamtb/brain-tumor-very-basice-inference)
  - Uses the above mentioned model: Brain_flair_model_effect_3e-05_0.0001.h5
  - Add this Kaggle Dataset: https://www.kaggle.com/lucamtb/effect0-brain

# Load Libraries

In [None]:
import os
import glob

import pandas as pd
import numpy as np
from pathlib import Path

import random
from tqdm import tqdm
import pydicom # Handle MRI images

import cv2  # OpenCV - https://docs.opencv.org/master/d6/d00/tutorial_py_root.html

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score


import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers
from pathlib import Path

# Configuration, Constants, Setup

In [None]:
data_dir = Path('../input/rsna-miccai-brain-tumor-radiogenomic-classification/')

mri_types = ["FLAIR", "T1w", "T2w", "T1wCE"]
excluded_images = [109, 123, 709] # Bad images

# Load Datasets

In [None]:
train_df = pd.read_csv(data_dir / "train_labels.csv",
#                        index='id',
#                       nrows=100000
                      )
test_df = pd.read_csv(data_dir / "sample_submission.csv")
sample_submission = pd.read_csv(data_dir / "sample_submission.csv")

train_df = train_df[~train_df.BraTS21ID.isin(excluded_images)]
val_df = train_df[-50:]
train_df = train_df[:-50]

print(f"train data: Rows={train_df.shape[0]}, Columns={train_df.shape[1]}")
print(f"val data: Rows={val_df.shape[0]}, Columns={val_df.shape[1]}")

# print(f"test data : Rows={test_df.shape[0]}, Columns={test_df.shape[1]}")

# Utility Functions

### There's a version that converts into grayscale: 

- https://www.kaggle.com/smoschou55/advanced-eda-brain-tumor-data


In [None]:
def load_dicom(path, size = 224):
    ''' 
    Reads a DICOM image, standardizes so that the pixel values are between 0 and 1, then rescales to 0 and 255
    
    Not super sure if this kind of scaling is appropriate, but everyone seems to do it. 
    '''
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    # transform data into black and white scale / grayscale
#     data = data - np.min(data)
    if np.max(data) != 0:
        data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return cv2.resize(data, (size, size))

In [None]:
def read_niifile(niifile,size=64,filtering=0.15):  # Read niifile file

    img = nib.load(niifile)  # Download niifile file (actually extract the file)
    img_fdata = img.get_fdata()  # Get niifile data

#     if np.max(img_fdata) != 0:
#         img_fdata = img_fdata / np.max(img_fdata)
#     img_fdata = (img_fdata * 255).astype(np.uint8)
#     print(img_fdata.shape,'1111')
    img_fdata = cv2.resize(img_fdata,(size,size))

    num_images = len(img_fdata[0,0,:])
        
    start = int(num_images * filtering)
    end = int(num_images * (1-filtering))

    interval = 3
    
    if num_images < 10: 
        interval = 1
        
    return np.array(img_fdata[:,:,start:end:interval])

def load_niifile(niifile,size=64,filtering=0.35):  # Read niifile file

#     if np.max(img_fdata) != 0:
#         img_fdata = img_fdata / np.max(img_fdata)
#     img_fdata = (img_fdata * 255).astype(np.uint8)
    print(img_fdata.shape,'1111')
    img_fdata = cv2.resize(img_fdata,(size,size))

    num_images = len(img_fdata[0,0,:])
        
    start = int(num_images * filtering)
    end = int(num_images * (1-filtering))

    interval = 3
    
    if num_images < 10: 
        interval = 1
        
    return np.array(img_fdata[:,:,start:end:interval])


In [None]:
from joblib import Parallel,delayed
import multiprocessing as mp
import nibabel as nib
from glob import glob
def get_all_image_paths(brats21id, image_type, folder='train',filtering=0.35): 
    '''
    Returns an arry of all the images of a particular type for a particular patient ID
    '''
    assert(image_type in mri_types)
    
    patient_path = os.path.join(
        "../input/rsna-miccai-brain-tumor-radiogenomic-classification/%s/" % folder, 
        str(brats21id).zfill(5),
    )

    paths = sorted(
        glob.glob(os.path.join(patient_path, image_type, "*")), 
        key=lambda x: int(x[:-4].split("-")[-1]),
    )
    
    num_images = len(paths)

    start = int(num_images * filtering)
    end = int(num_images * (1-filtering))

    interval = 3
    
    if num_images < 10: 
        interval = 1
    
    return np.array(paths[start:end:interval])

def get_all_nifiti_paths(brats21id, folder='train',folername=None): 
    fullidname = str(brats21id).zfill(5)
    if folername == 'mask_img': 
        patient_path = f'../input/segresult/{folername}/{fullidname}.nii'
    elif folername == 'sample': 
        patient_path = f'../input/segresult/{folername}/{fullidname}_0001.nii'
    elif folder == 'test': 
        patient_path = f'./rsna-preprocessed/test/{fullidname}_T1wCE.nii.gz'

    return str(patient_path)

# def get_all_images(brats21id, image_type, folder='train', size=225):
#     return [load_dicom(path, size) for path in get_all_image_paths(brats21id, image_type, folder)]

def get_all_images(brats21id, image_type, folder='train', size=225):
    return Parallel(n_jobs=mp.cpu_count(),prefer='threads')(delayed(load_dicom)(path, size) for path in get_all_image_paths(brats21id, image_type, folder))

# def get_all_nifit(img_list, folder='train', size=225):
#     return Parallel(n_jobs=mp.cpu_count(),prefer='threads')(delayed(read_niifile)(path, size) for path in get_all_nifiti_paths(img_list,folder))
def get_all_nifit(img_list, folder='train',folername=None, size=225):
    path = get_all_nifiti_paths(img_list,folder,folername)
    return read_niifile(path, size)


# Load Images We Will Need

In [None]:
def crop_dim(voxel):
    keep = (voxel.mean(axis=(0, 1)) > 0)
    return keep

def get_all_data_for_train(df, image_type, image_size=32,use_mask=True):
    global train_df
    
    X = []
    y = []
    tumory = []
    M = []
    train_ids = []
    gt = []

    for i in tqdm(df.index):
        x = df.loc[i]
#         images = get_all_images(int(x['BraTS21ID']), image_type, 'train', image_size)
        images = get_all_nifit(int(x['BraTS21ID']), 'train', 'sample',image_size)
        if use_mask == True: 
            maskes = get_all_nifit(int(x['BraTS21ID']),'train','mask_img',image_size)
            keepdim = crop_dim(maskes)
            M.append(maskes)
            
        label = x['MGMT_value']
        X.append(images)
        if label == 1: 
            y.append(keepdim*1)

        elif label == 0: 
            y.append([label] * len(maskes[0,0,:]))

        tumory.append(keepdim)
        gt += [label]  * len(maskes[0,0,:])
        train_ids += [int(x['BraTS21ID'])] * len(images[0,0,:])
#         assert(len(X) == len(y))
    if use_mask == True: 
        return X, y,tumory, np.array(train_ids), M , np.array(gt)
    else: 
        return np.array(X), np.array(y), np.array(train_ids)

In [None]:
from itertools import chain

In [None]:
X, y,tumory, trainidt,M,train_GT = get_all_data_for_train(train_df, 'T1wCE', image_size=64,use_mask=True)
# print(np.concatenate(X,axis=-1).shape,len(list(chain(*y)))),len(list(chain(*tumory)),np.concatenate(M,axis=-1).shape)

In [None]:
X_val, y_val,y_val_tumor, validt,M_val,val_GT = get_all_data_for_train(val_df, 'T1wCE', image_size=64,use_mask=True)
print(np.concatenate(X_val,axis=-1).shape,len(y_val),len(list(chain(*y_val_tumor))))

In [None]:
M_valT = np.concatenate(M_val,axis=-1).transpose((2,0,1))
X_valT = np.concatenate(X_val,axis=-1).transpose((2,0,1))
y_val_tumor = list(chain(*y_val_tumor))
mgmt_y_val = list(chain(*y_val))

In [None]:
import matplotlib.pyplot as plt 
fig,axs=plt.subplots(1,2,figsize=(10,10))
n = 0
w = 100
MT = np.concatenate(M,axis=-1).transpose((2,0,1))
XT = np.concatenate(X,axis=-1).transpose((2,0,1))
tumor_y = list(chain(*tumory))
mgmt_y = list(chain(*y))
axs[0].imshow(MT[w])
axs[1].imshow(XT[w])
print(f'MGMT:{mgmt_y[w]},Tumor:{tumor_y[w]*1}')

# Train/Validation Split

In [None]:
X_train, _, y_train, _, trainidt_train, _ = train_test_split(XT, np.stack([mgmt_y,tumor_y],axis=1), trainidt, test_size=0.1, random_state=3452)
X_valid, _, y_valid, _, trainidt_valid, _ = train_test_split(X_valT, np.stack([mgmt_y_val,y_val_tumor],axis=1), validt, test_size=0.01, random_state=3452)

In [None]:
print(X_train.shape, X_valid.shape, y_train.shape, y_valid.shape, trainidt_train.shape, trainidt_valid.shape)

## Remove dimension

In [None]:
X_train = tf.expand_dims(X_train, axis=-1)
X_valid = tf.expand_dims(X_valid, axis=-1)
print(X_train.shape, X_valid.shape)

## One-hot encode labels

In [None]:
y_train = to_categorical(y_train)
y_valid = to_categorical(y_valid)

# Tensorflow Models

## Model from:  https://www.kaggle.com/ohbewise/dataset-to-model-with-tensorflow

In [None]:
# # Define, train, and evaluate model
# # source: https://keras.io/examples/vision/3D_image_classification/
# def get_model01(width=128, height=128, depth=64, name='3dcnn'):
#     """Build a 3D convolutional neural network model."""

#     inputs = tf.keras.Input((width, height, depth, 1))

#     x = tf.keras.layers.Conv3D(filters=64, kernel_size=3, activation="relu")(inputs)
#     x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
#     x = tf.keras.layers.BatchNormalization()(x)

#     x = tf.keras.layers.Conv3D(filters=64, kernel_size=3, activation="relu")(x)
#     x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
#     x = tf.keras.layers.BatchNormalization()(x)

#     x = tf.keras.layers.Conv3D(filters=128, kernel_size=3, activation="relu")(x)
#     x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
#     x = tf.keras.layers.BatchNormalization()(x)

#     x = tf.keras.layers.Conv3D(filters=256, kernel_size=3, activation="relu")(x)
#     x = tf.keras.layers.MaxPool3D(pool_size=2)(x)
#     x = tf.keras.layers.BatchNormalization()(x)

#     x = tf.keras.layers.GlobalAveragePooling3D()(x)
#     x = tf.keras.layers.Dense(units=512, activation="relu")(x)
#     x = tf.keras.layers.Dropout(0.3)(x)

#     outputs = tf.keras.layers.Dense(units=1, activation="sigmoid")(x)

#     # Define the model.
#     model = tf.keras.Model(inputs, outputs, name=name)
    
#     # Compile model.
#     initial_learning_rate = 0.0001
#     lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
#         initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
#     )
#     model.compile(
#         loss="binary_crossentropy",
#         optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
#         metrics=["acc"],
#     )
    
#     return model



## Model from: https://www.kaggle.com/evanyao27/team-9-second-week/notebook

- Validation AUC=0.9148664856146349

In [None]:
def get_model2D():
    np.random.seed(42)
    random.seed(42)
    tf.random.set_seed(42)

    inpt = keras.Input(shape=X_train.shape[1:])
    print(inpt.shape)
    h = keras.layers.experimental.preprocessing.Rescaling(1.0 / 255)(inpt)
    
    h = keras.layers.Conv2D(6, kernel_size=(3, 3), activation="relu", name="Conv_1")(h)
    h = keras.layers.MaxPool2D(pool_size=(2, 2))(h)
    
    h = keras.layers.Conv2D(16, kernel_size=(3, 3), activation="relu", name="Conv_2")(h)
    h = keras.layers.MaxPool2D(pool_size=(2, 2))(h)

    h = keras.layers.Conv2D(24, kernel_size=(3, 3), activation="relu", name="Conv_3")(h)
    h = keras.layers.MaxPool2D(pool_size=(2, 2))(h)
    h = keras.layers.Dropout(0.1)(h)

    h = keras.layers.Flatten()(h)
    h = keras.layers.Dense(24, activation="relu")(h)
    
    mgmt_output = keras.layers.Dense(2, activation="sigmoid", name='mgmt_output')(h)
    tumor_output = keras.layers.Dense(2, activation="sigmoid", name='tumor_output')(h)
    
    model = keras.Model(inpt, [mgmt_output,tumor_output])
    return model

## Set up Model Checkpoint

In [None]:
class PlotProgress(keras.callbacks.Callback):
    
    def __init__(self, entity='loss'):
        self.entity = entity
        
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        
        self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('{}'.format(self.entity)))
        self.val_losses.append(logs.get('val_{}'.format(self.entity)))
        self.i += 1
        
        clear_output(wait=True)
        plt.plot(self.x, self.losses, label="{}".format(self.entity))
        plt.plot(self.x, self.val_losses, label="val_{}".format(self.entity))
        plt.legend()
        plt.show();

In [None]:
checkpoint_filepath = "chpt_{epoch}.h5"
first_decay_steps = 1000
scheduler = tf.keras.experimental.CosineDecayRestarts(1e-3,first_decay_steps,t_mul=2.0,m_mul=1.0)
plot_progress = PlotProgress(entity='loss')


model_checkpoint_callback = [
    tf.keras.callbacks.LearningRateScheduler(scheduler),
    tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor="val_auc",
    mode="max",
    save_best_only=False,
    save_freq="epoch",
    verbose=10,
)]

### Note that rerunning the cell below will change val_acc to val_acc_N and the model will not be saved.

In [None]:
model = get_model2D()

In [None]:
model.summary()

In [None]:

model.compile(
    loss={'mgmt_output':"categorical_crossentropy",'tumor_output':"categorical_crossentropy"},optimizer=tf.keras.optimizers.Adam(learning_rate=2e-3), metrics=[tf.keras.metrics.AUC()]
)

In [None]:
history = model.fit(x = X_train,
                    y = {'mgmt_output':y_train[:,0],'tumor_output':y_train[:,1]},
#                     y = {'dense_7':np.array(y_train)[:,0,:],'dense_8':np.array(y_train)[:,1,:]},
                    epochs=50, 
                    callbacks=[model_checkpoint_callback], 
                    validation_data= (X_valid, {'mgmt_output':y_valid[:,0],'tumor_output':y_valid[:,1]}))

In [None]:
X_val_test= tf.expand_dims(X_valT, axis=-1)
y_val_test = to_categorical(list(chain(*y_val)))
best = 0
epoch = -1
for i in range(30):
    model_best = tf.keras.models.load_model(filepath=f'chpt_{i + 1}.h5')
    y_pred = model_best.predict(X_val_test)
    

    tumor_pred = y_pred[1][:,1] > 0.3 # filtering tumor

    pred = np.argmax(y_pred[0],axis=1)
    result = pd.DataFrame(validt)
    result[1] = pred
    result.columns = ["BraTS21ID", "MGMT_value"]
    result = result.iloc[tumor_pred]
    
    result2 = result.groupby("BraTS21ID", as_index=False).mean()
    result2 = val_df.merge(result2, on="BraTS21ID")
    auc = roc_auc_score(
        result2.MGMT_value_x,
        result2.MGMT_value_y,
    )
    print(f"Validation AUC={auc}")
    if best < auc:
        best = auc
        epoch = i + 1
        
print(f'Best AUC: {best}, epoch: {epoch}')

# Load Our Best Model

# Predictions on Validation Set

In [None]:
!pip install --force-reinstall /kaggle/input/torchio/SimpleITK-2.1.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl
!pip install /kaggle/input/torchio/Deprecated-1.2.13-py2.py3-none-any.whl
!pip install /kaggle/input/torchio/torchio-0.18.56-py2.py3-none-any.whl

In [None]:
# def preprocess_dataset(dataset, out_dir, parallel=True, demo=False):
#     change_num = {'T1w':'0','T1wCE':'1','T2w':'2','FLAIR':'3'}
#     import shutil
#     import multiprocessing as mp
#     from pathlib import Path
#     from tqdm import tqdm
#     if demo:  # just to showcase TorchIO
#         dataset._subjects = dataset._subjects[:5]
#     out_dir = Path(out_dir)
#     labels_name = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv'
#     out_dir.mkdir(exist_ok=True, parents=True)
#     subjects_dir = out_dir / ('train' if dataset.train else 'test')
#     if parallel:
#         loader = torch.utils.data.DataLoader(
#             dataset,
#             num_workers=mp.cpu_count(),
#             collate_fn=lambda x: x[0],
#         )
#         iterable = loader
#     else:
#         iterable = dataset
#     for subject in tqdm(iterable):
#         subject_dir = subjects_dir 
# #         print(subject_dir)
#         for name, image in tqdm(subject.get_images_dict().items(), leave=False):
#             image_dir = subject_dir 
# #             image_dir.mkdir(exist_ok=True, parents=True)
#             image_path = out_dir / f'{subject.BraTS21ID}_{change_num[name].zfill(4)}.nii.gz'
#             image.save(image_path)

# out_dir = './'
# # if not Path(out_dir).is_dir():
# preprocess_dataset(test_set, out_dir, parallel=True, demo=False)

In [None]:
from pathlib import Path
import torch
import torchio as tio
import matplotlib.pyplot as plt

root_dir = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'

preprocessing_transforms = (    
#     tio.ZNormalization(),
    tio.ToCanonical(),
    tio.Resample(1, image_interpolation='bspline'),
    tio.Resample('T1wCE', image_interpolation='nearest'),
)
preprocess = tio.Compose(preprocessing_transforms)
test_set = tio.datasets.RSNAMICCAI(root_dir, train=False, transform=preprocess)


In [None]:

def preprocess_dataset(dataset, out_dir, parallel=True, demo=False):
    import shutil
    import multiprocessing as mp
    from pathlib import Path
    from tqdm import tqdm
#     if demo:  # just to showcase TorchIO
#         dataset._subjects = dataset._subjects[:5]
    out_dir = Path(out_dir)
#     labels_name = 'train_labels.csv'
#     out_dir.mkdir(exist_ok=True, parents=True)
#     shutil.copy(dataset.root_dir / labels_name, out_dir / labels_name)
    subjects_dir = out_dir / ('train' if dataset.train else 'test')
    if parallel:
        loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=mp.cpu_count(),
            collate_fn=lambda x: x[0],
        )
        iterable = loader
    else:
        iterable = dataset
    for subject in tqdm(iterable):
        subject_dir = subjects_dir 
#         print(subject_dir)
        for name, image in tqdm(subject.get_images_dict().items(), leave=False):
            if name == 'T1wCE': 
                image_dir = subject_dir 
                image_dir.mkdir(exist_ok=True, parents=True)
                image_path = image_dir / f'{subject.BraTS21ID.zfill(4)}_{name}.nii.gz'
                image.save(image_path)
        
    return dataset

out_dir = 'rsna-preprocessed'
# if not Path(out_dir).is_dir():
dicom_images = preprocess_dataset(test_set,out_dir, parallel=True, demo=False)



In [None]:
def get_all_data_for_test(image_type, image_size=32):
    global test_df
    
    X = []
    test_ids = []

    for i in tqdm(test_df.index):
        x = test_df.loc[i]
        images = get_all_nifit(int(x['BraTS21ID']), 'test', None,image_size)
        X.append(images)
#         images = get_all_images(int(x['BraTS21ID']), image_type, 'test', image_size)
#         X += images
        test_ids += [int(x['BraTS21ID'])] * len(images[0,0,:])

    return X, np.array(test_ids)


In [None]:
# get_all_images = Parallel(n_jobs=mp.cpu_count(),prefer='threads')(delayed(load_niifile)(path.get_images_dict()['T1wCE']) for path in tqdm(dicom_images))

In [None]:
X_test, testidt = get_all_data_for_test('T1wCE', image_size=64)

In [None]:
X_testT =  np.concatenate(X_test,axis=-1).transpose((2,0,1))
X_testT= tf.expand_dims(X_testT, axis=-1)

# Predictions on the Test Set

In [None]:
# X_val_test= tf.expand_dims(X_valT, axis=-1)
# y_val_test = to_categorical(list(chain(*y_val)))
# best = 0
# epoch = -1
# for i in range(30):
#     model_best = tf.keras.models.load_model(filepath=f'chpt_{i + 1}.h5')
#     y_pred = model_best.predict(X_val_test)
    

#     tumor_pred = y_pred[1][:,1] > 0.3 # filtering tumor

#     pred = np.argmax(y_pred[0],axis=1)
#     result = pd.DataFrame(validt)
#     result[1] = pred
#     result.columns = ["BraTS21ID", "MGMT_value"]
#     result = result.iloc[tumor_pred]
    
#     result2 = result.groupby("BraTS21ID", as_index=False).mean()
#     result2 = val_df.merge(result2, on="BraTS21ID")
#     auc = roc_auc_score(
#         result2.MGMT_value_x,
#         result2.MGMT_value_y,
#     )
#     print(f"Validation AUC={auc}")
#     if best < auc:
#         best = auc
#         epoch = i + 1
        
# print(f'Best AUC: {best}, epoch: {epoch}')

In [None]:
import os
import pandas as pd
import torchio as tio
from pathlib import Path

# Parameters to limit the processing power needed.
demo  = False # if True limits to 10 patients
scan_types    = ['FLAIR','T1w','T1wCE','T2w'] # uses all scan types


In [None]:
data_dir   = '/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification/'
out_dir    = './processed'

for dataset in ['train']:
    dataset_dir = f'{data_dir}{dataset}'
    patients = os.listdir(dataset_dir)
    if demo:
        patients = patients[:10]
    
    # Remove cases the competion host said to exclude 
    # https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/discussion/262046
    if '00109' in patients: patients.remove('00109')
    if '00123' in patients: patients.remove('00123')
    if '00709' in patients: patients.remove('00709')
    
    print(f'Total patients in {dataset} dataset: {len(patients)}')

    count = 0
    for patient in patients:
        count = count + 1
        print(f'{dataset}: {count}/{len(patients)}')

        for scan_type in scan_types:
            scan_src  = f'{dataset_dir}/{patient}/{scan_type}/'
            scan_dest = f'{out_dir}/{dataset}/{patient}/{scan_type}/'
            Path(scan_dest).mkdir(parents=True, exist_ok=True)
            image = tio.ScalarImage(scan_src)
            transforms = [
                tio.ToCanonical(),
                tio.Resample(1),
                tio.ZNormalization(masking_method=tio.ZNormalization.mean),
                tio.CropOrPad((128,128,64)),
                tio.RescaleIntensity((-1, 1)),
            ]
            transform = tio.Compose(transforms)
            preprocessed = transform(image)
            preprocessed.save(f'{scan_dest}/{scan_type}.nii.gz')

In [None]:
model_best = tf.keras.models.load_model(filepath=f'chpt_{epoch}.h5')

y_pred = model_best.predict(X_testT)
# tumor_filter = y_pred[1][:,1] > 0.3 # filtering tumor

pred = np.argmax(y_pred[0], axis=1) #

result = pd.DataFrame(testidt)
result[1] = pred
pred

# Submission File

In [None]:
# result = result.iloc[tumor_filter]
result.columns=['BraTS21ID','MGMT_value']

result2 = result.groupby('BraTS21ID',as_index=False).mean()
result2['BraTS21ID'] = sample_submission['BraTS21ID']

# Rounding...
result2['MGMT_value'] = result2['MGMT_value'].apply(lambda x:round(x*10)/10)
result2.to_csv('submission.csv',index=False)
result2

In [None]:
import seaborn as sns
sns.countplot(data=result2, x="MGMT_value")