## Use stacked images (3D) and Efficientnet3D model

Acknowledgements:

- https://www.kaggle.com/ihelon/brain-tumor-eda-with-animations-and-modeling
- https://www.kaggle.com/furcifer/torch-efficientnet3d-for-mri-no-train
- https://github.com/shijianjian/EfficientNet-PyTorch-3D
    
    
Use models with only one MRI type, then ensemble the 4 models 


In [None]:
import os
import sys 
import json
import glob
import random
import collections
import time

import numpy as np
import pandas as pd
import pydicom
import cv2
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch import nn
from torch.utils import data as torch_data
from sklearn import model_selection as sk_model_selection
from torch.nn import functional as torch_functional
import torch.nn.functional as F

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

In [None]:

data_directory = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'
pytorch3dpath = "../input/efficientnetpyttorch3d/EfficientNet-PyTorch-3D"

    
mri_types = ['FLAIR','T1w','T1wCE','T2w']
SIZE = 256
NUM_IMAGES = 64

sys.path.append(pytorch3dpath)
from efficientnet_pytorch_3d import EfficientNet3D

## Functions to load images

In [None]:
def load_dicom_image(path, img_size=SIZE):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    if np.min(data)==np.max(data):
        data = np.zeros((img_size,img_size))
        return data
    data = data - np.min(data)
    if np.max(data) != 0:
        data = data / np.max(data)
    
    #data = (data * 255).astype(np.uint8)
    data = cv2.resize(data, (img_size, img_size))
    return data

def load_dicom_images_3d(scan_id, num_imgs=NUM_IMAGES, img_size=SIZE, mri_type="FLAIR", split="train"):

    files = sorted(glob.glob(f"{data_directory}/{split}/{scan_id}/{mri_type}/*.dcm"))
    
    middle = len(files)//2
    num_imgs2 = num_imgs//2
    p1 = max(0, middle - num_imgs2)
    p2 = min(len(files), middle + num_imgs2)
    img3d = np.stack([load_dicom_image(f) for f in files[p1:p2]]).T 
    if img3d.shape[-1] < num_imgs:
        n_zero = np.zeros((img_size, img_size, num_imgs - img3d.shape[-1]))
        img3d = np.concatenate((img3d,  n_zero), axis = -1)
            
    return np.expand_dims(img3d,0)

#load_dicom_images_3d("00000").shape

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

set_seed(42)

## Model and training classes

In [None]:
class Dataset(torch_data.Dataset):
    def __init__(self, paths, targets=None, mri_type=None, label_smoothing=0.01, split="train"):
        self.paths = paths
        self.targets = targets
        self.mri_type = mri_type
        self.label_smoothing = label_smoothing
        self.split = split
          
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, index):
        scan_id = self.paths[index]
        if self.targets is None:
            data = load_dicom_images_3d(str(scan_id).zfill(5), mri_type=self.mri_type[index], split=self.split)
        else:
            data = load_dicom_images_3d(str(scan_id).zfill(5), mri_type=self.mri_type[index], split="train")

        if self.targets is None:
            return {"X": torch.tensor(data).float(), "id": scan_id}
        else:
            y = torch.tensor(abs(self.targets[index]-self.label_smoothing), dtype=torch.float)
            return {"X": torch.tensor(data).float(), "y": y}


In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = EfficientNet3D.from_name("efficientnet-b0", override_params={'num_classes': 2}, in_channels=1)
        n_features = self.net._fc.in_features
        self.net._fc = nn.Linear(in_features=n_features, out_features=1, bias=True)
    
    def forward(self, x):
        out = self.net(x)
        return out
    

In [None]:
class Dataset(torch_data.Dataset):
    def __init__(self, paths, targets=None, mri_type=None, label_smoothing=0.01, split="train"):
        self.paths = paths
        self.targets = targets
        self.mri_type = mri_type
        self.label_smoothing = label_smoothing
        self.split = split
          
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, index):
        scan_id = self.paths[index]
        if self.targets is None:
            data = load_dicom_images_3d(str(scan_id).zfill(5), mri_type=self.mri_type[index], split=self.split)
        else:
            data = load_dicom_images_3d(str(scan_id).zfill(5), mri_type=self.mri_type[index], split="train")

        if self.targets is None:
            return {"X": torch.tensor(data).float(), "id": scan_id}
        else:
            y = torch.tensor(abs(self.targets[index]-self.label_smoothing), dtype=torch.float)
            return {"X": torch.tensor(data).float(), "y": y}

## train models

In [None]:
modelfiles=['../input/3deffmodels/FLAIR-e2-loss0.696-auc0.605.pth','../input/3deffmodels/T1w-e2-loss0.718-auc0.579.pth','../input/3deffmodels/T1wCE-e6-loss0.683-auc0.633.pth','../input/3deffmodels/T2w-e8-loss0.658-auc0.677.pth']

## Predict function

In [None]:
def predict(modelfile, df, mri_type, split):
    print("Predict:", modelfile, mri_type, df.shape)
    df.loc[:,"MRI_Type"] = mri_type
    data_retriever = Dataset(
        df.index.values, 
        mri_type=df["MRI_Type"].values,
        split=split
    )

    data_loader = torch_data.DataLoader(
        data_retriever,
        batch_size=1,
        shuffle=False,
        num_workers=8,
    )
   
    model = Model()
    model.to(device)
    
    checkpoint = torch.load(modelfile)
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()
    
    y_pred = []
    ids = []

    for e, batch in enumerate(data_loader,1):
        print(f"{e}/{len(data_loader)}", end="\r")
        with torch.no_grad():
            tmp_pred = torch.sigmoid(model(batch["X"].to(device))).cpu().numpy().squeeze()
            if tmp_pred.size == 1:
                y_pred.append(tmp_pred)
            else:
                y_pred.extend(tmp_pred.tolist())
            ids.extend(batch["id"].numpy().tolist())
            
    preddf = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred}) 
    preddf = preddf.set_index("BraTS21ID")
    return preddf

## Ensemble for submission

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
submission = pd.read_csv(f"{data_directory}/sample_submission.csv", index_col="BraTS21ID")

submission["MGMT_value"] = 0
for m, mtype in zip(modelfiles, mri_types):
    pred = predict(m, submission, mtype, split="test")
    submission["MGMT_value"] += pred["MGMT_value"]

#submission["MGMT_value"] /= len(modelfiles)
#submission["MGMT_value"].to_csv("submission.csv")

In [None]:
submission

In [None]:
mgmt=np.array(submission["MGMT_value"])

In [None]:
sns.displot(submission["MGMT_value"])

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random as rd
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
from tensorflow import keras
import tensorflow as tf
print("Tensorflow version " + tf.__version__)
import cv2
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Parameters

In [None]:
IM_SIZE = 256

## Model

In [None]:
model = keras.models.load_model('../input/emodels/model (7).h5 (1)/model (7).h5')

In [None]:
model.summary()

## Some function

In [None]:
def mid_crop(img,c=25,c2=30):
    c21=int(img.shape[0]//2)
    c22=int(img.shape[1]//2)
    return img[c22-c2:c22+c2,c21-c:c21+c]
def read_xray(path, voi_lut = False, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    #data = (data * 255).astype(np.uint8)
        
    return mid_crop(cv2.resize(data*255,(100,100)))
def pad_images(imgs,img_shape=(28,28)):
    padded=np.zeros((imgs.shape[0],max([len(x) for x in imgs]+[356]),img_shape[0],img_shape[1]))
    for i in range(imgs.shape[0]):
        for j in range(len(imgs[i])):
            try:
                padded[i,j]=imgs[i][j]
            except:
                break
    return padded[:,:356,...,np.newaxis]
def get_prediction_per_case(patient):
    
    path = f'../input/rsna-miccai-brain-tumor-radiogenomic-classification/test/{patient}/FLAIR/'

    list_subfolders_with_paths = [f for f in os.listdir(path)]
    
    prediction = []
    
    imagess=[]
    
    for images in list_subfolders_with_paths:
        
       
        img = read_xray(path+images)
        if np.max(img) > 0 and np.mean(img)>= 0.015:
             
             
       
             img=mid_crop(cv2.resize(img,(100,100)))
        
             img=cv2.resize(img,(50,50))
             img=cv2.merge((img, img, img)) 
             imagess.append(img/255)
    
    #print(pad_images(np.array([imagess]),(28,28)).shape)
  
    
    return float(np.mean(model.predict(np.array(imagess)), axis=0))

In [None]:
get_prediction_per_case('00047')

In [None]:
df = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv',dtype="string")

In [None]:
mgmt = (mgmt+df['BraTS21ID'].apply(get_prediction_per_case))

In [None]:
mgmt=np.array(mgmt)

In [None]:
mgmt

In [None]:
sns.histplot(mgmt)

In [None]:
model = keras.models.load_model('../input/emodels/T1w - model.h5') 

In [None]:
def mid_crop(img,c=25,c2=30):
    c21=int(img.shape[0]//2)
    c22=int(img.shape[1]//2)
    return img[c22-c2:c22+c2,c21-c:c21+c]
def read_xray(path, voi_lut = False, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    #data = (data * 255).astype(np.uint8)
        
    return mid_crop(cv2.resize(data*255,(100,100)))
def pad_images(imgs,img_shape=(28,28)):
    padded=np.zeros((imgs.shape[0],max([len(x) for x in imgs]+[356]),img_shape[0],img_shape[1]))
    for i in range(imgs.shape[0]):
        for j in range(len(imgs[i])):
            try:
                padded[i,j]=imgs[i][j]
            except:
                break
    return padded[:,:356,...,np.newaxis]
def get_prediction_per_case(patient):
    
    path = f'../input/rsna-miccai-brain-tumor-radiogenomic-classification/test/{patient}/T1w/'

    list_subfolders_with_paths = [f for f in os.listdir(path)]
    
    prediction = []
    
    imagess=[]
    
    for images in list_subfolders_with_paths:
        
       
        img = read_xray(path+images)
        if np.max(img) > 0 and np.mean(img)>= 0.015:
             
             
       
             img=mid_crop(cv2.resize(img,(100,100)))
        
             img=cv2.resize(img,(50,50))
             img=cv2.merge((img, img, img)) 
             imagess.append(img/255)
    
    #print(pad_images(np.array([imagess]),(28,28)).shape)
  
    
    return float(np.mean(model.predict(np.array(imagess)), axis=0))

In [None]:
get_prediction_per_case('00047')

In [None]:
mgmt = (mgmt+df['BraTS21ID'].apply(get_prediction_per_case))

In [None]:
mgmt=np.array(mgmt)

In [None]:
model = keras.models.load_model('../input/emodels/T1wCE - model.h5') 

In [None]:
def mid_crop(img,c=25,c2=30):
    c21=int(img.shape[0]//2)
    c22=int(img.shape[1]//2)
    return img[c22-c2:c22+c2,c21-c:c21+c]
def read_xray(path, voi_lut = False, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    #data = (data * 255).astype(np.uint8)
        
    return mid_crop(cv2.resize(data*255,(100,100)))
def pad_images(imgs,img_shape=(28,28)):
    padded=np.zeros((imgs.shape[0],max([len(x) for x in imgs]+[356]),img_shape[0],img_shape[1]))
    for i in range(imgs.shape[0]):
        for j in range(len(imgs[i])):
            try:
                padded[i,j]=imgs[i][j]
            except:
                break
    return padded[:,:356,...,np.newaxis]
def get_prediction_per_case(patient):
    
    path = f'../input/rsna-miccai-brain-tumor-radiogenomic-classification/test/{patient}/T1wCE/'

    list_subfolders_with_paths = [f for f in os.listdir(path)]
    
    prediction = []
    
    imagess=[]
    
    for images in list_subfolders_with_paths:
        
       
        img = read_xray(path+images)
        if np.max(img) > 0 and np.mean(img)>= 0.015:
             
             
       
             img=mid_crop(cv2.resize(img,(100,100)))
        
             img=cv2.resize(img,(50,50))
             img=cv2.merge((img, img, img)) 
             imagess.append(img/255)
    
    #print(pad_images(np.array([imagess]),(28,28)).shape)
  
    
    return float(np.mean(model.predict(np.array(imagess)), axis=0))

In [None]:
get_prediction_per_case('00047')

In [None]:
mgmt = (mgmt+df['BraTS21ID'].apply(get_prediction_per_case))

In [None]:
mgmt=np.array(mgmt)

In [None]:
model = keras.models.load_model('../input/emodels/T2w - model.h5') 

In [None]:
def mid_crop(img,c=25,c2=30):
    c21=int(img.shape[0]//2)
    c22=int(img.shape[1]//2)
    return img[c22-c2:c22+c2,c21-c:c21+c]
def read_xray(path, voi_lut = False, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    #data = (data * 255).astype(np.uint8)
        
    return mid_crop(cv2.resize(data*255,(100,100)))
def pad_images(imgs,img_shape=(28,28)):
    padded=np.zeros((imgs.shape[0],max([len(x) for x in imgs]+[356]),img_shape[0],img_shape[1]))
    for i in range(imgs.shape[0]):
        for j in range(len(imgs[i])):
            try:
                padded[i,j]=imgs[i][j]
            except:
                break
    return padded[:,:356,...,np.newaxis]
def get_prediction_per_case(patient):
    
    path = f'../input/rsna-miccai-brain-tumor-radiogenomic-classification/test/{patient}/T2w/'

    list_subfolders_with_paths = [f for f in os.listdir(path)]
    
    prediction = []
    
    imagess=[]
    
    for images in list_subfolders_with_paths:
        
       
        img = read_xray(path+images)
        if np.max(img) > 0 and np.mean(img)>= 0.015:
             
             
       
             img=mid_crop(cv2.resize(img,(100,100)))
        
             img=cv2.resize(img,(50,50))
             img=cv2.merge((img, img, img)) 
             imagess.append(img/255)
    
    #print(pad_images(np.array([imagess]),(28,28)).shape)
  
    
    return float(np.mean(model.predict(np.array(imagess)), axis=0))

In [None]:
get_prediction_per_case('00047')

In [None]:

mgmt = (mgmt+df['BraTS21ID'].apply(get_prediction_per_case))

In [None]:
mgmt=np.array(mgmt)

In [None]:
sns.distplot(mgmt)

In [None]:
model = keras.models.load_model('../input/effect0-brain/Brain_flair_model_effect.h5',custom_objects={"FixedDropout": keras.layers.Dropout})
def read_xray(path, voi_lut = True, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    #data = (data * 255).astype(np.uint8)
        
    return data

def get_prediction_per_case(patient):
    
    path = f'../input/rsna-miccai-brain-tumor-radiogenomic-classification/test/{patient}/FLAIR/'

    list_subfolders_with_paths = [f for f in os.listdir(path)]
    
    prediction = []
    
    for images in list_subfolders_with_paths:
        
            
        img = read_xray(path+images)
                       
        if np.max(img) > 0 and np.mean(img)>= 0.015:
                
                
            img =  cv2.resize(img,(IM_SIZE,IM_SIZE))
            
            img = cv2.merge((img,img,img))
            img = tf.reshape(img, (-1, IM_SIZE, IM_SIZE, 3))
            
            pred = model.predict(img)
            
            prediction.append(pred)
    
    return np.mean(prediction,axis=0)[0][0]

In [None]:
get_prediction_per_case('00047')

In [None]:

mgmt = (mgmt+df['BraTS21ID'].apply(get_prediction_per_case))

In [None]:
mgmt=np.array(mgmt)

In [None]:
submission['MGMT_value']=mgmt/(5+len(modelfiles))

In [None]:
import seaborn as sns
sns.histplot([x[0:4] for x in submission['MGMT_value'].astype(str)])

In [None]:
sns.distplot(submission['MGMT_value'])

In [None]:
submission

In [None]:
submission['MGMT_value'].to_csv('submission.csv')