CV: 0.6327

In [None]:
from brain_inference import generate_prediction

sams_sub = generate_prediction()
sams_sub.head()

In [None]:
sams_sub['MGMT_value'].hist(bins=50);

---

In [None]:
! python ../input/brats21-bestcv-infer/brats21_yyama_cvbest_infer.py
! rm -r ./test

In [None]:
import pandas as pd
yyama_sub = pd.read_csv('sub_yyama_cvbest.csv')
yyama_sub['BraTS21ID'] = yyama_sub['BraTS21ID'].astype(str).apply(lambda s: s.zfill(5))

yyama_sub.head()

In [None]:
yyama_sub['MGMT_value'].hist(bins=50);

In [None]:
import gc
gc.collect()

---

# Image processing -> 256 voxel

In [None]:
from pathlib import Path
import numpy as np
import cv2
import pydicom
import matplotlib.pyplot as plt
import os, gc

DATASET = 'test'
scan_types = 'FLAIR'
data_root = Path("../input/rsna-miccai-brain-tumor-radiogenomic-classification")


# https://www.kaggle.com/arnabs007/part-1-rsna-miccai-btrc-understanding-the-data
# https://www.kaggle.com/davidbroberts/determining-mr-image-planes
def get_image_plane(data):
    x1, y1, _, x2, y2, _ = [round(j) for j in data.ImageOrientationPatient]
    cords = [x1, y1, x2, y2]

    if cords == [1, 0, 0, 0]:
        return 'Coronal'
    elif cords == [1, 0, 0, 1]:
        return 'Axial'
    elif cords == [0, 1, 0, 0]:
        return 'Sagittal'
    else:
        return 'Unknown'
    
    
def get_voxel(study_id, scan_type):
    imgs = []
    dcm_dir = data_root.joinpath(DATASET, study_id, scan_type)
    dcm_paths = sorted(dcm_dir.glob("*.dcm"), key=lambda x: int(x.stem.split("-")[-1]))
    positions = []
    
    for dcm_path in dcm_paths:
        img = pydicom.dcmread(str(dcm_path))
        imgs.append(img.pixel_array)
        positions.append(img.ImagePositionPatient)
        
    plane = get_image_plane(img)
    voxel = np.stack(imgs)
    
    # reorder planes if needed and rotate voxel
    if plane == "Coronal":
        if positions[0][1] < positions[-1][1]:
            voxel = voxel[::-1]
            print(f"{study_id} {scan_type} {plane} reordered")
        voxel = voxel.transpose((1, 0, 2))
    elif plane == "Sagittal":
        if positions[0][0] < positions[-1][0]:
            voxel = voxel[::-1]
            print(f"{study_id} {scan_type} {plane} reordered")
        voxel = voxel.transpose((1, 2, 0))
        voxel = np.rot90(voxel, 2, axes=(1, 2))
    elif plane == "Axial":
        if positions[0][2] > positions[-1][2]:
            voxel = voxel[::-1]
            print(f"{study_id} {scan_type} {plane} reordered")
        voxel = np.rot90(voxel, 2)
    else:
        raise ValueError(f"Unknown plane {plane}")
    return voxel, plane


def normalize_contrast(voxel):
    if voxel.sum() == 0:
        return voxel
    voxel = voxel - np.min(voxel)
    voxel = voxel / np.max(voxel)
    voxel = (voxel * 255).astype(np.uint8)
    return voxel


def crop_voxel(voxel):
    if voxel.sum() == 0:
        return voxel
    keep = (voxel.mean(axis=(0, 1)) > 0)
    voxel = voxel[:, :, keep]
    keep = (voxel.mean(axis=(0, 2)) > 0)
    voxel = voxel[:, keep]
    keep = (voxel.mean(axis=(1, 2)) > 0)
    voxel = voxel[keep]
    return voxel


def resize_voxel(voxel, sz=256):
    output = np.zeros((sz, sz, sz), dtype=np.uint8)

    if np.argmax(voxel.shape) == 0:
        for i, s in enumerate(np.linspace(0, voxel.shape[0] - 1, sz)):
            output[i] = cv2.resize(voxel[int(s)], (sz, sz))
    elif np.argmax(voxel.shape) == 1:
        for i, s in enumerate(np.linspace(0, voxel.shape[1] - 1, sz)):
            output[:, i] = cv2.resize(voxel[:, int(s)], (sz, sz))
    elif np.argmax(voxel.shape) == 2:
        for i, s in enumerate(np.linspace(0, voxel.shape[2] - 1, sz)):
            output[:, :, i] = cv2.resize(voxel[:, :, int(s)], (sz, sz))

    return output

In [None]:
lists = list(data_root.joinpath(DATASET).glob("*"))

for n, study_path in enumerate(list(data_root.joinpath(DATASET).glob("*"))):
    study_id = study_path.name
    
    if not study_path.is_dir():
        continue

    voxel, plane = get_voxel(study_id, scan_types)
    voxel = normalize_contrast(voxel)
    voxel = crop_voxel(voxel)
    voxel = resize_voxel(voxel)
    save_dir = f'/kaggle/working/test/{study_id}'
    os.makedirs(save_dir, exist_ok=True)
    np.save(f'{save_dir}/{scan_types}', voxel)
        
    del voxel, plane
    gc.collect()
    
    print(f'{n}/{len(lists)} DONE!!')

# Segmentation

In [None]:
# Installing segmentation_models_pytorch
!mkdir -p /tmp/pip/cache/
!cp ../input/segmentationmodelspytorch/segmentation_models/efficientnet_pytorch-0.6.3.xyz /tmp/pip/cache/efficientnet_pytorch-0.6.3.tar.gz
!cp ../input/segmentationmodelspytorch/segmentation_models/pretrainedmodels-0.7.4.xyz /tmp/pip/cache/pretrainedmodels-0.7.4.tar.gz
!cp ../input/segmentationmodelspytorch/segmentation_models/segmentation-models-pytorch-0.1.2.xyz /tmp/pip/cache/segmentation_models_pytorch-0.1.2.tar.gz
!cp ../input/segmentationmodelspytorch/segmentation_models/timm-0.1.20-py3-none-any.whl /tmp/pip/cache/
!cp ../input/segmentationmodelspytorch/segmentation_models/timm-0.2.1-py3-none-any.whl /tmp/pip/cache/
!pip install --no-index --find-links /tmp/pip/cache/ efficientnet-pytorch
!pip install --no-index --find-links /tmp/pip/cache/ segmentation-models-pytorch

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GroupKFold

from tqdm import tqdm
import os, gc
import random
from PIL import Image
import tifffile as tiff
import cv2
import zipfile
import collections
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
from skimage import exposure
from bokeh.plotting import figure as bokeh_figure
from bokeh.io import output_notebook, show, output_file
from bokeh.models import ColumnDataSource, HoverTool, Panel
from bokeh.models.widgets import Tabs
from PIL import Image
from sklearn import preprocessing
from random import randint

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

from segmentation_models_pytorch.unet import Unet
from segmentation_models_pytorch.encoders import get_preprocessing_fn

import torchvision
from torchvision import transforms
from albumentations import *
import albumentations as A
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torch.optim.lr_scheduler import CosineAnnealingLR

import warnings
warnings.filterwarnings("ignore")
device = 'cuda' if torch.cuda.is_available() else 'cpu'


def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
    
def transform(array, size=256, keep_ratio=False, resample=Image.LANCZOS):
    im = Image.fromarray(array)
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    return im
    
    
seed = 2020
seed_everything(seed)
sz = 256
NFOLDS = 5
root = '/kaggle/working/test'

#ImageNet
mean = np.array([[[0.485, 0.456, 0.406]]])
std = np.array([[[0.229, 0.224, 0.225]]])

In [None]:
test_df = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv')
test_df['BraTS21ID'] = test_df['BraTS21ID'].apply(lambda x: str(x).zfill(5))
test_df['type'] = 'FLAIR'
test_df['img_idx'] = 0

test_df_ = test_df.copy()
for i in range(1, 256):
    test_df_['img_idx'] = i
    test_df = test_df.append(test_df_, ignore_index=True)

test_df = test_df.sort_values(['BraTS21ID', 'img_idx']).reset_index(drop=True)
test_df['seg_area'] = 0
test_df['brain_area'] = 0
test_df

In [None]:
transform1 = A.Compose([
    A.Transpose(p=1.0),
    A.HorizontalFlip(p=1.0),
    ])

transform2 = A.Compose([
    A.HorizontalFlip(p=1.0),
    A.Transpose(p=1.0),
    ])


class Dataset(Dataset):
    def __init__(self, df, preprocess_input=None, transform=None):
        self.df = df
        self.preprocess_input = preprocess_input
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        patient_id = self.df.iloc[idx, 0]
        img_idx = self.df.iloc[idx, 3]
        one_img = np.load(f'{root}/{patient_id}/FLAIR.npy')[img_idx, :, :].astype(np.float32)
        img = np.array([one_img, one_img, one_img])
        img = np.transpose(img, (1, 2, 0))
        
        if self.transform:
            sample = self.transform(image=img)
            img = sample['image']
            
        if self.preprocess_input:
            img = self.preprocess_input(image=img)['image']
            
        img = img.transpose((2, 0, 1))
        img = torch.from_numpy(img)
        one_img = torch.from_numpy(one_img)
            
        return img, one_img, patient_id, img_idx
    
    
ENCODER_NAME = 'efficientnet-b0'
preprocessing_fn = Lambda(image=get_preprocessing_fn(encoder_name=ENCODER_NAME, pretrained='imagenet'))

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.model = Unet(
            encoder_name='efficientnet-b0', 
            encoder_weights=None, 
            classes=1, 
            activation=None
        )

    def forward(self, images):
        img_masks = self.model(images)
        return img_masks

In [None]:
TH = 0.45
test_ds = Dataset(df=test_df, preprocess_input=preprocessing_fn, transform=transform1)
test_dl = DataLoader(dataset=test_ds, batch_size=256, shuffle=False, num_workers=2)

model0 = Model()
model0.load_state_dict(torch.load('../input/miccai-flair-efnetb0-unet-weight2/fold_0.pth'))
model0.to(device)
model0.eval()
model1 = Model()
model1.load_state_dict(torch.load('../input/miccai-flair-efnetb0-unet-weight2/fold_1.pth'))
model1.to(device)
model1.eval()
model2 = Model()
model2.load_state_dict(torch.load('../input/miccai-flair-efnetb0-unet-weight2/fold_2.pth'))
model2.to(device)
model2.eval()
model3 = Model()
model3.load_state_dict(torch.load('../input/miccai-flair-efnetb0-unet-weight2/fold_3.pth'))
model3.to(device)
model3.eval()
model4 = Model()
model4.load_state_dict(torch.load('../input/miccai-flair-efnetb0-unet-weight2/fold_4.pth'))
model4.to(device)
model4.eval()

preds1 = []
preds2 = []

print('=====segmentation=====')
for i, (img, one_img, id, idx) in tqdm(enumerate(test_dl)):
    img = img.to(device, dtype=torch.float)
    
    with torch.no_grad():
        pred0 = nn.Sigmoid()(model0(img))
        pred1 = nn.Sigmoid()(model1(img))
        pred2 = nn.Sigmoid()(model2(img))
        pred3 = nn.Sigmoid()(model3(img))
        pred4 = nn.Sigmoid()(model4(img))
        pred = (pred0 + pred1 + pred2 + pred3 + pred4) / 5
        pred = pred.detach().cpu().numpy().astype(np.float32)
        pred = np.squeeze(pred)
        pred = (pred >= TH).astype(np.uint8)
        
        for p in range(pred.shape[0]):
            area = np.count_nonzero(pred[p])
            area_ = np.count_nonzero(one_img[p])
            id_ = id[p]
            idx_ = idx[p]
            pred_ = pred[p]
            pred_ = transform2(image=pred_)['image']
            pred_ = transform(pred_)
            preds1.append(area)
            preds2.append(area_)
            #save_dir = f'/kaggle/tmp/test/' + f'{id_}/' + f'Image-{idx_}'
            #os.makedirs(save_dir, exist_ok=True)
            #pred_.save(save_dir + '.png')
            
    del img, one_img, pred, pred_, area, area_
    gc.collect()
        
predictions1 = np.array(preds1).reshape(-1, 1)
predictions2 = np.array(preds2).reshape(-1, 1)
                
test_df['seg_area'] = predictions1
test_df['brain_area'] = predictions2
test_df['seg_brain_ratio'] = test_df['seg_area'] / test_df['brain_area']

del preds1, preds2, predictions1, predictions2, model0, model1, model2, model3, model4 
gc.collect()

In [None]:
test_df.to_csv('test_segmentation.csv', index=False)

# MGMT status inference

In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GroupKFold
from sklearn.metrics import roc_auc_score
from tqdm.notebook import tqdm
import os, gc
import random
from PIL import Image
import tifffile as tiff
import cv2
import zipfile
import collections
from PIL import Image
from sklearn import preprocessing
from random import randint
import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import torchvision
from torchvision import transforms
import albumentations as A
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torch.optim.lr_scheduler import CosineAnnealingLR
import warnings
warnings.filterwarnings("ignore")
device = 'cuda' if torch.cuda.is_available() else 'cpu'


def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
    
def slice_selection_all(df, TH):
    df2 = pd.DataFrame(columns=df.columns)
    id_list = df['BraTS21ID'].unique()

    for i in tqdm(range(len(id_list))):
        tmp = df[df['BraTS21ID'] == id_list[i]].reset_index(drop=True)
        tmp = tmp.sort_values('seg_area').reset_index(drop=True)
        tmp['index'] = tmp.index
        idx = tmp['index'].quantile(TH).astype(np.int32)
        tmp_ = tmp[tmp['index'] >= idx].reset_index(drop=True)
        df2 = df2.append(tmp_, ignore_index=True)
        
    return df2


def slice_selection_partial(df, TH, num):
    df2 = pd.DataFrame(columns=df.columns)
    id_list = df['BraTS21ID'].unique()

    for i in tqdm(range(len(id_list))):
        tmp = df[df['BraTS21ID'] == id_list[i]]
        tmp = tmp.sort_values('seg_area').reset_index(drop=True)
        tmp['index'] = tmp.index
        idxs = list(set(tmp['index'].quantile(np.linspace(TH, 1.0, num)).astype(np.int32).to_list()))
        tmp_ = tmp.iloc[idxs]
        df2 = df2.append(tmp_, ignore_index=True)

    return df2


def slice_selection_top(df, num):
    df2 = pd.DataFrame(columns=df.columns)
    id_list = df['BraTS21ID'].unique()

    for i in tqdm(range(len(id_list))):
        tmp = df[df['BraTS21ID'] == id_list[i]].reset_index(drop=True)
        tmp = tmp.sort_values('seg_area', ascending=False).reset_index(drop=True)
        tmp_ = tmp.head(num)
        df2 = df2.append(tmp_, ignore_index=True)

    return df2
    

seed = 2020
seed_everything(seed)
sz = 256
root = '/kaggle/working/test'


#ImageNet
mean = np.array([[[0.485, 0.456, 0.406]]])
std = np.array([[[0.229, 0.224, 0.225]]])

In [None]:
test_df['BraTS21ID'] = test_df['BraTS21ID'].apply(lambda x: str(x).zfill(5))
test_df = test_df.drop('type', axis=1)
max_brain_area = test_df.groupby('BraTS21ID').max().reset_index().rename(columns={'brain_area':'max_brain_area'})[['BraTS21ID', 'max_brain_area']]
test_df = test_df.merge(max_brain_area, on='BraTS21ID', how='left')
test_df = test_df[(test_df['img_idx'] >= 10) & (test_df['img_idx'] <= 245)].reset_index(drop=True)
test_df = test_df[(test_df['seg_brain_ratio'] > 0.01) & (test_df['seg_brain_ratio'] < 0.45)].reset_index(drop=True)
test_df2 = slice_selection_all(test_df, 0.3)
test_df2

In [None]:
class Dataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        brain_id = self.df.iloc[idx, 0]
        img_idx = self.df.iloc[idx, 2]
        img = np.load(f'{root}/{brain_id}/FLAIR.npy')[img_idx, :, :].astype(np.float32)
        img = np.transpose(np.array([img, img, img]), (1, 2, 0))
        if self.transform:
            sample = self.transform(image=img)
            img = sample['image']
        img = (img/255.0 - mean) / std
        img = np.transpose(img, (2, 0, 1))
        img = torch.from_numpy(img)
        return img
    
    
class Model(nn.Module):
    def __init__(self, model_name='efficientnet_b0', pretrained=False):
        super().__init__()
        self.model = timm.create_model(
            model_name, 
            in_chans=3, 
            pretrained=pretrained
        )
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, 1)

    def forward(self, x):
        x = self.model(x)
        return x
    

def inference_fn(data_loader, model, device):
    model.eval()    
    val_preds = []
    
    for i, x in enumerate(data_loader):
        img = x
        img = img.to(device, dtype=torch.float)
        
        with torch.no_grad():
            pred = model(img)
            val_preds.append(nn.Sigmoid()(pred).detach().cpu().numpy())
            
    val_preds = np.concatenate(val_preds)
                
    return val_preds

In [None]:
predictions = 0
num_models = 4
NFOLDS1 = 3
NFOLDS2 = 4
NFOLDS3 = 5

for i in range(NFOLDS1):
    print(f'=====FOLD:{i}=====')
    test_ds = Dataset(df=test_df2, transform=None)
    test_dl = DataLoader(dataset=test_ds, batch_size=64, shuffle=False, num_workers=2)
    model = Model(model_name='efficientnet_b0')
    model.load_state_dict(torch.load(f'../input/miccai-efnetb0-weight010/fold_{i}.pth'))
    model.to(device)
    predictions += inference_fn(test_dl, model, device) / (NFOLDS1 * num_models)
    #rank_preds = sp.stats.rankdata(inference_fn(test_dl, model, device), method='average')
    #predictions += rank_preds / (NFOLDS * len(test_df2) * num_models)
    del model
    gc.collect()
    
for i in range(NFOLDS2):
    print(f'=====FOLD:{i}=====')
    test_ds = Dataset(df=test_df2, transform=None)
    test_dl = DataLoader(dataset=test_ds, batch_size=64, shuffle=False, num_workers=2)
    model = Model(model_name='efficientnet_b0')
    model.load_state_dict(torch.load(f'../input/miccai-efnetb0-weight011/fold_{i}.pth'))
    model.to(device)
    predictions += inference_fn(test_dl, model, device) / (NFOLDS2 * num_models)
    #rank_preds = sp.stats.rankdata(inference_fn(test_dl, model, device), method='average')
    #predictions += rank_preds / (NFOLDS * len(test_df2) * num_models)
    del model
    gc.collect()
    
for i in range(NFOLDS3):
    print(f'=====FOLD:{i}=====')
    test_ds = Dataset(df=test_df2, transform=None)
    test_dl = DataLoader(dataset=test_ds, batch_size=64, shuffle=False, num_workers=2)
    model = Model(model_name='efficientnet_b0')
    model.load_state_dict(torch.load(f'../input/miccai-efnetb0-weight012-dup/fold_{i}.pth'))
    model.to(device)
    predictions += inference_fn(test_dl, model, device) / (NFOLDS3 * num_models)
    #rank_preds = sp.stats.rankdata(inference_fn(test_dl, model, device), method='average')
    #predictions += rank_preds / (NFOLDS * len(test_df2) * num_models)
    del model
    gc.collect()
    
for i in range(NFOLDS3):
    print(f'=====FOLD:{i}=====')
    test_ds = Dataset(df=test_df2, transform=None)
    test_dl = DataLoader(dataset=test_ds, batch_size=64, shuffle=False, num_workers=2)
    model = Model(model_name='efficientnet_b1')
    model.load_state_dict(torch.load(f'../input/miccai-efnetb1-weight001/fold_{i}.pth'))
    model.to(device)
    predictions += inference_fn(test_dl, model, device) / (NFOLDS3 * num_models)
    #rank_preds = sp.stats.rankdata(inference_fn(test_dl, model, device), method='average')
    #predictions += rank_preds / (NFOLDS * len(test_df2) * num_models)
    del model
    gc.collect()
    
test_df2['MGMT_value'] = predictions

In [None]:
test = test_df2.groupby('BraTS21ID').median()['MGMT_value'].reset_index()
test['BraTS21ID'] = test['BraTS21ID'].apply(lambda x: int(x))

# Submission

In [None]:
sample = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv')
sample = sample.iloc[:, :1].merge(test[['BraTS21ID', 'MGMT_value']], on='BraTS21ID', how='left').fillna(0.5)
sample['BraTS21ID'] = sample['BraTS21ID'].astype(str).apply(lambda s: s.zfill(5))
sample.head()

In [None]:
sample['MGMT_value'].hist(bins=50);

---
all three

In [None]:
# simple average
combined = pd.concat([sample, sams_sub, yyama_sub], axis=0)

combined = combined.groupby('BraTS21ID', sort=False)['MGMT_value'].mean().reset_index()
    
combined.to_csv('submission.csv', index=False)

In [None]:
# # weighted
# sample['MGMT_value'] = sample['MGMT_value'] * 0.36
# sams_sub['MGMT_value'] = sams_sub['MGMT_value'] * 0.44
# yyama_sub['MGMT_value'] = yyama_sub['MGMT_value'] * 0.2

# combined = pd.concat([sample , sams_sub , yyama_sub], axis=0)

# combined = combined.groupby('BraTS21ID', sort=False)['MGMT_value'].sum().reset_index()
    
# combined.to_csv('submission.csv', index=False)

In [None]:
combined.head(20)

In [None]:
combined['MGMT_value'].hist(bins=50);