Inference Notebook for: https://www.kaggle.com/pranshu15/skipping-slices

In [None]:
import os
import glob
from tqdm import tqdm_notebook as tqdm
import random 
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torchvision import transforms, utils
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
import pickle
import albumentations as A
from albumentations.pytorch import ToTensorV2

import sys
sys.path.append('../input/monai-v060-deep-learning-in-healthcare-imaging/')
from monai.networks.nets import DenseNet121, DenseNet264

import warnings
warnings.filterwarnings("ignore")

In [None]:
path = '/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification'

In [None]:
img_size = 256
stack_size = 64

def dicom2array(path, voi_lut=True, fix_monochrome=True):
    dicom = pydicom.read_file(path)
    # VOI LUT (if available by DICOM device) is used to
    # transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    
    data = data - np.min(data)
    data = data / (np.max(data) if np.max(data)>0 else 1)
    # data = (data * 255).astype(np.uint8)
    # data = data/255.
    data = cv2.resize(data, (img_size, img_size))
    return data

def load_sequence(paths):
    stack = []
    # load only non zero slices
    for i, path in  enumerate(paths):
        data = dicom2array(path)
        if data.max() == 0:
            continue
        else:
            stack.append(data)
    
    # if all empty (present in some cases)
    if len(stack)==0:
        return np.zeros((img_size,img_size,stack_size))
    
    stack = np.dstack(stack)# [:,:,2:-2] # Skip first and last 2 slices (generally very small regions/ almost all black)
    # skip slices(take every nth slice)
    n = stack.shape[2]//stack_size + 1
    start = np.random.choice([i for i in range(n)]) # select a random starting slice from first n slices
    stack = stack[:,:,start::n]
    # If sequence is very small, repeat it multiple times.
    num_of_repetitions = stack_size//stack.shape[2]
    stack = np.concatenate((stack,)*num_of_repetitions + (stack[:,:,:stack_size-stack.shape[2]*num_of_repetitions],), axis=2)
    
    return stack

def load_3d_dicom_images(scan_id, split = "train"):
    """
    we will use some heuristics to choose the slices to avoid any numpy zero matrix (if possible)
    """
    # Flair
    flair = sorted(glob.glob(f"{path}/{split}/{scan_id}/FLAIR/*.dcm"), key=lambda x: int(x.split('/')[-1].split('-')[-1].split('.')[0]))
    flair_img = load_sequence(flair)
    
    # T1W
    t1w = sorted(glob.glob(f"{path}/{split}/{scan_id}/T1w/*.dcm"), key=lambda x: int(x.split('/')[-1].split('-')[-1].split('.')[0]))
    t1w_img = load_sequence(t1w)
    
    # T1WCE
    t1wce = sorted(glob.glob(f"{path}/{split}/{scan_id}/T1wCE/*.dcm"), key=lambda x: int(x.split('/')[-1].split('-')[-1].split('.')[0]))
    t1wce_img = load_sequence(t1wce)
    
    # T2W
    t2w = sorted(glob.glob(f"{path}/{split}/{scan_id}/T2w/*.dcm"), key=lambda x: int(x.split('/')[-1].split('-')[-1].split('.')[0]))
    t2w_img = load_sequence(t2w)

    return np.concatenate((flair_img, t1w_img, t1wce_img, t2w_img), axis = -1)

In [None]:
# let's write a simple pytorch dataloader


class BrainTumor(Dataset):
    def __init__(self, df, path = '/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification', split = "train", validation_fold = 0):
        
        df.BraTS21ID = df.BraTS21ID.apply(lambda x: str(x).zfill(5))
        self.labels = {}            
        if split == "val":
            self.split = 'train'
            val_data = df[df.kfold==validation_fold]
            brats = list(val_data["BraTS21ID"])
            mgmt = list(val_data["MGMT_value"])
            for b, m in zip(brats, mgmt):
                self.labels[b] = m
            
            self.ids = [a.split("/")[-1] for a in sorted(glob.glob(path + f"/{self.split}/" + "/*"))]
            self.ids = [id for id in self.ids if id in val_data.BraTS21ID.values]
        elif split == "train":
            self.split = split
            train_data = df[df.kfold!=validation_fold]
            brats = list(train_data["BraTS21ID"])
            mgmt = list(train_data["MGMT_value"])
            for b, m in zip(brats, mgmt):
                self.labels[b] = m
            
            self.ids = [a.split("/")[-1] for a in sorted(glob.glob(path + f"/{self.split}/" + "/*"))]
            self.ids = [id for id in self.ids if id in train_data.BraTS21ID.values]
        else:
            self.split = split
            self.ids = [a.split("/")[-1] for a in sorted(glob.glob(path + f"/{self.split}/" + "/*"))]
            
    
    def __len__(self):
        return len(self.ids)
    
    def get_transforms(self):
        return A.Compose([
#                     A.OneOf([
#                         A.RandomBrightnessContrast (brightness_limit=0.1, contrast_limit=0.1, brightness_by_max=False),
#                         A.IAAAffine (shear=(-15,15), scale=(0.85,1.15), translate_percent=(0,0.15),rotate=(-25,25), mode='reflect'),
#                         A.GaussianBlur(blur_limit=(3,7)),
#                     ], p=0.5),
                    ToTensorV2()
                ])
    
    def __getitem__(self, idx):
        imgs = load_3d_dicom_images(self.ids[idx], self.split)
        transform = self.get_transforms()
        augments = transform(image=imgs)
        imgs = augments['image']
        imgs = torch.stack([imgs[:stack_size,:,:],imgs[stack_size:stack_size*2,:,:],imgs[stack_size*2:stack_size*3,:,:],imgs[stack_size*3:,:,:]], axis=0)
        
        if self.split != "test":
            label = self.labels[self.ids[idx]]
            return torch.tensor(imgs, dtype = torch.float32), torch.tensor(label, dtype = torch.float32)
        else:
            return torch.tensor(imgs, dtype = torch.float32)

In [None]:
submission = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")

test_dataset = BrainTumor(submission, split='test')
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=4, pin_memory=True)

In [None]:
gpu = torch.device(f"cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
model_flair = torch.load('../input/miccai-densenet-264/densenet-264-flair-fold-0.pt', map_location=gpu)
model_flair.eval()
model_flair = model_flair.to(gpu)

In [None]:
model_t1w = torch.load('../input/miccai-densenet-264/densenet-264-t1w-fold-1.pt', map_location=gpu)
model_t1w.eval()
model_t1w = model_t1w.to(gpu)

In [None]:
model_t1wce = torch.load('../input/miccai-densenet-264/densenet-264-t1wce-fold-2.pt', map_location=gpu) 
model_t1wce.eval()
model_t1wce = model_t1wce.to(gpu)

In [None]:
model_t2w = torch.load('../input/miccai-densenet-264/densenet-264-t2w-fold-3.pt', map_location=gpu)
model_t2w.eval()
model_t2w = model_t2w.to(gpu)

In [None]:
print('Starting Predictions...')

In [None]:
y_pred = []
with torch.no_grad():
    for i, x  in tqdm(enumerate(test_loader), total=len(test_loader)):
        x = x.to(gpu)
        out = []
        
        # Get predictions from all 4 models.
        # Here I have extracted separate channels from the input tensor for different sequence models, all for combined.
        out.append(model_flair(torch.unsqueeze(x[:,0,...],1)).cpu().detach().sigmoid().numpy().reshape(-1).tolist())
        out.append(model_t1w(torch.unsqueeze(x[:,1,...],1)).cpu().detach().sigmoid().numpy().reshape(-1).tolist())
        out.append(model_t1wce(torch.unsqueeze(x[:,2,...],1)).cpu().detach().sigmoid().numpy().reshape(-1).tolist())
        out.append(model_t2w(torch.unsqueeze(x[:,3,...],1)).cpu().detach().sigmoid().numpy().reshape(-1).tolist())

        # Change shape from (4, n) to (n, 4) so each row contains prediction from all 4 models. (n is batch size)
        out = np.array(out)
        out = out.transpose(1,0)

        # extend results of each item in batch to y_pred.
        y_pred.extend(out.tolist())
        print(f'{(i+1)/len(test_loader)}%') # Print progress for while commiting.
#         break
y_pred = np.array(y_pred)
# y_pred = y_pred.mean(axis=1)

In [None]:
# stacking_classifier = pickle.load(open('../input/miccai-densenet-264/LR_Stacking_Classifier_DenseNet_264.sav', 'rb'))
stacking_classifier = pickle.load(open('../input/miccai-densenet-264/XGB_Stacking_Classifier.sav', 'rb'))
y_pred_stacking_classifier = stacking_classifier.predict_proba(y_pred)[:,1]

In [None]:
submission['MGMT_value'] = y_pred
submission.to_csv("submission.csv", index=False)
submission