## setup

In [None]:
!pip install -q ../input/monai030/monai-0.3.0-202010042353-py3-none-any.whl

In [None]:
import sys
import numpy as np
import pandas as pd
import cv2
import PIL.Image
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm as tqdm
from sklearn.metrics import cohen_kappa_score, confusion_matrix
from sklearn.model_selection import train_test_split, StratifiedKFold
import random

import torch
from torch.utils.data import TensorDataset, DataLoader,Dataset
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data.sampler import SubsetRandomSampler, RandomSampler, SequentialSampler
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR
from sklearn.metrics import roc_auc_score
import albumentations

import pydicom
import os, os.path as osp

import monai
from monai.transforms import LoadNifti, Randomizable, apply_transform
from monai.transforms import AddChannel, Compose, RandRotate90, Resize, ScaleIntensity, ToTensor
from monai.utils import get_seed

from scipy.ndimage.interpolation import zoom
from tqdm import tqdm
from glob import glob

device = torch.device('cuda')

In [None]:
image_size = 160
out_dim = 9
batch_size = 2

In [None]:
!cp ../input/rsna-str-pulmonary-embolism-detection/sample_submission.csv submission.csv

In [None]:
train= pd.read_csv('../input/rsna-str-pulmonary-embolism-detection/train.csv')
sub =  pd.read_csv('../input/rsna-str-pulmonary-embolism-detection/sample_submission.csv')
test = pd.read_csv('../input/rsna-str-pulmonary-embolism-detection/test.csv')

DEBUG = (test.shape[0]==146853)
DEBUG

In [None]:
test_study = test.drop_duplicates('StudyInstanceUID')[['StudyInstanceUID','SeriesInstanceUID']]

# save time during commit
if DEBUG:
    test_study = test_study.head(25)
test_study.shape

## dataset

In [None]:
def load_dicom_array(f):
    dicom_files = glob(osp.join(f, '*.dcm'))
    dicoms = [pydicom.dcmread(d) for d in dicom_files]
    M = float(dicoms[0].RescaleSlope)
    B = float(dicoms[0].RescaleIntercept)
    # Assume all images are axial
    z_pos = [float(d.ImagePositionPatient[-1]) for d in dicoms]
    dicoms = np.asarray([d.pixel_array for d in dicoms])
    dicoms = dicoms[np.argsort(z_pos)]
    dicoms = dicoms * M
    dicoms = dicoms + B
    return dicoms, np.asarray(dicom_files)[np.argsort(z_pos)]

def window(img, WL=50, WW=350):
    upper, lower = WL+WW//2, WL-WW//2
    X = np.clip(img.copy(), lower, upper)
    X = X - np.min(X)
    X = X / np.max(X)
    X = (X*255.0).astype('uint8')
    return X

def read_dicom(dcm_path, image_size=256):
    image, files = load_dicom_array(dcm_path)
    # Windows from https://pubs.rsna.org/doi/pdf/10.1148/rg.245045008
    image_lung = np.expand_dims(window(image, WL=-600, WW=1500), axis=3)
    image_mediastinal = np.expand_dims(window(image, WL=40, WW=400), axis=3)
    image_pe_specific = np.expand_dims(window(image, WL=100, WW=700), axis=3)
    image = np.concatenate([image_mediastinal, image_pe_specific, image_lung], axis=3)
    rat = image_size / np.max(image.shape[1:])
    image = zoom(image, [1.,rat,rat,1.], prefilter=False, order=1)
    return image

In [None]:
class RSNADataset3D(Dataset, Randomizable):
    def __init__(self, csv, mode, transform=None):
        self.csv = csv.reset_index()
        self.mode = mode
        self.transform = transform
    def __len__(self):
        return self.csv.shape[0]
    def randomize(self) -> None:
        MAX_SEED = np.iinfo(np.uint32).max + 1
        self._seed = self.R.randint(MAX_SEED, dtype="uint32")    
    def __getitem__(self, index):
        self.randomize()
        row = self.csv.iloc[index]
        try:
            img = read_dicom(os.path.join('../input/rsna-str-pulmonary-embolism-detection/test', row.StudyInstanceUID, row.SeriesInstanceUID))
        except:
            img = np.zeros((144, 256, 256, 3),dtype=np.uint8)
            
        # (144, 256, 256, 3)  Z, H, W, ch
        img = img[:,:,:,::-1].transpose(3,1,2,0) # -> ch, H, W, Z
        if self.transform is not None:
            if isinstance(self.transform, Randomizable):
                self.transform.set_random_state(seed=self._seed)
            img = apply_transform(self.transform, img)
        if self.mode == 'test':
            return img   

In [None]:
val_transforms = Compose([ScaleIntensity(), Resize((image_size, image_size, image_size)), ToTensor()])

In [None]:
if DEBUG:
    dataset_show = RSNADataset3D(test_study.head(5), 'test', transform=val_transforms)
    from pylab import rcParams
    rcParams['figure.figsize'] = 12,5
    for i in range(3):
        f, axarr = plt.subplots(1,3)
        img = dataset_show[i]
        print(img.shape)
        for j in range(3):            
            axarr[j].imshow(img.numpy().transpose(1,2,3,0).mean(axis=j))        
            axarr[j].set_title(i)
        plt.show()

## load model trained locally

In [None]:
!ls -lrt ../input/monai3d-160-3ch-1e-5-20ep-aug/monai3d_160_3ch_1e-5_20ep_aug_best_fold0.pth

In [None]:
model_files = [f'../input/monai3d-160-3ch-1e-5-20ep-aug/monai3d_160_3ch_1e-5_20ep_aug_best_fold{i}.pth' for i in range(1)]

In [None]:
def load_model(model_file):
    model = monai.networks.nets.densenet.densenet121(spatial_dims=3, in_channels=3, out_channels=out_dim).to(device)

    try:  # single GPU model_file
        model.load_state_dict(torch.load(model_file), strict=True)
    except:  # multi GPU model_file
        state_dict = torch.load(model_file)
        state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()}
        model.load_state_dict(state_dict, strict=True)

    model.eval()    
    print()
    return model

models = [load_model(model) for model in model_files]
len(models)

In [None]:
dataset_test = RSNADataset3D(test_study, 'test', transform=val_transforms)
test_loader = DataLoader(dataset_test, batch_size=8, num_workers=2)

In [None]:
LOGITS = []
with torch.no_grad():
    for data in tqdm(test_loader):
        data = data.to(device)
        logits = torch.zeros((data.shape[0], out_dim)).to(device)
        for model in models:
            l = model(data)
            logits += l
        logits /= len(models)
        LOGITS.append(logits.detach().cpu())
PROBS = torch.sigmoid(torch.cat(LOGITS)).numpy().squeeze()    

In [None]:
PROBS.max(), PROBS.min()

## weighted mean prediction per slice location for single images

https://www.kaggle.com/osciiart/baseline-with-no-image/

In [None]:
# get dicom paths
df_test = test
df_test['path'] = ("../input/rsna-str-pulmonary-embolism-detection/test/" 
                   + df_test['StudyInstanceUID'].values + "/"
                   + df_test['SeriesInstanceUID'].values + "/"
                   + df_test['SOPInstanceUID'].values + ".dcm"
                  )
print(df_test['path'][0])

In [None]:
# extract exam (study) level data
col_index = 'SOPInstanceUID'
col_groupby = 'StudyInstanceUID'
df_test_study = df_test[df_test[col_groupby].duplicated()==False].reset_index(drop=True)
df_tmp = df_test.groupby(col_groupby)[col_index].agg(len).reset_index()
df_tmp.columns = [col_groupby, 'num_images']
df_test_study = pd.merge(df_test_study, df_tmp, on=col_groupby, how='left')
df_test = pd.merge(df_test, df_test_study[[col_groupby, 'num_images']], on=col_groupby, how='left')
print(df_test.shape)
df_test.head()

In [None]:
# get series index of image
def task(i):
    if (i+1)%10000==0:
        print("{}/{} {:.1f}".format(i+1, len(df_test), time.time()-starttime))
    path = df_test['path'][i]
    tmp_dcm = pydicom.dcmread(path)
    return tmp_dcm.ImagePositionPatient[-1]

import time
import multiprocessing
from concurrent.futures import ProcessPoolExecutor

starttime = time.time()
executor = ProcessPoolExecutor(max_workers=multiprocessing.cpu_count())
# futures = [executor.submit(task, i) for i in range(10000)]
futures = [executor.submit(task, i) for i in range(len(df_test))]
result_list = []
for i in range(len(futures)):
    result_list.append(futures[i].result())
df_test['z_pos'] = result_list
df_test.head()

In [None]:
# calculate slice location
df_tmp = []
for i in range(len(df_test_study)):
    if (i+1)%100==0: print("{}/{}".format(i+1, len(df_test_study)))
    study = df_test_study[col_groupby][i]
    df_study = df_test[df_test[col_groupby]==study].sort_values('z_pos').reset_index(drop=True)
    df_study['series_index'] = np.arange(len(df_study))
    df_tmp.append(df_study[[col_index, 'series_index']])
df_tmp = pd.concat(df_tmp)

df_test = pd.merge(df_test, df_tmp, on=col_index, how='left')
# df_test = pd.merge(df_test, df_test_study[[col_groupby, 'num_images']], on=col_groupby, how='left')
df_test['slice_location'] = df_test['series_index'] / (df_test['num_images'] - 1)
df_test.head()

In [None]:
# get weighted mean prediction per slice location
q_weighted_means = np.array([0.00326324, 0.05970682, 0.32645303, 0.67452216, 0.71344817, 0.4734337, 0.0740926, 0.00369781])

df_test = df_test.copy()
bins = 8
df_test['bins'] = bins-1
for i in range(bins):
    df_test['bins'][(df_test['slice_location']>=(i/bins)) & (df_test['slice_location']<((i+1)/bins))] = i
df_test['q_weighted_means'] = df_test['bins'].apply(lambda x: q_weighted_means[x])
df_test.head()

In [None]:
df_test.shape

## merge the study-level predictions

In [None]:
sub =  pd.read_csv('../input/rsna-str-pulmonary-embolism-detection/sample_submission.csv')
sub = pd.merge(sub[['id']], df_test[['SOPInstanceUID','q_weighted_means']].rename(columns={'SOPInstanceUID':'id','q_weighted_means':'label'}), on='id', how='left')
sub.fillna(0.5,inplace=True)
sub

In [None]:
target_cols = [
        'negative_exam_for_pe', # exam level
        'rv_lv_ratio_gte_1', # exam level
        'rv_lv_ratio_lt_1', # exam level
        'leftsided_pe', # exam level
        'chronic_pe', # exam level
        'rightsided_pe', # exam level
        'acute_and_chronic_pe', # exam level
        'central_pe', # exam level
        'indeterminate' # exam level
    ]
sub.set_index('id',inplace=True)
for i,target in enumerate(target_cols):
    sub.loc[[x+'_'+target for x in test_study.StudyInstanceUID.values],'label']= PROBS[:,i]
sub.reset_index(inplace=True)
sub.to_csv('submission.csv', index = False)

In [None]:
sub