In [None]:
# Codes from this cell are adopted from Quadcore/Richard Epstein public notebook
# This notebook loads GDCM without Internet access.
# GDCM is needed to read some DICOM compressed images.
# Once you run a notebook and get the GDCM error, you must restart that Kernel to read the files, even if you load the GDCM software.
# Note that you do not "import GDCM". You just "import pydicom".
# The Dataset (gdcm-conda-install) was provided by Ronaldo S.A. Batista. Definitely deserves an upvote!

!cp ../input/gdcm-conda-install/gdcm.tar .
!tar -xvzf gdcm.tar
!conda install --offline ./gdcm/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2

print("GDCM installed.")

In [None]:
import numpy as np, pandas as pd, os
import matplotlib.pyplot as plt
import glob
import datetime
import torch
import torchvision.transforms as transforms
import pydicom
from pydicom import dcmread
from tqdm import tqdm
from typing import Dict

import cv2
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import albumentations as albu

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

startTime = datetime.datetime.now()

In [None]:
def to_device(x, cuda_id=0):
    return x.cuda(cuda_id) if torch.cuda.is_available() else x


def load_jit_model(x, cuda_id=0):
    return torch.jit.load(x, map_location=f'cuda:{cuda_id}' if torch.cuda.is_available() else 'cpu')


ss = pd.read_csv('../input/rsna-str-pulmonary-embolism-detection/sample_submission.csv')

In [None]:
class ClassificationDataset(Dataset):
    def __init__(self,
                 fold: int = 0,
                 mode: str = 'test'):
        self.mode = mode
        self.df = pd.read_csv('../input/rsna-str-pulmonary-embolism-detection/test.csv')

        self.labels = ['pe_present_on_image']

    def __len__(self):
        return len(self.df)

    def transform(self, image):
        normalize = albu.Normalize(
              mean=[0.485, 0.456, 0.406],
              std=[0.229, 0.224, 0.225]
         )
        pipeline = {
            
            "test": albu.Compose(
                [
                    albu.Resize(
                        512,
                        512,
                    ),
                ],
                p=1.0,
            ),
        }

        result = pipeline[self.mode](image=image)
        return result["image"]

    @staticmethod
    def _preprocess_img(img):
        img = np.transpose(img, (2, 0, 1))
        return img


    @staticmethod
    def window_image(img, window_center, window_width, intercept, slope, rescale=True):

        img = (img * slope + intercept)
        img_min = window_center - window_width // 2
        img_max = window_center + window_width // 2
        img[img < img_min] = img_min
        img[img > img_max] = img_max

        if rescale:
            # Extra rescaling to 0-1, not in the original notebook
            img = (img - img_min) / (img_max - img_min)

        return img

    @staticmethod
    def get_first_of_dicom_field_as_int(x):
        # get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)
        if type(x) == pydicom.multival.MultiValue:
            return int(x[0])
        else:
            return int(x)

    def get_windowing(self, data):
        dicom_fields = [data[('0028', '1050')].value,  # window center
                        data[('0028', '1051')].value,  # window width
                        data[('0028', '1052')].value,  # intercept
                        data[('0028', '1053')].value]  # slope
        return [self.get_first_of_dicom_field_as_int(x) for x in dicom_fields]

    def __getitem__(self, item):
        data = self.df.iloc[item]
        try:
            dcm = pydicom.dcmread(os.path.join('../input/rsna-str-pulmonary-embolism-detection/test/',
                                               data.StudyInstanceUID,
                                               data.SeriesInstanceUID,
                                               f'{data.SOPInstanceUID}.dcm'))
            window_center, window_width, intercept, slope = self.get_windowing(dcm)
            img = dcm.pixel_array
            image1 = np.expand_dims(self.window_image(img, -600, 1500, intercept, slope), axis=-1)  # LUNG window
            image2 = np.expand_dims(self.window_image(img, 100, 700, intercept, slope), axis=-1)  # PE window
            image3 = np.expand_dims(self.window_image(img, 40, 400, intercept, slope), axis=-1) # MEDIASTINAL window

            img = self.transform(np.concatenate([image1, image2, image3], axis=-1)).astype(np.float32)
        
            return {'img': self._preprocess_img(img),
                    'category': data.SOPInstanceUID}
        except Exception as e:
            return {'img': self._preprocess_img(np.zeros(512, 512, 3)),
                    'category': data.SOPInstanceUID}

In [None]:
test = ClassificationDataset()
loader = DataLoader(dataset=test, batch_size=16, shuffle=False, num_workers=16, drop_last=False)
sub = open('submission.csv', "w")
sub.write('id,label')
sub.write("\n")


In [None]:
with torch.no_grad():
    for batch in tqdm(loader):
        imgs = batch['img']#.cuda()
        names = batch['category']
        y_preds = [0.5]*16#model(imgs).data.cpu()
        for idx, name in enumerate(names):
            sub.write(f'{name},{np.format_float_positional(y_preds[idx], precision=10)}')
            sub.write("\n")

In [None]:
test_df = pd.read_csv('../input/rsna-str-pulmonary-embolism-detection/test.csv')
labels = ['negative_exam_for_pe',
               'rv_lv_ratio_gte_1',
               'rv_lv_ratio_lt_1',
               'leftsided_pe',
               'chronic_pe',
               'rightsided_pe',
               'acute_and_chronic_pe',
               'central_pe',
               'indeterminate']

for study in tqdm(np.unique(test_df.StudyInstanceUID)):
    for l in labels:
        sub.write(f'{study}_{l},{0.5}')
        sub.write("\n")
sub.close()

In [None]:
# testDataDF = pd.read_csv('../input/rsna-str-pulmonary-embolism-detection/test.csv', dtype={'StudyInstanceUID':'string', 'SeriesInstanceUID':'string', 'SOPInstanceUID':'string'})
# testDataDF = testDataDF.set_index('SOPInstanceUID')

In [None]:
# listOfStudyID = testDataDF['StudyInstanceUID'].unique()
# print(len(listOfStudyID))

In [None]:
# Sanity Check
#thisStudyDF.head()
#print(len(thisStudyDF))

#thisImageIDlist = thisStudyDF.index.to_list()
#for eachItem in thisStudyDF.index:
#    print(type(eachItem))

In [None]:
# def window(img, WL=50, WW=350):
#     upper, lower = WL+WW//2, WL-WW//2
#     X = np.clip(img.copy(), lower, upper)
#     X = X - np.min(X)
#     X = X / np.max(X)
#     X = (X*255.0).astype('uint8')
#     return X

# data_transform = transforms.Compose([
#         transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406],
#                              std=[0.229, 0.224, 0.225])
#     ])

In [None]:
'''
model_Path = '../input/firstbaselinemodel/baseMod4.pth' 
baseModel = torch.load(model_Path) 
baseModel.eval();
'''

In [None]:
# #scoreDF = pd.DataFrame(columns=['id','label'])
# #scoreDF = scoreDF.set_index('id')

# f = open('submission.csv', 'w')
# f.write('id,label\n')

# with torch.no_grad():

#     for eachStudyID in tqdm(listOfStudyID):
        
#         thisStudyDF = testDataDF[testDataDF['StudyInstanceUID']==eachStudyID]
        
#         for eachImageID in thisStudyDF.index:
            
#             '''
#             try:
#                 eachImagePath = '../input/rsna-str-pulmonary-embolism-detection/test/'+testDataDF.loc[eachImageID, 'StudyInstanceUID']+'/'+testDataDF.loc[eachImageID, 'SeriesInstanceUID']+'/'+eachImageID+'.dcm'
#                 dcm_data = dcmread(eachImagePath)
#                 image = dcm_data.pixel_array * int(dcm_data.RescaleSlope) + int(dcm_data.RescaleIntercept)
#                 image = np.stack([window(image, WL=-600, WW=1500),
#                                   window(image, WL=40, WW=400),
#                                   window(image, WL=100, WW=700)], 2)

#                 image = image.astype(np.float32)
#                 image = data_transform(image)
#                 toPred = image.unsqueeze(0).cuda()
#                 z = baseModel(toPred)
#                 pred = torch.sigmoid(z)
#                 pred = pred.cpu().detach().numpy().astype('float32')[0,0]
#             except:
#                 pred = defaultScore['_pe_present_on_image']
#             '''
            
#             #scoreDF.loc[imageID, 'label'] = 0.5
#             f.write(eachImageID+',0.5\n')
            
#         # Study level labels
#         listOfMetricLabels = ['_negative_exam_for_pe', '_rv_lv_ratio_gte_1', '_rv_lv_ratio_lt_1', '_leftsided_pe', '_chronic_pe', '_rightsided_pe', '_acute_and_chronic_pe', '_central_pe', '_indeterminate']

#         for eachMetric in listOfMetricLabels:
#             #scoreDF.loc[studyID+eachMetric, 'label'] = 0.5
#             f.write(eachStudyID+eachMetric+',0.5\n')
            
# f.close()

# #print("totalEntries",len(scoreDF))
# #scoreDF.to_csv('submission.csv', index=True)

# print('finish')

In [None]:
# submissionDF = pd.read_csv('submission.csv', dtype={'id':'string', 'label':'string'})
# submissionDF['label'].values
# print(len(submissionDF))