In [1]:
from glob import glob
import os
import pandas as pd
import numpy as np
from PIL import Image
from random import randrange
import pydicom
import matplotlib.pyplot as plt

In [3]:
print(os.listdir("D:\\kaggle\\rsna"))
rootDir = "D:\\kaggle\\rsna"

['stage_1_sample_submission.csv', 'stage_1_test_images', 'stage_1_train.csv', 'stage_1_train_images']


In [4]:
def window_image(img, window_center,window_width, intercept, slope):

    img = (img*slope +intercept)
    img_min = window_center - window_width//2
    img_max = window_center + window_width//2
    img[img<img_min] = img_min
    img[img>img_max] = img_max
    return img

def get_first_of_dicom_field_as_int(x):
    #get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)
    if type(x) == pydicom.multival.MultiValue:
        return int(x[0])
    else:
        return int(x)

def get_windowing(data):
    dicom_fields = [data[('0028','1050')].value, #window center
                    data[('0028','1051')].value, #window width
                    data[('0028','1052')].value, #intercept
                    data[('0028','1053')].value] #slope
    return [get_first_of_dicom_field_as_int(x) for x in dicom_fields]

In [10]:
def prepareDataframe(path, train=False, nrows=None):
    """
    Prepare Pandas DataFrame for fitting neural network models
    Returns a Dataframe with two columns
    ImageID and Labels (list of all labels for an image)
    """ 
    df = pd.read_csv(path, nrows=nrows)
    if train:
        # Duplicates found from this kernel:
        # https://www.kaggle.com/akensert/resnet50-keras-baseline-model
        removeDuplicates = [1598538, 1598539, 1598540, 1598541, 1598542, 1598543,
                                312468,  312469,  312470,  312471,  312472,  312473,
                                2708700, 2708701, 2708702, 2708703, 2708704, 2708705,
                                3032994, 3032995, 3032996, 3032997, 3032998, 3032999]  
        df = df.drop(index=removeDuplicates).reset_index(drop=True)
    
    # Get ImageID for using with generator
    df['ImageID'] = df['ID'].str.rsplit('_', 1).map(lambda x: x[0]) + '.dcm'
    # Get labels for each image
    labelList = df.groupby('ImageID')['Label'].apply(list)
    
    # A clean DataFrame with a column for ImageID and columns for each label
    newDf = pd.DataFrame({'ImageID': df['ImageID'].unique(), 
                           'Labels': labelList}).set_index('ImageID').reset_index()
    newDf[targets] = pd.DataFrame(newDf['Labels'].values.tolist(), index= newDf.index)
    newDf = newDf.drop('Labels', axis=1)
    return newDf

In [11]:
trainSheet = os.path.join(rootDir, 'stage_1_train.csv')
testSheet = os.path.join(rootDir, 'stage_1_sample_submission.csv')

# All labels that we have to predict in this competition
targets = ['epidural', 'intraparenchymal', 
           'intraventricular', 'subarachnoid', 
           'subdural', 'any']

trainDf = prepareDataframe(trainSheet, train=True)
trainDf = trainDf.set_index("ImageID", drop=True)
testDf = prepareDataframe(testSheet)
testDf = testDf.set_index("ImageID", drop=True)

In [None]:
class RsnaRIT(Dataset):

    def __init__(self, dataPartition, dataPath, transform=None):
        self.dataPath = os.path.join(dataPath, dataPartition)
        self.dataList = os.listdir(self.dataPath)
        self.dataPartition = dataPartition
        self.transform = transform

    def __len__(self):
        return len(self.dataList)

    def __getitem__(self, img_id):
        imgID = self.dataList[img_id]
        imagePath = os.path.join(self.dataPath, imgID)
        
        data = pydicom.dcmread(imagePath)
        window_center , window_width, intercept, slope = get_windowing(data)
        img = pydicom.read_file(train[case]).pixel_array
        img = window_image(img, window_center, window_width, intercept, slope)
        
        if self.dataPartition == 'train':
            labels = trainDf.loc[imgID,:].tolist()
        elif self.dataPartition == 'test':
            labels = testDf.loc[imgID,:].tolist()
            
        img = np.divide(img, 255.)

        dataDict = {'image': scan, 'label': labels}

        if self.transform:
            dataDict = self.transform(dataDict)

        return dataDict