In [54]:
import dicom
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import glob
import re
import cPickle as pickle

from skimage.transform import resize
from sklearn.model_selection import train_test_split
from dicom.contrib.pydicom_PIL import show_PIL
from keras.preprocessing.image import img_to_array, load_img


def getScaledDims(currshape, ratio):
    """ Scale images with current shape to new ratio """
    w,h = currshape
    currratio = h/float(w)
    
    if currratio > ratio: # Height is larger
        new_h = int(w*ratio)
        return (w,new_h)
    else:
        new_w = int(h/ratio)
        return (new_w, h)

def cropCenter(img,cropy,cropx):
    """ Center crop images to new height and new width, specified by cropy, cropx """
    y, x = img.shape
    startx = x//2-(cropx//2)
    starty = y//2-(cropy//2)    
    return img[starty:starty+cropy, startx:startx+cropx]

def loadImage(image_path, ratio):
    """ Returns a np matrix with ratio of dimensions specified by ratio """
    img = plt.imread(image_path)
    currshape = img.shape
    cropx, cropy = getScaledDims(currshape, ratio)
    newimg = cropCenter(img,cropx,cropy)
    return newimg

def resizeImage(image, dims):
    """ Resizes the image into dimensions specified by dims and converts img to 3 channels """
    newimg = resize(image, dims)
    
    # Convert from 1 channel to 3 channels
    newimg_3d = np.empty(dims + (3,))
    for i in range(3):
        newimg_3d[:,:,i] = newimg
        
    return newimg_3d

def loadTrueClass(path, number, dims):
    """ Path: Path to .png images
        Number: Number of images to run
        Dims: (height, width) of the final images that will be fed into the model """
    
    # Crop images to ratio specified.
    ratio = dims[1]/float(dims[0])
    x_True = []
    for i, image_path in enumerate(glob.glob(path)[:number]):
        if i % 50 == 0: print(i)
        img = loadImage(image_path, ratio)
    
        # Resize all images to that dims
        finalImg = resizeImage(img, dims)
        x_True.append(finalImg)
    
    x_True = np.array(x_True)
    
    return x_True

##### Load True Images

In [5]:
dirPath = '/enc_data/eddata/pacemaker'
dataPath = os.path.join(dirPath, 'organized-data')
pacemakerPath = os.path.join(dirPath,"png/full_image/*.png")

In [7]:
# Get a set of patient/clipnum of images that have pacemaker

pacemakerImgs = set()

for image_path in glob.glob(os.path.join(dirPath,"png/full_image/*.png")):
    patientid, clipnum = re.split('image\/(\d*)_(\d*)-', image_path)[1:3]
    pacemakerImgs.add((patientid, clipnum)) 

In [8]:
len(pacemakerImgs)

2145

In [55]:
allPatients = os.listdir(dataPath)
print('Number of patients: {}'.format(len(allPatients)))
numTrueFiles = len(glob.glob(pacemakerPath))
print('Number of pacemaker files: {}'.format(numTrueFiles))

x_True = loadTrueClass(pacemakerPath, numTrueFiles, (224, 224))
pickle.dump(x_True, open( "x_true.p", "wb" ) )

Number of patients: 46022
Number of pacemaker files: 2443
0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
1250
1300
1350
1400
1450
1500
1550
1600
1650
1700
1750
1800
1850
1900
1950
2000
2050
2100
2150
2200
2250
2300
2350
2400


##### Load False Images

In [3]:
def get_PIL(dataset):
    """ Converts dicom dataset file to PIL files that can be imported into Keras """
    
    def get_LUT_value(data, window, level):
        """Apply the RGB Look-Up Table for the given data and window/level value."""
        
        ##### UNSURE IF I CAN DO THIS #####
        if isinstance(window, list):
            window = window[0]
        if isinstance(level, list):
            level = level[0]
            
        return np.piecewise(data, 
            [data <= (level - 0.5 - (window-1)/2),
                data > (level - 0.5 + (window-1)/2)],
                [0, 255, lambda data: ((data - (level - 0.5))/(window-1) + 0.5)*(255-0)])

    image = get_LUT_value(dataset.pixel_array, dataset.WindowWidth, dataset.WindowCenter)
    im = PIL.Image.fromarray(image).convert('L') # Convert mode to L since LUT has only 256 values: http://www.pythonware.com/library/pil/handbook/image.htm

    return im

In [46]:
def loadDicom(filename):
    ds = dicom.read_file(filename)
    img = get_PIL(ds)
    x = img_to_array(img)/255. # Normalize to 0-1
    return x[:,:,0]

In [15]:
import json

dirPath = '/enc_data/eddata/pacemaker'
dataPath = os.path.join(dirPath, 'organized-data')
allPatients = os.listdir(dataPath)
print('Number of patients: {}'.format(len(allPatients)))

with open(os.path.join(dirPath, 'regex_ann/neg.json'), 'r') as f:
    neg = json.load(f)
    print(len(neg))

Number of patients: 46022
43875


In [48]:
def loadDicomImg(image_path, dims):
    """ Takes path to dicom file and crops and resizes to the appropriate dimensions """
    img = loadDicom(filePath)
    currshape = img.shape
    
    ratio = dims[1]/float(dims[0])
    cropx, cropy = getScaledDims(currshape, ratio)
    newimg = cropCenter(img,cropx,cropy)

    finalimg = resizeImage(newimg, dims)
    return finalimg

In [53]:
dims = (224, 224)
x_Neg = []
count = 0

for patient in neg:
    if count > 3000: break
    if count % 50 == 0: print(count)
        
    patientPath = os.path.join(dataPath, patient)
    files = os.listdir(patientPath)
    
    for f in files:
        if not f.endswith('.dcm'): continue
            
        imgPath = os.path.join(patientPath, f)
        img = loadDicomImg(imgPath, dims)
        x_Neg.append(img)
        count += 1
        
x_Neg = np.array(x_Neg)
x_Neg.shape

0
150
250
450
750
850
1350
1500
1550
1650
1850
1900
2050
2550
2700


(3028, 224, 224, 3)

In [56]:
pickle.dump(x_Neg, open( "x_neg.p", "wb" ) )