In [1]:
import numpy as np
import pandas as pd
import dicom as dicom
import os
import scipy.ndimage
import pickle
import matplotlib.pyplot as plt
from skimage import measure
from mpl_toolkits.mplot3d.art3d import Poly3DCollection

In [2]:
def load_dicom(path):
    slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
    
    # The Image Position specifies the x, y, and z coords of the upper left hand corner of the image
    z_depths = [s.ImagePositionPatient[2] for s in slices]
    if len(set(z_depths))<len(z_depths):
        print ('Duplicate slices for same z-depth - ID:',path[-32:])
        slices.sort(key = lambda x: x.InstanceNumber)
        acq_num = int(np.mean([s.AcquisitionNumber for s in slices]))
        slices = [s for s in slices if s.AcquisitionNumber==acq_num]

    slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
    if slice_thickness == 0:
        print ('Patient with slice thickness of 0 - ID:',path[-32:])
        assert False     
    x,y = slices[0].ImagePositionPatient[:2]
    
    for s in slices:
        s.SliceThickness = slice_thickness
        if s.ImagePositionPatient[0] != x or s.ImagePositionPatient[1] != y:
            print ('Patient x and y not aligned - ID:',path[-32:])
            assert False
        if 'SliceLocation' not in s:
            print ('Patient slice has no SliceLocation - ID:', path[-32:])
            assert False
        if np.abs(s.SliceLocation) != np.abs(s.ImagePositionPatient[2]):
            print ('Patient SliceLocation does not equal Imageposition - ID:', path[-32:])
            print ('SliceLocation',s.SliceLocation,'ImagePositionPatient[2]',s.ImagePositionPatient[2])
            assert False
        
        orient = list(map(float, s.ImageOrientationPatient))
        if orient != [1, 0, 0, 0, 1, 0]:
            print ('Patient has bad orientation - ID:', path[-32:])
            assert False
            
    return slices

In [3]:
def dicom_HU(scan):
    image = np.stack([s.pixel_array*s.RescaleSlope+s.RescaleIntercept for s in scan],axis=2).astype(np.int16)
    image[image < -1990] = -1000

    return np.array(image)

In [4]:
def dicom_resample(image, scan, new_spacing=[1,1,1]):
    # Determine current pixel spacing
    spacing = np.array(scan[0].PixelSpacing + [scan[0].SliceThickness], dtype=np.float32)
    #print spacing
    resize_factor = spacing / new_spacing
    new_real_shape = image.shape * resize_factor
    new_shape = np.round(new_real_shape)
    real_resize_factor = new_shape / image.shape
    new_spacing = spacing / real_resize_factor
    #print new_spacing, real_resize_factor
    image = scipy.ndimage.interpolation.zoom(image, real_resize_factor)
    
    image = np.clip(image,-1000,400)
    image = np.transpose(image, (1,0,2))
    return image

In [None]:
def main():
    path_raw = '/home/gantos/data/stage1/'
    path_save = '/home/gantos/stage1_arrays/'

    patients = os.listdir(path_raw)

    errors = []
    mins = []

    for patient in patients:
        try:
            scan = load_dicom(path_raw+patient)
        except:
            errors.append(patient)
            continue
        hu = dicom_HU(scan)
        mins.append(np.min(hu))
        px_rescaled = dicom_resample(hu, scan, new_spacing=[1,1,1])

        np.save(path_save+patient+'.npy',px_rescaled)
        
        if patients.index(patient)%20==0:
            print (patients.index(patient),'of',len(patients))
    
    with open(path_save+'errors_preprocessing.txt','wb') as fp:
        pickle.dump(errors, fp)
    with open(path_save+'mins_preprocessing.txt','wb') as fp:
        pickle.dump(errors, fp)


In [None]:
main()

0 of 1595
20 of 1595
40 of 1595
Patient SliceLocation does not equal Imageposition - ID: ba71b330a16e8b4c852f9a8730ee33b9
SliceLocation +366.70 ImagePositionPatient[2] 1516.800000
Patient SliceLocation does not equal Imageposition - ID: 4b28f147cb82baba3edcdbd34ca19085
SliceLocation +321.00 ImagePositionPatient[2] 1383.500000
60 of 1595
Patient SliceLocation does not equal Imageposition - ID: 31136e50b7205e9184227f94cdea0090
SliceLocation +310.50 ImagePositionPatient[2] 1533.000000
Patient SliceLocation does not equal Imageposition - ID: 990fbe3f0a1b53878669967b9afd1441
SliceLocation 370.70 ImagePositionPatient[2] 1573.80005
80 of 1595
Patient SliceLocation does not equal Imageposition - ID: 5518b27f000e34cf2db5a362f4fac613
SliceLocation +351.00 ImagePositionPatient[2] 1470.500000
Patient SliceLocation does not equal Imageposition - ID: 7eb217c0444e5d866bd462ade5266a06
SliceLocation +357.00 ImagePositionPatient[2] 1249.000000
Patient SliceLocation does not equal Imageposition - ID: 3d5