In [6]:
#adapted after https://www.kaggle.com/sentdex/data-science-bowl-2017/first-pass-through-data-w-3d-convnet
#and https://github.com/pieper/pydicom/blob/master/source/dicom/test/test_filereader.py

import dicom
import os
import matplotlib.pyplot as plt
import cv2
import numpy as np

In [7]:
import math

def chunks(l, n):
    for i in range(0, len(l), n):
        yield l[i:i + n]

def mean(l):
    return sum(l) / len(l)

IMG_PX_SIZE = 150
HM_SLICES = 20

In [8]:
def process_data(patient,img_px_size=50, hm_slices=20):
    
    slices = [dicom.read_file(folder + '/' + s) for s in os.listdir(folder)]
    slices.sort(key = lambda x: int(x.ImagePositionPatient[2]))
    
    #info about picture
    #print(slices[0])
    
    #print array shape
    #print(slices[0].pixel_array.shape, len(slices)) 
    
    #show a slice of choice
    #plt.imshow(slices[125].pixel_array)
    #plt.show()
    
    #resizing to 150*150*20
    new_slices = []

    slices = [cv2.resize(np.array(each_slice.pixel_array),(img_px_size,img_px_size)) for each_slice in slices]

    chunk_sizes = math.ceil(len(slices) / hm_slices)

    for slice_chunk in chunks(slices, chunk_sizes):
        slice_chunk = list(map(mean, zip(*slice_chunk)))
        new_slices.append(slice_chunk)
        
    if len(new_slices) == hm_slices-1:
        new_slices.append(new_slices[-1])

    if len(new_slices) == hm_slices-2:
        new_slices.append(new_slices[-1])
        new_slices.append(new_slices[-1])

    if len(new_slices) == hm_slices+2:
        new_val = list(map(mean, zip(*[new_slices[hm_slices-1],new_slices[hm_slices],])))
        del new_slices[hm_slices]
        new_slices[hm_slices-1] = new_val

    if len(new_slices) == hm_slices+1:
        new_val = list(map(mean, zip(*[new_slices[hm_slices-1],new_slices[hm_slices],])))
        del new_slices[hm_slices]
        new_slices[hm_slices-1] = new_val
    
    #showing resized pictures
    #fig = plt.figure()
    #for num,each_slice in enumerate(new_slices):
    #    y = fig.add_subplot(4,5,num+1)
    #    y.imshow(each_slice, cmap='gray')
    #plt.show()
    
    return np.array(new_slices)

In [9]:
data_dir = '../4D-Lung/'
#patient 0
path = data_dir + '100_HM10395/1.3.6.1.4.1.14519.5.2.1.6834.5010.465205689126985052184293614571/'

folders = [x[0] for x in os.walk(path)]
del folders[0]
#folders[:10]

data = []
for num,folder in enumerate(folders[:10]):
    img_data=process_data(folder,img_px_size=IMG_PX_SIZE, hm_slices=HM_SLICES)
    #print(img_data.shape)
    data.append(img_data)
np.save('data-{}-{}-{}.npy'.format(IMG_PX_SIZE,IMG_PX_SIZE,HM_SLICES), data)

In [10]:
for folder in folders[10:11]:
    rtss = dicom.read_file(folder + '/' + os.listdir(folder)[0])
    #print(rtss)
    #expected = '1.2.840.10008.1.2'  # implVR little endian
    #got = rtss.file_meta.TransferSyntaxUID
    #msg = "Expected transfer syntax %r, got %r" % (expected, got)
    #frame_of_ref = rtss.ReferencedFrameOfReferenceSequence[0]
    #study = frame_of_ref.RTReferencedStudySequence[0]
    #uid = study.RTReferencedSeriesSequence[0].SeriesInstanceUID
    #expected = "1.2.826.0.1.3680043.8.498.2010020400001.2.1.1"
    #msg = "Expected Reference Series UID '%s', got '%s'" % (expected, uid)

    got = rtss.ROIContourSequence[10].ContourSequence[2] #cancer is tenth contour
    #print(got)
    #expected = 3
    #msg = "Expected Contour Number %d, got %r" % (expected, got)

    #obs_seq0 = rtss.RTROIObservationsSequence[0]
    #got = obs_seq0.ROIPhysicalPropertiesSequence[0].ROIPhysicalProperty
    #expected = 'REL_ELEC_DENSITY'
    #msg = "Expected Physical Property '%s', got %r" % (expected, got)
    #folder
    #os.listdir(folder)
    #slices = [dicom.read_file(folder + '/' + s) for s in os.listdir(folder)]
    #slices.sort(key = lambda x: int(x.ImagePositionPatient[2]))
    #print(len(slices))
    #print(slices[0])