In [None]:
from __future__ import print_function

from glob import glob

import numpy as np

import matplotlib.pyplot as plt

import skimage.transform

import skimage.draw

import pydicom

import numpy.ma

import tensorflow as tf

# fix this
from skimage.draw import polygon

# dont include this in script
plt.rcParams['figure.figsize'] = [6, 6]

# GETTING DATA

In [None]:
def list_files(data_path, ext):
    """
    Returns a sorted list of all files in data_path with ext dcm.
    """
    if ext is None:
        file_names = glob(data_path + "/*")
    else:
        file_names = glob(data_path + "/*" + ext)
    file_names.sort()
    return file_names

In [None]:
def read_dicom_files(file_names):
    """
    Returns a volume stack of DICOM files from names in list file_names.
    """
    return [pydicom.dcmread(name, force=True) for name in file_names]

In [None]:
def filter_dicom_files(dicom_files):
    """
    Filters a DICOM volume into 4 sections:

    dicom_series: DICOM CT series files (CT DICOM files).

    dicom_structures: DICOM structure set file (RS DICOM file).

    dicom_plan: DICOM treatment plan file (RP DICOM file).

    dicom_dose: DICOM dose grid file (RD DICOM file).
    """
    dicom_series = []
    dicom_structures = []
    dicom_plan = []
    dicom_dose = []

    for file in dicom_files:
        if hasattr(file, 'ImageType'):
            dicom_series.append(file)
        elif hasattr(file, 'StructureSetName'):
            dicom_structures.append(file)
        elif hasattr(file, 'BeamSequence'):
            dicom_plan.append(file)
        else:
            dicom_dose.append(file)
    return dicom_series, dicom_structures, dicom_plan, dicom_dose

In [None]:
def add_transfer_syntax(dicom_series):
    """
    Fill in missing TransferSyntaxUID on DICOM files after reading
    in the volume. Required before pixel_array attribute is called.
    """
    for file in dicom_series:
        try:
            file.file_meta.TransferSyntaxUID
        except AttributeError:
            file.file_meta.TransferSyntaxUID = (
                pydicom.uid.ImplicitVRLittleEndian)
    return dicom_series

In [None]:
def get_pixel_array(dicom_series):
    """
    Return pixel array volume from DICOM imaging volume.
    """
    return np.array(
        [file.pixel_array for file in dicom_series])

In [None]:
def read_structures(dicom_structures):
    contours = []
    """
    """
    for i in range(len(dicom_structures.ROIContourSequence)):
        contour = {}
        contour['color'] = dicom_structures.ROIContourSequence[i].ROIDisplayColor
        contour['number'] = dicom_structures.ROIContourSequence[i].ReferencedROINumber
        contour['name'] = dicom_structures.StructureSetROISequence[i].ROIName
        assert contour['number'] == dicom_structures.StructureSetROISequence[i].ROINumber
        contour['contours'] = [s.ContourData for s in dicom_structures.ROIContourSequence[i].ContourSequence]
        contours.append(contour)
    return contours

# MANIPULATING DATA

In [None]:
def resize_pixel_array(pixel_array, shape):
    """
    Resizes axial slices in the pixel_array volume to (x, y)tuple scale.
    Assumes volume shape (z, x, y) where z indexes axial slices.
    """
    if len(pixel_array.shape) > 2:
        shape = len(pixel_array), *shape
    return skimage.transform.resize(pixel_array, shape)

In [None]:
#def resize_pixel_array(pixel_array, shape, is_mask=False):
#    """
#    Resizes axial slices in the pixel_array volume to (x, y)tuple scale.
##    Assumes volume shape (z, x, y) where z indexes axial slices.
#    """
#    if len(pixel_array.shape) > 2:
#        shape = len(pixel_array), *shape
#    if is_mask:
#        return numpy.ma.resize(pixel_array, shape)
#    else:
#        return skimage.transform.resize(pixel_array, shape)

In [None]:
def normalise_pixel_array_volume(pixel_array_volume):
    """
    Return a normalised pixel array volume
    """
    # TODO
    # Have changed from max of slice to max of array
    # But have not put much thought into effect different
    # maximum values from different patient cases may have (yet)!
    # This should probably be /max for type
    return pixel_array_volume / np.max(pixel_array_volume)

In [None]:
def transform_to_array(x, y, dicom_series):
    """
    """
    translation = dicom_series[0].ImagePositionPatient
    scale = dicom_series[0].PixelSpacing
    x = np.array(x)
    y = np.array(y)

    r = (y - translation[1]) / scale[1]
    c = (x - translation[0]) / scale[0]
    return -r, c

In [None]:
def get_simplified_names(names):
    if len(names) > 1:
        names = [name.lower().replace(' ', '').replace('_', '') for name in names]
    else:
        names[0].lower().replace(' ', '').replace('_', '')
    return names

In [None]:
def get_binary_masks(contours, slices, image, names):
    """
    """
    z = [np.around(s.ImagePositionPatient[2], 1) for s in slices]
    label = np.zeros_like(image, dtype=np.int16)
    for con in contours:
        
        # TODO
        # Label selecting logic ie. using names and simplify
        if con['name'] == "Vacbag":
            
            
            num = int(con['number'])
            for c in con['contours']:
                nodes = np.array(c).reshape((-1, 3))
                assert np.amax(np.abs(np.diff(nodes[:, 2]))) == 0
                z_index = z.index(nodes[0, 2])
                x = nodes[:, 0]
                y = nodes[:, 1]
                r, c = transform_to_array(x, y, slices)      
                rr, cc = polygon(r, c)
                try:
                    label[z_index, rr, cc] = True
                except IndexError:
                    print(f"IndexError for contour {con['name']} at {z_index}")
            
        colors = tuple(np.array([con['color'] for con in contours]) / 255.0)
    return label, colors

# CONSTRUCT TRAINING DATA

In [None]:
def get_padding(pixel_array_volume, index, padding=2):
    """
    """
    return pixel_array_volume[index - padding:index + padding + 1]

In [None]:
def get_training_data(data, names=None):
    file_names = list_files(patient, ".dcm")
    dicom_files = read_dicom_files(file_names)
    dicom_series, dicom_structures, *rest = filter_dicom_files(dicom_files)
    dicom_series = add_transfer_syntax(dicom_series)
    dicom_series.sort(key=lambda x: float(x.ImagePositionPatient[2]))
    images = get_pixel_array(dicom_series)
    structures = read_structures(dicom_structures[0])
    masks, colors = get_binary_masks(structures, dicom_series, images, ["Vacbag"])
    return dicom_series, images, structures, masks, colors

In [None]:
def shape_model_data(shape, images, masks=None):
    images = resize_pixel_array(images, shape)
    images = normalise_pixel_array_volume(images)
    if masks is not None:
       # masks = resize_pixel_array(masks, shape, is_mask=True)
        masks = resize_pixel_array(masks, shape)
        masks = normalise_pixel_array_volume(masks)
        masks = np.round(masks)
    return images, masks

In [None]:
def build_model_data(images, masks):
    return model_input_data

# HELPER FUNCTIONS

In [None]:
def plot_pixel_array(pixel_array, index=None):
    """
    Quick hack to view a slice from either a 3D or 2D array
    """
    # TODO
    # Scale pixel intensity for CT
    if index is not None:
        pixel_array = pixel_array[index]
    plt.imshow(pixel_array, cmap=plt.cm.bone)
    plt.show()

In [None]:
def plot_model_data(images, masks, index=90):
    plt.figure(figsize=(15, 15))
    for i in range(9):
        plt.subplot(3, 3, i + 1)
    # plt.imshow(images[..., i + 90], cmap="gray") # side
    #plt.imshow(images[i + 90], cmap="gray") # side
        plt.imshow(images[i + index], cmap="gray") # side
    #plt.contour(masks[ i + 90], levels=[0.5, 1.5, 2.5, 3.5, 4.5], colors=colors)
    #plt.contour(masks[ i + 90])#, colors=colors)
        plt.contour(masks[ i + index])
    plt.axis('off')

In [None]:
def print_contour_data(structures):
    for con in structures:
        num = int(con['number'])
        name = con['name']
        print(f"structures[{num}] = {name}")

--------

# LOAD DATA FOR MODEL

In [None]:
data_root = "/home/matthew/proj/masters-project/slim_DATASET"
data = list_files(data_root, None)
shape = 64, 64

loaded_data = []

#good_data = [1, 2, 4, 6, 7, 8 , 9, 10, 16, 17, 20, 21]

for index, patient in enumerate(data[0:1]):
   # print(index+1, patient)
    print(f"LOADING: {index+1}/{len(data)}")
    print(f"FILE: {patient}")
  #  if any( index + 1 == data for data in good_data):
    try:
        dicom_series, images, structures, masks, colors = get_training_data(patient)
    except IndexError:
        pass
    images, masks = shape_model_data(shape, images, masks)
    loaded_data.append([images, masks])
    print(f"------- FILE COMPLETE ---------")
          


In [None]:
loaded_data = np.array(loaded_data)

In [None]:
np.save("/home/matthew/proj/masters-project/test_data_array_2", loaded_data)

In [None]:
loaded_data.shape

In [None]:
patient = 0
index = 90
images = loaded_data[patient][0]
masks = loaded_data[patient][1]
plot_model_data(images, masks, index)

In [None]:
plt.imshow(masks[90])

In [None]:
plt.imshow(images[90])

# MODEL

In [None]:
#BATCH_SIZE = 64
#BUFFER_SIZE = 1000
#STEPS_PER_EPOCH = TRAIN_LENGTH // BATCH_SIZE

In [None]:
def conv2d_down( inputs , filters , stride_size ):
    out = tf.nn.conv2d( inputs , filters , strides=stride_size , padding=padding ) 
    return tf.nn.leaky_relu( out , alpha=0.2 ) 

def maxpool_down( inputs , pool_size , stride_size ):
    return tf.nn.max_pool( inputs , ksize=pool_size , padding='VALID' , strides=stride_size )

def conv2d_up( inputs , filters , stride_size , output_shape ):
    out = tf.nn.conv2d_transpose( inputs , filters , output_shape=output_shape , strides=stride_size , padding=padding ) 
    return tf.nn.leaky_relu( out , alpha=0.2 ) 

def maxpool_up( inputs , size ):
    in_dimen = tf.shape( inputs )[ 1 ]
    out_dimen = tf.cast( tf.round( in_dimen * size ) , dtype=tf.int32 ) 
    return tf.image.resize( inputs , [ out_dimen , out_dimen ] , method='nearest' ) 

In [None]:
initializer = tf.initializers.glorot_uniform()
def get_weight( shape , name ):
    return tf.Variable( initializer( shape ) , name=name , trainable=True )

shapes = [
    [ 3 , 3 , 3 , 16 ] , 
    [ 3 , 3 , 16 , 16 ] , 

    [ 3 , 3 , 16 , 32 ] , 
    [ 3 , 3 , 32 , 32 ] ,

    [ 3 , 3 , 32 , 64 ] , 
    [ 3 , 3 , 64 , 64 ] ,

    [ 3 , 3 , 64 , 128 ] , 
    [ 3 , 3 , 128 , 128 ] ,

    [ 3 , 3 , 128 , 256 ] , 
    [ 3 , 3 , 256 , 256 ] ,

    [ 3 , 3 , 128 , 384 ],
    [ 3 , 3 , 128 , 128 ],

    [ 3 , 3 , 64 , 192 ],
    [ 3 , 3 , 64 , 64 ],

    [ 3 , 3 , 32 , 96 ],
    [ 3 , 3 , 32 , 32 ],

    [ 3 , 3 , 16 , 48 ],
    [ 3 , 3 , 16 , 16 ],

    [ 1 , 1 , 16 , 1 ],
]

weights = []
for i in range( len( shapes ) ):
    weights.append( get_weight( shapes[ i ] , 'weight{}'.format( i ) ) )

In [None]:
def model( x ) :
    batch_size = tf.shape( x )[0]
    x = tf.cast( x , dtype=tf.float32 )
    c1 = conv2d_down( x , weights[ 0 ] , stride_size=1 ) 
    c1 = conv2d_down( c1 , weights[ 1 ] , stride_size=1 ) 
    p1 = maxpool_down( c1 , pool_size=2 , stride_size=2 )
    
    c2 = conv2d_down( p1 , weights[ 2 ] , stride_size=1 )
    c2 = conv2d_down( c2 , weights[ 3 ] , stride_size=1 ) 
    p2 = maxpool_down( c2 , pool_size=2 , stride_size=2 )
    
    c3 = conv2d_down( p2 , weights[ 4 ] , stride_size=1 ) 
    c3 = conv2d_down( c3 , weights[ 5 ] , stride_size=1 ) 
    p3 = maxpool_down( c3 , pool_size=2 , stride_size=2 )
    
    c4 = conv2d_down( p3 , weights[ 6 ] , stride_size=1 )
    c4 = conv2d_down( c4 , weights[ 7 ] , stride_size=1 )
    p4 = maxpool_down( c4 , pool_size=2 , stride_size=2 )

    c5 = conv2d_down( p4 , weights[ 8 ] , stride_size=1 )
    c5 = conv2d_down( c5 , weights[ 9 ] , stride_size=1 ) 
        
    p5 = maxpool_up( c5 , 2 )
    concat_1 = tf.concat( [ p5 , c4 ] , axis=-1 ) 
    c6 = conv2d_up( concat_1 , weights[ 10 ] , stride_size=1 , output_shape=[ batch_size , 16 , 16 , 128 ] )
    c6 = conv2d_up( c6 , weights[ 11 ] , stride_size=1 , output_shape=[ batch_size , 16 , 16 , 128 ] )  

    p6 = maxpool_up( c6 , 2 )
    concat_2 = tf.concat( [ p6 , c3 ] , axis=-1 ) 
    c7 = conv2d_up( concat_2 , weights[ 12 ] , stride_size=1 , output_shape=[ batch_size , 32 , 32 , 64 ] )
    c7 = conv2d_up( c7 , weights[ 13 ] , stride_size=1 , output_shape=[ batch_size , 32 , 32 , 64 ] )  

    p7 = maxpool_up( c7 , 2 )
    concat_3 = tf.concat( [ p7 , c2 ] , axis=-1 ) 
    c8 = conv2d_up( concat_3 , weights[ 14 ] , stride_size=1 , output_shape=[ batch_size , 64 , 64 , 32 ] )
    c8 = conv2d_up( c8 , weights[ 15 ] , stride_size=1 , output_shape=[ batch_size , 64 , 64 , 32 ] )   

    p8 = maxpool_up( c8 , 2 )
    concat_4 = tf.concat( [ p8 , c1 ] , axis=-1 ) 
    c9 = conv2d_up( concat_4 , weights[ 16 ] , stride_size=1 , output_shape=[ batch_size , 128 , 128 , 16 ] )
    c9 = conv2d_up( c9 , weights[ 17 ] , stride_size=1 , output_shape=[ batch_size , 128 , 128 , 16 ] )   

    output = tf.nn.conv2d( c9 , weights[ 18 ] , strides=[ 1 , 1 , 1 , 1 ] , padding=padding ) 
    outputs = tf.nn.sigmoid( output ) 
    return outputs

In [None]:
def loss( pred , target ):
    return tf.losses.binary_crossentropy( target , pred )

learning_rate = 0.001
optimizer = tf.optimizers.Adam( learning_rate=learning_rate )

def train( model, inputs , outputs ):
    with tf.GradientTape() as tape:
        current_loss = loss( model( inputs ), outputs)
    grads = tape.gradient( current_loss , weights )
    optimizer.apply_gradients( zip( grads , weights ) )
    print( tf.reduce_mean( current_loss ) )

In [None]:
num_epochs = 25  #@param {type: "number"}

for e in range( num_epochs ):
    print( 'Epoch {} out of {} {}'.format( e + 1 , num_epochs , '--' * 50 ) )
    for patient in loaded_data:
        print(len(range(patient)))
        #for index in len(range(patient)):
         #   image, label = patient[index]
         #   train( model , image , label )
        
#    patient = 0
#    index = 90
#    images = loaded_data[patient][0]
#    masks = loaded_data[patient][1]

In [None]:
len(range())