In [None]:
#How to structure data for script

#.
#└── DATA_PATH
#    ├── FOLDER1
#    │   ├── CT_FILE_1.dcm
#    │   ├── CT_FILE_2.dcm
#         ......
#    │   ├── RP_FILE.dcm
#    │   └── RS_FILE.dcm
#    ├── FOLDER2
#    │   ├── CT_FILE_1.dcm
#         ...... etc

In [None]:
import importlib

import numpy as np

import matplotlib.pyplot as plt

import skimage.transform

import skimage.draw

import dicom_process

import helpers

importlib.reload(dicom_process)
importlib.reload(helpers)

plt.rcParams['figure.figsize'] = [6, 6]

# Choose case:

In [None]:
# Vet case
DATA_PATH = "/home/matthew/proj/masters-project/pymedphys-segmentation/open_datasets/VET_DATASET_CLEAN/"
# Structure masks to be pulled
STRUCTURE_NAMES = ["patient", "Vacbag", "vacbag", "Couch Foam Half Couch", "Couch Outer Half Couch", "Couch Edge"]

In [None]:
# Prostate case
#DATA_PATH = "/home/matthew/priv/PROSTATE_TEST/"
# Structure masks to be pulled
#STRUCTURE_NAMES = ["patient", "RT HOF", "LT HOF", "BLADDER", "RECTUM", "Couch Foam Half Couch", "Couch Outer Half Couch", "Couch Edge"]

In [None]:
# Resize images
SIZE = 64, 64
# Number of additional context image slices for each (image, label) input
# ie. input_instance = image +- CONTEXT neighbouring images
CONTEXT = 10

# For viewing:

In [None]:
# Choose a patient folder from data_folders
FOLDER_INDEX = 0
# Choose a z slice index
SLICE_INDEX = 100

# Automated data processing overview:
### NOTE: See # LIMIT TO ONLY SOME FOLDERS TO MAKE EXAMPLE QUICKER (below)

#### [ ] TODO: Have not added cleaning structures (via STRUCTURE_NAMES) to automated process - only manual (below automated)
#### [ ] TODO: Only returns one structure but an easy fix by appending
#### [ ] TODO Account for fact that multiple ROIs per xyz index
#### [ ] TODO: Save data for each case when building trainning data as does not all will fit in memory
#### NOTE: These TODOs are outlined in the manual section (below)



In [None]:
data_folders = dicom_process.list_files(DATA_PATH, None)

# LIMIT TO ONLY SOME FOLDERS TO MAKE EXAMPLE QUICKER
data_folders = data_folders[0:1]

loaded_data = []

for index, folder in enumerate(data_folders):
    print(f"----------------")
    print(f"LOADING: {index+1}/{len(data_folders)}")
    print(f"{folder}")
    try:
        images, labels, colors, structures = dicom_process.get_input_data(folder, SIZE, CONTEXT)
    except IndexError:
        print(f"WARNING: IndexError")
        pass
    
    # probably want to get context and the save each (input_instance, label) 
    # set in folder as total data in all folders in data_folders is too large for memerory
    loaded_data.append([images, labels, colors, structures])
        
print(f"================")
print(f"LOADING COMPLETE")

In [None]:
print(f"Folder: {data_folders[FOLDER_INDEX]}")
images, labels, *rest = loaded_data[FOLDER_INDEX]
helpers.plot_model_data(images, labels, index=SLICE_INDEX, slices=9, corners=False)

In [None]:
# so far this only contains one label - need to rework so adds all - TODO above
print(len(labels))
print(labels.shape)

## Lets examine the structure labels in our data

In [None]:
for index, data in enumerate(loaded_data):
    structures = data[3]
    print(data_folders[index])
    helpers.print_structures(structures)
    print("----------------------------")
    

--------------------------------------------
<br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br>

# Manual data processing overview

In [None]:
# Load for a single folder in data_folder - FOLDER_INDEX

folder = dicom_process.list_files(DATA_PATH, None)[FOLDER_INDEX]

file_names = dicom_process.list_files(folder, ".dcm")

dicom_files = dicom_process.read_dicom_files(file_names)
dicom_series, dicom_structures, *rest = dicom_process.filter_dicom_files(dicom_files)
dicom_series = dicom_process.add_transfer_syntax(dicom_series)
dicom_series.sort(key=lambda x: float(x.ImagePositionPatient[2]))

## Some basic data to examine

In [None]:
helpers.print_dicom_slice_data(dicom_series[0])
# np.uint16 - Unsigned integer (0 to 65535)

In [None]:
z_slice_locations = np.array([dicom.ImagePositionPatient[2] for dicom in dicom_series])
print(z_slice_locations)

In [None]:
# moving slice locations from patient space to pixel space (image space)
((z_slice_locations - np.min(z_slice_locations)) / dicom_series[0].SliceThickness)

## Images

In [None]:
images = dicom_process.get_pixel_array(dicom_series)

In [None]:
# View single image data
helpers.plot_pixel_array(images[SLICE_INDEX])
helpers.print_range(images)

In [None]:
# View single image data - resized
images_resized = dicom_process.resize_pixel_array(images, SIZE)
helpers.plot_pixel_array(images_resized[SLICE_INDEX])
helpers.print_range(images_resized)

In [None]:
# View single image data - resized and normalised
images_resized_normal = dicom_process.normalise_pixel_array_volume(images_resized)
helpers.plot_pixel_array(images_resized_normal[SLICE_INDEX])
helpers.print_range(images_resized_normal)

## Structures

In [None]:
# Structures from a folder in data_folder
# Note dicom_structures is returned as a like like dicom_series
# to get the first RS dicom file found use the first index dicom_structures[0]
structures = dicom_process.read_structures(dicom_structures[0])
helpers.print_structures(structures)

In [None]:
# clean the structures
structures = dicom_process.clean_structures(structures, STRUCTURE_NAMES)
helpers.print_structures(structures)

In [None]:
# how my dicts work
patient = structures[0]
print(patient.keys())

In [None]:
# extracting contour points
patient_xyz = patient['contour_points']
z_slice_contour_data = patient_xyz[0]
xyz_points = np.array(z_slice_contour_data).reshape((-1, 3))
print(xyz_points[0:10])

In [None]:
labels = dicom_process.get_binary_masks(structures, dicom_series, images)

In [None]:
helpers.plot_model_data(images, labels, SLICE_INDEX)

--------------------
<br><br><br><br><br>
# Below is code in progress


#### [ ] TODO Mask that includes all labels
#### [ ] TODO Account for fact that multiple ROIs per xyz index

In [None]:
def transform_to_array(x, y, dicom_series):
    """
    Transform from patient space to pixel space
    """
    translation = dicom_series[0].ImagePositionPatient
    scale = dicom_series[0].PixelSpacing
    orientation = dicom_series[0].ImageOrientationPatient
    x = np.array(x)
    y = np.array(y)

    # NOTE Only handles +-1 cosines
    # A more robust method that handles intermediate angels
    # was attempted however the affine matrix was singular
    # See: http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.6.2.htmlx1
    # See: https://dicomiseasy.blogspot.com/2013/06/getting-oriented-using-image-plane.html
    r = (y - translation[1]) / scale[1] * orientation[4]
    c = (x - translation[0]) / scale[0] * orientation[0]

    return r, c

In [None]:
def get_binary_masks(structures, dicom_series):
    for slice_index in range(len(dicom_series[:])):        
        for structure in structures[:]:
            xyz = structure['contour_points'][slice_index]
           # r, c = dicom_process.transform_to_array(x, y, dicom_series)
            #rr, cc = skimage.draw.polygon(r, c)
    return None

In [None]:
get_binary_masks(structures, dicom_series)

In [None]:
for slice_index in range(len(dicom_series)):
    print(slice_index, structures[1]['contour_points'][slice_index][2])

In [None]:
x = structures[1]['contour_points'][10][0::3]
y = structures[1]['contour_points'][10][1::3]
z = structures[1]['contour_points'][10][2::3]
print(z[0])
r, c = transform_to_array(x, y, dicom_series)
img1 = np.zeros((512, 512), dtype=np.uint16)
rr, cc = skimage.draw.polygon(r, c)
img1[rr, cc] = 1
plt.imshow(img1)

In [None]:
x = structures[1]['contour_points'][11][0::3]
y = structures[1]['contour_points'][11][1::3]
z = structures[1]['contour_points'][11][2::3]
print(z[0])
r, c = transform_to_array(x, y, dicom_series)
img2 = np.zeros((512, 512), dtype=np.uint16)
rr, cc = skimage.draw.polygon(r, c)
img2[rr, cc] = 1

In [None]:
index = 85
print(dicom_series[index].ImagePositionPatient[2])
plt.imshow(images[index], cmap="gray")
plt.contour(img1, colors='red')
plt.contour(img2, colors='blue')

In [None]:
labels = get_binary_masks(structures, dicom_series, images)
print(labels.shape)
helpers.plot_model_data(images, labels, SLICE_INDEX-10)

--------------------
#### [ ]  TODO Getting context and saving input instances to load later into model

In [None]:
def get_context(pixel_array_volume, index, context=10):
    """
    # TODO write docstring
    """
    return pixel_array_volume[index - context:index + context + 1]

In [None]:
OUTPUT_PATH = "/home/matthew/proj/masters-project/pymedphys-segmentation/open_datasets/model_data/"
context = 10
for index in range(len(images)):
    temp = get_context(images, index, context)
    # skip those that dont have enough padding
    # this is a quick and dirt hack to be fixed
    if temp.shape[0] == 1 + 2 * context:
        input_instance = temp
        label_instance = labels[index]
        data_instance = [input_instance, label_instance]

In [None]:
plt.imshow(label_instance)