In [None]:
import pathlib

In [None]:
# Makes it so any changes in pymedphys is automatically
# propagated into the notebook without needing a kernel reset.
from IPython.lib.deepreload import reload
%load_ext autoreload
%autoreload 2

In [None]:
from pymedphys.labs.autosegmentation import pipeline, filtering

In [None]:
# Put all of the DICOM data within a directory called 'dicom' 
# organised by 'training', 'validation', and 'testing' in here:
data_path_root = pathlib.Path.home().joinpath('.data/dicom-ct-and-structures')

# Of note, the DICOM file directory structure need not have any further
# organisation beyond being placed somewhere within one of the three
# 'training', 'validation', or 'testing'. They can be organised into
# directories by patient but that is not a requirement.

In [None]:
# The following names_map is used to standardise the structure names
names_map = filtering.load_names_mapping('name_mappings.json')

In [None]:
# Create masks for the following structures, in the following order
structures_to_learn = [
    'lens_left', 'lens_right', 'eye_left', 'eye_right', 'patient']

# Use the following to filter the slices used for training, validation, and testing
filters = {
    "study_set_must_have_all_of": structures_to_learn,
    "slice_at_least_one_of": ['lens_left', 'lens_right', 'eye_left', 'eye_right'],
    "slice_must_have": ['patient'],
    "slice_cannot_have": []
}

In [None]:
datasets = pipeline.create_datasets(data_path_root, names_map, structures_to_learn, filters)

In [None]:
for ct_uid, x_grid, y_grid, input_array, output_array in datasets['training'].take(15):
    print(ct_uid)

In [None]:
for ct_uid, x_grid, y_grid, input_array, output_array in datasets['validation'].take(15):
    print(ct_uid)

In [None]:
for ct_uid, x_grid, y_grid, input_array, output_array in datasets['testing'].take(15):
    print(ct_uid)