In [None]:
import pathlib
import json
import shutil

import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf

from IPython import display

import pydicom

In [None]:
# Makes it so any changes in pymedphys is automatically
# propagated into the notebook without needing a kernel reset.
from IPython.lib.deepreload import reload
%load_ext autoreload
%autoreload 2

In [None]:
import pymedphys
from pymedphys.labs.autosegmentation import pipeline, filtering, indexing, mask, tfrecord
from pymedphys._data import zenodo

In [None]:
def dataset_from_zenodo_download(
    record_name, ct_uids, structures_to_learn
):
    def generator():
        for ct_uid in ct_uids:
            npz_path = pymedphys.zenodo_data_paths(
                record_name, 
                filenames=["f{ct_uid}.npz"]
            )[0]
            data = np.load(npz_path)
            x_grid = data["x_grid"]
            y_grid = data["y_grid"]
            input_array = data["input_array"]
            output_array = data["output_array"]
            
            input_array = input_array[:, :, None]

            yield ct_uid, x_grid, y_grid, input_array, output_array

    parameters = (
        (tf.string, tf.float64, tf.float64, tf.int32, tf.float64),
        (
            tf.TensorShape(()),
            tf.TensorShape([512]),
            tf.TensorShape([512]),
            tf.TensorShape([512, 512, 1]),
            tf.TensorShape([512, 512, len(structures_to_learn)]),
        ),
    )

    dataset = tf.data.Dataset.from_generator(generator, *parameters)

    return dataset

In [None]:
def datasets_from_zenodo_download(
    record_name
):
    filenames_to_download = ['ct_uids_by_training_type.zip', 'structures_to_learn.zip']

    configuration_paths = pymedphys.zenodo_data_paths(
        record_name, 
        filenames=filenames_to_download)

    configurations = {}
    for path in configuration_paths:
        with open(path) as f:
            configurations[path.stem] = json.load(f)

    ct_uids_by_training_type = configurations['ct_uids_by_training_type']
    structures_to_learn = configurations['structures_to_learn']
    
    datasets = {}
    for training_type, ct_uids in ct_uids_by_training_type.items():
        datasets[training_type] = dataset_from_zenodo_download(
            record_name, ct_uids, structures_to_learn
        )
        
    return datasets

In [None]:
record_name = "auto-segmentation-eye-lens-patient-npz"
datasets = datasets_from_zenodo_download(record_name)

In [None]:
datasets

In [None]:



urls = zenodo.get_zenodo_file_urls(record_name)
filenames_to_download = ['ct_uids_by_training_type.zip', 'structures_to_learn.zip']

configuration_paths = pymedphys.zenodo_data_paths(
    record_name, 
    filenames=filenames_to_download)

configurations = {}
for path in configuration_paths:
    with open(path) as f:
        configurations[path.stem] = json.load(f)
        
ct_uids_by_training_type = configurations['ct_uids_by_training_type']
structures_to_learn = configurations['structures_to_learn']

In [None]:
download_urls = {
    uid: urls[f"{uid}.npz"] for uid in ct_uids_by_training_type['training']
}

download_urls
    

In [None]:
# ct_uids_by_training_type

In [None]:
# ct_uids_by_training_type

In [None]:
configuration_files = {
    'training_types': 'ct_uids_by_training_type.zip',
    'structures_to_learn': 'structures_to_learn.zip'
}

filenames_to_download = [item for _, item in configuration_files.items()]
filenames_to_download

In [None]:
paths = pymedphys.zenodo_data_paths(
    'auto-segmentation-eye-lens-patient-npz', 
    filenames=filenames_to_download)

In [None]:
configuration_paths = {}

for key, filename in configuration_files.items():
    configuration_paths[key] = [
        path for path in paths if path.stem == pathlib.Path(filename).stem][0]
    
configuration_paths

In [None]:
paths[0].name

In [None]:
dicom_paths = pymedphys.zenodo_data_paths("auto-segmentation")

for path in dicom_paths:
    if path.suffix == '.dcm':
        dataset_id = path.parent.name
        parent_and_file = path.parts[-2::]

        if int(dataset_id) < 4:
            new_path = validation_directory.joinpath(*parent_and_file)
        elif int(dataset_id) < 12:
            new_path = testing_directory.joinpath(*parent_and_file)
        else:
            new_path = training_directory.joinpath(*parent_and_file)

    elif path.name == 'name_mappings.json':
        new_path = name_mappings_path
        
    else:
        raise ValueError(f"Unexpected file found. {path}.")
        
    if not new_path.exists():
        new_path.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy(path, new_path)

In [None]:
# The following names_map is used to standardise the structure names
names_map = filtering.load_names_mapping(name_mappings_path)

In [None]:
# Create masks for the following structures, in the following order
structures_to_learn = [
    'lens_left', 'lens_right', 'eye_left', 'eye_right', 'patient']

# Use the following to filter the slices used for training, validation,
# and testing
filters = {
    "study_set_must_have_all_of": structures_to_learn,
    "slice_at_least_one_of": [
        'lens_left', 'lens_right', 'eye_left', 'eye_right'
    ],
    "slice_must_have": ['patient'],
    "slice_cannot_have": []
}

In [None]:
(
    ct_image_paths,
    structure_set_paths,
    ct_uid_to_structure_uid,
    structure_uid_to_ct_uids,
) = indexing.get_uid_cache(data_path_root)

In [None]:
(
    structure_names_by_ct_uid,
    structure_names_by_structure_set_uid,
) = indexing.get_cached_structure_names_by_uids(
    data_path_root, structure_set_paths, names_map
)

In [None]:
datasets = pipeline.create_datasets(
    data_path_root, names_map, structures_to_learn, filters)

In [None]:
# Create all npz files and build ct_uid to training type map

ct_uid_to_training_type = {}
ct_uids_by_training_type = {}

for dataset_type, dataset in datasets.items():
    for ct_uid, x_grid, y_grid, input_array, output_array in dataset:
        ct_uid = ct_uid.numpy().decode()        
        ct_uid_to_training_type[ct_uid] = dataset_type
        try:
            ct_uids_by_training_type[dataset_type].append(ct_uid)
        except KeyError:
            ct_uids_by_training_type[dataset_type] = []


ct_uids_by_training_type

In [None]:
structures_to_learn_path = data_path_root.joinpath("structures_to_learn.json")
with open(structures_to_learn_path, "w") as f:
    json.dump(structures_to_learn, f)

In [None]:
ct_uids_by_training_type_path = data_path_root.joinpath("ct_uids_by_training_type.json")

with open(ct_uids_by_training_type_path, "w") as f:
    json.dump(ct_uids_by_training_type, f)