Create an AMOS data-set. 

We have various possibilities, since AMOS contains both CT and MRI. Some tasks, we want MRI only, but other tasks CT only, and in other tasks we want all data.

In [1]:
# standard stuff 
import os 
import sys
import nibabel as nib
import numpy as np
import shutil
import random
from collections import OrderedDict
import json

# bespoke stuff
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from util import management as mana
from util import preprocess as pre
from util import constants as con

##### Set task specifics
task = con.TASK_700
only_MRI = False
only_CT = False
test_perc = 0.15
organ_labels =  {"0": "background", "1": "spleen", "2": "right kidney", "3": "left kidney", "4": "gall bladder",
          "5": "esophagus", "6": "liver", "7": "stomach", "8": "arota", "9": "postcava", "10": "pancreas", 
           "11": "right adrenal gland", "12": "left adrenal gland", "13": "duodenum", "14": "bladder", "15": "prostate/uterus"}
organ_labels_json =  {"0": "background", "1": "spleen", "2": "right kidney", "3": "left kidney", "4": "gall bladder",
          "5": "esophagus", "6": "liver", "7": "stomach", "8": "arota", "9": "postcava", "10": "pancreas", 
           "11": "right adrenal gland", "12": "left adrenal gland", "13": "duodenum", "14": "bladder", "15": "prostate/uterus"}
if only_MRI:
    modality = {"0": "MRI"}
elif only_CT:
    modality = {"0": "CT"}
else:
    modality = {"0": "CT", "1": "MRI"}

Set paths and define some useful functions.

In [2]:
# Set paths
TASK_NAME = f'Task{task}' 
BASE_DIR = "C:/Users/ikke_/OneDrive/Documenten/Thesis"
AMOS_DIR = f'{BASE_DIR}/Data/amos/AMOS22'
DATA_DIR = f"{BASE_DIR}/Data/nnUNet_raw_data_base"
TASK_DIR = f"{DATA_DIR}/nnUNet_raw_data/{TASK_NAME}"
TRAIN_DATA_DIR = f"{TASK_DIR}/imagesTr"
TRAIN_LABEL_DIR = f"{TASK_DIR}/labelsTr"
TEST_DATA_DIR = f"{TASK_DIR}/imagesTs"
TEST_LABEL_DIR = f"{TASK_DIR}/labelsTs"

def is_mri(file_name):
    ''' '''
    return "_05" in file_name or "_06" in file_name

def get_filename_modality(file_name):
    # either 1 modality, or multiple and not_MRI
    if only_MRI or only_CT or not(is_mri(file_name)):
        return "0000"
    # muiltiple modalities, MRI
    else:
        return "0001"
        
def get_all_files(path):

    # get the paths to all the files
    files = [os.path.join(path, i) for i in os.listdir(path)]
    all_files = files

    # filter out the CT images or the MRI images
    if only_MRI:
        all_files = [f for f in all_files if is_mri(f)]
    elif only_CT:
        all_files = [f for f in all_files if not(is_mri(f))]

    # return all files
    return all_files

First, rename the labels to our desired name

In [4]:
for f in os.listdir(TRAIN_LABEL_DIR):
    f2 = 'panc_' + f.split('_')[1]
    old = TRAIN_LABEL_DIR + '/' + f
    new = TRAIN_LABEL_DIR + '/' + f2
    os.rename(old, new)

Then we create the nnU-net tasks for the training data

In [5]:
pre.create_all_folders(TRAIN_DATA_DIR, TRAIN_LABEL_DIR, TEST_DATA_DIR, TEST_LABEL_DIR)
test_filenames = pre.create_images(f'{AMOS_DIR}/imagesTr', TRAIN_DATA_DIR, TEST_DATA_DIR, test_perc, get_all_files=get_all_files, get_filename_modality=get_filename_modality)
pre.create_labels(test_filenames, f'{AMOS_DIR}/labelsTr', TRAIN_LABEL_DIR, TEST_LABEL_DIR, organ_labels.keys(),  get_all_files=get_all_files)
pre.generate_dataset_json(True, "AMOS", TASK_DIR, task, modality, organ_labels_json, TRAIN_LABEL_DIR, TEST_LABEL_DIR)
