This notebook verifies that the cropping (performed in cropDataSet.ipynb) went correctly by checking whether the cropping did not remove any part of the pancreas.

In [1]:
import os
import sys
import numpy as np
import pandas as pd
from skimage.measure import *
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from util import data
from util import management as mana
from util import constants as con

# Set original task
task = con.TASK_501

Set some general variables that we'll need.

In [2]:
# get automatic mappings
thesis_path = con.THESIS_PATH
newTask = con.CROP_TASK_MAPPING[task]
config = con.CONFIG_MAPPING[task]
modality = con.MODALITY_MAPPING[task]

# old task
data_path = f"{thesis_path}/Data/nnUNet_raw_data_base/nnUNet_raw_data/Task{task}"
img_tr_path = f'{data_path}/imagesTr'
img_ts_path = f'{data_path}/imagesTs'
label_tr_path = f'{data_path}/labelsTr'
label_ts_path = f'{data_path}/labelsTs'
segmentation_tr_path = f"{thesis_path}/Results/{task}/{config}/inference/{task}/imagesTr"
segmentation_ts_path = f"{thesis_path}/Results/{task}/{config}/inference/{task}/imagesTs"

# new task
new_data_path = f"{thesis_path}/Data/nnUNet_raw_data_base/nnUNet_raw_data/Task{newTask}"
new_img_tr_path = f'{new_data_path}/imagesTr'
new_img_ts_path = f'{new_data_path}/imagesTs'
new_label_tr_path = f'{new_data_path}/labelsTr'
new_label_ts_path = f'{new_data_path}/labelsTs'

# create new paths if they don't exist
mana.create_maybe_dir(new_data_path)
mana.create_maybe_dir(new_img_tr_path)
mana.create_maybe_dir(new_img_ts_path)
mana.create_maybe_dir(new_label_tr_path)
mana.create_maybe_dir(new_label_ts_path)

Define functions to compare labels

In [3]:
def is_label_equal(l1, l2):
    # sum all the ones in the label
    suml1 = sum(sum(sum(l1)))
    suml2 = sum(sum(sum(l2)))
    assert suml1 == suml2, f"Error because sum of labels not same: {suml1} {suml2} "

    # get image regions 
    regions1 = regionprops(l1)
    regions2 = regionprops(l2)

    # see if they have same amount of regions
    assert len(regions1) == len(regions2), f"Error because length not same: {len(regions1)} {len(regions2)}"
    for r1, r2 in zip(regions1, regions2):
        a1 = r1.area
        a2 = r2.area 
        assert a1 == a2, f"Error because area is not the same: {a1} {a2}"


Load in data and check all labels.

In [4]:
for tstr in [con.TEST, con.TRAIN]:

    # set paths
    if tstr == con.TEST:
        img_path = img_ts_path
        new_img_path = new_img_ts_path
        lab_path = label_ts_path
        new_lab_path = new_label_ts_path
        
    # get all the training and testing data for this task
    fnames, labs, _, _, _ = data.get_all_data_dictionary(img_path, lab_path, modality)
    new_fnames, new_labs, _, _, _ = data.get_all_data_dictionary(new_img_path, new_lab_path, modality)

    assert fnames == new_fnames, "ERROR. Filenames are not the same!"
    for f in fnames:
        is_label_equal(labs[f], new_labs[f])