# Create nnUNet data and dataset json
---


# Libraries

In [8]:
from savenifti import convert_2d_image_to_nifti
from polygon import convert_polygons_to_annotations, get_tissue_union_from_mask
from filepath import convert_path, GenerateJson
import os
from wholeslidedata.annotation.wholeslideannotation import WholeSlideAnnotation
from wholeslidedata.image.wholeslideimage import WholeSlideImage
#from wholeslidedata.accessories.asap.parser import AsapAnnotationParser
from matplotlib import pyplot as plt
from wholeslidedata.samplers.patchlabelsampler import SegmentationPatchLabelSampler
from shapely.geometry import Point
import numpy as np
import pandas as pd
import SimpleITK as sitk
from wholeslidedata.annotation.utils import cv2_polygonize
from shapely import geometry
from wholeslidedata.labels import Label
from wholeslidedata.annotation.structures import Annotation
import yaml
import random


In [3]:
# !pip3 install git+https://github.com/DIAGNijmegen/pathology-whole-slide-data@main
# !pip3 install --upgrade opencv-python-headless
# # # sudo apt update && sudo apt -y upgrade && sudo apt-get install -y openslide-tools
# restart kernel

# Data import

In [5]:
current_os = "w" if os.name == "nt" else "l"
other_os = "l" if current_os == "w" else "w"
camelyon_path = convert_path('directory_to_yaml_file/data_camelyon17.yaml', current_os)
with open(camelyon_path, 'r') as f:
    yaml = yaml.safe_load(f)
root = yaml['path']['root']
camelyon = yaml['path']['camelyon17']     
train_data = yaml['data']['training']


### Whole Slide Data

### Colormap ASAP and labels

In [6]:
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import cm
# import pylab as plt

colormap = [ [ 0, 0, 0, 0 ], [ 0, 224, 249, 255 ], [ 0, 249, 50, 255 ], [ 174, 249, 0, 255 ], [ 249, 100, 0, 255 ], [ 249, 0, 125, 255 ], [ 149, 0, 249, 255 ], [ 0, 0, 206, 255 ], [ 0, 185, 206, 255 ], [ 0, 206, 41, 255 ], [ 143, 206, 0, 255 ], [ 206, 82, 0, 255 ], [ 206, 0, 103, 255 ], [ 124, 0, 206, 255 ], [ 0, 0, 162, 255 ], [ 0, 145, 162, 255 ], [ 0, 162, 32, 255 ], [ 114, 162, 0, 255 ], [ 162, 65, 0, 255 ], [ 162, 0, 81, 255 ], [ 97, 0, 162, 255 ], [ 0, 0, 119, 255 ], [ 0, 107, 119, 255 ], [ 0, 119, 23, 255 ], [ 83, 119, 0, 255 ], [ 119, 47, 0, 255 ], [ 119, 0, 59, 255 ], [ 71, 0, 119, 255 ], [ 100, 100, 249, 255 ], [ 100, 234, 249, 255 ], [ 100, 249, 129, 255 ], [ 204, 249, 100, 255 ], [ 249, 159, 100, 255 ], [ 249, 100, 174, 255 ], [ 189, 100, 249, 255 ], [ 82, 82, 206, 255 ], [ 82, 193, 206, 255 ], [ 82, 206, 107, 255 ], [ 168, 206, 82, 255 ], [ 206, 131, 82, 255 ], [ 206, 82, 143, 255 ], [ 156, 82, 206, 255 ], [ 65, 65, 162, 255 ], [ 65, 152, 162, 255 ], [ 65, 162, 84, 255 ], [ 132, 162, 65, 255 ], [ 162, 104, 65, 255 ], [ 162, 65, 114, 255 ], [ 123, 65, 162, 255 ], [ 47, 47, 119, 255 ], [ 47, 112, 119, 255 ], [ 47, 119, 61, 255 ], [ 97, 119, 47, 255 ], [ 119, 76, 47, 255 ], [ 119, 47, 83, 255 ], [ 90, 47, 119, 255 ], [ 174, 174, 249, 255 ], [ 174, 242, 249, 255 ], [ 174, 249, 189, 255 ], [ 227, 249, 174, 255 ], [ 249, 204, 174, 255 ], [ 249, 174, 212, 255 ], [ 219, 174, 249, 255 ], [ 143, 143, 206, 255 ], [ 143, 199, 206, 255 ], [ 143, 206, 156, 255 ], [ 187, 206, 143, 255 ], [ 206, 168, 143, 255 ], [ 206, 143, 175, 255 ], [ 181, 143, 206, 255 ], [ 114, 114, 162, 255 ], [ 114, 157, 162, 255 ], [ 114, 162, 123, 255 ], [ 147, 162, 114, 255 ], [ 162, 132, 114, 255 ], [ 162, 114, 137, 255 ], [ 142, 114, 162, 255 ], [ 83, 83, 119, 255 ], [ 83, 115, 119, 255 ], [ 83, 119, 90, 255 ], [ 108, 119, 83, 255 ], [ 119, 97, 83, 255 ], [ 119, 83, 101, 255 ], [ 104, 83, 119, 255 ], [ 224, 224, 249, 255 ], [ 224, 247, 249, 255 ], [ 224, 249, 229, 255 ], [ 242, 249, 224, 255 ], [ 249, 234, 224, 255 ], [ 249, 224, 237, 255 ], [ 239, 224, 249, 255 ], [ 185, 185, 206, 255 ], [ 185, 204, 206, 255 ], [ 185, 206, 189, 255 ], [ 199, 206, 185, 255 ], [ 206, 193, 185, 255 ], [ 206, 185, 195, 255 ], [ 197, 185, 206, 255 ], [ 145, 145, 162, 255 ], [ 145, 160, 162, 255 ], [ 145, 162, 149, 255 ], [ 157, 162, 145, 255 ], [ 162, 152, 145, 255 ], [ 162, 145, 153, 255 ], [ 155, 145, 162, 255 ], [ 107, 107, 119, 255 ], [ 107, 118, 119, 255 ], [ 107, 119, 109, 255 ], [ 115, 119, 107, 255 ], [ 119, 112, 107, 255 ], [ 119, 107, 113, 255 ], [ 114, 107, 119, 255 ], [ 0, 0, 249, 255 ], [ 0, 224, 249, 255 ], [ 0, 249, 50, 255 ], [ 174, 249, 0, 255 ], [ 249, 100, 0, 255 ], [ 249, 0, 125, 255 ], [ 149, 0, 249, 255 ], [ 0, 0, 206, 255 ], [ 0, 185, 206, 255 ], [ 0, 206, 41, 255 ], [ 143, 206, 0, 255 ], [ 206, 82, 0, 255 ], [ 206, 0, 103, 255 ], [ 124, 0, 206, 255 ], [ 0, 0, 162, 255 ], [ 0, 145, 162, 255 ], [ 0, 162, 32, 255 ], [ 114, 162, 0, 255 ], [ 162, 65, 0, 255 ], [ 162, 0, 81, 255 ], [ 97, 0, 162, 255 ], [ 0, 0, 119, 255 ], [ 0, 107, 119, 255 ], [ 0, 119, 23, 255 ], [ 83, 119, 0, 255 ], [ 119, 47, 0, 255 ], [ 119, 0, 59, 255 ], [ 71, 0, 119, 255 ], [ 100, 100, 249, 255 ], [ 100, 234, 249, 255 ], [ 100, 249, 129, 255 ], [ 204, 249, 100, 255 ], [ 249, 159, 100, 255 ], [ 249, 100, 174, 255 ], [ 189, 100, 249, 255 ], [ 82, 82, 206, 255 ], [ 82, 193, 206, 255 ], [ 82, 206, 107, 255 ], [ 168, 206, 82, 255 ], [ 206, 131, 82, 255 ], [ 206, 82, 143, 255 ], [ 156, 82, 206, 255 ], [ 65, 65, 162, 255 ], [ 65, 152, 162, 255 ], [ 65, 162, 84, 255 ], [ 132, 162, 65, 255 ], [ 162, 104, 65, 255 ], [ 162, 65, 114, 255 ], [ 123, 65, 162, 255 ], [ 47, 47, 119, 255 ], [ 47, 112, 119, 255 ], [ 47, 119, 61, 255 ], [ 97, 119, 47, 255 ], [ 119, 76, 47, 255 ], [ 119, 47, 83, 255 ], [ 90, 47, 119, 255 ], [ 174, 174, 249, 255 ], [ 174, 242, 249, 255 ], [ 174, 249, 189, 255 ], [ 227, 249, 174, 255 ], [ 249, 204, 174, 255 ], [ 249, 174, 212, 255 ], [ 219, 174, 249, 255 ], [ 143, 143, 206, 255 ], [ 143, 199, 206, 255 ], [ 143, 206, 156, 255 ], [ 187, 206, 143, 255 ], [ 206, 168, 143, 255 ], [ 206, 143, 175, 255 ], [ 181, 143, 206, 255 ], [ 114, 114, 162, 255 ], [ 114, 157, 162, 255 ], [ 114, 162, 123, 255 ], [ 147, 162, 114, 255 ], [ 162, 132, 114, 255 ], [ 162, 114, 137, 255 ], [ 142, 114, 162, 255 ], [ 83, 83, 119, 255 ], [ 83, 115, 119, 255 ], [ 83, 119, 90, 255 ], [ 108, 119, 83, 255 ], [ 119, 97, 83, 255 ], [ 119, 83, 101, 255 ], [ 104, 83, 119, 255 ], [ 224, 224, 249, 255 ], [ 224, 247, 249, 255 ], [ 224, 249, 229, 255 ], [ 242, 249, 224, 255 ], [ 249, 234, 224, 255 ], [ 249, 224, 237, 255 ], [ 239, 224, 249, 255 ], [ 185, 185, 206, 255 ], [ 185, 204, 206, 255 ], [ 185, 206, 189, 255 ], [ 199, 206, 185, 255 ], [ 206, 193, 185, 255 ], [ 206, 185, 195, 255 ], [ 197, 185, 206, 255 ], [ 145, 145, 162, 255 ], [ 145, 160, 162, 255 ], [ 145, 162, 149, 255 ], [ 157, 162, 145, 255 ], [ 162, 152, 145, 255 ], [ 162, 145, 153, 255 ], [ 155, 145, 162, 255 ], [ 107, 107, 119, 255 ], [ 107, 118, 119, 255 ], [ 107, 119, 109, 255 ], [ 115, 119, 107, 255 ], [ 119, 112, 107, 255 ], [ 119, 107, 113, 255 ], [ 114, 107, 119, 255 ], [ 0, 0, 249, 255 ], [ 0, 224, 249, 255 ], [ 0, 249, 50, 255 ], [ 174, 249, 0, 255 ], [ 249, 100, 0, 255 ], [ 249, 0, 125, 255 ], [ 149, 0, 249, 255 ], [ 0, 0, 206, 255 ], [ 0, 185, 206, 255 ], [ 0, 206, 41, 255 ], [ 143, 206, 0, 255 ], [ 206, 82, 0, 255 ], [ 206, 0, 103, 255 ], [ 124, 0, 206, 255 ], [ 0, 0, 162, 255 ], [ 0, 145, 162, 255 ], [ 0, 162, 32, 255 ], [ 114, 162, 0, 255 ], [ 162, 65, 0, 255 ], [ 162, 0, 81, 255 ], [ 97, 0, 162, 255 ], [ 0, 0, 119, 255 ], [ 0, 107, 119, 255 ], [ 0, 119, 23, 255 ], [ 83, 119, 0, 255 ], [ 119, 47, 0, 255 ], [ 119, 0, 59, 255 ], [ 71, 0, 119, 255 ], [ 100, 100, 249, 255 ], [ 100, 234, 249, 255 ], [ 100, 249, 129, 255 ], [ 0, 249, 50, 255 ] ]
colormap = np.array(colormap)/255.
cmap = LinearSegmentedColormap.from_list('my_cmap', colors=colormap)

label_names = ['Background','Benign','Tumor']
label_index = list(range(len(label_names)))
n_labels = len(label_names)
label_plot_args = {"cmap":cmap, "vmin":0, "vmax":255, "interpolation":"none"}



# Check images and annotations

In [7]:
labels={'Background':0,
        'Benign': 1, 
        'Tumor': 2}


In [2]:
current_os = "w" if os.name == "nt" else "l"
other_os = "l" if current_os == "w" else "w"
nnUNet_base = r"root_folder" # directory to folder
nnUNet_data_root = convert_path(os.path.join(nnUNet_base, "nnUNet_raw_data"), current_os)
sorted(os.listdir(nnUNet_data_root))
task_name = 'Tasknumber_task_name'
task_root = os.path.join(nnUNet_data_root, task_name)
task_root = convert_path(task_root, current_os)
print("TASK:\t\t", task_name)
nnUNet_base_linux = convert_path(nnUNet_base, to=current_os)
print("nnUNet ROOT:\t", nnUNet_base_linux)
image_folder = os.path.join(task_root, "imagesTr")
image_folder = convert_path(image_folder, current_os)
label_folder = os.path.join(task_root, "labelsTr")
label_folder = convert_path(label_folder, current_os)
if not(os.path.isdir(image_folder)):
    os.makedirs(image_folder)
if not(os.path.isdir(label_folder)):
    os.makedirs(label_folder)

TASK:		 Task028_camelyon_uncertainty
nnUNet ROOT:	 B:\projects\pathology-mrifuse


In [3]:
from savenifti import convert_2d_image_to_nifti
from polygon import convert_polygons_to_annotations, get_tissue_union_from_mask
from filepath import convert_path, GenerateJson
import os
from wholeslidedata.annotation.wholeslideannotation import WholeSlideAnnotation
from wholeslidedata.image.wholeslideimage import WholeSlideImage
#from wholeslidedata.accessories.asap.parser import AsapAnnotationParser
from matplotlib import pyplot as plt
from wholeslidedata.samplers.patchlabelsampler import SegmentationPatchLabelSampler
from shapely.geometry import Point
import numpy as np
import pandas as pd
import SimpleITK as sitk
from wholeslidedata.annotation.utils import cv2_polygonize
from shapely import geometry
from wholeslidedata.labels import Label
from wholeslidedata.annotation.structures import Annotation
import yaml
import random
current_os = "w" if os.name == "nt" else "l"
other_os = "l" if current_os == "w" else "w"
camelyon_path = convert_path('/yaml/data_camelyon17.yaml', current_os)
with open(camelyon_path, 'r') as f:
    yaml = yaml.safe_load(f)
root = yaml['path']['root']
camelyon = yaml['path']['camelyon17']     
train_data = yaml['data']['training']
labels={'Background':0,
        'Benign': 1, 
        'Tumor': 2}

current_os = "w" if os.name == "nt" else "l"
other_os = "l" if current_os == "w" else "w"
nnUNet_base = r"root"
nnUNet_data_root = convert_path(os.path.join(nnUNet_base, "nnUNet_raw_data"), current_os)
nnUNet_base_linux = convert_path(nnUNet_base, to=current_os)
print("nnUNet ROOT:\t", nnUNet_base_linux)


for i,sample_per_slide in zip([task_num],[sample]):
    #task_num
    #sample = 10-130 per slide
    task_name = f'Task0{i}_camelyon_with_uncertainty_{sample_per_slide}'
    task_root = os.path.join(nnUNet_data_root, task_name)
    task_root = convert_path(task_root, current_os)
    print("TASK:\t\t", task_name)
    nnUNet_base_linux = convert_path(nnUNet_base, to=current_os)
    print("nnUNet ROOT:\t", nnUNet_base_linux)
    image_folder = os.path.join(task_root, "imagesTr")
    image_folder = convert_path(image_folder, current_os)
    label_folder = os.path.join(task_root, "labelsTr")
    label_folder = convert_path(label_folder, current_os)

    dataset_path = task_root
    GenerateJson(dataset_path)

nnUNet ROOT:	 B:\projects\pathology-mrifuse
TASK:		 Task031_camelyon_with_uncertainty_40
nnUNet ROOT:	 B:\projects\pathology-mrifuse


## Loss on distributions

### Claculate the mean MSE losses over percentages of indivudual folds and their mean

In [None]:
wsi_or_roi_based = 'roi'

In [None]:
if wsi_or_roi_based == 'wsi':
    print_best_dist_loss(val_distribution_losses)
    print_best_dist_loss(val_distribution_losses_log)

In [None]:
if wsi_or_roi_based == 'roi':
    print_best_dist_loss(train_distribution_losses)
    print_best_dist_loss(train_distribution_losses_log)

In [None]:
if wsi_or_roi_based == 'roi':
    print_best_dist_loss(val_distribution_losses)
    print_best_dist_loss(val_distribution_losses_log)

# Create nnUNet dataset (TRAIN)

Change Task name here for a new project:

In [9]:
import os
task_name = f'Task130_camelyon_uncertainty_130'
task_root = os.path.join(nnUNet_data_root, task_name)
task_root = convert_path(task_root, current_os)
print("TASK:\t\t", task_name)
nnUNet_base_linux = convert_path(nnUNet_base, to=current_os)
print("nnUNet ROOT:\t", nnUNet_base_linux)
image_folder = os.path.join(task_root, "imagesTr")
image_folder = convert_path(image_folder, current_os)
label_folder = os.path.join(task_root, "labelsTr")
label_folder = convert_path(label_folder, current_os)
os.makedirs(image_folder, exist_ok=True)
os.makedirs(label_folder, exist_ok=True)
dataset_path = task_root
GenerateJson(dataset_path)

TASK:		 Task130_camelyon_uncertainty_130
nnUNet ROOT:	 /data/pathology/projects/pathology-mrifuse


In [23]:
nnUNet_base_linux = convert_path(nnUNet_base, to=current_os)


dataset_path

### Make dataset json

# Run nnUNet TRAIN job

In [29]:
print("TASK:\t\t", task_name)
nnUNet_base_linux = convert_path(nnUNet_base, to="linux")
print("nnUNet ROOT:\t", nnUNet_base_linux)

TASK:		 Task028_camelyon_uncertainty
nnUNet ROOT:	 /data/pathology/projects/pathology-mrifuse


## Set docker, trainer, and experiment planner

### Current docker:
'doduo1.umcn.nl/pathology_lung_til/nnunet:9.0-midl2023'

### how I made the docker
on cluster 

clone diag nnunet wrapper (into project folder)

checkout my feature branch that allows changing the plans and experiment planner, derived from feature branch from max de grauw

take the docker file there (which uses the cloned repo's wrapper.py file (maybe incorrect name))

copy your own specific docker commands below it

build and push it (change name in build.sh file)

voila

In [7]:
# docker = 'doduo1.umcn.nl/pathology_lung_til/nnunet:8.7-pathology_DA-latest'
docker = 'doduo1.umcn.nl/pathology_lung_til/nnunet:9.4-midl2023'

# trainer_name = 'nnUNetTrainerV2' #default
trainer_name = 'nnUNetTrainerV2_BN'
#trainer_name = 'nnUNetTrainerV2_BN_pathology_DA_hed005'
# trainer_name = 'nnUNetTrainerV2_BN_pathology_DA_hed005_hsv010'

# experiment_planner = 'ExperimentPlanner2D_v21_RGB_z_score_default' # this is a compatibility wrapper of the default
# experiment_planner = 'ExperimentPlanner2D_v21_RGB_z_score_default_bs2_ps1024'
# experiment_planner = 'ExperimentPlanner2D_v21_RGB_scaleTo_0_1'
# experiment_planner = 'ExperimentPlanner2D_v21_RGB_scaleTo_0_1_bs2_ps1024'
experiment_planner = 'ExperimentPlanner2D_v21_RGB_scaleTo_0_1_bs8_ps512'
# experiment_planner = 'ExperimentPlanner2D_v21_RGB_scaleTo_0_1_bs32_ps256'

# experiment_planner_identifier = 'nnUNet_RGB_z_score_default'# this is a compatibility wrapper of the default
# experiment_planner_identifier = 'nnUNet_RGB_z_score_default_bs2_ps1024' # this is a compatibility wrapper of the default
# experiment_planner_identifier = 'nnUNet_RGB_scaleTo_0_1'
# experiment_planner_identifier = 'nnUNet_RGB_scaleTo_0_1_bs2_ps1024'
experiment_planner_identifier = 'nnUNet_RGB_scaleTo_0_1_bs8_ps512'
# experiment_planner_identifier = 'nnUNet_RGB_scaleTo_0_1_bs32_ps256'

## Manually set patch size

check and set to true

## Manually set data splits

### Copy output below for ENSEMBLE

In [None]:
names = ['FOLD_0', 'FOLD_1', 'FOLD_2', 'FOLD_3', 'FOLD_4', "ALL"]
folds = [0, 1, 2, 3, 4, "all"]
for (name, fold) in zip(names, folds):
    print(name, '\n')
    output_folder_single_model_linux = nnUNet_base_linux+"/inference_results/" + task_name[:7] + f"_{name}" + task_name[7:]
#     if makedirs:
#         os.mkdir(convert_path(output_folder_single_model_linux, to="windows")) # change this if ur linux
    inference_command = f"./c-submit --require-cpus=4 --require-mem=28g --gpu-count=1 \
--require-gpu-mem=11g --priority={priority} joeyspronck 9974 72 \
doduo1.umcn.nl/nnunet/sol nnunet predict \
{task_name} \
--results {results_folder_linux} \
--input {test_image_folder_linux} \
--output {output_folder_single_model_linux} \
--checkpoint model_best \
--trainer nnUNetTrainerV2_Loss_CE_checkpoints \
--network 2d \
--store_probability_maps \
--folds {fold}"
    print(inference_command, '\n\n\n')