# nnU-Net

## Setup imports

In [1]:
import os
import json
import random
import shutil
import tempfile
from monai.config import print_config
from monai.apps import download_and_extract

print_config()

MONAI version: 1.3.3rc1+2.g7a8680e8
Numpy version: 2.0.0
Pytorch version: 2.3.1
MONAI flags: HAS_EXT = False, USE_COMPILED = False, USE_META_DICT = False
MONAI rev id: 7a8680e84457cb374859639ab6a078313da85926
MONAI __file__: /home/<username>/miniforge3/envs/nnunet/lib/python3.11/site-packages/monai/__init__.py

Optional dependencies:
Pytorch Ignite version: NOT INSTALLED or UNKNOWN VERSION.
ITK version: NOT INSTALLED or UNKNOWN VERSION.
Nibabel version: 5.2.1
scikit-image version: 0.24.0
scipy version: 1.14.0
Pillow version: 10.4.0
Tensorboard version: NOT INSTALLED or UNKNOWN VERSION.
gdown version: NOT INSTALLED or UNKNOWN VERSION.
TorchVision version: 0.18.1
tqdm version: 4.66.4
lmdb version: NOT INSTALLED or UNKNOWN VERSION.
psutil version: 6.0.0
pandas version: 2.2.2
einops version: NOT INSTALLED or UNKNOWN VERSION.
transformers version: NOT INSTALLED or UNKNOWN VERSION.
mlflow version: NOT INSTALLED or UNKNOWN VERSION.
pynrrd version: NOT INSTALLED or UNKNOWN VERSION.
clearml ver

## Datalist Generator

### Setup paths to your data

In [2]:
directory = os.environ.get("MONAI_DATA_DIRECTORY")
if directory is not None:
    os.makedirs(directory, exist_ok=True)
root_dir = tempfile.mkdtemp() if directory is None else directory
print(root_dir)

/home/mark/GitHub/Project-MONAI-tutorials/monai_data_dir


### Download sample MSD Dataset

In [3]:
msd_task = "Task09_Spleen"
resource = "https://msd-for-monai.s3-us-west-2.amazonaws.com/" + msd_task + ".tar"

compressed_file = os.path.join(root_dir, msd_task + ".tar")
dataroot = os.path.join(root_dir, msd_task)

if not os.path.exists(dataroot):
    download_and_extract(resource, compressed_file, root_dir)

### MSD dataset structure follows the following convention:

In [4]:
test_dir = os.path.join(dataroot, "imagesTs/")
train_dir = os.path.join(dataroot, "imagesTr/")
label_dir = os.path.join(dataroot, "labelsTr/")

### Construct skeleton JSON to populate with your own data

In [5]:
datalist_json = {"testing": [], "training": []}

### Populate JSON with test data

In [6]:
datalist_json["testing"] = [
    {"image": "./imagesTs/" + file} for file in os.listdir(test_dir) if (".nii.gz" in file) and ("._" not in file)
]

### Visualise testing data

In [7]:
datalist_json["testing"][:10]

[{'image': './imagesTs/spleen_55.nii.gz'},
 {'image': './imagesTs/spleen_7.nii.gz'},
 {'image': './imagesTs/spleen_39.nii.gz'},
 {'image': './imagesTs/spleen_36.nii.gz'},
 {'image': './imagesTs/spleen_43.nii.gz'},
 {'image': './imagesTs/spleen_50.nii.gz'},
 {'image': './imagesTs/spleen_11.nii.gz'},
 {'image': './imagesTs/spleen_42.nii.gz'},
 {'image': './imagesTs/spleen_57.nii.gz'},
 {'image': './imagesTs/spleen_23.nii.gz'}]

### Populate with training images and labels in your directory

In [8]:
datalist_json["training"] = [
    {"image": "./imagesTr/" + file, "label": "./labelsTr/" + file, "fold": 0}
    for file in os.listdir(train_dir)
    if (".nii.gz" in file) and ("._" not in file)
]  # Initialize as single fold

### Visualise training data

In [9]:
datalist_json["training"][:10]

[{'image': './imagesTr/spleen_20.nii.gz',
  'label': './labelsTr/spleen_20.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_12.nii.gz',
  'label': './labelsTr/spleen_12.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_59.nii.gz',
  'label': './labelsTr/spleen_59.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_8.nii.gz',
  'label': './labelsTr/spleen_8.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_3.nii.gz',
  'label': './labelsTr/spleen_3.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_26.nii.gz',
  'label': './labelsTr/spleen_26.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_21.nii.gz',
  'label': './labelsTr/spleen_21.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_13.nii.gz',
  'label': './labelsTr/spleen_13.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_56.nii.gz',
  'label': './labelsTr/spleen_56.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_52.nii.gz',
  'label': './labelsTr/spleen_52.nii.gz',
  'fold': 0}]

### Randomise training data


In [10]:
random.seed(42)
random.shuffle(datalist_json["training"])
datalist_json["training"][:10]

[{'image': './imagesTr/spleen_52.nii.gz',
  'label': './labelsTr/spleen_52.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_8.nii.gz',
  'label': './labelsTr/spleen_8.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_46.nii.gz',
  'label': './labelsTr/spleen_46.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_19.nii.gz',
  'label': './labelsTr/spleen_19.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_10.nii.gz',
  'label': './labelsTr/spleen_10.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_29.nii.gz',
  'label': './labelsTr/spleen_29.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_45.nii.gz',
  'label': './labelsTr/spleen_45.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_14.nii.gz',
  'label': './labelsTr/spleen_14.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_3.nii.gz',
  'label': './labelsTr/spleen_3.nii.gz',
  'fold': 0},
 {'image': './imagesTr/spleen_38.nii.gz',
  'label': './labelsTr/spleen_38.nii.gz',
  'fold': 0}]

### Split training data into N random folds

In [11]:
num_folds = 5
fold_size = len(datalist_json["training"]) // num_folds
for i in range(num_folds):
    for j in range(fold_size):
        datalist_json["training"][i * fold_size + j]["fold"] = i

### Visualise final training data with all randomised folds

In [12]:
datalist_json["training"][-15:]

[{'image': './imagesTr/spleen_60.nii.gz',
  'label': './labelsTr/spleen_60.nii.gz',
  'fold': 3},
 {'image': './imagesTr/spleen_41.nii.gz',
  'label': './labelsTr/spleen_41.nii.gz',
  'fold': 3},
 {'image': './imagesTr/spleen_59.nii.gz',
  'label': './labelsTr/spleen_59.nii.gz',
  'fold': 3},
 {'image': './imagesTr/spleen_20.nii.gz',
  'label': './labelsTr/spleen_20.nii.gz',
  'fold': 3},
 {'image': './imagesTr/spleen_53.nii.gz',
  'label': './labelsTr/spleen_53.nii.gz',
  'fold': 3},
 {'image': './imagesTr/spleen_32.nii.gz',
  'label': './labelsTr/spleen_32.nii.gz',
  'fold': 3},
 {'image': './imagesTr/spleen_26.nii.gz',
  'label': './labelsTr/spleen_26.nii.gz',
  'fold': 4},
 {'image': './imagesTr/spleen_21.nii.gz',
  'label': './labelsTr/spleen_21.nii.gz',
  'fold': 4},
 {'image': './imagesTr/spleen_56.nii.gz',
  'label': './labelsTr/spleen_56.nii.gz',
  'fold': 4},
 {'image': './imagesTr/spleen_22.nii.gz',
  'label': './labelsTr/spleen_22.nii.gz',
  'fold': 4},
 {'image': './images

### Save JSON to file

In [13]:
datalist_file = "msd_" + msd_task.lower() + "_folds.json"
with open(datalist_file, "w", encoding="utf-8") as f:
    json.dump(datalist_json, f, ensure_ascii=False, indent=4)
print(f"Datalist is saved to {datalist_file}")

Datalist is saved to msd_task09_spleen_folds.json


## Run with Minimal Input using `nnUNetV2Runner`

---
After creating the data list, the user can create a simple "input.yaml" file (shown below) as the minimum input for **nnUNetV2Runner**.

```yaml
modality: CT
datalist: "./msd_task09_spleen_folds.json"
dataroot: "/workspace/data/Task09_Spleen"
```

Note: For multi-modal inputs, please check the **Frequently Asked Questions** section

Users can also set values of directory variables as options in "input.yaml" if any directory needs to be specified.

```yaml
dataset_name_or_id: 1 # task-specific integer index (optional)
nnunet_preprocessed: "./work_dir/nnUNet_preprocessed" # directory for storing pre-processed data (optional)
nnunet_raw: "./work_dir/nnUNet_raw_data_base" # directory for storing formated raw data (optional)
nnunet_results: "./work_dir/nnUNet_trained_models" # diretory for storing trained model checkpoints (optional)
```
---


Once the minimum input information is provided, the user can use the following commands to start the process of the entire nnU-Net pipeline automatically (from model training to model ensemble).

```bash
python -m monai.apps.nnunet nnUNetV2Runner run --input_config='./input.yaml'
```

For experiment and debugging purposes, users may want to set the number of epochs of training in the nnU-Net pipeline.
Our integration offers an optional argument `trainer_class_name` to specify the number of epochs as below:

```bash
python -m monai.apps.nnunet nnUNetV2Runner run --input_config='./input.yaml' --trainer_class_name nnUNetTrainer_1epoch
```

The supported `trainer_class_name` are:
- nnUNetTrainer (default)
- nnUNetTrainer_1epoch
- nnUNetTrainer_5epochs
- nnUNetTrainer_10epochs
- nnUNetTrainer_20epochs
- nnUNetTrainer_50epochs
- nnUNetTrainer_100epochs
- nnUNetTrainer_250epochs
- nnUNetTrainer_2000epochs
- nnUNetTrainer_4000epochs
- nnUNetTrainer_8000epochs

#### Convert dataset

In [20]:
!python -m monai.apps.nnunet nnUNetV2Runner convert_dataset --input_config "./input.yaml"

2024-07-22 21:31:40,449 - INFO - num_input_channels: 1
2024-07-22 21:32:33,382 - INFO - num_foreground_classes: 1
2024-07-22 21:32:33,382 - INFO - converting data section: training...
100%|███████████████████████████████████████████| 41/41 [04:04<00:00,  5.95s/it]
2024-07-22 21:36:37,533 - INFO - converting data section: testing...
100%|███████████████████████████████████████████| 20/20 [01:17<00:00,  3.87s/it]


#### Plan and process experiment using V2 plans

In [21]:
!python -m monai.apps.nnunet nnUNetV2Runner plan_and_process -pl nnUNetPlannerResEncM --input_config "./input.yaml"

2024-07-22 21:40:46,837 - INFO - Fingerprint extraction...
Dataset009_Task09_Spleen
Using <class 'nnunetv2.imageio.simpleitk_reader_writer.SimpleITKIO'> as reader/writer
100%|███████████████████████████████████████████| 41/41 [00:05<00:00,  6.92it/s]
2024-07-22 21:40:55,182 - INFO - Experiment planning...
Attempting to find 3d_lowres config. 
Current spacing: [1.60001004 0.81675806 0.81675806]. 
Current patch size: (np.int64(64), np.int64(192), np.int64(192)). 
Current median shape: [187.         497.08737864 497.08737864]
Attempting to find 3d_lowres config. 
Current spacing: [1.64801034 0.8412608  0.8412608 ]. 
Current patch size: (np.int64(64), np.int64(192), np.int64(192)). 
Current median shape: [181.55339806 482.60910548 482.60910548]
Attempting to find 3d_lowres config. 
Current spacing: [1.69745065 0.86649862 0.86649862]. 
Current patch size: (np.int64(64), np.int64(192), np.int64(192)). 
Current median shape: [176.26543501 468.55252959 468.55252959]
Attempting to find 3d_lowre

#### Train all 20 models using all available GPU(s), specifying 1 epoch

In [24]:
!python -m monai.apps.nnunet nnUNetV2Runner train --input_config "./input.yaml" --trainer_class_name nnUNetTrainer_1epoch

2024-07-22 21:54:04,235 - INFO - number of GPUs is 1, device ids are (0,)

############################
INFO: You are using the old nnU-Net default plans. We have updated our recommendations. Please consider using those instead! Read more here: https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/resenc_presets.md
############################

Using device: cuda:0

#######################################################################
Please cite the following paper when using nnU-Net:
Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2021). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature methods, 18(2), 203-211.
#######################################################################

2024-07-22 21:54:06.727874: do_dummy_2d_data_aug: False
2024-07-22 21:54:06.728213: Using splits from existing split file: ./work_dir/nnUNet_preprocessed/Dataset009_Task09_Spleen/splits_final.json
2024-07-22 21:54:06.72836

## Run nnU-Net modules using ```nnUNetV2Runner```


```nnUNetV2Runner``` offers the one-stop API to execute the pipeline, as well as the APIs to access the underlying components of nnU-Net V2. Below is the command for different components.

```bash
## [component] convert dataset
python -m monai.apps.nnunet nnUNetV2Runner convert_dataset --input_config "./input.yaml"

## [component] experiment planning and data pre-processing
python -m monai.apps.nnunet nnUNetV2Runner plan_and_process --input_config "./input.yaml"

## [component] use all available GPU(s) to train all 20 models
python -m monai.apps.nnunet nnUNetV2Runner train --input_config "./input.yaml"

## [component] use all available GPU(s) to train a single model
python -m monai.apps.nnunet nnUNetV2Runner train_single_model --input_config "./input.yaml" \
    --config "3d_fullres" \
    --fold 0

## [component] distributed training of 20 models utilizing specified GPU devices 0 and 1
python -m monai.apps.nnunet nnUNetV2Runner train --input_config "./input.yaml" --gpu_id_for_all 0,1

## [component] find best configuration
python -m monai.apps.nnunet nnUNetV2Runner find_best_configuration --input_config "./input.yaml"

## [component] predict, ensemble, and postprocessing
python -m monai.apps.nnunet nnUNetV2Runner predict_ensemble_postprocessing --input_config "./input.yaml"

## [component] predict only
python -m monai.apps.nnunet nnUNetV2Runner predict_ensemble_postprocessing --input_config "./input.yaml" \
	--run_ensemble false --run_postprocessing false

## [component] ensemble only
python -m monai.apps.nnunet nnUNetV2Runner predict_ensemble_postprocessing --input_config "./input.yaml" \
	--run_predict false --run_postprocessing false

## [component] post-processing only
python -m monai.apps.nnunet nnUNetV2Runner predict_ensemble_postprocessing --input_config "./input.yaml" \
	--run_predict false --run_ensemble false
```



For utilizing PyTorch DDP in multi-GPU training, the subsequent command is offered to facilitate the training of a singlular model on a specific fold:

```bash
## [component] multi-gpu training for a single model
python -m monai.apps.nnunet nnUNetV2Runner train_single_model --input_config "./input.yaml" \
    --config "3d_fullres" \
    --fold 0 \
    --gpu_id 0,1
```


We offer an alternative API for constructing datasets from [MSD challenge](http://medicaldecathlon.com/) to meet requirements of nnU-Net, as reference in the provided [link](https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/dataset_format.md#how-to-use-decathlon-datasets).

```bash
## [component] converting msd datasets
python -m monai.apps.nnunet nnUNetV2Runner convert_msd_dataset --input_config "./input.yaml" --data_dir "/workspace/data/Task09_Spleen"
```