## 環境設置

In [None]:
# Mount to Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
!pip install nnunetv2 -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/206.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m206.3/206.3 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.3/52.3 MB[0m [31m42.2 MB/s[0m eta [36m0:00:00[0m
[2K   [9

In [None]:
import os

os.environ['nnUNet_raw'] = '/content/nnUNet_raw'
os.environ['nnUNet_preprocessed'] = '/content/nnUNet_preprocessed'
os.environ['nnUNet_results'] = '/content/drive/MyDrive/poster/NNUNET/nnUNet_results'

## 用程式碼上傳raw

In [None]:
import os
import shutil
import time
import gc
from tqdm import tqdm

def copy_single_file(src_file, dest_file):
    """Copy a single file with error handling"""
    try:
        # Create parent directories if they don't exist
        os.makedirs(os.path.dirname(dest_file), exist_ok=True)

        # Copy the file
        shutil.copy2(src_file, dest_file)
        return True
    except Exception as e:
        print(f"Error copying {src_file} to {dest_file}: {str(e)}")
        return False

def get_all_files(source_dir):
    """Get a list of all files in the directory and its subdirectories"""
    all_files = []
    for root, _, files in os.walk(source_dir):
        for file in files:
            src_file = os.path.join(root, file)
            all_files.append(src_file)
    return all_files

def robust_copy_directory(source_dir, dest_dir, start_idx=0, batch_size=5, sleep_time=2):
    """
    Copy files from source_dir to dest_dir with memory management

    Args:
        source_dir: Source directory
        dest_dir: Destination directory
        start_idx: Index to start copying from (useful for resuming)
        batch_size: Number of files to copy before pausing
        sleep_time: Time to sleep between batches in seconds
    """
    # Create destination directory if it doesn't exist
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    # Get list of all files
    all_files = get_all_files(source_dir)
    total_files = len(all_files)

    if start_idx >= total_files:
        print(f"Start index {start_idx} is greater than total files {total_files}")
        return

    print(f"Found {total_files} files to copy, starting from index {start_idx}")

    # Process files in small batches
    for i in range(start_idx, total_files, batch_size):
        batch_end = min(i + batch_size, total_files)
        print(f"\nProcessing batch {i//batch_size + 1}: files {i+1}-{batch_end} of {total_files}")

        # Process each file in the current batch
        for j in range(i, batch_end):
            src_file = all_files[j]
            # Calculate relative path to maintain directory structure
            rel_path = os.path.relpath(src_file, source_dir)
            dest_file = os.path.join(dest_dir, rel_path)

            print(f"Copying file {j+1}/{total_files}: {rel_path}")
            success = copy_single_file(src_file, dest_file)

            # Force garbage collection after each file
            gc.collect()

        # After each batch, print progress and pause
        print(f"Completed {batch_end}/{total_files} files ({(batch_end/total_files)*100:.1f}%)")

        if batch_end < total_files:
            print(f"Pausing for {sleep_time} seconds before next batch...")
            time.sleep(sleep_time)

            # More aggressive garbage collection every 50 files
            if (batch_end % 50) < batch_size:
                print("Performing extensive garbage collection...")
                for _ in range(3):
                    gc.collect()
                time.sleep(5)

            # Print resume information
            print(f"""
If this process freezes, you can resume by running:
robust_copy_directory("{source_dir}", "{dest_dir}", start_idx={batch_end}, batch_size={batch_size})
""")

    print(f"\nCopy complete! Copied {total_files} files from {source_dir} to {dest_dir}")

def verify_copy(source_dir, dest_dir):
    """Verify that all files were copied correctly"""
    source_files = get_all_files(source_dir)
    dest_files = get_all_files(dest_dir)

    # Convert to relative paths for comparison
    source_rel_paths = set(os.path.relpath(f, source_dir) for f in source_files)
    dest_rel_paths = set(os.path.relpath(f, dest_dir) for f in dest_files)

    missing_files = source_rel_paths - dest_rel_paths

    print(f"Source files: {len(source_rel_paths)}")
    print(f"Destination files: {len(dest_rel_paths)}")

    if missing_files:
        print(f"WARNING: {len(missing_files)} files were not copied!")
        if len(missing_files) < 10:
            print("Missing files:")
            for f in missing_files:
                print(f"  - {f}")
        else:
            print("First 10 missing files:")
            for f in list(missing_files)[:10]:
                print(f"  - {f}")
    else:
        print("All files were copied successfully!")

In [None]:
source_directory = '/content/drive/MyDrive/poster/NNUNET/nnUNet_raw/Dataset003_aocr2024_partial'
destination_directory = '/content/nnUNet_raw/Dataset003_aocr2024_partial'

robust_copy_directory(source_directory, destination_directory)

# After copying completes, verify that everything was copied correctly
verify_copy(source_directory, destination_directory)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processing batch 20: files 96-100 of 2001
Copying file 96/2001: imagesTr/PARTIAL_0095_0000.nii.gz
Copying file 97/2001: imagesTr/PARTIAL_0096_0000.nii.gz
Copying file 98/2001: imagesTr/PARTIAL_0097_0000.nii.gz
Copying file 99/2001: imagesTr/PARTIAL_0098_0000.nii.gz
Copying file 100/2001: imagesTr/PARTIAL_0099_0000.nii.gz
Completed 100/2001 files (5.0%)
Pausing for 2 seconds before next batch...
Performing extensive garbage collection...

If this process freezes, you can resume by running:
robust_copy_directory("/content/drive/MyDrive/poster/NNUNET/nnUNet_raw/Dataset003_aocr2024_partial", "/content/nnUNet_raw/Dataset003_aocr2024_partial", start_idx=100, batch_size=5)


Processing batch 21: files 101-105 of 2001
Copying file 101/2001: imagesTr/PARTIAL_0100_0000.nii.gz
Copying file 102/2001: imagesTr/PARTIAL_0101_0000.nii.gz
Copying file 103/2001: imagesTr/PARTIAL_0102_0000.nii.gz
Copying file 104/2001: imagesTr/PARTIAL_0103

In [None]:
import os
import json

def generate_dataset_json(output_folder: str,
                          channel_names: dict,
                          labels: dict,
                          num_training_cases: int,
                          file_ending: str,
                          dataset_name: str = None,
                          overwrite_image_reader_writer: str = None,
                          ):
  """
  製作如下的json檔
  {
      "name": "nnunet-how-to",
      "channel_names": {
          "0": "CT"
      },
      "labels": {
          "background": 0,
          "AA": 1
      },
      "numTraining": 10,
      "file_ending": "nii.gz",
      "overwrite_image_reader_writer": "SimpleITKIO"
  }
  """
  # channel names need strings as keys
  keys = list(channel_names.keys())
  for k in keys:
      if not isinstance(k, str):
          channel_names[str(k)] = channel_names[k]
          del channel_names[k]

  # labels need ints as values
  for l in labels.keys():
      value = labels[l]
      if isinstance(value, (tuple, list)):
          value = tuple([int(i) for i in value])
          labels[l] = value
      else:
          labels[l] = int(labels[l])

  dataset_json = {
      'name': '',
      'channel_names': channel_names,
      'labels': labels,
      'numTraining': num_training_cases,
      'file_ending': file_ending,
      'overwrite_image_reader_writer': ''
  }

  if dataset_name is not None:
      dataset_json['name'] = dataset_name
  if overwrite_image_reader_writer is not None:
      dataset_json['overwrite_image_reader_writer'] = overwrite_image_reader_writer

  save_path = os.path.join(output_folder, 'dataset.json')
  with open(save_path, 'w') as f:
      json.dump(dataset_json, f, indent=4)

  print(f'dataset.json saved to {save_path}')

In [None]:

json_dir_path = '/content/nnUNet_raw/Dataset003_aocr2024_partial'
generate_dataset_json(json_dir_path,
                      {0: 'CT'},                              # channel_names
                      {"background": 0, "AA": 1, "HA": 2},    # labels
                      998,                                    # num_training_cases
                      '.nii.gz',                              # file_ending
                      overwrite_image_reader_writer='SimpleITKIO'
                      )

dataset.json saved to /content/nnUNet_raw/Dataset003_aocr2024_partial/dataset.json


## plan and preprocess

nnUNetv2_plan_and_preprocess = nnUNetv2_extract_fingerprint + nnUNetv2_plan_experiment + nnUNetv2_preprocess

In [None]:
# !nnUNetv2_plan_and_preprocess -h

In [None]:
!nnUNetv2_extract_fingerprint -d 3 --verify_dataset_integrity

Dataset003_aocr2024_partial
Using <class 'nnunetv2.imageio.simpleitk_reader_writer.SimpleITKIO'> reader/writer

####################
verify_dataset_integrity Done. 
If you didn't see any error messages then your dataset is most likely OK!
####################

Using <class 'nnunetv2.imageio.simpleitk_reader_writer.SimpleITKIO'> reader/writer
100% 998/998 [01:49<00:00,  9.08it/s]


In [None]:
!nnUNetv2_plan_experiment -d 3 -pl nnUNetPlannerResEncL

Dropping 3d_lowres config because the image size difference to 3d_fullres is too small. 3d_fullres: [ 50. 512. 512.], 3d_lowres: [50, 512, 512]
2D U-Net configuration:
{'data_identifier': 'nnUNetPlans_2d', 'preprocessor_name': 'DefaultPreprocessor', 'batch_size': 35, 'patch_size': (np.int64(512), np.int64(512)), 'median_image_size_in_voxels': array([512., 512.]), 'spacing': array([0.68359375, 0.68359375]), 'normalization_schemes': ['CTNormalization'], 'use_mask_for_norm': [False], 'resampling_fn_data': 'resample_data_or_seg_to_shape', 'resampling_fn_seg': 'resample_data_or_seg_to_shape', 'resampling_fn_data_kwargs': {'is_seg': False, 'order': 3, 'order_z': 0, 'force_separate_z': None}, 'resampling_fn_seg_kwargs': {'is_seg': True, 'order': 1, 'order_z': 0, 'force_separate_z': None}, 'resampling_fn_probabilities': 'resample_data_or_seg_to_shape', 'resampling_fn_probabilities_kwargs': {'is_seg': False, 'order': 1, 'order_z': 0, 'force_separate_z': None}, 'architecture': {'network_class_na

In [None]:
!nnUNetv2_preprocess -h

usage: nnUNetv2_preprocess [-h] [-d D [D ...]] [-plans_name PLANS_NAME] [-c C [C ...]]
                           [-np NP [NP ...]] [--verbose]

options:
  -h, --help            show this help message and exit
  -d D [D ...]          [REQUIRED] List of dataset IDs. Example: 2 4 5. This will run fingerprint
                        extraction, experiment planning and preprocessing for these datasets. Can
                        of course also be just one dataset
  -plans_name PLANS_NAME
                        [OPTIONAL] You can use this to specify a custom plans file that you may
                        have generated
  -c C [C ...]          [OPTIONAL] Configurations for which the preprocessing should be run.
                        Default: 2d 3d_fullres 3d_lowres. 3d_cascade_fullres does not need to be
                        specified because it uses the data from 3d_fullres. Configurations that do
                        not exist for some dataset will be skipped.
  -np NP [NP ...] 

In [None]:
!nnUNetv2_preprocess -d 3 \
                     -plans_name nnUNetResEncUNetLPlans \
                     -c 3d_fullres

Preprocessing dataset Dataset003_aocr2024_partial
Configuration: 3d_fullres...
100% 998/998 [31:25<00:00,  1.89s/it]


## train

In [None]:
!nnUNetv2_train -h

usage: nnUNetv2_train [-h] [-tr TR] [-p P] [-pretrained_weights PRETRAINED_WEIGHTS]
                      [-num_gpus NUM_GPUS] [--npz] [--c] [--val] [--val_best]
                      [--disable_checkpointing] [-device DEVICE]
                      dataset_name_or_id configuration fold

positional arguments:
  dataset_name_or_id    Dataset name or ID to train with
  configuration         Configuration that should be trained
  fold                  Fold of the 5-fold cross-validation. Should be an int between 0 and 4.

options:
  -h, --help            show this help message and exit
  -tr TR                [OPTIONAL] Use this flag to specify a custom trainer. Default:
                        nnUNetTrainer
  -p P                  [OPTIONAL] Use this flag to specify a custom plans identifier. Default:
                        nnUNetPlans
  -pretrained_weights PRETRAINED_WEIGHTS
                        [OPTIONAL] path to nnU-Net checkpoint file to be used as pretrained model.
              

In [None]:
!nnUNetv2_train 3 3d_fullres 3 \
                -p nnUNetResEncUNetLPlans \
                --npz \
                --c

Using device: cuda:0

#######################################################################
Please cite the following paper when using nnU-Net:
Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2021). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature methods, 18(2), 203-211.
#######################################################################

2025-03-18 02:43:44.648379: Using torch.compile...
2025-03-18 02:44:03.387612: do_dummy_2d_data_aug: True
2025-03-18 02:44:03.394195: Creating new 5-fold cross-validation split...
2025-03-18 02:44:03.403931: Desired fold for training: 3
2025-03-18 02:44:03.407138: This split has 799 training and 199 validation cases.
using pin_memory on device 0
using pin_memory on device 0

This is the configuration used by this training:
Configuration name: 3d_fullres
 {'data_identifier': 'nnUNetPlans_3d_fullres', 'preprocessor_name': 'DefaultPreprocessor', 'batch_size': 2, 'patch_siz

## find best configuration

In [None]:
!nnUNetv2_find_best_configuration -h

usage: nnUNetv2_find_best_configuration [-h] [-p P [P ...]] [-c C [C ...]] [-tr TR [TR ...]]
                                        [-np NP] [-f F [F ...]] [--disable_ensembling]
                                        [--no_overwrite]
                                        dataset_name_or_id

positional arguments:
  dataset_name_or_id    Dataset Name or id

options:
  -h, --help            show this help message and exit
  -p P [P ...]          List of plan identifiers. Default: nnUNetPlans
  -c C [C ...]          List of configurations. Default: ['2d', '3d_fullres', '3d_lowres',
                        '3d_cascade_fullres']
  -tr TR [TR ...]       List of trainers. Default: nnUNetTrainer
  -np NP                Number of processes to use for ensembling, postprocessing etc
  -f F [F ...]          Folds to use. Default: 0 1 2 3 4
  --disable_ensembling  Set this flag to disable ensembling
  --no_overwrite        If set we will not overwrite already ensembled files etc. May speed up
 

In [None]:
!nnUNetv2_find_best_configuration 3 \
    -p nnUNetResEncUNetLPlans \
    -c 3d_fullres \
    -f 0 1 3


***All results:***
nnUNetTrainer__nnUNetResEncUNetLPlans__3d_fullres: 0.30610593668230773

*Best*: nnUNetTrainer__nnUNetResEncUNetLPlans__3d_fullres: 0.30610593668230773

***Determining postprocessing for best model/ensemble***
Removing all but the largest foreground region did not improve results!
Removing all but the largest component for 1 did not improve results! Dice before: 0.5057 after: 0.50516
Removing all but the largest component for 2 did not improve results! Dice before: 0.10651 after: 0.10433

***Run inference like this:***

nnUNetv2_predict -d Dataset003_aocr2024_partial -i INPUT_FOLDER -o OUTPUT_FOLDER -f  0 1 3 -tr nnUNetTrainer -c 3d_fullres -p nnUNetResEncUNetLPlans

***Once inference is completed, run postprocessing like this:***

nnUNetv2_apply_postprocessing -i OUTPUT_FOLDER -o OUTPUT_FOLDER_PP -pp_pkl_file /content/drive/MyDrive/poster/NNUNET/nnUNet_results/Dataset003_aocr2024_partial/nnUNetTrainer__nnUNetResEncUNetLPlans__3d_fullres/crossval_results_folds_0_1_3/

## predict

In [None]:
!cp -r '/content/drive/MyDrive/poster/NNUNET/predict/input_partial' '/content'

In [None]:
!nnUNetv2_predict -d Dataset003_aocr2024_partial \
    -i "/content/input_partial" \
    -o "/content/drive/MyDrive/poster/NNUNET/predict/Dataset003_aocr2024_partial/nnUNetTrainer__nnUNetResEncUNetLPlans__3d_fullres/fold_0_1_3/output" \
    -f  0 1 3 \
    -tr nnUNetTrainer \
    -c 3d_fullres \
    -p nnUNetResEncUNetLPlans


#######################################################################
Please cite the following paper when using nnU-Net:
Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2021). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature methods, 18(2), 203-211.
#######################################################################

There are 200 cases in the source folder
I am process 0 out of 1 (max process ID is 0, we start counting with 0!)
There are 200 cases that I would like to predict

Predicting PARTIAL_0001:
perform_everything_on_device: True
100% 12/12 [00:10<00:00,  1.11it/s]
100% 12/12 [00:08<00:00,  1.36it/s]
100% 12/12 [00:08<00:00,  1.36it/s]
sending off prediction to background worker for resampling and export
done with PARTIAL_0001

Predicting PARTIAL_0002:
perform_everything_on_device: True
100% 12/12 [00:08<00:00,  1.36it/s]
100% 12/12 [00:08<00:00,  1.36it/s]
100% 12/12 [00:08<00:00,  1.36it/s]
sen

## postprocess

In [None]:
!nnUNetv2_apply_postprocessing \
    -i '/content/drive/MyDrive/poster/NNUNET/predict/Dataset003_aocr2024_partial/nnUNetTrainer__nnUNetResEncUNetLPlans__3d_fullres/fold_0_1_3/output' \
    -o '/content/drive/MyDrive/poster/NNUNET/predict/Dataset003_aocr2024_partial/nnUNetTrainer__nnUNetResEncUNetLPlans__3d_fullres/fold_0_1_3/output_pp' \
    -pp_pkl_file /content/drive/MyDrive/poster/NNUNET/nnUNet_results/Dataset003_aocr2024_partial/nnUNetTrainer__nnUNetResEncUNetLPlans__3d_fullres/crossval_results_folds_0_1_3/postprocessing.pkl \
    -np 8 \
    -plans_json /content/drive/MyDrive/poster/NNUNET/nnUNet_results/Dataset003_aocr2024_partial/nnUNetTrainer__nnUNetResEncUNetLPlans__3d_fullres/crossval_results_folds_0_1_3/plans.json