# Choroid Plexus Segmentation Training via Auto3DSeg

In [2]:
import os
import json
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import glob
import random
import platform
from monai.apps.auto3dseg import AutoRunner
from monai.config import print_config
import importlib
from dataclasses import asdict
from loguru import logger
import sys

from reload_recursive import reload_recursive

import mri_data
import monai_training

print_config()

MONAI version: 1.4.0
Numpy version: 1.26.4
Pytorch version: 2.5.1+cu124
MONAI flags: HAS_EXT = False, USE_COMPILED = False, USE_META_DICT = False
MONAI rev id: 46a5272196a6c2590ca2589029eed8e4d56ff008
MONAI __file__: /home/<username>/.virtualenvs/monai/lib/python3.12/site-packages/monai/__init__.py

Optional dependencies:
Pytorch Ignite version: NOT INSTALLED or UNKNOWN VERSION.
ITK version: 5.4.0
Nibabel version: 5.3.2
scikit-image version: 0.24.0
scipy version: 1.14.1
Pillow version: 11.0.0
Tensorboard version: 2.18.0
gdown version: 5.2.0
TorchVision version: 0.20.1+cu124
tqdm version: 4.66.5
lmdb version: 1.5.1
psutil version: 6.1.0
pandas version: 2.2.3
einops version: 0.8.0
transformers version: 4.46.1
mlflow version: 2.17.0
pynrrd version: 1.0.0
clearml version: 1.16.5

For details about installing the optional dependencies, please visit:
    https://docs.monai.io/en/latest/installation.html#installing-the-recommended-dependencies



In [3]:
reload_recursive(monai_training)
reload_recursive(mri_data)

from mri_data.file_manager import scan_3Tpioneer_bids, filter_first_ses
from monai_training.preprocess import DataSetProcesser
from monai_training import training, preprocess

In [4]:
logger.remove()
logger.add(sys.stderr, level="DEBUG")

1

In [5]:
load_data = True

In [6]:
hostname = platform.node()
if hostname == "rhinocampus":
    drive_root = Path("/media/smbshare")
else:
    drive_root = Path("/mnt/h")

projects_root = Path("/home/srs-9/Projects")
msmri_home = projects_root / "ms_mri"
training_work_dirs = msmri_home / "training_work_dirs"

dataroot = drive_root / "3Tpioneer_bids"
work_dir_name = "choroid_resegment2"
work_dir = training_work_dirs / work_dir_name
modalities = ["flair", "t1"]


## Prep the database

Get the data and labels organized

In [7]:
if load_data:
    datalist_file = os.path.join(work_dir, "datalist.json")
    with open(datalist_file, 'r') as f:
        datalist = json.load(f)

    dataset = preprocess.parse_datalist(datalist_file, dataroot)

[32m2024-10-31 18:43:15.438[0m | [1mINFO    [0m | [36mmonai_training.preprocess[0m:[36mparse_datalist[0m:[36m282[0m - [1mLoading /home/srs-9/Projects/ms_mri/training_work_dirs/choroid_resegment2/datalist.json[0m
[32m2024-10-31 18:43:15.439[0m | [1mINFO    [0m | [36mmonai_training.preprocess[0m:[36mparse_datalist[0m:[36m283[0m - [1m/home/srs-9/Projects/ms_mri/training_work_dirs/choroid_resegment2/datalist.json exists: True[0m


In [8]:
if not load_data:
    dataset_proc = DataSetProcesser.new_dataset(dataroot, scan_3Tpioneer_bids, filters=[filter_first_ses])
    dataset_proc.prepare_images(modalities)
    dataset_proc.prepare_labels(["choroid_t1_flair"], suffix_list=["CH", "ED", "DT"])

    dataset = dataset_proc.dataset
    dataset.sort()

In [9]:
if not load_data:
    dataset = training.assign_conditions(dataset, 0.2)
    dataset[0]

In [10]:
if not load_data:
    training_data = []
    test_data = []

    for scan in dataset:
        if scan.cond == 'tr':
            training_data.append({"image": scan.image, "label": scan.label})
        elif scan.cond == 'ts':
            test_data.append(scan.image)

## Review

In [11]:
def display_slices(scan):
    img1 = nib.load(scan.image_path)
    img2 = nib.load(scan.label_path)

    data1 = img1.get_fdata()[:,:,:]
    data2 = img2.get_fdata()

    slice_sums = np.sum(data2, axis=(0, 1))
    
    print(slice_sums)

    max_slice_index = np.argmax(slice_sums)
    print(f"Max slice: {max_slice_index}")

    slice1 = data1[:, :, max_slice_index]
    slice2 = data2[:, :, max_slice_index]    

    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(slice1, cmap='gray')
    plt.title(f"Image 1 - Slice {max_slice_index}")
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.imshow(slice2, cmap='gray')
    plt.title(f"Image 2 - Slice {max_slice_index}")
    plt.axis('off')
    plt.show()


In [13]:
# display_slices(dataset[20])

In the original code, they include labels in the test data as well. Also they have a function that checks that there is nonzero number of voxels in the label

In [14]:
#? I don't know why they put labels for the test data. the brats segmentation code didn't.
# train_data = [{'image': path + '/flair.nii.gz', 'label': path + '/flair_chp_mask_qced.nii.gz'} for path in train_exams]
# test_data = [{'image': path + '/flair.nii.gz', 'label': path + '/flair_chp_mask_qced.nii.gz'} for path in test_exams]
if not load_data:
    train_data = []
    test_data = []
    for scan in dataset:
        if scan.cond == 'tr' and scan.has_label:
            train_data.append({"image": str(scan.image_path), "label": str(scan.label_path)})
        elif scan.cond == 'ts' and scan.has_label():
            test_data.append({"image": str(scan.image_path), "label": str(scan.label_path)})


    print(f"Train num total: {len(train_data)}")
    print(f"Test num: {len(test_data)}")

Create and save datalist

In [15]:
if not load_data:
    n_folds = 5
    datalist = {
        "testing": test_data,
        "training": [{"fold": i % n_folds, "image": c["image"], "label": c["label"]} for i,c in enumerate(train_data)]
    }

    if not os.path.isdir(work_dir):
        os.makedirs(work_dir)

    # dataroot_dir = "/mnt/h"
    # if not os.path.isdir(dataroot_dir):
    #     os.makedirs(dataroot_dir)

    datalist_file = os.path.join(work_dir, "datalist.json")
    with open(datalist_file, "w") as f:
        json.dump(datalist, f)

Load datalist

In [16]:
datalist['testing']

[{'image': '/media/smbshare/3Tpioneer_bids/sub-ms1080/ses-20180416/flair.t1.nii.gz',
  'label': '/media/smbshare/3Tpioneer_bids/sub-ms1080/ses-20180416/choroid_t1_flair-ED.nii.gz'},
 {'image': '/media/smbshare/3Tpioneer_bids/sub-ms1188/ses-20200720/flair.t1.nii.gz',
  'label': '/media/smbshare/3Tpioneer_bids/sub-ms1188/ses-20200720/choroid_t1_flair-ED.nii.gz'},
 {'image': '/media/smbshare/3Tpioneer_bids/sub-ms1234/ses-20180214/flair.t1.nii.gz',
  'label': '/media/smbshare/3Tpioneer_bids/sub-ms1234/ses-20180214/choroid_t1_flair-ED.nii.gz'},
 {'image': '/media/smbshare/3Tpioneer_bids/sub-ms1259/ses-20200803/flair.t1.nii.gz',
  'label': '/media/smbshare/3Tpioneer_bids/sub-ms1259/ses-20200803/choroid_t1_flair-ED.nii.gz'},
 {'image': '/media/smbshare/3Tpioneer_bids/sub-ms2126/ses-20181224/flair.t1.nii.gz',
  'label': '/media/smbshare/3Tpioneer_bids/sub-ms2126/ses-20181224/choroid_t1_flair-ED.nii.gz'},
 {'image': '/media/smbshare/3Tpioneer_bids/sub-ms2164/ses-20200113/flair.t1.nii.gz',
  'la

In [17]:
runner = AutoRunner(
    work_dir=work_dir,
    algos=["swinunetr"],
    input={
        "modality": "MRI",
        "datalist": str(datalist_file),
        "dataroot": str(dataroot),
    },
)

2024-10-31 18:43:24,221 - INFO - AutoRunner using work directory /home/srs-9/Projects/ms_mri/training_work_dirs/choroid_resegment2
2024-10-31 18:43:24,225 - INFO - Found num_fold 5 based on the input datalist /home/srs-9/Projects/ms_mri/training_work_dirs/choroid_resegment2/datalist.json.
2024-10-31 18:43:24,225 - INFO - Setting num_fold 5 based on the input datalist /home/srs-9/Projects/ms_mri/training_work_dirs/choroid_resegment2/datalist.json.
2024-10-31 18:43:24,237 - INFO - Using user defined command running prefix , will override other settings


In [18]:
max_epochs = 100

train_param = {
    "num_epochs_per_validation": 1,
    #"num_images_per_batch": 2,
    "num_epochs": max_epochs,
    "num_warmup_epochs": 1,
}
runner.set_training_params(train_param)

<monai.apps.auto3dseg.auto_runner.AutoRunner at 0x7f059291e120>

In [None]:
runner.run()

2024-10-31 18:43:24,251 - INFO - Skipping data analysis...
2024-10-31 18:43:24,252 - INFO - Skipping algorithm generation...
2024-10-31 18:43:24,253 - INFO - Skipping algorithm training...
2024-10-31 18:43:24,253 - INFO - Ensembling using single GPU!
2024-10-31 18:43:24,254 - INFO - The output_dir is not specified. /home/srs-9/Projects/ms_mri/training_work_dirs/choroid_resegment2/ensemble_output will be used to save ensemble predictions.
2024-10-31 18:43:24,295 - INFO - Auto3Dseg picked the following networks to ensemble:
2024-10-31 18:43:24,295 - INFO - swinunetr_0
2024-10-31 18:43:24,295 - INFO - swinunetr_1
2024-10-31 18:43:24,296 - INFO - swinunetr_2
2024-10-31 18:43:24,296 - INFO - swinunetr_3
2024-10-31 18:43:24,296 - INFO - swinunetr_4
2024-10-31 18:43:24,296 - INFO - Auto3Dseg ensemble prediction outputs will be saved in /home/srs-9/Projects/ms_mri/training_work_dirs/choroid_resegment2/ensemble_output.


Ensembling (rank 0)...:   0%|          | 0/8 [00:00<?, ?it/s]monai.networks.nets.swin_unetr SwinUNETR.__init__:img_size: Argument `img_size` has been deprecated since version 1.3. It will be removed in version 1.5. The img_size argument is not required anymore and checks on the input size are run during forward().
You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `we

In [None]:
scan_path = "/mnt/t/Data/3Tpioneer_bids/sub-ms1001/ses-20170215/proc/lesion_index.t3m20-mni_reg.nii.gz"
img = nib.load(scan_path)
