### Setup Imports

In [1]:
import os
import nibabel as nib
import numpy as np
import json

from monai.apps.auto3dseg import DataAnalyzer, export_bundle_algo_history, import_bundle_algo_history
from monai.utils.enums import AlgoKeys
from monai.auto3dseg import algo_to_pickle
from monai.config import print_config
from monai.bundle.config_parser import ConfigParser

ModuleNotFoundError: No module named 'monai'

In [2]:
!pip list

Package                  Version
------------------------ ----------------
absl-py                  2.1.0
alembic                  1.13.1
anyio                    4.2.0
asttokens                2.4.0
attrs                    23.2.0
audioread                3.0.1
backcall                 0.2.0
bcrypt                   4.0.1
beautifulsoup4           4.12.3
blinker                  1.7.0
cachetools               5.3.0
certifi                  2023.7.22
cffi                     1.16.0
charset-normalizer       3.3.0
click                    8.1.7
cloudpickle              3.0.0
comm                     0.1.4
contourpy                1.2.0
cryptography             3.4.8
cycler                   0.12.1
databricks-cli           0.18.0
dbus-python              1.2.18
debugpy                  1.8.0
decorator                5.1.1
dicomweb-client          0.59.1
diskcache                5.6.3
distlib                  0.3.6
distro                   1.7.0
distro-info              1.1build1
docker    

### Run the DataAnalyzer on datasets

In [2]:

json_path = "/var/data/student_home/user/project_dir/monai_segmentation/monai_training/JSON_dir/train.json"

datalist = [] # your list with json objects (dicts)

with open(json_path) as json_file:
   datalist = json.load(json_file)


In [4]:
datalist

{'description': 'Dataset for Choroid Plexus segmentation',
 'labels': {'0': 'background', '1': 'Choroid Plexus'},
 'modality': {'0': 'MR'},
 'name': 'MRI Dataset - Choroid Plexus Segmentation',
 'numTraining': 29,
 'tensorImageSize': '3D',
 'training': [{'fold': 0,
   'image': '/var/data/MONAI_Choroid_Plexus/dataset_monai_train_from_scratch/26_ChP.nii.gz',
   'label': '/var/data/MONAI_Choroid_Plexus/dataset_monai_train_from_scratch/labels/final/26_ChP.nii.gz'},
  {'fold': 0,
   'image': '/var/data/MONAI_Choroid_Plexus/dataset_monai_train_from_scratch/22_ChP.nii.gz',
   'label': '/var/data/MONAI_Choroid_Plexus/dataset_monai_train_from_scratch/labels/final/22_ChP.nii.gz'},
  {'fold': 0,
   'image': '/var/data/MONAI_Choroid_Plexus/dataset_monai_train_from_scratch/19_ChP.nii.gz',
   'label': '/var/data/MONAI_Choroid_Plexus/dataset_monai_train_from_scratch/labels/final/19_ChP.nii.gz'},
  {'fold': 0,
   'image': '/var/data/MONAI_Choroid_Plexus/dataset_monai_train_from_scratch/3_ChP.nii.gz',


In [3]:
work_dir = os.path.join("/var/data/student_home/user/project_dir/monai_segmentation/monai_training", 'working_dir_training_from_scratch') 
work_dir = os.path.join("/var/data/student_home/user/project_dir/monai_segmentation/monai_training", 'working_dir_training_from_scratch_0327') 

dataroot = "/var/data/MONAI_Choroid_Plexus/dataset_monai_train_from_scratch"

# create working directory
if not os.path.isdir(work_dir):
    os.makedirs(work_dir)
if not os.path.isdir(dataroot):
    os.makedirs(dataroot)

da_output_yaml = os.path.join(work_dir, "datastats.yaml")


In [4]:

#analyser = DataAnalyzer(datalist, dataroot, output_path=da_output_yaml)
analyser = DataAnalyzer(json_path, dataroot, output_path=da_output_yaml)
   
analyser.get_all_case_stats()




2024-03-27 22:52:52,728 - INFO - Found 1 GPUs for data analyzing!


100%|██████████| 29/29 [00:22<00:00,  1.30it/s]

2024-03-27 22:53:15,248 - INFO - Writing data stats to /var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch_0327/datastats.yaml.
2024-03-27 22:53:15,263 - INFO - Writing by-case data stats to /var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch_0327/datastats_by_case.yaml, this may take a while.





{stats_summary: {image_stats: {shape: {'max': [257, 384, 384],
    'mean': [205.58620689655172, 290.2068965517241, 290.2068965517241],
    'median': [185.0, 256.0, 256.0],
    'min': [180, 240, 240],
    'stdev': [34.54983131989295, 63.26811002030186, 63.26811002030186],
    'percentile': [[180, 240, 240],
     [180, 240, 240],
     [257, 384, 384],
     [257, 384, 384]],
    'percentile_00_5': [180, 240, 240],
    'percentile_10_0': [180, 240, 240],
    'percentile_90_0': [257, 384, 384],
    'percentile_99_5': [257, 384, 384]},
   channels: {'max': 1,
    'mean': 1.0,
    'median': 1.0,
    'min': 1,
    'stdev': 0.0,
    'percentile': [1, 1, 1, 1],
    'percentile_00_5': 1,
    'percentile_10_0': 1,
    'percentile_90_0': 1,
    'percentile_99_5': 1},
   cropped_shape: {'max': [257, 364, 383],
    'mean': [205.58620689655172, 283.7241379310345, 283.48275862068965],
    'median': [185.0, 256.0, 256.0],
    'min': [180, 240, 240],
    'stdev': [34.54983131989295, 53.73876565945357, 53

In [9]:
data_src = {
    "modality": "MRI",
    "datalist": json_path,#datalist, # give path to json file, it is not necessary to already read the json file
    "dataroot": dataroot,
}

data_src_cfg = os.path.join(work_dir, "data_src_cfg.yaml")
ConfigParser.export_config_file(data_src, data_src_cfg)

## Generate Bundle: 

In [1]:
from monai.apps.auto3dseg import BundleAlgo

In [12]:
# Generate a new BundleAlgo object
data_stats_yaml = da_output_yaml
algo = BundleAlgo(template_path="/var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch/algorithm_templates")
algo.set_data_stats(data_stats_yaml)
# algo.set_data_src("../data_src.json")
algo.export_to_disk(".", algo_name="mynet")

2024-02-26 15:10:25,924 - INFO - Generated:./mynet


In [None]:
# Generate a new BundleAlgo object
data_stats_yaml = da_output_yaml
algo = BundleAlgo(template_path="/var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch/algorithm_templates")
algo.set_data_stats(data_stats_yaml)
# algo.set_data_src("../data_src.json")
algo.export_to_disk(".", algo_name="mynet")

## But maybe the Bundle class does not exist, so a new Bundle needs to be generated: 

In [6]:
from monai.apps.auto3dseg import BundleGen
import os

work_dir = "/var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch"
data_output_yaml = os.path.join(work_dir, "datastats.yaml")
data_src_cfg = os.path.join(work_dir, "data_src_cfg.yaml")
#data_src_cfg = "./task.yaml"

bundle_generator = BundleGen(
    algo_path=work_dir,
    data_stats_filename=data_output_yaml,
    templates_path_or_url=None,
    data_src_cfg_name=data_src_cfg,
)

bundle_generator.generate(work_dir, num_fold=5) # creates 5 folders for each algroithm

'''
Folder structure of the working directory:
working_dir_training_from_scratch
    ├── data_src_cfg.yaml
    ├── datastats.yaml
    ├── bundle
    │   ├── algo1
    │   │   ├── fold0
    │   │   │   ├── task.yaml
    │   │   │   └── train
    │   │   ├── fold1
    │   │   │   ├── task.yaml
    │   │   │   └── train
    │   │   ├── fold2
    │   │   │   ├── task.yaml
    │   │   │   └── train
    │   │   ├── fold3
    │   │   │   ├── task.yaml
    │   │   │   └── train
    │   │   └── fold4
    │   │       ├── task.yaml
    │   │       └── train
    │   ├── algo2
    │   │   ├── ...
    │   │   .
    │   │   .
    │   └── algo3
    │       ├── ...
    └── algorithm_templates
        ├── algo1
        │   ├── configs
        │   │   ├── hyperparameters_search.yaml
        │   │   ├── hyperparameters.yaml
        │   │   ├── network_search.yaml
        │   │   ├── network.yaml
        │   │   ├── transforms_infer.yaml
        │   │   ├── transforms_train.yaml
        │   │   └── transforms_validate.yaml
        │   ├── docs
        │       └──  README.md
        │   └── scripts
        │       ├── algo.py
        │       ├── arch_code.pth  
        │       ├── infer.py  
        │       ├── search.py    
        │       ├── train.py
        │       ├── validate.py
        │       ├── infer.py   

    
'''

algo_templates.tar.gz: 296kB [00:00, 390kB/s]                             

2024-02-20 12:13:28,562 - INFO - Downloaded: /tmp/tmp7wkndqe8/algo_templates.tar.gz
2024-02-20 12:13:28,563 - INFO - Expected md5 is None, skip md5 check for file /tmp/tmp7wkndqe8/algo_templates.tar.gz.
2024-02-20 12:13:28,565 - INFO - Writing into directory: /var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch.





2024-02-20 12:13:29,189 - INFO - /var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch/segresnet2d_0
2024-02-20 12:13:29,525 - INFO - /var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch/segresnet2d_1
2024-02-20 12:13:30,108 - INFO - /var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch/segresnet2d_2
2024-02-20 12:13:30,469 - INFO - /var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch/segresnet2d_3
2024-02-20 12:13:30,834 - INFO - /var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch/segresnet2d_4
2024-02-20 12:13:31,193 - INFO - /var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch/dints_0
2024-02-20 12:13:31,725 - INFO - /var/data/student_home/user/project_dir/mon

## Getting and saving the algorithm generation history to the local drive
If the users continue to train the algorithms on local system, The history of the algorithm generation can be fetched via get_history method of the BundleGen object. There also are scenarios that users need to stop the Python process after the algo_gen. For example, the users may need to transfer the files to a remote cluster to start the training. Auto3DSeg offers a utility function export_bundle_algo_history to dump the history to hard drive and recall it by import_bundle_algo_history.

If the files are copied to a remote system, please ensure the algorithm templates are also copied there. Some functions require the path to instantiate the algorithm class properly.

In [8]:
history = bundle_generator.get_history()
export_bundle_algo_history(history)  # save the Algo objects

In [2]:
max_epochs = 100
max_epochs = max(max_epochs, 2)

train_param = {
    #"CUDA_VISIBLE_DEVICES": [0],  # use only 1 gpu
    "num_epochs_per_validation": 2 * max_epochs, # how to chose that?
    "num_images_per_batch": 2,
    "num_epochs": max_epochs,
    "num_warmup_iterations": 2 * max_epochs, # how to chose that?
}

print(train_param)

{'num_epochs_per_validation': 200, 'num_images_per_batch': 2, 'num_epochs': 100, 'num_warmup_iterations': 200}


In [3]:
work_dir = "/var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch"
history = import_bundle_algo_history(work_dir, only_trained=False)
for algo_dict in history:
    print(algo_dict)
    algo = algo_dict[AlgoKeys.ALGO]
    print(algo.template_path)
    algo.train(train_param)  # can use default params by `algo.train()`
    acc = algo.get_score()
    algo_to_pickle(algo, template_path=algo.template_path, best_metric=acc)

{identifier: 'dints_0', algo_instance: <dints.scripts.algo.DintsAlgo object at 0x7fd1c15fc3d0>, best_metric: None, is_trained: False}
/var/data/student_home/user/project_dir/monai_segmentation/monai_training/working_dir_training_from_scratch/algorithm_templates
ARRIVED HERE:  {'searching#num_epochs_per_validation': 200, 'searching#num_images_per_batch': 2, 'searching#num_epochs': 100, 'searching#num_warmup_iterations': 200}


AttributeError: 'DintsAlgo' object has no attribute 'device_setting'

## Training # 

In [None]:
class Trainer:

    # Class to train a Neural Network from scratch

    # initialization
    def __init__(self, work_dir: str=".", dataroot: str = ".", json_file: str = ".", output_dir=None):

        self.workdir=work_dir
        self.dataroot=dataroot
        self.JSON_file=json_file
        if output_dir is None:
            self.output_dir=os.path.join(self.workdir, 'working_directory_finetuning')
        elif  isinstance(output_dir, str):
            self.output_dir=output_dir
    
    def finetuning_run(self):

        dataroot = self.dataroot
        work_dir = self.output_dir

        # create working directory
        if not os.path.isdir(work_dir):
            os.makedirs(work_dir)

        algorithm_path=os.path.join(self.workdir, 'DNN_models', 'algorithm_templates')

        da_output_yaml = os.path.join(work_dir, "datastats.yaml")
        data_src_cfg = os.path.join(work_dir, "data_src_cfg.yaml")

        if not os.path.isdir(dataroot):
            os.makedirs(dataroot)

        if not os.path.isdir(work_dir):
            os.makedirs(work_dir)


        # write to a json file
        datalist = self.JSON_file

        # 1. Analyze Dataset

        da = DataAnalyzer(datalist, dataroot, output_path=da_output_yaml)
        da.get_all_case_stats()

        data_src = {
            "modality": "MRI",
            "datalist": datalist,
            "dataroot": dataroot,
        }

        ConfigParser.export_config_file(data_src, data_src_cfg)
