Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Bug fix: deployed models and training code use different versions of hi-ml #606

Merged
merged 3 commits into from
Dec 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[flake8]
ignore = E226,E302,E41,W391, E701, W291, E722, W503, E128, E126, E127, E731, E401
ignore = E226,E302,E41,W391, E701, W291, E722, W503, E128, E126, E127, E731, E401, E402
max-line-length = 160
max-complexity = 25
exclude = fastMRI/ test_outputs/ hi-ml/
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ gets uploaded to AzureML, by skipping all test folders.
- ([#605](https://github.com/microsoft/InnerEye-DeepLearning/pull/605)) Make build jobs deterministic for regression testing.

### Fixed
- ([#606](https://github.com/microsoft/InnerEye-DeepLearning/pull/606)) Bug fix: registered models do not include the hi-ml submodule
- ([#593](https://github.com/microsoft/InnerEye-DeepLearning/pull/593)) Bug fix for hi-ml 0.1.11 issue (#130): empty mount point is turned into ".", which fails the AML job
- ([#587](https://github.com/microsoft/InnerEye-DeepLearning/pull/587)) Bug fix for regression in AzureML's handling of environments: upgrade to hi-ml 0.1.11
- ([#537](https://github.com/microsoft/InnerEye-DeepLearning/pull/537)) Print warning if inference is disabled but comparison requested.
Expand Down
16 changes: 12 additions & 4 deletions InnerEye/Common/fixed_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import os
import sys
from pathlib import Path
from typing import Optional
from typing import List, Optional, Tuple

from InnerEye.Common.type_annotations import PathOrString

Expand Down Expand Up @@ -93,6 +93,15 @@ def get_environment_yaml_file() -> Path:
return env


def get_hi_ml_submodule_relative_paths() -> List[Tuple[Path, str]]:
"""
Returns the paths relative to the repository root where the submodules for hi-ml and hi-ml-azure are expected.
It returns a list with a tuple (folder name, expected subfolder in that folder)
"""
return [(Path("hi-ml") / "hi-ml-azure" / "src", "health_azure"),
(Path("hi-ml") / "hi-ml" / "src", "health_ml")]


def add_submodules_to_path() -> None:
"""
This function adds all submodules that the code uses to sys.path and to the environment variables. This is
Expand All @@ -104,9 +113,8 @@ def add_submodules_to_path() -> None:
"""
innereye_root = repository_root_directory()
folders_to_add = [(innereye_root, "InnerEye"),
(innereye_root / "fastMRI", "fastmri"),
(innereye_root / "hi-ml" / "hi-ml-azure" / "src", "health_azure"),
(innereye_root / "hi-ml" / "hi-ml" / "src", "health_ml")]
(innereye_root / "fastMRI", "fastmri")]
folders_to_add.extend([(innereye_root / p, folder) for p, folder in get_hi_ml_submodule_relative_paths()])
for (folder, subfolder_that_must_exist) in folders_to_add:
if (folder / subfolder_that_must_exist).is_dir():
folder_str = str(folder)
Expand Down
12 changes: 9 additions & 3 deletions InnerEye/ML/run_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,10 +697,11 @@ def copy_child_paths_to_folder(self,
:param python_environment: The Python environment that is used in the present AzureML run.
"""

def copy_folder(source_folder: Path, destination_folder: str = "") -> None:
def copy_folder(source_folder: Path, destination_folder: Optional[Path] = None) -> None:
logging.info(f"Copying folder for registration: {source_folder}")
destination_folder = destination_folder or source_folder.name
shutil.copytree(str(source_folder), str(model_folder / destination_folder),
full_destination = model_folder / source_folder.name if destination_folder is None \
else model_folder / destination_folder
shutil.copytree(str(source_folder), str(full_destination),
ignore=shutil.ignore_patterns('*.pyc'))

def copy_file(source: Path, destination_file: str) -> None:
Expand Down Expand Up @@ -743,6 +744,11 @@ def copy_file(source: Path, destination_file: str) -> None:
# we can identify it by going up the folder structure off a known file (repository_root does exactly that)
repository_root = fixed_paths.repository_root_directory()
copy_folder(repository_root / INNEREYE_PACKAGE_NAME)
# If hi-ml is used as a submodule, copy that too
for relative_path, _ in fixed_paths.get_hi_ml_submodule_relative_paths():
full_submodule_path = repository_root / relative_path
if full_submodule_path.is_dir():
copy_folder(full_submodule_path, relative_path)
# Extra code directory is expected to be relative to the project root folder.
if self.azure_config.extra_code_directory:
extra_code_folder = self.project_root / self.azure_config.extra_code_directory
Expand Down
8 changes: 3 additions & 5 deletions InnerEye/ML/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,6 @@
from pathlib import Path
from typing import Optional, Tuple

# Suppress all errors here because the imports after code cause loads of warnings. We can't specifically suppress
# individual warnings only.
# flake8: noqa

# Workaround for an issue with how AzureML and Pytorch Lightning interact: When spawning additional processes for DDP,
# the working directory is not correctly picked up in sys.path
print(f"Starting InnerEye runner at {sys.argv[0]}")
Expand All @@ -24,6 +20,8 @@
sys.path.insert(0, innereye_root_str)
from InnerEye.Common import fixed_paths

# This must be added before all other imports because they might rely on hi-ml already, and that can optionally live
# in a submodule
fixed_paths.add_submodules_to_path()

from azureml._base_sdk_common import user_agent
Expand All @@ -47,7 +45,7 @@
is_offline_run_context)
from InnerEye.Azure.run_pytest import download_pytest_result, run_pytest
from InnerEye.Common.common_util import (FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE,
disable_logging_to_file, is_linux, logging_to_stdout)
is_linux, logging_to_stdout)
from InnerEye.Common.generic_parsing import GenericConfig
from InnerEye.ML.common import DATASET_CSV_FILE_NAME
from InnerEye.ML.deep_learning_config import DeepLearningConfig
Expand Down
15 changes: 3 additions & 12 deletions InnerEye/Scripts/submit_for_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
import param
import requests
from azureml.core import Model, ScriptRunConfig
from health_azure import create_run_configuration, submit_run

from InnerEye.Azure.azure_config import AzureConfig
from InnerEye.Common.common_util import logging_to_stdout
from InnerEye.Common.fixed_paths import DEFAULT_DATA_FOLDER, DEFAULT_RESULT_IMAGE_NAME, DEFAULT_RESULT_ZIP_DICOM_NAME, \
DEFAULT_TEST_IMAGE_NAME, DEFAULT_TEST_ZIP_NAME, ENVIRONMENT_YAML_FILE_NAME, PYTHON_ENVIRONMENT_NAME, \
DEFAULT_TEST_IMAGE_NAME, DEFAULT_TEST_ZIP_NAME, PYTHON_ENVIRONMENT_NAME, \
RUN_SCORING_SCRIPT, SCORE_SCRIPT, SETTINGS_YAML_FILE, repository_root_directory
from InnerEye.Common.generic_parsing import GenericConfig
from health_azure import create_run_configuration, submit_run


class SubmitForInferenceConfig(GenericConfig):
Expand Down Expand Up @@ -149,17 +149,8 @@ def submit_for_inference(args: SubmitForInferenceConfig, azure_config: AzureConf
logging.info(f"Building inference run submission in {source_directory_path}")
image_folder = source_directory_path / DEFAULT_DATA_FOLDER
image = copy_image_file(args.image_file, image_folder, args.use_dicom)
model_sas_urls = model.get_sas_urls()
# Identifies all the files with basename "environment.yml" in the model and downloads them.
# These downloads should go into a temp folder that will most likely not be included in the model itself,
# because the AzureML run will later download the model into the same folder structure, and the file names might
# clash.
temp_folder = source_directory_path / "temp_for_scoring"
conda_files = download_files_from_model(model_sas_urls, ENVIRONMENT_YAML_FILE_NAME, dir_path=temp_folder)
if len(conda_files) != 1:
raise ValueError("Exactly 1 Conda environment definition must exist in the model.")
# Retrieve the name of the Python environment that the training run used. This environment should have been
# registered. If no such environment exists, it will be re-create from the Conda files provided.
# registered at training.
python_environment_name = model.tags.get(PYTHON_ENVIRONMENT_NAME, "")
if not python_environment_name:
raise ValueError(f"The model did not contain tag {PYTHON_ENVIRONMENT_NAME} for the AzureML environment to use.")
Expand Down
8 changes: 4 additions & 4 deletions Tests/AfterTraining/test_after_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@
from InnerEye.Scripts import submit_for_inference
from Tests.ML.util import assert_nifti_content, get_default_azure_config, get_default_workspace, get_nifti_shape

FALLBACK_SINGLE_RUN = "refs_pull_593_merge_1637188926_7ba554ba"
FALLBACK_ENSEMBLE_RUN = "refs_pull_545_merge:HD_caea82ae-9603-48ba-8280-7d2bc6272411"
FALLBACK_2NODE_RUN = "refs_pull_545_merge:refs_pull_545_merge_1626538178_9f3023b2"
FALLBACK_SINGLE_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867172_17ba8dc5"
FALLBACK_ENSEMBLE_RUN = "refs_pull_606_merge:HD_b8a6ad93-8c19-45de-8ea1-f87fce92c3bd"
FALLBACK_2NODE_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867224_8d8072fe"
FALLBACK_CV_GLAUCOMA = "refs_pull_545_merge:HD_72ecc647-07c3-4353-a538-620346114ebd"
FALLBACK_HELLO_CONTAINER_RUN = "refs_pull_545_merge:refs_pull_545_merge_1626538216_3eb92f09"
FALLBACK_HELLO_CONTAINER_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867108_789991ac"


def get_most_recent_run_id(fallback_run_id_for_local_execution: str = FALLBACK_SINGLE_RUN) -> str:
Expand Down
5 changes: 5 additions & 0 deletions TestsOutsidePackage/test_register_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ def test_copy_child_paths_to_folder(is_ensemble: bool,
project_root = Path(__file__).parent.parent
ml_runner = MLRunner(model_config=fake_model, azure_config=azure_config, project_root=project_root)
model_folder = test_output_dirs.root_dir / "final"
hi_ml_submodules = [p for p, _ in fixed_paths.get_hi_ml_submodule_relative_paths()]
has_submodule = any(folder.is_dir() for folder in hi_ml_submodules)
ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=checkpoints_absolute)
expected_files = [
fixed_paths.ENVIRONMENT_YAML_FILE_NAME,
Expand All @@ -61,6 +63,9 @@ def test_copy_child_paths_to_folder(is_ensemble: bool,
"InnerEye/Common/fixed_paths.py",
"InnerEye/Common/common_util.py",
]
if has_submodule:
expected_files.extend(["hi-ml/hi-ml/src/health_ml/__init__.py",
"hi-ml/hi-ml-azure/src/health_azure/__init__.py"])
for r in checkpoints_relative:
expected_files.append(f"{CHECKPOINT_FOLDER}/{r}")
for expected_file in expected_files:
Expand Down
3 changes: 1 addition & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ dependencies:
- gitpython==3.1.7
- gputil==1.4.0
- h5py==2.10.0
- hi-ml==0.1.11
- hi-ml-azure==0.1.11
- InnerEye-DICOM-RT==1.0.1
- joblib==0.16.0
- jupyter==1.0.0
Expand Down Expand Up @@ -57,6 +55,7 @@ dependencies:
- pytorch-lightning==1.3.8
- rich==5.1.1
- rpdb==0.1.6
- ruamel.yaml==0.16.12
- runstats==1.8.0
- scikit-image==0.17.2
- scikit-learn==0.23.2
Expand Down
17 changes: 11 additions & 6 deletions score.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,26 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
from collections import defaultdict
import logging
import os
import sys
import zipfile
from collections import defaultdict
from pathlib import Path
from typing import List, Optional, Tuple
import zipfile

import numpy as np
import param
from azureml.core import Run
from InnerEye_DICOM_RT.nifti_to_dicom_rt_converter import rtconvert
from azureml.core import Run

from InnerEye.Azure.azure_util import is_offline_run_context
from InnerEye.Common import fixed_paths

# This must be added before all other imports because they might rely on hi-ml already, and that can optionally live
# in a submodule
fixed_paths.add_submodules_to_path()

from InnerEye.Azure.azure_util import is_offline_run_context
from InnerEye.Common.fixed_paths import DEFAULT_RESULT_ZIP_DICOM_NAME
from InnerEye.Common.generic_parsing import GenericConfig
from InnerEye.Common.type_annotations import TupleFloat3, TupleInt3
Expand All @@ -27,8 +32,8 @@
from InnerEye.ML.pipelines.ensemble import EnsemblePipeline
from InnerEye.ML.pipelines.inference import FullImageInferencePipelineBase, InferencePipeline
from InnerEye.ML.utils.config_loader import ModelConfigLoader
from InnerEye.ML.utils.io_util import ImageWithHeader, load_nifti_image, reverse_tuple_float3, store_as_ubyte_nifti, \
load_dicom_series_and_save
from InnerEye.ML.utils.io_util import ImageWithHeader, load_dicom_series_and_save, load_nifti_image, \
reverse_tuple_float3, store_as_ubyte_nifti


class ScorePipelineConfig(GenericConfig):
Expand Down