microsoft · ant0nsc · Dec 7, 2021 · Dec 7, 2021 · Dec 7, 2021 · Dec 7, 2021
diff --git a/.flake8 b/.flake8
@@ -1,5 +1,5 @@
 [flake8]
-ignore = E226,E302,E41,W391, E701, W291, E722, W503, E128, E126, E127, E731, E401
+ignore = E226,E302,E41,W391, E701, W291, E722, W503, E128, E126, E127, E731, E401, E402
 max-line-length = 160
 max-complexity = 25
 exclude = fastMRI/ test_outputs/ hi-ml/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -63,6 +63,7 @@ gets uploaded to AzureML, by skipping all test folders.
 - ([#605](https://github.com/microsoft/InnerEye-DeepLearning/pull/605)) Make build jobs deterministic for regression testing.
 
 ### Fixed
+- ([#606](https://github.com/microsoft/InnerEye-DeepLearning/pull/606)) Bug fix: registered models do not include the hi-ml submodule
 - ([#593](https://github.com/microsoft/InnerEye-DeepLearning/pull/593)) Bug fix for hi-ml 0.1.11 issue (#130): empty mount point is turned into ".", which fails the AML job
 - ([#587](https://github.com/microsoft/InnerEye-DeepLearning/pull/587)) Bug fix for regression in AzureML's handling of environments: upgrade to hi-ml 0.1.11
 - ([#537](https://github.com/microsoft/InnerEye-DeepLearning/pull/537)) Print warning if inference is disabled but comparison requested.

diff --git a/InnerEye/Common/fixed_paths.py b/InnerEye/Common/fixed_paths.py
@@ -6,7 +6,7 @@
 import os
 import sys
 from pathlib import Path
-from typing import Optional
+from typing import List, Optional, Tuple
 
 from InnerEye.Common.type_annotations import PathOrString
 
@@ -93,6 +93,15 @@ def get_environment_yaml_file() -> Path:
     return env
 
 
+def get_hi_ml_submodule_relative_paths() -> List[Tuple[Path, str]]:
+    """
+    Returns the paths relative to the repository root where the submodules for hi-ml and hi-ml-azure are expected.
+    It returns a list with a tuple (folder name, expected subfolder in that folder)
+    """
+    return [(Path("hi-ml") / "hi-ml-azure" / "src", "health_azure"),
+            (Path("hi-ml") / "hi-ml" / "src", "health_ml")]
+
+
 def add_submodules_to_path() -> None:
     """
     This function adds all submodules that the code uses to sys.path and to the environment variables. This is
@@ -104,9 +113,8 @@ def add_submodules_to_path() -> None:
     """
     innereye_root = repository_root_directory()
     folders_to_add = [(innereye_root, "InnerEye"),
-                      (innereye_root / "fastMRI", "fastmri"),
-                      (innereye_root / "hi-ml" / "hi-ml-azure" / "src", "health_azure"),
-                      (innereye_root / "hi-ml" / "hi-ml" / "src", "health_ml")]
+                      (innereye_root / "fastMRI", "fastmri")]
+    folders_to_add.extend([(innereye_root / p, folder) for p, folder in get_hi_ml_submodule_relative_paths()])
     for (folder, subfolder_that_must_exist) in folders_to_add:
         if (folder / subfolder_that_must_exist).is_dir():
             folder_str = str(folder)

diff --git a/InnerEye/ML/run_ml.py b/InnerEye/ML/run_ml.py
@@ -697,10 +697,11 @@ def copy_child_paths_to_folder(self,
         :param python_environment: The Python environment that is used in the present AzureML run.
         """
 
-        def copy_folder(source_folder: Path, destination_folder: str = "") -> None:
+        def copy_folder(source_folder: Path, destination_folder: Optional[Path] = None) -> None:
             logging.info(f"Copying folder for registration: {source_folder}")
-            destination_folder = destination_folder or source_folder.name
-            shutil.copytree(str(source_folder), str(model_folder / destination_folder),
+            full_destination = model_folder / source_folder.name if destination_folder is None \
+                else model_folder / destination_folder
+            shutil.copytree(str(source_folder), str(full_destination),
                             ignore=shutil.ignore_patterns('*.pyc'))
 
         def copy_file(source: Path, destination_file: str) -> None:
@@ -743,6 +744,11 @@ def copy_file(source: Path, destination_file: str) -> None:
         # we can identify it by going up the folder structure off a known file (repository_root does exactly that)
         repository_root = fixed_paths.repository_root_directory()
         copy_folder(repository_root / INNEREYE_PACKAGE_NAME)
+        # If hi-ml is used as a submodule, copy that too
+        for relative_path, _ in fixed_paths.get_hi_ml_submodule_relative_paths():
+            full_submodule_path = repository_root / relative_path
+            if full_submodule_path.is_dir():
+                copy_folder(full_submodule_path, relative_path)
         # Extra code directory is expected to be relative to the project root folder.
         if self.azure_config.extra_code_directory:
             extra_code_folder = self.project_root / self.azure_config.extra_code_directory

diff --git a/InnerEye/ML/runner.py b/InnerEye/ML/runner.py
@@ -9,10 +9,6 @@
 from pathlib import Path
 from typing import Optional, Tuple
 
-# Suppress all errors here because the imports after code cause loads of warnings. We can't specifically suppress
-# individual warnings only.
-# flake8: noqa
-
 # Workaround for an issue with how AzureML and Pytorch Lightning interact: When spawning additional processes for DDP,
 # the working directory is not correctly picked up in sys.path
 print(f"Starting InnerEye runner at {sys.argv[0]}")
@@ -24,6 +20,8 @@
         sys.path.insert(0, innereye_root_str)
 from InnerEye.Common import fixed_paths
 
+# This must be added before all other imports because they might rely on hi-ml already, and that can optionally live
+# in a submodule
 fixed_paths.add_submodules_to_path()
 
 from azureml._base_sdk_common import user_agent
@@ -47,7 +45,7 @@
                                        is_offline_run_context)
 from InnerEye.Azure.run_pytest import download_pytest_result, run_pytest
 from InnerEye.Common.common_util import (FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE,
-                                         disable_logging_to_file, is_linux, logging_to_stdout)
+                                         is_linux, logging_to_stdout)
 from InnerEye.Common.generic_parsing import GenericConfig
 from InnerEye.ML.common import DATASET_CSV_FILE_NAME
 from InnerEye.ML.deep_learning_config import DeepLearningConfig

diff --git a/InnerEye/Scripts/submit_for_inference.py b/InnerEye/Scripts/submit_for_inference.py
@@ -12,14 +12,14 @@
 import param
 import requests
 from azureml.core import Model, ScriptRunConfig
+from health_azure import create_run_configuration, submit_run
 
 from InnerEye.Azure.azure_config import AzureConfig
 from InnerEye.Common.common_util import logging_to_stdout
 from InnerEye.Common.fixed_paths import DEFAULT_DATA_FOLDER, DEFAULT_RESULT_IMAGE_NAME, DEFAULT_RESULT_ZIP_DICOM_NAME, \
-    DEFAULT_TEST_IMAGE_NAME, DEFAULT_TEST_ZIP_NAME, ENVIRONMENT_YAML_FILE_NAME, PYTHON_ENVIRONMENT_NAME, \
+    DEFAULT_TEST_IMAGE_NAME, DEFAULT_TEST_ZIP_NAME, PYTHON_ENVIRONMENT_NAME, \
     RUN_SCORING_SCRIPT, SCORE_SCRIPT, SETTINGS_YAML_FILE, repository_root_directory
 from InnerEye.Common.generic_parsing import GenericConfig
-from health_azure import create_run_configuration, submit_run
 
 
 class SubmitForInferenceConfig(GenericConfig):
@@ -149,17 +149,8 @@ def submit_for_inference(args: SubmitForInferenceConfig, azure_config: AzureConf
     logging.info(f"Building inference run submission in {source_directory_path}")
     image_folder = source_directory_path / DEFAULT_DATA_FOLDER
     image = copy_image_file(args.image_file, image_folder, args.use_dicom)
-    model_sas_urls = model.get_sas_urls()
-    # Identifies all the files with basename "environment.yml" in the model and downloads them.
-    # These downloads should go into a temp folder that will most likely not be included in the model itself,
-    # because the AzureML run will later download the model into the same folder structure, and the file names might
-    # clash.
-    temp_folder = source_directory_path / "temp_for_scoring"
-    conda_files = download_files_from_model(model_sas_urls, ENVIRONMENT_YAML_FILE_NAME, dir_path=temp_folder)
-    if len(conda_files) != 1:
-        raise ValueError("Exactly 1 Conda environment definition must exist in the model.")
     # Retrieve the name of the Python environment that the training run used. This environment should have been
-    # registered. If no such environment exists, it will be re-create from the Conda files provided.
+    # registered at training.
     python_environment_name = model.tags.get(PYTHON_ENVIRONMENT_NAME, "")
     if not python_environment_name:
         raise ValueError(f"The model did not contain tag {PYTHON_ENVIRONMENT_NAME} for the AzureML environment to use.")

diff --git a/Tests/AfterTraining/test_after_training.py b/Tests/AfterTraining/test_after_training.py
@@ -50,11 +50,11 @@
 from InnerEye.Scripts import submit_for_inference
 from Tests.ML.util import assert_nifti_content, get_default_azure_config, get_default_workspace, get_nifti_shape
 
-FALLBACK_SINGLE_RUN = "refs_pull_593_merge_1637188926_7ba554ba"
-FALLBACK_ENSEMBLE_RUN = "refs_pull_545_merge:HD_caea82ae-9603-48ba-8280-7d2bc6272411"
-FALLBACK_2NODE_RUN = "refs_pull_545_merge:refs_pull_545_merge_1626538178_9f3023b2"
+FALLBACK_SINGLE_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867172_17ba8dc5"
+FALLBACK_ENSEMBLE_RUN = "refs_pull_606_merge:HD_b8a6ad93-8c19-45de-8ea1-f87fce92c3bd"
+FALLBACK_2NODE_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867224_8d8072fe"
 FALLBACK_CV_GLAUCOMA = "refs_pull_545_merge:HD_72ecc647-07c3-4353-a538-620346114ebd"
-FALLBACK_HELLO_CONTAINER_RUN = "refs_pull_545_merge:refs_pull_545_merge_1626538216_3eb92f09"
+FALLBACK_HELLO_CONTAINER_RUN = "refs_pull_606_merge:refs_pull_606_merge_1638867108_789991ac"
 
 
 def get_most_recent_run_id(fallback_run_id_for_local_execution: str = FALLBACK_SINGLE_RUN) -> str:

diff --git a/TestsOutsidePackage/test_register_model.py b/TestsOutsidePackage/test_register_model.py
@@ -52,6 +52,8 @@ def test_copy_child_paths_to_folder(is_ensemble: bool,
     project_root = Path(__file__).parent.parent
     ml_runner = MLRunner(model_config=fake_model, azure_config=azure_config, project_root=project_root)
     model_folder = test_output_dirs.root_dir / "final"
+    hi_ml_submodules = [p for p, _ in fixed_paths.get_hi_ml_submodule_relative_paths()]
+    has_submodule = any(folder.is_dir() for folder in hi_ml_submodules)
     ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=checkpoints_absolute)
     expected_files = [
         fixed_paths.ENVIRONMENT_YAML_FILE_NAME,
@@ -61,6 +63,9 @@ def test_copy_child_paths_to_folder(is_ensemble: bool,
         "InnerEye/Common/fixed_paths.py",
         "InnerEye/Common/common_util.py",
     ]
+    if has_submodule:
+        expected_files.extend(["hi-ml/hi-ml/src/health_ml/__init__.py",
+                               "hi-ml/hi-ml-azure/src/health_azure/__init__.py"])
     for r in checkpoints_relative:
         expected_files.append(f"{CHECKPOINT_FOLDER}/{r}")
     for expected_file in expected_files:

diff --git a/environment.yml b/environment.yml
@@ -26,8 +26,6 @@ dependencies:
       - gitpython==3.1.7
       - gputil==1.4.0
       - h5py==2.10.0
-      - hi-ml==0.1.11
-      - hi-ml-azure==0.1.11
       - InnerEye-DICOM-RT==1.0.1
       - joblib==0.16.0
       - jupyter==1.0.0
@@ -57,6 +55,7 @@ dependencies:
       - pytorch-lightning==1.3.8
       - rich==5.1.1
       - rpdb==0.1.6
+      - ruamel.yaml==0.16.12
       - runstats==1.8.0
       - scikit-image==0.17.2
       - scikit-learn==0.23.2

diff --git a/score.py b/score.py
@@ -2,21 +2,26 @@
 #  Copyright (c) Microsoft Corporation. All rights reserved.
 #  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
 #  ------------------------------------------------------------------------------------------
-from collections import defaultdict
 import logging
 import os
 import sys
+import zipfile
+from collections import defaultdict
 from pathlib import Path
 from typing import List, Optional, Tuple
-import zipfile
 
 import numpy as np
 import param
-from azureml.core import Run
 from InnerEye_DICOM_RT.nifti_to_dicom_rt_converter import rtconvert
+from azureml.core import Run
 
-from InnerEye.Azure.azure_util import is_offline_run_context
 from InnerEye.Common import fixed_paths
+
+# This must be added before all other imports because they might rely on hi-ml already, and that can optionally live
+# in a submodule
+fixed_paths.add_submodules_to_path()
+
+from InnerEye.Azure.azure_util import is_offline_run_context
 from InnerEye.Common.fixed_paths import DEFAULT_RESULT_ZIP_DICOM_NAME
 from InnerEye.Common.generic_parsing import GenericConfig
 from InnerEye.Common.type_annotations import TupleFloat3, TupleInt3
@@ -27,8 +32,8 @@
 from InnerEye.ML.pipelines.ensemble import EnsemblePipeline
 from InnerEye.ML.pipelines.inference import FullImageInferencePipelineBase, InferencePipeline
 from InnerEye.ML.utils.config_loader import ModelConfigLoader
-from InnerEye.ML.utils.io_util import ImageWithHeader, load_nifti_image, reverse_tuple_float3, store_as_ubyte_nifti, \
-    load_dicom_series_and_save
+from InnerEye.ML.utils.io_util import ImageWithHeader, load_dicom_series_and_save, load_nifti_image, \
+    reverse_tuple_float3, store_as_ubyte_nifti
 
 
 class ScorePipelineConfig(GenericConfig):