microsoft · ant0nsc · Apr 4, 2022 · Mar 28, 2022 · Mar 29, 2022 · Mar 29, 2022
diff --git a/.amlignore b/.amlignore
@@ -3,6 +3,8 @@
 /azure-pipelines
 /docs
 /.idea
+outputs/
+__pycache__/
 .pytest_cache
 .mypy_cache
 logs
@@ -14,3 +16,11 @@ temp_environment-*
 .config
 .vscode
 node_modules
+
+# Temp folders created from SSL tests
+cifar-10-python.tar.gz
+cifar-10-batches-py/
+None/
+hi-ml-histopathology/testSSL/test_outputs
+test_outputs/
+lightning_logs/
diff --git a/.github/workflows/build-test-pr.yml b/.github/workflows/build-test-pr.yml
@@ -24,6 +24,7 @@ env:
   HIML_COV_XML_ARTIFACT_SUFFIX: '-code_coverage_xml'
 
 jobs:
+
   flake8:
     runs-on: ubuntu-18.04
     steps:
@@ -258,6 +259,13 @@ jobs:
           make pip_local
           make pytest_and_coverage
 
+      - name: Run GPU tests
+        run: |
+          branch_prefix="refs/heads/"
+          full_branch_name=$GITHUB_REF
+          branch_name_without_prefix=${full_branch_name#$branch_prefix}
+          python hi-ml/run_pytest.py --mark=gpu --cluster=pr-gpu --conda_env=${{ matrix.folder }}/environment.yml --folder=${{ matrix.folder }} --experiment="$branch_name_without_prefix"
+
       - name: Upload coverage artifacts
         uses: ./.github/actions/upload_coverage_artifacts
         if: ${{ matrix.packageName == '*.whl' }}

diff --git a/.gitignore b/.gitignore
@@ -165,3 +165,4 @@ cifar-10-batches-py/
 None/
 hi-ml-histopathology/testSSL/test_outputs
 test_outputs/
+lightning_logs/
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -46,6 +46,19 @@
             ],
             "console": "integratedTerminal"
         },
+        {
+            "name": "Python: Run pytest for GPU tests only",
+            "type": "python",
+            "request": "launch",
+            "program": "${workspaceFolder}/hi-ml/run_pytest.py",
+            "cwd": "${workspaceFolder}",
+            "args": [
+                "--mark=gpu",
+                "--cluster=pr-gpu",
+                "--conda_env=${workspaceFolder}/hi-ml-histopathology/environment.yml",
+            ],
+            "console": "integratedTerminal"
+        },
         {
             "name": "Python: Run DeepSMILECrck in AzureML",
             "type": "python",

diff --git a/hi-ml-azure/pytest.ini b/hi-ml-azure/pytest.ini
@@ -8,3 +8,4 @@ markers =
     fast: Tests that should run very fast, and can act as smoke tests to see if something goes terribly wrong.
     slow: Tests that are slow to run and not crucial to the build.
     timeout: Tests will terminate and fail if not completed within this length of time.
+    gpu: Tests that require at least 1 GPU to be present
diff --git a/hi-ml-azure/src/health_azure/himl.py b/hi-ml-azure/src/health_azure/himl.py
@@ -447,16 +447,17 @@ def submit_to_azure_if_needed(  # type: ignore
         )
 
     if snapshot_root_directory is None:
-        logging.info(f"No snapshot root directory given. Uploading all files in the current directory {Path.cwd()}")
+        print(f"No snapshot root directory given. Uploading all files in the current directory {Path.cwd()}")
         snapshot_root_directory = Path.cwd()
 
     workspace = get_workspace(aml_workspace, workspace_config_path)
+    print(f"Loaded AzureML workspace {workspace.name}")
 
     if conda_environment_file is None:
         conda_environment_file = find_file_in_parent_to_pythonpath(CONDA_ENVIRONMENT_FILE)
+        print(f"Using the Conda environment from this file: {conda_environment_file}")
     conda_environment_file = _str_to_path(conda_environment_file)
 
-    logging.info(f"Loaded AzureML workspace {workspace.name}")
     run_config = create_run_configuration(
         workspace=workspace,
         compute_cluster_name=compute_cluster_name,

diff --git a/hi-ml-azure/src/health_azure/utils.py b/hi-ml-azure/src/health_azure/utils.py
@@ -5,6 +5,7 @@
 """
 Utility functions for interacting with AzureML runs
 """
+from contextlib import contextmanager
 import hashlib
 import json
 import logging
@@ -27,7 +28,7 @@
 from enum import Enum
 from itertools import islice
 from pathlib import Path
-from typing import Any, Callable, DefaultDict, Dict, List, Optional, Set, Tuple, Type, TypeVar, Union
+from typing import Any, Callable, DefaultDict, Dict, Generator, List, Optional, Set, Tuple, Type, TypeVar, Union
 
 import conda_merge
 import pandas as pd
@@ -175,15 +176,29 @@ def set_fields_and_validate(config: param.Parameterized, fields_to_set: Dict[str
         config.validate()
 
 
-def create_argparser(config: param.Parameterized) -> ArgumentParser:
+def create_argparser(
+    config: param.Parameterized,
+    usage: Optional[str] = None,
+    description: Optional[str] = None,
+    epilog: Optional[str] = None
+) -> ArgumentParser:
     """
     Creates an ArgumentParser with all fields of the given config that are overridable.
 
     :param config: The config whose parameters should be used to populate the argument parser
+    :param usage: Brief information about correct usage that is printed if the script started with "--help". If not
+    provided, this is auto-generated from the complete set of arguments.
+    :param description: A description of the program that is printed if the script is started with "--help"
+    :param epilog: A text that is printed after the argument details if the script is started with "--help"
     :return: ArgumentParser
     """
     assert isinstance(config, param.Parameterized)
-    parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
+    parser = ArgumentParser(
+        formatter_class=ArgumentDefaultsHelpFormatter,
+        usage=usage,
+        description=description,
+        epilog=epilog
+    )
     _add_overrideable_config_args_to_parser(config, parser)
     return parser
 
@@ -1968,3 +1983,45 @@ def workspace(self) -> Workspace:
         if self._workspace is None:
             self._workspace = aml_workspace_for_unittests()
         return self._workspace
+
+
+@contextmanager
+def check_config_json(script_folder: Path, shared_config_json: Path) -> Generator:
+    """
+    Create a workspace config.json file exists in the folder where we expect a test script. This is either copied
+    from the location given in shared_config_json (this should be the case when executing a test on a dev machine),
+    or created from environment variables (this should trigger in builds on the github agents).
+
+    :param script_folder: This is the folder in which the config.json file should be created
+    :param shared_config_json: Path to a shared config.json file
+    """
+    target_config_json = script_folder / WORKSPACE_CONFIG_JSON
+    target_config_exists = target_config_json.is_file()
+    if target_config_exists:
+        pass
+    elif shared_config_json.exists():
+        # This will execute on local dev machines
+        logging.info(f"Copying {shared_config_json} to folder {script_folder}")
+        shutil.copy(shared_config_json, target_config_json)
+    else:
+        # This will execute on github agents
+        logging.info(f"Creating {str(target_config_json)} from environment variables.")
+        subscription_id = os.getenv(ENV_SUBSCRIPTION_ID, "")
+        resource_group = os.getenv(ENV_RESOURCE_GROUP, "")
+        workspace_name = os.getenv(ENV_WORKSPACE_NAME, "")
+        if subscription_id and resource_group and workspace_name:
+            with open(str(target_config_json), 'w', encoding="utf-8") as file:
+                config = {
+                    "subscription_id": subscription_id,
+                    "resource_group": resource_group,
+                    "workspace_name": workspace_name
+                }
+                json.dump(config, file)
+        else:
+            raise ValueError("Either a shared config.json must be present, or all 3 environment variables for "
+                             "workspace creation must exist.")
+    try:
+        yield
+    finally:
+        if not target_config_exists:
+            target_config_json.unlink()
diff --git a/hi-ml-azure/testazure/testazure/test_azure_util.py b/hi-ml-azure/testazure/testazure/test_azure_util.py
@@ -32,7 +32,7 @@
 
 import health_azure.utils as util
 from health_azure.himl import AML_IGNORE_FILE, append_to_amlignore
-from health_azure.utils import PackageDependency
+from health_azure.utils import PackageDependency, create_argparser
 from testazure.test_himl import RunTarget, render_and_run_test_script
 from testazure.utils_testazure import (DEFAULT_IGNORE_FOLDERS, DEFAULT_WORKSPACE, MockRun, change_working_directory,
                                        repository_root)
@@ -579,7 +579,7 @@ def test_merge_conda_with_pip_requirements(random_folder: Path) -> None:
 
 
 @pytest.mark.fast
-def test_merge_conda_failure_cases(random_folder: Path, caplog: CaptureFixture) -> None:
+def test_merge_conda_failure_cases(random_folder: Path, caplog: LogCaptureFixture) -> None:
     """Tests various failure cases of merging, such as empty conda files, no specified channels etc"""
     merged_file = random_folder / "merged.yml"
     env1 = _generate_conda_env_str(channels=["defaults", "pytorch"],
@@ -991,7 +991,7 @@ def _mock_env_get(workspace: Workspace, name: str = "", version: Optional[str] =
 
 
 def test_set_environment_variables_for_multi_node(
-        caplog: CaptureFixture,
+        caplog: LogCaptureFixture,
         capsys: CaptureFixture,
 ) -> None:
     with caplog.at_level(logging.INFO):  # type: ignore
@@ -1009,7 +1009,7 @@ def test_set_environment_variables_for_multi_node(
             },
             clear=True):
         util.set_environment_variables_for_multi_node()
-    out, _ = capsys.readouterr()
+    out = capsys.readouterr().out
     assert "Distributed training: MASTER_ADDR = here, MASTER_PORT = there, NODE_RANK = everywhere" in out
 
     with mock.patch.dict(
@@ -1021,7 +1021,7 @@ def test_set_environment_variables_for_multi_node(
             },
             clear=True):
         util.set_environment_variables_for_multi_node()
-    out, _ = capsys.readouterr()
+    out = capsys.readouterr().out
     assert "Distributed training: MASTER_ADDR = here, MASTER_PORT = 6105, NODE_RANK = everywhere" in out
 
 
@@ -1870,6 +1870,48 @@ def test_parsing_bools(parameterized_config_and_parser: Tuple[ParamClass, Argume
                            expected_value)
 
 
+def test_argparse_usage(capsys: CaptureFixture) -> None:
+    """Test if the auto-generated argument parser prints out defaults and usage information.
+    """
+    class SimpleClass(param.Parameterized):
+        name: str = param.String(default="name_default", doc="Name description")
+    config = SimpleClass()
+    parser = create_argparser(config, usage="my_usage", description="my_description", epilog="my_epilog")
+    arguments = ["", "--help"]
+    with pytest.raises(SystemExit):
+        with patch.object(sys, "argv", arguments):
+            parser.parse_args()
+    stdout: str = capsys.readouterr().out  # type: ignore
+    assert "Name description" in stdout
+    assert "default: " in stdout
+    assert "optional arguments:" in stdout
+    assert "--name NAME" in stdout
+    assert "usage: my_usage" in stdout
+    assert "my_description" in stdout
+    assert "my_epilog" in stdout
+
+
+def test_argparse_usage_empty(capsys: CaptureFixture) -> None:
+    """Test if the auto-generated argument parser prints out defaults and auto-generated usage information.
+    """
+    class SimpleClass(param.Parameterized):
+        name: str = param.String(default="name_default", doc="Name description")
+    config = SimpleClass()
+    parser = create_argparser(config)
+    arguments = ["", "--help"]
+    with pytest.raises(SystemExit):
+        with patch.object(sys, "argv", arguments):
+            parser.parse_args()
+    stdout: str = capsys.readouterr().out  # type: ignore
+    assert "usage: " in stdout
+    # Check if the auto-generated usage text is present
+    assert "[-h] [--name NAME]" in stdout
+    assert "optional arguments:" in stdout
+    assert "--name NAME" in stdout
+    assert "Name description" in stdout
+    assert "default: " in stdout
+
+
 @pytest.mark.fast
 def test_apply_overrides(parameterized_config_and_parser: Tuple[ParamClass, ArgumentParser]) -> None:
     """

diff --git a/hi-ml-azure/testazure/testazure/test_himl.py b/hi-ml-azure/testazure/testazure/test_himl.py
@@ -30,11 +30,29 @@
 from azureml.train.hyperdrive import HyperDriveConfig
 
 import health_azure.himl as himl
-from health_azure.datasets import (DatasetConfig, _input_dataset_key, _output_dataset_key, get_datastore)
-from health_azure.utils import (DEFAULT_ENVIRONMENT_VARIABLES, ENVIRONMENT_VERSION, EXPERIMENT_RUN_SEPARATOR,
-                                WORKSPACE_CONFIG_JSON, get_most_recent_run, get_workspace, is_running_in_azure_ml)
+from health_azure.datasets import (
+    DatasetConfig,
+    _input_dataset_key,
+    _output_dataset_key,
+    get_datastore
+)
+from health_azure.utils import (
+    DEFAULT_ENVIRONMENT_VARIABLES,
+    ENVIRONMENT_VERSION,
+    EXPERIMENT_RUN_SEPARATOR,
+    WORKSPACE_CONFIG_JSON,
+    check_config_json,
+    get_most_recent_run,
+    get_workspace,
+    is_running_in_azure_ml
+)
 from testazure.test_data.make_tests import render_environment_yaml, render_test_script
-from testazure.utils_testazure import DEFAULT_DATASTORE, change_working_directory, check_config_json, repository_root
+from testazure.utils_testazure import (
+    DEFAULT_DATASTORE,
+    change_working_directory,
+    get_shared_config_json,
+    repository_root
+)
 
 INEXPENSIVE_TESTING_CLUSTER_NAME = "lite-testing-ds2"
 EXPECTED_QUEUED = "This command will be run in AzureML:"
@@ -703,7 +721,7 @@ def spawn() -> Tuple[int, List[str]]:
     if suppress_config_creation:
         code, stdout = spawn()
     else:
-        with check_config_json(path):
+        with check_config_json(path, shared_config_json=get_shared_config_json()):
             code, stdout = spawn()
     captured = "\n".join(stdout)
     print(f"Script console output:\n{captured}")
@@ -715,7 +733,7 @@ def spawn() -> Tuple[int, List[str]]:
         return captured
     else:
         assert EXPECTED_QUEUED in captured
-        with check_config_json(path):
+        with check_config_json(path, shared_config_json=get_shared_config_json()):
             workspace = get_workspace(aml_workspace=None, workspace_config_path=path / WORKSPACE_CONFIG_JSON)
 
         run = get_most_recent_run(run_recovery_file=path / himl.RUN_RECOVERY_FILE,
@@ -857,7 +875,7 @@ def test_invoking_hello_world_env_var(run_target: RunTarget, tmp_path: Path) ->
 @pytest.mark.timeout(300)
 def test_mounting_dataset(tmp_path: Path) -> None:
     logging.info("creating config.json")
-    with check_config_json(tmp_path):
+    with check_config_json(tmp_path, shared_config_json=get_shared_config_json()):
         logging.info("get_workspace")
         workspace = get_workspace(aml_workspace=None,
                                   workspace_config_path=tmp_path / WORKSPACE_CONFIG_JSON)
@@ -882,7 +900,7 @@ def test_mounting_dataset(tmp_path: Path) -> None:
 @pytest.mark.timeout(60)
 def test_downloading_dataset(tmp_path: Path) -> None:
     logging.info("creating config.json")
-    with check_config_json(tmp_path):
+    with check_config_json(tmp_path, shared_config_json=get_shared_config_json()):
         logging.info("get_workspace")
         workspace = get_workspace(aml_workspace=None,
                                   workspace_config_path=tmp_path / WORKSPACE_CONFIG_JSON)
@@ -981,7 +999,7 @@ def test_invoking_hello_world_datasets(run_target: RunTarget,
         for i in range(0, output_count)]
 
     # Get default datastore
-    with check_config_json(tmp_path):
+    with check_config_json(tmp_path, shared_config_json=get_shared_config_json()):
         workspace = get_workspace(aml_workspace=None,
                                   workspace_config_path=tmp_path / WORKSPACE_CONFIG_JSON)
         datastore: AzureBlobDatastore = get_datastore(workspace=workspace,