Skip to content

Commit

Permalink
PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
ant0nsc committed Sep 2, 2022
1 parent 467fc6c commit 4ec3f22
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 9 deletions.
3 changes: 1 addition & 2 deletions docs/source/runner.md
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,7 @@ There are two additional flags that can be used to control the logging behaviour
- The `--tag` flag sets the display name for the AzureML run. You can use that to give your run a memorable name,
and later easily find it in the AzureML UI.

Using these command line arguments, the following command will log to the experiment `my_experiment`, in a run that is showing
as `my_first_run`:
The following command will log to the experiment `my_experiment`, in a run that is labelled `my_first_run` in the UI:

```bash
himl-runner --model=HelloWorld --log_from_vm --experiment=my_experiment --tag=my_first_run
Expand Down
5 changes: 3 additions & 2 deletions hi-ml/src/health_ml/model_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ def create_lightning_trainer(container: LightningContainer,
:param container: The container with model and data.
:param resume_from_checkpoint: If provided, training resumes from this checkpoint point.
:param num_nodes: The number of nodes to use in distributed training.
:param azureml_run_for_logging: An optional AzureML Run object to which all metrics should be logged. If None and
the present code is running in AzureML, the current run is used.
:param azureml_run_for_logging: An optional AzureML Run object to which all metrics should be logged. Use this
argument to log to AzureML when the training is happening outside of AzureML. If `azureml_run_for_logging` is
None and the present code is running in AzureML, the current run is used.
:return: A tuple [Trainer object, diagnostic logger]
"""
logging.debug(f"resume_from_checkpoint: {resume_from_checkpoint}")
Expand Down
3 changes: 2 additions & 1 deletion hi-ml/src/health_ml/utils/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ def finalize(self, status: str) -> None:
if self.enable_logging_outside_azure_ml and not self.is_running_in_azure_ml and self.run is not None:
if self.has_user_provided_run:
# The logger uses a run that was provided by the user: Flush it, but do not complete it.
# The user should complete the run after finishing the experiment.
# The user should complete the run after finishing the experiment. This is important when running
# training outside of AzureML, so that training and inference metrics can be written to the same run.
self.run.flush()
else:
# Run.complete should only be called if we created an AzureML run here in the constructor.
Expand Down
8 changes: 5 additions & 3 deletions hi-ml/testhiml/testhiml/test_run_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from typing import Generator
from unittest.mock import DEFAULT, MagicMock, Mock, patch

from azureml._restclient.constants import RunStatus

from health_ml.configs.hello_world import HelloWorld # type: ignore
from health_ml.experiment_config import ExperimentConfig
from health_ml.lightning_container import LightningContainer
Expand Down Expand Up @@ -355,9 +357,9 @@ def test_log_on_vm(log_from_vm: bool) -> None:
metrics = logger.run.get_metrics()
assert "test_mse" in metrics
assert "loss" in metrics
# The run must have been correctly marked as completed. However, I did not find a way to check this.
# When we get here, the run is still returning "Running" as the status, even though it is completed in the UI.
# assert logger.run.status == RunStatus.COMPLETED
# The run must have been correctly marked as completed.
logger.run.wait_for_completion()
assert logger.run.status == RunStatus.COMPLETED
else:
assert logger.run is None

Expand Down
3 changes: 2 additions & 1 deletion hi-ml/testhiml/testhiml/utils/test_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,9 +352,10 @@ def test_azureml_logger_finalize() -> None:
logger = AzureMLLogger(enable_logging_outside_azure_ml=True, run=run_mock)
assert logger.run is not None
assert logger.has_user_provided_run
logger.finalize("nothing")
run_mock.flush = MagicMock()
run_mock.complete = MagicMock()
# When providing a run explicitly, the finalize method should not call the run's complete method. Completing
# the run is the responsibility of the user.
logger.finalize(status="nothing")
run_mock.flush.assert_called_once()
run_mock.complete.assert_not_called()
Expand Down

0 comments on commit 4ec3f22

Please sign in to comment.