diff --git a/docs/source/runner.md b/docs/source/runner.md
index a1b34a26d..bb54c6499 100644
--- a/docs/source/runner.md
+++ b/docs/source/runner.md
@@ -34,9 +34,10 @@ for example, `--model health_cpath.PandaImageNetMIL` is effectively telling the
 To train in AzureML, use the flag `--cluster` to specify the name of the cluster
 in your Workspace that you want to submit the job to. So the whole command would look like:
 
-```
+```bash
 himl-runner --model=HelloWorld --cluster=my_cluster_name
 ```
+
 You can also specify `--num_nodes` if you wish to distribute the model training.
 
 When starting the runner, you need to do that from a directory that contains all the code that your experiment needs:
@@ -47,7 +48,7 @@ AzureML needs to know which Python/Conda environment it should use. For that, th
 that contains a Conda environment definition. This file needs to be present either in the current working directory or
 one of its parents. To specify a Conda environment that is located elsewhere, you can use
 
-```shell
+```bash
 himl-runner --model=HelloWorld --cluster=my_cluster_name --conda_env=/my/folder/to/special_environment.yml
 ```
 
@@ -216,9 +217,9 @@ and returns a tuple containing the Optimizer and LRScheduler objects
 You can use the hi-ml-runner in inference mode only by switching the `--run_inference_only` flag on and specifying
 the model weights by setting `--src_checkpoint` argument that supports three types of checkpoints:
 
-* A local path where the checkpoint is stored `--src_checkpoint=local/path/to/my_checkpoint/model.ckpt`
-* A remote URL from where to download the weights `--src_checkpoint=https://my_checkpoint_url.com/model.ckpt`
-* An AzureML run id where checkpoints are saved in `outputs/checkpoints`. For this specific use case, you can experiment
+- A local path where the checkpoint is stored `--src_checkpoint=local/path/to/my_checkpoint/model.ckpt`
+- A remote URL from where to download the weights `--src_checkpoint=https://my_checkpoint_url.com/model.ckpt`
+- An AzureML run id where checkpoints are saved in `outputs/checkpoints`. For this specific use case, you can experiment
   with different checkpoints by setting `--src_checkpoint` according to the format
   `<azureml_run_id>:<optional/custom/path/to/checkpoints/><filename.ckpt>`. If no custom path is provided
   (e.g., `--src_checkpoint=AzureML_run_id:best.ckpt`), we assume the checkpoints to be saved in the default
@@ -228,7 +229,7 @@ the model weights by setting `--src_checkpoint` argument that supports three typ
 Running the following command line will run inference using `MyContainer` model with weights from the checkpoint saved
 in the AzureMl run `MyContainer_XXXX_yyyy` at the best validation loss epoch `/outputs/checkpoints/best_val_loss.ckpt`.
 
-```
+```bash
 himl-runner --model=Mycontainer --run_inference_only --src_checkpoint=MyContainer_XXXX_yyyy:best_val_loss.ckpt
 ```
 
@@ -238,13 +239,43 @@ Analogously, one can resume training by setting `--src_checkpoint` to either con
 The pytorch lightning trainer will initialize the lightning module from the given checkpoint corresponding to the best
 validation loss epoch as set in the following comandline.
 
-```
+```bash
 himl-runner --model=Mycontainer --cluster=my_cluster_name --src_checkpoint=MyContainer_XXXX_yyyy:best_val_loss.ckpt
 ```
 
 Warning: When resuming training, one should make sure to set `container.max_epochs` greater than the last epoch of the
 specified checkpoint. A misconfiguration exception will be raised otherwise:
 
-```
+```text
 pytorch_lightning.utilities.exceptions.MisconfigurationException: You restored a checkpoint with current_epoch=19, but you have set Trainer(max_epochs=4).
 ```
+
+## Logging to AzureML when running outside AzureML
+
+The runner offers the ability to log metrics to AzureML, even if the present training is not running
+inside of AzureML. This adds an additional level of traceability for runs on GPU VMs, where there is otherwise
+no record of any past training.
+
+You can trigger this behaviour by specifying the `--log_from_vm` flag. For the `HelloWorld` model, this
+will look like:
+
+```bash
+himl-runner --model=HelloWorld --log_from_vm
+```
+
+For logging to work, you need have a `config.json` file in the current working directory (or one of its
+parent folders) that specifies the AzureML workspace itself. When starting the runner, you will be asked
+to authenticate to AzureML.
+
+There are two additional flags that can be used to control the logging behaviour:
+
+- The `--experiment` flag sets which AzureML experiment to log to. By default, the experiment name will be
+    the name of the model class (`HelloWorld` in the above example).
+- The `--tag` flag sets the display name for the AzureML run. You can use that to give your run a memorable name,
+    and later easily find it in the AzureML UI.
+
+The following command will log to the experiment `my_experiment`, in a run that is labelled `my_first_run` in the UI:
+
+```bash
+himl-runner --model=HelloWorld --log_from_vm --experiment=my_experiment --tag=my_first_run
+```
diff --git a/hi-ml-azure/src/health_azure/utils.py b/hi-ml-azure/src/health_azure/utils.py
index a2e77d9a8..2875e067b 100644
--- a/hi-ml-azure/src/health_azure/utils.py
+++ b/hi-ml-azure/src/health_azure/utils.py
@@ -1898,7 +1898,7 @@ def create_aml_run_object(
     exp = Experiment(workspace=actual_workspace, name=experiment_name)
     if snapshot_directory is None or snapshot_directory == "":
         snapshot_directory = tempfile.mkdtemp()
-    return exp.start_logging(name=run_name, snapshot_directory=str(snapshot_directory))  # type: ignore
+    return exp.start_logging(display_name=run_name, snapshot_directory=str(snapshot_directory))  # type: ignore
 
 
 def aml_workspace_for_unittests() -> Workspace:
diff --git a/hi-ml-azure/testazure/testazure/test_azure_util.py b/hi-ml-azure/testazure/testazure/test_azure_util.py
index 4021eadb0..ab38b92c4 100644
--- a/hi-ml-azure/testazure/testazure/test_azure_util.py
+++ b/hi-ml-azure/testazure/testazure/test_azure_util.py
@@ -2112,7 +2112,7 @@ def test_create_run() -> None:
         run = util.create_aml_run_object(experiment_name=experiment_name, run_name=run_name,
                                          workspace=DEFAULT_WORKSPACE.workspace)
         assert run is not None
-        assert run.name == run_name
+        assert run.display_name == run_name
         assert run.experiment.name == experiment_name
         metric_name = "mymetric"
         metric_value = 1.234
diff --git a/hi-ml/src/health_ml/configs/hello_world.py b/hi-ml/src/health_ml/configs/hello_world.py
index 9a6c57e9c..9b5823873 100644
--- a/hi-ml/src/health_ml/configs/hello_world.py
+++ b/hi-ml/src/health_ml/configs/hello_world.py
@@ -230,6 +230,7 @@ def on_test_epoch_end(self) -> None:
         average_mse = torch.mean(torch.stack(self.test_mse))
         Path("test_mse.txt").write_text(str(average_mse.item()))
         Path("test_mae.txt").write_text(str(self.test_mae.compute().item()))
+        self.log("test_mse", average_mse, on_epoch=True, on_step=False)
 
 
 class HelloWorld(LightningContainer):
diff --git a/hi-ml/src/health_ml/deep_learning_config.py b/hi-ml/src/health_ml/deep_learning_config.py
index 8247e09f0..3d93665d8 100644
--- a/hi-ml/src/health_ml/deep_learning_config.py
+++ b/hi-ml/src/health_ml/deep_learning_config.py
@@ -173,6 +173,13 @@ class WorkflowParams(param.Parameterized):
     run_inference_only: bool = param.Boolean(False, doc="If True, run only inference and skip training after loading"
                                                         "model weights from the specified checkpoint in "
                                                         "`src_checkpoint` flag. If False, run training and inference.")
+    tag: str = param.String(doc="A string that will be used as the display name of the run in AzureML.")
+    experiment: str = param.String(default="", doc="The name of the AzureML experiment to use for this run. If not "
+                                   "provided, the name of the model class will be used.")
+    log_from_vm: bool = param.Boolean(False, doc="If True, a training run outside AzureML will still log its "
+                                      "metrics to AzureML. Both intermediate validation metrics and final test results"
+                                      "will be recorded. You need to have an AzureML workspace config.json file "
+                                      "and will be asked for interactive authentication.")
 
     CROSSVAL_INDEX_ARG_NAME = "crossval_index"
     CROSSVAL_COUNT_ARG_NAME = "crossval_count"
diff --git a/hi-ml/src/health_ml/experiment_config.py b/hi-ml/src/health_ml/experiment_config.py
index ab305248e..fb8511603 100644
--- a/hi-ml/src/health_ml/experiment_config.py
+++ b/hi-ml/src/health_ml/experiment_config.py
@@ -11,7 +11,6 @@ class ExperimentConfig(param.Parameterized):
                                                   "job in AzureML.")
     model: str = param.String(doc="The fully qualified name of the model to train/test -e.g."
                                   "mymodule.configs.MyConfig.")
-    tag: str = param.String(doc="A string that will be used as the display name of the run in AzureML.")
     mount_in_azureml: bool = param.Boolean(False,
                                            doc="If False (default), consume datasets in AzureML by downloading at "
                                                "job start. If True, datasets in AzureML are mounted (read on demand "
diff --git a/hi-ml/src/health_ml/lightning_container.py b/hi-ml/src/health_ml/lightning_container.py
index dbda2ef86..e40f4e123 100644
--- a/hi-ml/src/health_ml/lightning_container.py
+++ b/hi-ml/src/health_ml/lightning_container.py
@@ -214,6 +214,12 @@ def has_custom_test_step(self) -> bool:
         """
         return type(self.model).test_step != LightningModule.test_step
 
+    @property
+    def effective_experiment_name(self) -> str:
+        """Returns the name of the AzureML experiment that should be used. This is taken from the commandline
+        argument `experiment`, falling back to the model class name if not set."""
+        return self.experiment or self.model_name
+
 
 class LightningModuleWithOptimizer(LightningModule):
     """
diff --git a/hi-ml/src/health_ml/model_trainer.py b/hi-ml/src/health_ml/model_trainer.py
index 28380f19a..bdece9090 100644
--- a/hi-ml/src/health_ml/model_trainer.py
+++ b/hi-ml/src/health_ml/model_trainer.py
@@ -6,6 +6,7 @@
 from pathlib import Path
 from typing import Any, List, Optional, Tuple, TypeVar
 
+from azureml.core import Run
 from pytorch_lightning import Callback, Trainer
 from pytorch_lightning.callbacks import GPUStatsMonitor, ModelCheckpoint
 from pytorch_lightning.loggers import TensorBoardLogger
@@ -53,7 +54,8 @@ def get_pl_profiler(pl_profiler: Optional[str], outputs_folder: Path) -> Optiona
 def create_lightning_trainer(container: LightningContainer,
                              resume_from_checkpoint: Optional[Path] = None,
                              num_nodes: int = 1,
-                             multiple_trainloader_mode: str = "max_size_cycle") -> \
+                             multiple_trainloader_mode: str = "max_size_cycle",
+                             azureml_run_for_logging: Optional[Run] = None) -> \
         Tuple[Trainer, StoringLogger]:
     """
     Creates a Pytorch Lightning Trainer object for the given model configuration. It creates checkpoint handlers
@@ -63,6 +65,9 @@ def create_lightning_trainer(container: LightningContainer,
     :param container: The container with model and data.
     :param resume_from_checkpoint: If provided, training resumes from this checkpoint point.
     :param num_nodes: The number of nodes to use in distributed training.
+    :param azureml_run_for_logging: An optional AzureML Run object to which all metrics should be logged. Use this
+        argument to log to AzureML when the training is happening outside of AzureML. If `azureml_run_for_logging` is
+        None and the present code is running in AzureML, the current run is used.
     :return: A tuple [Trainer object, diagnostic logger]
     """
     logging.debug(f"resume_from_checkpoint: {resume_from_checkpoint}")
@@ -86,7 +91,9 @@ def create_lightning_trainer(container: LightningContainer,
             message += "s per node with DDP"
     logging.info(f"Using {message}")
     tensorboard_logger = TensorBoardLogger(save_dir=str(container.logs_folder), name="Lightning", version="")
-    loggers = [tensorboard_logger, AzureMLLogger(False)]
+    azureml_logger = AzureMLLogger(enable_logging_outside_azure_ml=container.log_from_vm,
+                                   run=azureml_run_for_logging)
+    loggers = [tensorboard_logger, azureml_logger]
     storing_logger = StoringLogger()
     loggers.append(storing_logger)
     # Use 32bit precision when running on CPU. Otherwise, make it depend on use_mixed_precision flag.
diff --git a/hi-ml/src/health_ml/run_ml.py b/hi-ml/src/health_ml/run_ml.py
index acd2fe498..01a92a70d 100644
--- a/hi-ml/src/health_ml/run_ml.py
+++ b/hi-ml/src/health_ml/run_ml.py
@@ -10,6 +10,7 @@
 from pathlib import Path
 from typing import Dict, List, Optional
 
+from azureml.core import Run
 from pytorch_lightning import Trainer, seed_everything
 
 from health_azure import AzureRunInfo
@@ -18,7 +19,7 @@
                                 is_running_in_azure_ml, PARENT_RUN_CONTEXT, RUN_CONTEXT,
                                 aggregate_hyperdrive_metrics, get_metrics_for_childless_run,
                                 ENV_GLOBAL_RANK, ENV_LOCAL_RANK, ENV_NODE_RANK,
-                                is_local_rank_zero, is_global_rank_zero,)
+                                is_local_rank_zero, is_global_rank_zero, create_aml_run_object)
 
 from health_ml.experiment_config import ExperimentConfig
 from health_ml.lightning_container import LightningContainer
@@ -78,6 +79,7 @@ def __init__(self,
                                                     project_root=self.project_root,
                                                     run_context=RUN_CONTEXT)
         self.trainer: Optional[Trainer] = None
+        self.azureml_run_for_logging: Optional[Run] = None
 
     def set_run_tags_from_parent(self) -> None:
         """
@@ -176,9 +178,20 @@ def init_training(self) -> None:
         # Set random seeds just before training. Ensure that dataloader workers are also seeded correctly.
         seed_everything(self.container.get_effective_random_seed(), workers=True)
 
-        # get the container's datamodule
+        # Get the container's datamodule
         self.data_module = self.container.get_data_module()
 
+        # Create an AzureML run for logging if running outside AzureML. This run will be used for metrics logging
+        # during both training and inference. We can't rely on the automatically generated run inside the AzureMLLogger
+        # class because two of those logger objects will be created, so training and inference metrics would be logged
+        # in different runs.
+        if self.container.log_from_vm:
+            run = create_aml_run_object(experiment_name=self.container.effective_experiment_name)
+            # Display name should already be set when creating the Run object, but in some scenarios this
+            # does not happen. Hence, set it again.
+            run.display_name = self.container.tag if self.container.tag else None
+            self.azureml_run_for_logging = run
+
         if not self.container.run_inference_only:
 
             checkpoint_path_for_recovery = self.checkpoint_handler.get_recovery_or_checkpoint_path_train()
@@ -191,7 +204,8 @@ def init_training(self) -> None:
                 container=self.container,
                 resume_from_checkpoint=checkpoint_path_for_recovery,
                 num_nodes=self.container.num_nodes,
-                multiple_trainloader_mode=self.get_multiple_trainloader_mode())
+                multiple_trainloader_mode=self.get_multiple_trainloader_mode(),
+                azureml_run_for_logging=self.azureml_run_for_logging)
 
             rank_info = ", ".join(
                 f"{env}: {os.getenv(env)}" for env in [ENV_GLOBAL_RANK, ENV_LOCAL_RANK, ENV_NODE_RANK]
@@ -285,7 +299,10 @@ def run_inference(self) -> None:
                 self.checkpoint_handler.get_checkpoint_to_test() if self.container.src_checkpoint else None
             )
             trainer, _ = create_lightning_trainer(
-                self.container, resume_from_checkpoint=checkpoint_path, num_nodes=1
+                container=self.container,
+                resume_from_checkpoint=checkpoint_path,
+                num_nodes=1,
+                azureml_run_for_logging=self.azureml_run_for_logging
             )
 
             # Change to the outputs folder so that the model can write to current working directory, and still
@@ -339,29 +356,37 @@ def run(self) -> None:
         Driver function to run a ML experiment
         """
         self.setup()
-        self.init_training()
-        if not self.container.run_inference_only:
-            # Backup the environment variables in case we need to run a second training in the unit tests.
-            old_environ = dict(os.environ)
-
-            # do training
-            with logging_section("Model training"):
-                self.run_training()
-
-            # load model checkpoint for custom inference or additional validation step
-            if self.container.has_custom_test_step() or self.container.run_extra_val_epoch:
-                self.load_model_checkpoint()
-
-            # Run extra validation epoch if enabled
-            if self.container.run_extra_val_epoch:
-                with logging_section("Model Validation to save plots on validation set"):
-                    self.run_validation()
-
-            # Kill all processes besides rank 0
-            self.after_ddp_cleanup(old_environ)
-
-        # Run inference on a single device
-        with logging_section("Model inference"):
-            self.run_inference()
-
-        self.run_regression_test()
+        try:
+            self.init_training()
+            if not self.container.run_inference_only:
+                # Backup the environment variables in case we need to run a second training in the unit tests.
+                old_environ = dict(os.environ)
+
+                # do training
+                with logging_section("Model training"):
+                    self.run_training()
+
+                # load model checkpoint for custom inference or additional validation step
+                if self.container.has_custom_test_step() or self.container.run_extra_val_epoch:
+                    self.load_model_checkpoint()
+
+                # Run extra validation epoch if enabled
+                if self.container.run_extra_val_epoch:
+                    with logging_section("Model Validation to save plots on validation set"):
+                        self.run_validation()
+
+                # Kill all processes besides rank 0
+                self.after_ddp_cleanup(old_environ)
+
+            # Run inference on a single device
+            with logging_section("Model inference"):
+                self.run_inference()
+
+            self.run_regression_test()
+
+        finally:
+            if self.azureml_run_for_logging is not None:
+                try:
+                    self.azureml_run_for_logging.complete()
+                except Exception as ex:
+                    logging.error("Failed to complete AzureML run: %s", ex)
diff --git a/hi-ml/src/health_ml/runner.py b/hi-ml/src/health_ml/runner.py
index 999aa02b6..230b7057f 100644
--- a/hi-ml/src/health_ml/runner.py
+++ b/hi-ml/src/health_ml/runner.py
@@ -170,7 +170,7 @@ def additional_run_tags(self, script_params: List[str]) -> Dict[str, str]:
         """
         return {
             "commandline_args": " ".join(script_params),
-            "tag": self.experiment_config.tag
+            "tag": self.lightning_container.tag
         }
 
     def run(self) -> Tuple[LightningContainer, AzureRunInfo]:
@@ -207,8 +207,8 @@ def after_submission_hook(azure_run: Run) -> None:
             """
             # Set the default display name to what was provided as the "tag". This will affect single runs
             # and Hyperdrive parent runs
-            if self.experiment_config.tag:
-                azure_run.display_name = self.experiment_config.tag
+            if self.lightning_container.tag:
+                azure_run.display_name = self.lightning_container.tag
 
         root_folder = self.project_root
         entry_script = Path(sys.argv[0]).resolve()
@@ -257,7 +257,7 @@ def after_submission_hook(azure_run: Run) -> None:
                 compute_cluster_name=self.experiment_config.cluster,
                 environment_variables=environment_variables,
                 default_datastore=default_datastore,
-                experiment_name=self.lightning_container.model_name,  # create_experiment_name(),
+                experiment_name=self.lightning_container.effective_experiment_name,
                 input_datasets=input_datasets,  # type: ignore
                 num_nodes=self.experiment_config.num_nodes,
                 wait_for_completion=self.experiment_config.wait_for_completion,
@@ -270,12 +270,12 @@ def after_submission_hook(azure_run: Run) -> None:
                 after_submission=after_submission_hook,
                 tags=self.additional_run_tags(script_params)
             )
-            if self.experiment_config.tag and azure_run_info.run:
+            if self.lightning_container.tag and azure_run_info.run:
                 if self.lightning_container.is_crossvalidation_enabled:
                     # This code is only reached inside Azure. Set display name again - this will now affect
                     # Hypdrive child runs (for other jobs, this has already been done after submission)
                     cv_index = self.lightning_container.crossval_index
-                    full_display_name = f"{self.experiment_config.tag} {cv_index}"
+                    full_display_name = f"{self.lightning_container.tag} {cv_index}"
                     azure_run_info.run.display_name = full_display_name
 
         else:
diff --git a/hi-ml/src/health_ml/utils/logging.py b/hi-ml/src/health_ml/utils/logging.py
index 0b98e3a94..5790efbf0 100644
--- a/hi-ml/src/health_ml/utils/logging.py
+++ b/hi-ml/src/health_ml/utils/logging.py
@@ -40,6 +40,7 @@ class AzureMLLogger(LightningLoggerBase):
     def __init__(self,
                  enable_logging_outside_azure_ml: Optional[bool] = False,
                  experiment_name: str = "azureml_logger",
+                 run: Optional[Run] = None,
                  run_name: Optional[str] = None,
                  workspace: Optional[Workspace] = None,
                  workspace_config_path: Optional[Path] = None,
@@ -47,38 +48,48 @@ def __init__(self,
                  ) -> None:
         """
         :param enable_logging_outside_azure_ml: If True, the AzureML logger will write metrics to AzureML even if
-        executed outside of an AzureML run (for example, when working on a separate virtual machine). If False,
-        the logger will only write metrics to AzureML if the code is actually running inside of AzureML. Default False,
-        do not log outside of AzureML.
+            executed outside of an AzureML run (for example, when working on a separate virtual machine). If False,
+            the logger will only write metrics to AzureML if the code is actually running inside of AzureML. Default
+            False, do not log outside of AzureML.
         :param experiment_name: The AzureML experiment that should hold the run when executed outside of AzureML.
+        :param run: The AzureML run to log to when the ``enable_logging_outside_azure_ml`` flag is True. If None,
+            a new run will be created. When finished, the run should be completed by calling ``run.complete()``. The
+            logger itself only calls ``run.flush()`` in its ``finalize()`` method.
         :param run_name: An optional name for the run (this will be used as the display name in the AzureML UI). This
-        argument only matters when running outside of AzureML.
+            argument only matters when running outside of AzureML.
         :param workspace: If provided, use this workspace to create the run in.
         :param workspace_config_path: Use this path to read workspace configuration json file. If not provided,
-        use the workspace specified by the `config.json` file in the current working directory or its parents.
+            use the workspace specified by the `config.json` file in the current working directory or its parents.
         :param snapshot_directory: The folder that should be included as the code snapshot. By default, no snapshot
-        is created. Set this to the folder that contains all the code your experiment uses. You can use a file
-        .amlignore to skip specific files or folders, akin to .gitignore..
+            is created. Set this to the folder that contains all the code your experiment uses. You can use a file
+            .amlignore to skip specific files or folders, akin to .gitignore..
         """
         super().__init__()
         self.is_running_in_azure_ml = is_running_in_azure_ml()
         self.run: Optional[Run] = None
-        self.has_custom_run = False
+        self.has_user_provided_run = False
+        self.enable_logging_outside_azure_ml = enable_logging_outside_azure_ml
         if self.is_running_in_azure_ml:
             self.run = RUN_CONTEXT
         elif enable_logging_outside_azure_ml:
-            try:
-                self.run = create_aml_run_object(experiment_name=experiment_name,
-                                                 run_name=run_name,
-                                                 workspace=workspace,
-                                                 workspace_config_path=workspace_config_path,
-                                                 snapshot_directory=snapshot_directory)
-                print(f"Writing metrics to run {self.run.id} in experiment {self.run.experiment.name}.")
-                print(f"To check progress, visit this URL: {self.run.get_portal_url()}")
-                self.has_custom_run = True
-            except Exception:
-                logging.error("Unable to create an AzureML run to store the results.")
-                raise
+            if run is not None:
+                self.run = run
+                self.has_user_provided_run = True
+            else:
+                try:
+                    self.run = create_aml_run_object(experiment_name=experiment_name,
+                                                     run_name=run_name,
+                                                     workspace=workspace,
+                                                     workspace_config_path=workspace_config_path,
+                                                     snapshot_directory=snapshot_directory)
+                    # Display name should already be set when creating the run object, but this does not happen.
+                    # In unit tests, the run has the expected display name, but not here. Hence, set it again.
+                    self.run.display_name = run_name
+                except Exception as ex:
+                    logging.error(f"Unable to create an AzureML run to store the results because of {ex}.")
+                    raise
+            print(f"Writing metrics to run {self.run.id} in experiment {self.run.experiment.name}.")
+            print(f"To check progress, visit this URL: {self.run.get_portal_url()}")
         else:
             print("AzureMLLogger will not write any logs because it is running outside AzureML, and the "
                   "'enable_logging_outside_azure_ml' flag is set to False")
@@ -129,9 +140,15 @@ def version(self) -> int:
         return 0
 
     def finalize(self, status: str) -> None:
-        if self.run is not None and self.has_custom_run:
-            # Run.complete should only be called if we created an AzureML run here in the constructor.
-            self.run.complete()
+        if self.enable_logging_outside_azure_ml and not self.is_running_in_azure_ml and self.run is not None:
+            if self.has_user_provided_run:
+                # The logger uses a run that was provided by the user: Flush it, but do not complete it.
+                # The user should complete the run after finishing the experiment. This is important when running
+                # training outside of AzureML, so that training and inference metrics can be written to the same run.
+                self.run.flush()
+            else:
+                # Run.complete should only be called if we created an AzureML run here in the constructor.
+                self.run.complete()
 
     def _preprocess_hyperparams(self, params: Any) -> Dict[str, str]:
         """
diff --git a/hi-ml/testhiml/testhiml/test_run_ml.py b/hi-ml/testhiml/testhiml/test_run_ml.py
index 51900cf91..fa4ed8d3a 100644
--- a/hi-ml/testhiml/testhiml/test_run_ml.py
+++ b/hi-ml/testhiml/testhiml/test_run_ml.py
@@ -9,12 +9,15 @@
 from typing import Generator
 from unittest.mock import DEFAULT, MagicMock, Mock, patch
 
+from azureml._restclient.constants import RunStatus
+
 from health_ml.configs.hello_world import HelloWorld  # type: ignore
 from health_ml.experiment_config import ExperimentConfig
 from health_ml.lightning_container import LightningContainer
 from health_ml.run_ml import MLRunner
 from health_ml.utils.common_utils import is_gpu_available
 from health_azure.utils import is_global_rank_zero
+from health_ml.utils.logging import AzureMLLogger
 from testazure.utils_testazure import DEFAULT_WORKSPACE
 from testhiml.utils.fixed_paths_for_tests import mock_run_id
 
@@ -314,4 +317,62 @@ def test_runner_end_to_end() -> None:
         runner.setup()
         runner.init_training()
         runner.run_training()
-        assert True
+
+
+@pytest.mark.parametrize("log_from_vm", [True, False])
+def test_log_on_vm(log_from_vm: bool) -> None:
+    """Test if the AzureML logger is called when the experiment is run outside AzureML."""
+    experiment_config = ExperimentConfig(model="HelloWorld")
+    container = HelloWorld()
+    container.max_epochs = 1
+    # Mimic an experiment name given on the command line.
+    experiment_name = "unittest"
+    container.experiment = experiment_name
+    # The tag is used to identify the run, similar to the behaviour when submitting a run to AzureML.
+    tag = f"test_log_on_vm [{log_from_vm}]"
+    container.tag = tag
+    container.log_from_vm = log_from_vm
+    runner = MLRunner(experiment_config=experiment_config, container=container)
+    # When logging to AzureML, need to provide the unit test AML workspace.
+    # When not logging to AzureML, no workspace (and no authentication) should be needed.
+    if log_from_vm:
+        with patch("health_azure.utils.get_workspace", return_value=DEFAULT_WORKSPACE.workspace):
+            runner.run()
+    else:
+        runner.run()
+    # The PL trainer object is created in the init_training method.
+    # Check that the AzureML logger is set up correctly.
+    assert runner.trainer is not None
+    assert runner.trainer.loggers is not None
+    assert len(runner.trainer.loggers) > 1
+    logger = runner.trainer.loggers[1]
+    assert isinstance(logger, AzureMLLogger)
+    if log_from_vm:
+        assert logger.run is not None
+        # Check that all user supplied data (experiment and display name) are respected.
+        assert logger.run.experiment is not None
+        assert logger.run.experiment.name == experiment_name
+        assert logger.run.display_name == tag
+        # Both trainig and inference metrics must be logged in the same Run object.
+        metrics = logger.run.get_metrics()
+        assert "test_mse" in metrics
+        assert "loss" in metrics
+        # The run must have been correctly marked as completed.
+        logger.run.wait_for_completion()
+        assert logger.run.status == RunStatus.COMPLETED
+    else:
+        assert logger.run is None
+
+
+def test_experiment_name() -> None:
+    """Test that the experiment name is set correctly, choosing either the experiment name given on the commandline
+    or the model name"""
+    container = HelloWorld()
+    # No experiment name given on the commandline: use the model name
+    model_name = "some_model"
+    container._model_name = model_name
+    assert container.effective_experiment_name == model_name
+    # Experiment name given on the commandline: use the experiment name
+    experiment_name = "unittest"
+    container.experiment = experiment_name
+    assert container.effective_experiment_name == experiment_name
diff --git a/hi-ml/testhiml/testhiml/utils/test_logging.py b/hi-ml/testhiml/testhiml/utils/test_logging.py
index 21a95f819..e321c8ea3 100644
--- a/hi-ml/testhiml/testhiml/utils/test_logging.py
+++ b/hi-ml/testhiml/testhiml/utils/test_logging.py
@@ -143,7 +143,7 @@ def test_azureml_logger() -> None:
     logger = create_mock_logger()
     # On all build agents, this should not be detected as an AzureML run.
     assert not logger.is_running_in_azure_ml
-    assert logger.has_custom_run
+    assert not logger.has_user_provided_run
     logger.log_metrics({"foo": 1.0})
     assert logger.run is not None
     logger.run.log.assert_called_once_with("foo", 1.0, step=None)
@@ -271,7 +271,8 @@ def test_azureml_logger_init1() -> None:
         with mock.patch("health_ml.utils.logging.RUN_CONTEXT", "foo"):
             logger = AzureMLLogger(enable_logging_outside_azure_ml=True)
             assert logger.is_running_in_azure_ml
-            assert not logger.has_custom_run
+            assert logger.enable_logging_outside_azure_ml
+            assert not logger.has_user_provided_run
             assert logger.run == "foo"
             # We should be able to call finalize without any effect (logger.run == "foo", which has no
             # "Complete" method). When running in AzureML, the logger should not
@@ -300,7 +301,7 @@ def test_azureml_logger_actual_run() -> None:
     assert logger.run != RUN_CONTEXT
     assert isinstance(logger.run, Run)
     assert logger.run.experiment.name == "azureml_logger"
-    assert logger.has_custom_run
+    assert not logger.has_user_provided_run
     expected_metrics = {"foo": 1.0, "bar": 2.0}
     logger.log_metrics(expected_metrics)
     logger.run.flush()
@@ -328,13 +329,36 @@ def test_azureml_logger_init4() -> None:
                                snapshot_directory="snapshot",
                                workspace="workspace",  # type: ignore
                                workspace_config_path=Path("config_path"))
-        assert logger.has_custom_run
+        assert not logger.has_user_provided_run
         assert logger.run == run_mock
         mock_create.assert_called_once_with(experiment_name="exp",
                                             run_name="run",
                                             snapshot_directory="snapshot",
                                             workspace="workspace",
                                             workspace_config_path=Path("config_path"))
+    # The run created in the constructor is under the control of the AzureML logger, and should be completed.
+    # Check that the finalize method calls the run's complete method, but not the run's flush method.
+    run_mock.flush = MagicMock()
+    run_mock.complete = MagicMock()
+    logger.finalize(status="nothing")
+    run_mock.flush.assert_not_called()
+    run_mock.complete.assert_called_once()
+
+
+def test_azureml_logger_finalize() -> None:
+    """Test if the finalize method correctly updates the run status. It should only operate on runs that are
+    outside of AzureML."""
+    run_mock = MagicMock()
+    logger = AzureMLLogger(enable_logging_outside_azure_ml=True, run=run_mock)
+    assert logger.run is not None
+    assert logger.has_user_provided_run
+    run_mock.flush = MagicMock()
+    run_mock.complete = MagicMock()
+    # When providing a run explicitly, the finalize method should not call the run's complete method. Completing
+    # the run is the responsibility of the user.
+    logger.finalize(status="nothing")
+    run_mock.flush.assert_called_once()
+    run_mock.complete.assert_not_called()
 
 
 def test_progress_bar_enable() -> None: