mlflow · andrewnitu · Nov 11, 2020 · Oct 27, 2020 · Oct 27, 2020 · Oct 27, 2020
diff --git a/mlflow/utils/autologging_utils.py b/mlflow/utils/autologging_utils.py
@@ -1,9 +1,14 @@
 import inspect
 import functools
 import warnings
+import time
+import contextlib
 
 import mlflow
 from mlflow.utils import gorilla
+from mlflow.entities import Metric
+from mlflow.tracking.client import MlflowClient
+from mlflow.utils.validation import MAX_METRICS_PER_BATCH
 
 
 INPUT_EXAMPLE_SAMPLE_ROWS = 5
@@ -187,3 +192,98 @@ def resolve_input_example_and_signature(
         logger.warning(model_signature_user_msg)
 
     return input_example if log_input_example else None, model_signature
+
+
+# wrapper functions to be able to mock this easily in the tests
+def time_wrapper_for_log():
+    return time.time()
+
+
+def time_wrapper_for_current():
+    return time.time()
+
+
+def time_wrapper_for_timestamp():
+    return time.time()
+
+
+# we pass the batch_metrics_handler through, such that the callback can access it
+def _timed_log_batch(batch_metrics_handler, run_id, metrics):
+    start = time_wrapper_for_log()
+    metrics_slices = [
+        metrics[i * MAX_METRICS_PER_BATCH : (i + 1) * MAX_METRICS_PER_BATCH]
+        for i in range((len(metrics) + MAX_METRICS_PER_BATCH - 1) // MAX_METRICS_PER_BATCH)
+    ]
+    for metrics_slice in metrics_slices:
+        MlflowClient().log_batch(run_id=run_id, metrics=metrics_slice)
+    end = time_wrapper_for_log()
+    batch_metrics_handler.total_log_batch_time += end - start
+    batch_metrics_handler.num_log_batch += 1
+
+
+class BatchMetricsHandler:  # BatchMetricsLogger maybe?
+    def __init__(self):
+        # data is an array of tuples of the form (timestamp, metrics at timestamp)
+        self.data = {}
+        self.total_training_time = 0
+        self.total_log_batch_time = 0
+        self.num_log_batch = 0
+        self.previous_training_timestamp = None
+
+    def _purge(self):
+        run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id
+        final_metrics = []
+
+        for step, metrics_at_step in self.data.items():
+            for entry in metrics_at_step:
+                timestamp = entry[0]
+                metrics_at_timestamp = entry[1]
+
+                for key, value in metrics_at_timestamp.items():
+                    final_metrics.append(Metric(key, value, timestamp, step))
+
+        _timed_log_batch(self, run_id=run_id, metrics=final_metrics)
+
+        self.data = {}
+
+    def _should_purge(self):
+        if self.num_log_batch == 0:
+            return True
+
+        # we give some extra time in case of network slowdown
+        log_batch_time_fudge_factor = 10
+        if (
+            self.total_training_time
+            >= self.total_log_batch_time / self.num_log_batch * log_batch_time_fudge_factor
+        ):
+            return True
+
+        return False
+
+    # metrics is a dict representing the set of metrics collected during one iteration
+    def record_metrics(self, metrics, step):
+        current_timestamp = time_wrapper_for_current()
+        if self.previous_training_timestamp is None:
+            self.previous_training_timestamp = current_timestamp
+
+        training_time = current_timestamp - self.previous_training_timestamp
+
+        self.total_training_time += training_time
+
+        if step in self.data:
+            self.data[step].append([int(time_wrapper_for_timestamp() * 1000), metrics])
+        else:
+            self.data[step] = [[int(time_wrapper_for_timestamp() * 1000), metrics]]
+
+        if self._should_purge():
+            self.total_training_time = 0
+            self._purge()
+
+        self.previous_training_timestamp = current_timestamp
+
+
+@contextlib.contextmanager
+def with_batch_metrics_handler():
+    batch_metrics_handler = BatchMetricsHandler()
+    yield batch_metrics_handler
+    batch_metrics_handler._purge()
diff --git a/mlflow/xgboost.py b/mlflow/xgboost.py
@@ -45,6 +45,7 @@
     resolve_input_example_and_signature,
     _InputExampleInfo,
     ENSURE_AUTOLOGGING_ENABLED_TEXT,
+    with_batch_metrics_handler,
 )
 from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS
 
@@ -340,6 +341,9 @@ def record_eval_results(eval_results):
             """
 
             def callback(env):
+                batch_metrics_handler.record_metrics(
+                    dict(env.evaluation_result_list), env.iteration
+                )
                 eval_results.append(dict(env.evaluation_result_list))
 
             return callback
@@ -423,12 +427,10 @@ def log_feature_importance_plot(features, importance, importance_type):
         else:
             kwargs["callbacks"] = [callback]
 
-        # training model
-        model = original(*args, **kwargs)
-
-        # logging metrics on each iteration.
-        for idx, metrics in enumerate(eval_results):
-            try_mlflow_log(mlflow.log_metrics, metrics, step=idx)
+        # logging metrics on each iteration
+        with with_batch_metrics_handler() as batch_metrics_handler:
+            # training model
+            model = original(*args, **kwargs)
 
         # If early_stopping_rounds is present, logging metrics at the best iteration
         # as extra metrics with the max step + 1.

diff --git a/tests/utils/test_autologging_utils.py b/tests/utils/test_autologging_utils.py
@@ -1,14 +1,19 @@
 import inspect
 import pytest
+import itertools
 from unittest.mock import Mock, call
+from unittest import mock
+
 
 import mlflow
 from mlflow.utils import gorilla
+from mlflow.tracking.client import MlflowClient
 from mlflow.utils.autologging_utils import (
     get_unspecified_default_args,
     log_fn_args_as_params,
     wrap_patch,
     resolve_input_example_and_signature,
+    with_batch_metrics_handler,
 )
 
 # Example function signature we are testing on
@@ -263,3 +268,92 @@ def modifies(_):
 
     assert x["data"] == 0
     logger.warning.assert_not_called()
+
+
+def test_batch_metrics_handler_logs_all_metrics(start_run):  # pylint: disable=unused-argument
+    with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock:
+        with with_batch_metrics_handler() as batch_metrics_handler:
+            for i in range(100):
+                batch_metrics_handler.record_metrics({"x": i}, i)
+
+        # collect the args of all the logging calls
+        recorded_metrics = []
+        for call in log_batch_mock.call_args_list:
+            _, kwargs = call
+            metrics_arr = kwargs["metrics"]
+            for metric in metrics_arr:
+                recorded_metrics.append({metric._key: metric._value})
+
+        desired_metrics = [{"x": i} for i in range(100)]
+
+        assert recorded_metrics == desired_metrics
+
+
+def test_batch_metrics_handler_runs_training_and_logging_in_correct_ratio(
+    start_run,
+):  # pylint: disable=unused-argument
+    with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock, mock.patch(
+        "mlflow.utils.autologging_utils.time_wrapper_for_log"
+    ) as log_time_mock, mock.patch(
+        "mlflow.utils.autologging_utils.time_wrapper_for_current"
+    ) as current_time_mock, mock.patch(
+        "mlflow.utils.autologging_utils.time_wrapper_for_timestamp"
+    ) as timestamp_time_mock:
+        current_time_mock.side_effect = [
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+        ]  # training occurs every second
+        log_time_mock.side_effect = itertools.cycle(
+            [100, 101]
+        )  # logging takes 1 second, numbers don't matter here
+        timestamp_time_mock.return_value = 9999  # this doesn't matter
+
+        with with_batch_metrics_handler() as batch_metrics_handler:
+            batch_metrics_handler.record_metrics({"x": 1}, step=0)  # data doesn't matter
+            # first metrics should be skipped to record a previous timestamp and batch log time
+            log_batch_mock.assert_called_once()
+
+            log_batch_mock.reset_mock()  # resets the 'calls' of this mock
+
+            # the above 'training' took 1 second. So with fudge factor of 10x,
+            #   10 more 'training' should happen before the metrics are sent.
+            for _ in range(9):
+                batch_metrics_handler.record_metrics({"x": 1}, step=0)
+                log_batch_mock.assert_not_called()
+
+            batch_metrics_handler.record_metrics({"x": 1}, step=0)
+            log_batch_mock.assert_called_once()
+
+
+def test_batch_metrics_handler_chunks_metrics_when_batch_logging(
+    start_run,
+):  # pylint: disable=unused-argument
+    with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock, mock.patch(
+        "mlflow.utils.autologging_utils.time_wrapper_for_timestamp"
+    ) as timestamp_time_mock:
+        timestamp_time_mock.return_value = 0
+
+        with with_batch_metrics_handler() as batch_metrics_handler:
+            batch_metrics_handler.record_metrics({hex(x): x for x in range(5000)}, step=0)
+            run_id = mlflow.active_run().info.run_id
+
+            for call_idx, call in enumerate(log_batch_mock.call_args_list):
+                _, kwargs = call
+
+                assert kwargs["run_id"] == run_id
+                assert len(kwargs["metrics"]) == 1000
+                for metric_idx, metric in enumerate(kwargs["metrics"]):
+                    assert metric.key == hex(call_idx * 1000 + metric_idx)
+                    assert metric.timestamp == 0
+                    assert metric.value == call_idx * 1000 + metric_idx
+                    assert metric.step == 0