mlrun · gtopper · May 13, 2024 · May 13, 2024 · May 13, 2024 · May 13, 2024
diff --git a/mlrun/common/schemas/model_monitoring/constants.py b/mlrun/common/schemas/model_monitoring/constants.py
@@ -188,6 +188,7 @@ class SchedulingKeys:
 class FileTargetKind:
     ENDPOINTS = "endpoints"
     EVENTS = "events"
+    PREDICTIONS = "predictions"
     STREAM = "stream"
     PARQUET = "parquet"
     APPS_PARQUET = "apps_parquet"

diff --git a/mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py b/mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py
@@ -99,6 +99,23 @@ def _init_tables_path(self):
             monitoring_application_path + mm_constants.MonitoringTSDBTables.METRICS
         )
 
+        monitoring_predictions_full_path = (
+            mlrun.mlconf.get_model_monitoring_file_target_path(
+                project=self.project,
+                kind=mm_constants.FileTargetKind.PREDICTIONS,
+            )
+        )
+        (
+            _,
+            _,
+            monitoring_predictions_path,
+        ) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
+            monitoring_predictions_full_path
+        )
+        self.tables[mm_constants.FileTargetKind.PREDICTIONS] = (
+            monitoring_predictions_path
+        )
+
     def create_tsdb_application_tables(self):
         """
         Create the application tables using the TSDB connector. At the moment we support 2 types of application tables:
@@ -134,6 +151,27 @@ def apply_monitoring_stream_steps(
         - custom_metrics (user-defined metrics)
         """
 
+        # Write latency per prediction, labeled by endpoint ID only
+        graph.add_step(
+            "storey.TSDBTarget",
+            name="tsdb_predictions",
+            after="MapFeatureNames",
+            path=f"{self.container}/{self.tables[mm_constants.FileTargetKind.PREDICTIONS]}",
+            rate="1/s",
+            time_col=mm_constants.EventFieldType.TIMESTAMP,
+            container=self.container,
+            v3io_frames=self.v3io_framesd,
+            columns=["latency"],
+            index_cols=[
+                mm_constants.EventFieldType.ENDPOINT_ID,
+            ],
+            aggr="count,avg",
+            aggr_granularity="1m",
+            max_events=tsdb_batching_max_events,
+            flush_after_seconds=tsdb_batching_timeout_secs,
+            key=mm_constants.EventFieldType.ENDPOINT_ID,
+        )
+
         # Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
         # stats and details about the events