Fix error message for missing "targets" column (#10723)

Signed-off-by: Ann Zhang <ann.zhang@databricks.com>
mlflow · Dec 24, 2023 · e2de758 · e2de758
1 parent b929a3e
commit e2de758
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 9 deletions.
diff --git a/mlflow/models/evaluation/default_evaluator.py b/mlflow/models/evaluation/default_evaluator.py
@@ -1467,6 +1467,23 @@ def _compute_builtin_metrics(self):
                 )
             )
 
+    def _get_error_message_missing_columns(self, metric_name, param_names):
+        error_message_parts = [f"Metric '{metric_name}' requires the following:"]
+
+        special_params = ["targets", "predictions"]
+        for param in special_params:
+            if param in param_names:
+                error_message_parts.append(f"  - the '{param}' parameter needs to be specified")
+
+        remaining_params = [param for param in param_names if param not in special_params]
+
+        if remaining_params:
+            error_message_parts.append(
+                f"  - missing columns {remaining_params} need to be defined or mapped"
+            )
+
+        return "\n".join(error_message_parts)
+
     def _check_args(self, metrics, eval_df):
         failed_metrics = []
         # collect all failures for getting metric arguments
@@ -1494,7 +1511,7 @@ def _check_args(self, metrics, eval_df):
                     input_columns.append("targets")
 
             error_messages = [
-                f"Metric '{metric_name}' requires the columns {param_names}"
+                self._get_error_message_missing_columns(metric_name, param_names)
                 for metric_name, param_names in failed_metrics
             ]
             joined_error_message = "\n".join(error_messages)
@@ -1504,8 +1521,10 @@ def _check_args(self, metrics, eval_df):
             Below are the existing column names for the input/output data:
             Input Columns: {input_columns}
             Output Columns: {output_columns}
-            To resolve this issue, you may want to map the missing column to an existing column
-            using the following configuration:
+
+            To resolve this issue, you may need to specify any required parameters, or if you are
+            missing columns, you may want to map them to an existing column using the following
+            configuration:
             evaluator_config={{'col_mapping': {{<missing column name>: <existing column name>}}}}"""
             stripped_message = "\n".join(l.lstrip() for l in full_message.splitlines())
             raise MlflowException(stripped_message)

diff --git a/tests/evaluate/test_default_evaluator.py b/tests/evaluate/test_default_evaluator.py
@@ -2190,13 +2190,17 @@ def dummy_fn2(param_3, param_4, builtin_metrics):
 
     error_message = (
         r"Error: Metric calculation failed for the following metrics:\n"
-        r"Metric 'metric_1' requires the columns \['param_1', 'param_2'\]\n"
-        r"Metric 'metric_2' requires the columns \['param_3', 'builtin_metrics'\]\n\n"
+        r"Metric 'metric_1' requires the following:\n"
+        r"- the 'targets' parameter needs to be specified\n"
+        r"- missing columns \['param_1', 'param_2'\] need to be defined or mapped\n"
+        r"Metric 'metric_2' requires the following:\n"
+        r"- missing columns \['param_3', 'builtin_metrics'\] need to be defined or mapped\n\n"
         r"Below are the existing column names for the input/output data:\n"
         r"Input Columns: \['question', 'answer'\]\n"
-        r"Output Columns: \['predictions'\]\n"
-        r"To resolve this issue, you may want to map the missing column to an existing column\n"
-        r"using the following configuration:\n"
+        r"Output Columns: \['predictions'\]\n\n"
+        r"To resolve this issue, you may need to specify any required parameters, or if you are\n"
+        r"missing columns, you may want to map them to an existing column using the following\n"
+        r"configuration:\n"
         r"evaluator_config=\{'col_mapping': \{<missing column name>: <existing column name>\}\}"
     )
 
@@ -2208,7 +2212,6 @@ def dummy_fn2(param_3, param_4, builtin_metrics):
             mlflow.evaluate(
                 model_info.model_uri,
                 data,
-                targets="answer",
                 evaluators="default",
                 model_type="question-answering",
                 extra_metrics=[metric_1, metric_2],