mlflow · harupy · Feb 14, 2024 · Feb 14, 2024 · Feb 14, 2024 · Feb 14, 2024
diff --git a/mlflow/metrics/genai/genai_metric.py b/mlflow/metrics/genai/genai_metric.py
@@ -28,10 +28,16 @@
 
 
 def _format_args_string(grading_context_columns: Optional[List[str]], eval_values, indx) -> str:
+    import pandas as pd
+
     args_dict = {}
     for arg in grading_context_columns:
         if arg in eval_values:
-            args_dict[arg] = eval_values[arg][indx]
+            args_dict[arg] = (
+                eval_values[arg].iloc[indx]
+                if isinstance(eval_values[arg], pd.Series)
+                else eval_values[arg][indx]
+            )
         else:
             raise MlflowException(
                 f"{arg} does not exist in the eval function {list(eval_values.keys())}."

diff --git a/tests/metrics/genai/test_genai_metrics.py b/tests/metrics/genai/test_genai_metrics.py
@@ -769,6 +769,14 @@ def test_faithfulness_metric():
             examples=[mlflow_example],
         )
 
+    faithfulness_metric.eval_fn(
+        # Inputs with different indices
+        pd.Series([mlflow_prediction], index=[0]),
+        {},
+        pd.Series([input], index=[1]),
+        pd.Series([mlflow_ground_truth], index=[2]),
+    )
+
 
 def test_answer_correctness_metric():
     answer_correctness_metric = answer_correctness()