From 5abd79691a4d1cab59cdfac73dc6b6c21d0113c8 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Wed, 20 May 2026 16:47:38 +0000
Subject: [PATCH] feat(cli): add eval compare bias correction flag

Co-authored-by: Blaine Kasten <blainekasten@gmail.com>
---
 src/together/lib/cli/api/evals/create.py     |  8 +++++
 src/together/lib/cli/utils/_help_examples.py |  3 +-
 tests/cli/test_evals.py                      | 38 ++++++++++++++++++++
 3 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/src/together/lib/cli/api/evals/create.py b/src/together/lib/cli/api/evals/create.py
index 400397744..345794160 100644
--- a/src/together/lib/cli/api/evals/create.py
+++ b/src/together/lib/cli/api/evals/create.py
@@ -77,6 +77,13 @@ async def create(
     pass_threshold: Annotated[
         Optional[float], Parameter(help="Threshold for passing (required for score type)")
     ] = None,
+    disable_position_bias_correction: Annotated[
+        bool,
+        Parameter(
+            negative=(),
+            help="For compare evals, run only the original-order judge pass without position-bias correction",
+        ),
+    ] = False,
     model_a_field: Annotated[
         Optional[str],
         Parameter(
@@ -274,6 +281,7 @@ async def create(
             parameters=ParametersEvaluationCompareParameters(
                 input_data_file_path=training_file,
                 judge=judge_config,
+                disable_position_bias_correction=disable_position_bias_correction,
                 model_a=cast(ParametersEvaluationCompareParametersModelAEvaluationModelRequest, model_a_final),
                 model_b=cast(ParametersEvaluationCompareParametersModelBEvaluationModelRequest, model_b_final),
             ),
diff --git a/src/together/lib/cli/utils/_help_examples.py b/src/together/lib/cli/utils/_help_examples.py
index 817858769..2bbe64ea8 100644
--- a/src/together/lib/cli/utils/_help_examples.py
+++ b/src/together/lib/cli/utils/_help_examples.py
@@ -246,7 +246,8 @@
     --model-b deepseek-ai/DeepSeek-V3.1 \\
     --model-b-source serverless \\
     --model-b-system-template "You are a concise assistant." \\
-    --model-b-input-template $'Answer the following:\\n\\n{{prompt}}'[/primary]
+    --model-b-input-template $'Answer the following:\\n\\n{{prompt}}' \\
+    --disable-position-bias-correction[/primary]
 """
 
 ## Beta clusters API commands
diff --git a/tests/cli/test_evals.py b/tests/cli/test_evals.py
index 319a95a61..0bc318f17 100644
--- a/tests/cli/test_evals.py
+++ b/tests/cli/test_evals.py
@@ -59,3 +59,41 @@ def test_status(self, respx_mock: MockRouter, cli_runner: CliRunner) -> None:
         result = cli_runner.invoke(["evals", "status", "eval-wf-1"])
         assert result.exit_code == 0
         assert "Status: completed" in result.output
+
+
+class TestEvalsCreate:
+    @pytest.mark.respx(base_url=base_url)
+    def test_compare_passes_disable_position_bias_correction(
+        self, respx_mock: MockRouter, cli_runner: CliRunner
+    ) -> None:
+        route = respx_mock.post("/evaluation").mock(
+            return_value=httpx.Response(200, json={"workflow_id": "eval-wf-1", "status": "pending"})
+        )
+
+        result = cli_runner.invoke(
+            [
+                "evals",
+                "create",
+                "--type",
+                "compare",
+                "--judge-model",
+                "Qwen/Qwen3.5-9B",
+                "--judge-model-source",
+                "serverless",
+                "--judge-system-template",
+                "Choose the better response.",
+                "--input-data-file-path",
+                "file-123",
+                "--model-a-field",
+                "response_a",
+                "--model-b-field",
+                "response_b",
+                "--disable-position-bias-correction",
+            ]
+        )
+
+        assert result.exit_code == 0
+        req = cast(Call, route.calls[0]).request
+        payload = json.loads(req.content)
+        assert payload["type"] == "compare"
+        assert payload["parameters"]["disable_position_bias_correction"] is True