togethercomputer · blainekasten · May 20, 2026 · May 20, 2026
diff --git a/src/together/lib/cli/api/evals/create.py b/src/together/lib/cli/api/evals/create.py
@@ -77,6 +77,13 @@ async def create(
     pass_threshold: Annotated[
         Optional[float], Parameter(help="Threshold for passing (required for score type)")
     ] = None,
+    disable_position_bias_correction: Annotated[
+        bool,
+        Parameter(
+            negative=(),
+            help="For compare evals, run only the original-order judge pass without position-bias correction",
+        ),
+    ] = False,
     model_a_field: Annotated[
         Optional[str],
         Parameter(
@@ -274,6 +281,7 @@ async def create(
             parameters=ParametersEvaluationCompareParameters(
                 input_data_file_path=training_file,
                 judge=judge_config,
+                disable_position_bias_correction=disable_position_bias_correction,
                 model_a=cast(ParametersEvaluationCompareParametersModelAEvaluationModelRequest, model_a_final),
                 model_b=cast(ParametersEvaluationCompareParametersModelBEvaluationModelRequest, model_b_final),
             ),

diff --git a/src/together/lib/cli/utils/_help_examples.py b/src/together/lib/cli/utils/_help_examples.py
@@ -246,7 +246,8 @@
     --model-b deepseek-ai/DeepSeek-V3.1 \\
     --model-b-source serverless \\
     --model-b-system-template "You are a concise assistant." \\
-    --model-b-input-template $'Answer the following:\\n\\n{{prompt}}'[/primary]
+    --model-b-input-template $'Answer the following:\\n\\n{{prompt}}' \\
+    --disable-position-bias-correction[/primary]
 """
 
 ## Beta clusters API commands

diff --git a/tests/cli/test_evals.py b/tests/cli/test_evals.py
@@ -59,3 +59,41 @@ def test_status(self, respx_mock: MockRouter, cli_runner: CliRunner) -> None:
         result = cli_runner.invoke(["evals", "status", "eval-wf-1"])
         assert result.exit_code == 0
         assert "Status: completed" in result.output
+
+
+class TestEvalsCreate:
+    @pytest.mark.respx(base_url=base_url)
+    def test_compare_passes_disable_position_bias_correction(
+        self, respx_mock: MockRouter, cli_runner: CliRunner
+    ) -> None:
+        route = respx_mock.post("/evaluation").mock(
+            return_value=httpx.Response(200, json={"workflow_id": "eval-wf-1", "status": "pending"})
+        )
+
+        result = cli_runner.invoke(
+            [
+                "evals",
+                "create",
+                "--type",
+                "compare",
+                "--judge-model",
+                "Qwen/Qwen3.5-9B",
+                "--judge-model-source",
+                "serverless",
+                "--judge-system-template",
+                "Choose the better response.",
+                "--input-data-file-path",
+                "file-123",
+                "--model-a-field",
+                "response_a",
+                "--model-b-field",
+                "response_b",
+                "--disable-position-bias-correction",
+            ]
+        )
+
+        assert result.exit_code == 0
+        req = cast(Call, route.calls[0]).request
+        payload = json.loads(req.content)
+        assert payload["type"] == "compare"
+        assert payload["parameters"]["disable_position_bias_correction"] is True