Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions vero/src/vero/harbor/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,15 @@ class HarborConfig:
aggregate_attempts: str = "best"
extra_args: list[str] = field(default_factory=list) # passthrough harbor run flags

def __post_init__(self) -> None:
# Only the exact string "mean" activates de-noising; without this check a
# typo ("Mean", "avg") would silently run best-of-k with inflated scores.
if self.aggregate_attempts not in ("best", "mean"):
raise ValueError(
f"aggregate_attempts must be 'best' or 'mean', got "
f"{self.aggregate_attempts!r}"
)

@property
def is_registry(self) -> bool:
"""Local if the source resolves to an existing path; otherwise a registry ref."""
Expand Down
21 changes: 21 additions & 0 deletions vero/tests/test_harbor_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,3 +362,24 @@ def test_default_best_unchanged(self, tmp_path):
0, "t0", _params(),
)
assert r.score == 1.0


class TestAggregateAttemptsValidation:
"""A mistyped aggregate_attempts value must fail loudly at construction:
only the exact string 'mean' activates de-noising, so 'Mean'/'avg' would
otherwise silently run inflated best-of-k."""

def test_invalid_value_raises(self):
with pytest.raises(ValueError, match="aggregate_attempts"):
HarborConfig(
task_source="org/ds", agent_import_path="p:m",
aggregate_attempts="Mean",
)

def test_valid_values_accepted(self):
for value in ("best", "mean"):
cfg = HarborConfig(
task_source="org/ds", agent_import_path="p:m",
aggregate_attempts=value,
)
assert cfg.aggregate_attempts == value