From 99e81e21dfe95dd756b035bf690cd6f8e4f40397 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Fri, 5 Jun 2026 16:52:50 -0600
Subject: [PATCH] =?UTF-8?q?chore(release):=200.81.0=20=E2=80=94=20aggregat?=
 =?UTF-8?q?eJudgeVerdicts=20+=20token-recall=20checker=20+=20ErrorCluster?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lockstep version bump (npm + pyproject + python __version__ fallback) for the
eval-campaign scaffold prep primitives merged in #223 + #224.
---
 CHANGELOG.md                                  | 14 ++++++++++++++
 clients/python/pyproject.toml                 |  2 +-
 clients/python/src/agent_eval_rpc/__init__.py |  2 +-
 package.json                                  |  2 +-
 4 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 04b398dc..7c6f5f52 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,20 @@ All notable changes to `@tangle-network/agent-eval` and its sibling `agent-eval-
 
 ---
 
+## [0.81.0] — 2026-06-05 — eval-campaign scaffold prep primitives
+
+### Added
+
+- **`aggregateJudgeVerdicts<D>` (root).** Generic judge-ensemble reducer: fan out N uncorrelated judges, mean each rubric dimension over the SURVIVORS, report the inter-rater disagreement spread, sum cost. Replaces the same reduction hand-rolled in legal (`aggregateEnsemble`), creative (`production-loop/judges.ts`), and tax (`judge-ensemble.ts`). Fail-loud: a failed judge (`perDimension: null`) is recorded in `failedJudges`, never folded into a zero; all-failed throws; a failed judge's cost is still summed. Composite reuses `weightedComposite`.
+- **`createTokenRecallChecker` (root).** The deterministic, no-LLM `CorrectnessChecker` — sibling of `createLlmCorrectnessChecker`. A produced item fulfils a requirement when its content is substantive and recalls ≥ `minRecall` of the requirement title's significant tokens. The default completion gate for apps/tests without an LLM judge.
+- **`ErrorCluster` (root + `/analyst`).** The failure-cluster element type is now a named export, so consumers import it instead of deriving `DatasetOverview['error_clusters'][number]`.
+
+### Fixed
+
+- **Lint drift + non-executable pre-commit hook.** `.husky/pre-commit` was tracked `100644`, so the hook silently no-op'd and unformatted code reached `main`; marked executable and reformatted the drift.
+
+---
+
 ## [0.72.3] — 2026-06-01 — workflow trace hardening and driver backtests
 
 ### Added
diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
index 38a7fbe3..0961232a 100644
--- a/clients/python/pyproject.toml
+++ b/clients/python/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "agent-eval-rpc"
-version = "0.80.0"
+version = "0.81.0"
 description = "Python RPC client for @tangle-network/agent-eval — judge content against rubrics over HTTP or stdio RPC. Eval logic runs in the Node runtime; this package is a thin wire client."
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/clients/python/src/agent_eval_rpc/__init__.py b/clients/python/src/agent_eval_rpc/__init__.py
index 3a0f166d..6c490740 100644
--- a/clients/python/src/agent_eval_rpc/__init__.py
+++ b/clients/python/src/agent_eval_rpc/__init__.py
@@ -58,7 +58,7 @@
 try:
     __version__ = version("agent-eval-rpc")
 except PackageNotFoundError:
-    __version__ = "0.80.0"
+    __version__ = "0.81.0"
 
 __all__ = [
     "Client",
diff --git a/package.json b/package.json
index 2315ccc0..d1961078 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@tangle-network/agent-eval",
-  "version": "0.80.0",
+  "version": "0.81.0",
   "description": "Evaluate and improve AI agents from runs, traces, judges, and feedback. Compare candidates, cluster failures, measure lift, and gate releases.",
   "homepage": "https://github.com/tangle-network/agent-eval#readme",
   "repository": {