Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 57 additions & 1 deletion packages/cli/src/commands/code-pack.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@ import { sha256Hex } from "@opencodehub/core-types";
import type { PackManifest } from "@opencodehub/pack";
import type { IGraphStore } from "@opencodehub/storage";
import {
ATTESTATION_FILENAME,
DEFAULT_BUDGET_TOKENS,
DEFAULT_ENGINE,
DEFAULT_TOKENIZER_ID,
explainContextBom,
formatContextSummary,
runCodePack,
SONNET5_TOKENIZER_ID,
writeContextAttestation,
} from "./code-pack.js";

function makeFakeManifest(overrides: Partial<PackManifest> = {}): PackManifest {
Expand Down Expand Up @@ -64,6 +67,17 @@ test("DEFAULT_TOKENIZER_ID matches the spec pin", () => {
assert.equal(DEFAULT_TOKENIZER_ID, "openai:o200k_base@tiktoken-0.8.0");
});

test("SONNET5_TOKENIZER_ID is the anthropic-prefixed Sonnet-5 lane", () => {
assert.equal(SONNET5_TOKENIZER_ID, "anthropic:claude-sonnet-5@2026-06-30");
// The anthropic: vendor prefix is load-bearing — it is what makes the pack's
// resolveDeterminism downgrade the lane to best_effort (see pack index.test.ts
// E2E-B2). Guard against an accidental prefix change here.
assert.ok(
SONNET5_TOKENIZER_ID.startsWith("anthropic:"),
"Sonnet-5 lane must use the anthropic: vendor prefix to inherit best_effort determinism",
);
});

test("runCodePack defaults to engine=pack and dispatches to generatePack", async () => {
const repoPath = await mkdtemp(join(tmpdir(), "codehub-codepack-default-"));
try {
Expand Down Expand Up @@ -338,7 +352,7 @@ test("explainContextBom summarizes a context-bom.json on disk", async () => {
try {
const doc = {
bomFormat: "CycloneDX",
specVersion: "1.6",
specVersion: "1.7",
version: 1,
components: [
{
Expand Down Expand Up @@ -383,3 +397,45 @@ test("explainContextBom throws a clear error when context-bom.json is absent", a
await rm(dir, { recursive: true, force: true });
}
});

test("writeContextAttestation writes a parseable in-toto Statement to the pack dir", async () => {
const dir = await mkdtemp(join(tmpdir(), "codehub-prove-"));
try {
const manifest = makeFakeManifest({ packHash: "9".repeat(64) });
const path = await writeContextAttestation(dir, manifest);

// Written at the documented filename inside the pack dir.
assert.equal(path, join(dir, ATTESTATION_FILENAME));

const raw = await readFile(path, "utf8");
const stmt = JSON.parse(raw);
// Exact in-toto Statement v1 envelope.
assert.equal(stmt._type, "https://in-toto.io/Statement/v1");
assert.equal(stmt.predicateType, "https://opencodehub.dev/attestation/context/v0.1");
// Subject digest equals the manifest packHash.
assert.equal(stmt.subject.length, 1);
assert.equal(stmt.subject[0].digest.sha256, manifest.packHash);
// Predicate carries the manifest's context provenance.
assert.equal(stmt.predicate.packHash, manifest.packHash);
assert.equal(stmt.predicate.contextBomHash, manifest.contextBomHash);
assert.equal(stmt.predicate.bomItems.length, manifest.files.length);
} finally {
await rm(dir, { recursive: true, force: true });
}
});

test("writeContextAttestation is byte-deterministic across two emissions", async () => {
const dir1 = await mkdtemp(join(tmpdir(), "codehub-prove-det-1-"));
const dir2 = await mkdtemp(join(tmpdir(), "codehub-prove-det-2-"));
try {
const manifest = makeFakeManifest();
await writeContextAttestation(dir1, manifest);
await writeContextAttestation(dir2, manifest);
const a = await readFile(join(dir1, ATTESTATION_FILENAME), "utf8");
const b = await readFile(join(dir2, ATTESTATION_FILENAME), "utf8");
assert.equal(a, b);
} finally {
await rm(dir1, { recursive: true, force: true });
await rm(dir2, { recursive: true, force: true });
}
});
78 changes: 76 additions & 2 deletions packages/cli/src/commands/code-pack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,20 @@

import { createHash } from "node:crypto";
import { existsSync, statSync } from "node:fs";
import { mkdir, mkdtemp, readFile, rename, rm } from "node:fs/promises";
import { mkdir, mkdtemp, readFile, rename, rm, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join, resolve } from "node:path";
import type { FileNode, GraphNode, RepoNode } from "@opencodehub/core-types";
import { sha256Hex } from "@opencodehub/core-types";
import { parse as ingestionParse } from "@opencodehub/ingestion";
import { generatePack, type PackManifest } from "@opencodehub/pack";
import {
buildContextAttestation,
type CacheChannel,
DEFAULT_CACHE_CHANNEL,
generatePack,
type PackManifest,
serializeAttestation,
} from "@opencodehub/pack";
import { type IGraphStore, openStore, resolveGraphPath, type Store } from "@opencodehub/storage";
import { runPack } from "./pack.js";

Expand All @@ -48,6 +55,27 @@ export const DEFAULT_BUDGET_TOKENS = 100_000;
/** Default tokenizer identifier when `--tokenizer` is omitted. */
export const DEFAULT_TOKENIZER_ID = "openai:o200k_base@tiktoken-0.8.0";

/**
* Tokenizer-provenance lane for Claude Sonnet 5 (launched 2026-06-30).
*
* Sonnet 5 ships a new tokenizer that inflates the same source bytes by
* ~30-35% vs prior Claude tokenizers, so a budget authored for the default
* `openai:o200k_base` lane under-provisions when the *consuming* agent is
* Sonnet 5 — the pack's budgetTokens→chunkSize map is 1:1, so the same budget
* silently produces oversized chunks under the heavier tokenizer.
*
* This constant is provenance metadata ONLY: it records which tokenizer a pack
* was authored against so a variance probe (Finding 0001 v2) can attribute
* results to a lane. It does NOT change the bytes→token math — there is no
* runtime Sonnet-5 encoder. The `anthropic:` vendor prefix is load-bearing:
* `@opencodehub/pack`'s `resolveDeterminism` downgrades any `anthropic:`-prefixed
* lane from `strict` to `best_effort`, which is the correct class for a pack
* whose byte-identity guarantee is relaxed by a Claude tokenizer.
*
* Format follows the `<vendor>:<name>@<pin>` convention (see PackManifest).
*/
export const SONNET5_TOKENIZER_ID = "anthropic:claude-sonnet-5@2026-06-30";

/** Default engine when `--engine` is omitted — the new `@opencodehub/pack` BOM. */
export const DEFAULT_ENGINE: "pack" | "repomix" = "pack";

Expand All @@ -62,6 +90,13 @@ export interface CodePackArgs {
readonly outDir?: string;
/** Engine: "pack" (default) or "repomix" (legacy opt-in). */
readonly engine?: "pack" | "repomix";
/**
* Delivery channel for channel-aware cache-prefix enforcement (Move 4).
* Recorded on the pack options and threaded into the agent-facing assembly.
* Kept OUT of the manifest/packHash preimage, so the default (`auto`) leaves
* pack output byte-identical to pre-Move-4. Defaults to `auto`.
*/
readonly cacheChannel?: CacheChannel;
/**
* Test seam — inject a custom `generatePack` so unit tests don't need
* to load native storage bindings. Production callers leave this
Expand Down Expand Up @@ -105,6 +140,11 @@ export interface CodePackResult {
* directory; consumers should walk `outDir`).
*/
readonly repomixOutputPath?: string;
/**
* Absolute path of the in-toto context attestation, present only when the
* `--prove` flag emitted one (pack engine only). Undefined otherwise.
*/
readonly attestationPath?: string;
}

export async function runCodePack(args: CodePackArgs = {}): Promise<CodePackResult> {
Expand All @@ -120,6 +160,7 @@ export async function runCodePack(args: CodePackArgs = {}): Promise<CodePackResu
async function runPackEngine(repoPath: string, args: CodePackArgs): Promise<CodePackResult> {
const budget = args.budget ?? DEFAULT_BUDGET_TOKENS;
const tokenizer = args.tokenizer ?? DEFAULT_TOKENIZER_ID;
const cacheChannel = args.cacheChannel ?? DEFAULT_CACHE_CHANNEL;
const generate = args._generatePack ?? generatePack;

// Production: open a read-only graph store; tests inject `_store` to
Expand Down Expand Up @@ -175,6 +216,9 @@ async function runPackEngine(repoPath: string, args: CodePackArgs): Promise<Code
outDir: stagingDir,
budgetTokens: budget,
tokenizerId: tokenizer,
// Recorded on the pack options; deliberately not part of the manifest
// preimage (Move 4), so `auto` keeps packHash byte-identical to today.
cacheChannel,
},
composedStore !== undefined
? { store: composedStore, ...provenance }
Expand Down Expand Up @@ -358,6 +402,36 @@ export function statSizeOrZero(path: string): number {
}
}

/**
* Filename of the in-toto context attestation emitted by `--prove`, written
* inside the pack directory alongside the BOM bodies.
*/
export const ATTESTATION_FILENAME = "attestation.intoto.json";

/**
* Emit the in-toto context attestation for a finished pack (spec: Move 3 /
* `--prove`). Builds the Statement from the manifest — subject = the pack's
* `packHash`, predicate = the context provenance + BOM item list — and writes
* its canonical JSON to `<outDir>/attestation.intoto.json`.
*
* The Statement is a pure function of the manifest (no clock / UUID / run-id),
* so re-emitting over the same pack yields byte-identical bytes. This is the
* UNSIGNED statement; signing (cosign keyless) stays a CI concern that can
* layer a DSSE envelope over these bytes.
*
* Returns the absolute path written so the caller can surface it.
*/
export async function writeContextAttestation(
outDir: string,
manifest: PackManifest,
): Promise<string> {
const statement = buildContextAttestation(manifest);
const bytes = new TextEncoder().encode(serializeAttestation(statement));
const attestationPath = join(outDir, ATTESTATION_FILENAME);
await writeFile(attestationPath, bytes);
return attestationPath;
}

/** Summary of a pack's context read-receipt, derived from context-bom.json. */
export interface ContextSummary {
/** Number of source files recorded in the receipt. */
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/src/commands/replay.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ describe("loadPack (real on-disk)", () => {
// context-bom.json — CycloneDX with an opencodehub:byteRanges property.
const contextBom = JSON.stringify({
bomFormat: "CycloneDX",
specVersion: "1.6",
specVersion: "1.7",
components: [
{
type: "file",
Expand Down
76 changes: 76 additions & 0 deletions packages/cli/src/commands/variance-probe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { tmpdir } from "node:os";
import { join } from "node:path";
import { after, before, describe, it } from "node:test";
import type { AgentRunner, Harness, RunOutcome, RunRequest } from "@opencodehub/eval";
import { DEFAULT_TOKENIZER_ID, SONNET5_TOKENIZER_ID } from "./code-pack.js";
import { assemblePackContext, runVarianceProbe } from "./variance-probe.js";

/** A fake runner: stable answer with-pack, distinct answer per run without. */
Expand Down Expand Up @@ -108,6 +109,47 @@ describe("runVarianceProbe (seamed)", () => {
assert.equal(withPackPrompts, 2, "both with-pack runs saw the assembled context");
});

it("threads --pack-tokenizer into the assemble call and onto the report", async () => {
let seenTokenizer: string | undefined;
const report = await runVarianceProbe({
taskFile,
runs: 1,
harness: "claude",
packTokenizer: SONNET5_TOKENIZER_ID,
_assemblePackContext: async (_repo, tokenizer) => {
seenTokenizer = tokenizer;
return "PACK";
},
_runnerFor: (h) => new FakeRunner(h),
});
assert.equal(
seenTokenizer,
SONNET5_TOKENIZER_ID,
"the with-pack arm packs under the requested lane",
);
assert.equal(
report.packTokenizerId,
SONNET5_TOKENIZER_ID,
"the report attributes the result to the tokenizer lane (Finding 0001 v2)",
);
});

it("falls back to the default tokenizer lane when --pack-tokenizer is absent", async () => {
let seenTokenizer: string | undefined;
const report = await runVarianceProbe({
taskFile,
runs: 1,
harness: "claude",
_assemblePackContext: async (_repo, tokenizer) => {
seenTokenizer = tokenizer;
return "PACK";
},
_runnerFor: (h) => new FakeRunner(h),
});
assert.equal(seenTokenizer, DEFAULT_TOKENIZER_ID, "default lane unchanged when flag omitted");
assert.equal(report.packTokenizerId, DEFAULT_TOKENIZER_ID);
});

it("builds a per-harness runner for each agent in the default set (Bug-2 routing)", async () => {
// With no --harness pin, the probe visits both agents; the default factory
// maps args.models[harness] to each. We assert the factory is invoked once
Expand Down Expand Up @@ -158,4 +200,38 @@ describe("assemblePackContext", () => {
// sorted: readme.md before skeleton.jsonl
assert.ok(ctx.indexOf("### readme.md") < ctx.indexOf("### skeleton.jsonl"));
});

it("Move 4: the auto default is byte-identical to the no-channel call (no marker)", async () => {
const bare = await assemblePackContext(packDir);
const auto = await assemblePackContext(packDir, "auto");
assert.equal(auto, bare, "auto must not perturb the default output");
assert.ok(!auto.includes("opencodehub:cachePoint"), "auto emits no cache marker");
});

it("Move 4: an automatic channel emits no marker", async () => {
const ctx = await assemblePackContext(packDir, "anthropic");
assert.ok(!ctx.includes("opencodehub:cachePoint"), "anthropic caches automatically");
assert.equal(ctx, await assemblePackContext(packDir), "identical to the marker-free default");
});

it("Move 4: bedrock inserts one cache-breakpoint sentinel at the prefix boundary", async () => {
const ctx = await assemblePackContext(packDir, "bedrock");
const marker =
'<!-- opencodehub:cachePoint channel=bedrock {"cachePoint":{"type":"default"}} -->';
assert.ok(ctx.includes(marker), "bedrock sentinel present");
assert.equal(ctx.split(marker).length - 1, 1, "exactly one marker");
// skeleton.jsonl is the sole stable-prefix file present, so the boundary
// sits immediately after it (before the volatile tail would begin).
assert.ok(
ctx.indexOf("### skeleton.jsonl") < ctx.indexOf(marker),
"marker follows the stable skeleton prefix",
);
});

it("Move 4: same channel twice is byte-identical (deterministic)", async () => {
assert.equal(
await assemblePackContext(packDir, "bedrock"),
await assemblePackContext(packDir, "bedrock"),
);
});
});
Loading
Loading