From 81c65674ec143be3c7c54965928746804da8f996 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Tue, 21 Apr 2026 19:30:58 +0200 Subject: [PATCH 01/16] feat(shared): add model pricing table + computeTurnCost Seed rates for Claude (sonnet-4.6, opus-4.6/4.7/4.5, haiku-4.5) and Codex (gpt-5.4, 5.3-codex, spark, mini) in USD per 1M tokens. getPricing() resolves via provider aliases with zero-rate fallback. computeTurnCost() splits input / cached / output / reasoning spend. Prep for session + MTD cost meter. --- packages/shared/package.json | 4 + packages/shared/src/pricing.test.ts | 145 +++++++++++++++++ packages/shared/src/pricing.ts | 243 ++++++++++++++++++++++++++++ 3 files changed, 392 insertions(+) create mode 100644 packages/shared/src/pricing.test.ts create mode 100644 packages/shared/src/pricing.ts diff --git a/packages/shared/package.json b/packages/shared/package.json index 82085dfcaf..84899e5e31 100644 --- a/packages/shared/package.json +++ b/packages/shared/package.json @@ -71,6 +71,10 @@ "./path": { "types": "./src/path.ts", "import": "./src/path.ts" + }, + "./pricing": { + "types": "./src/pricing.ts", + "import": "./src/pricing.ts" } }, "scripts": { diff --git a/packages/shared/src/pricing.test.ts b/packages/shared/src/pricing.test.ts new file mode 100644 index 0000000000..de76bea819 --- /dev/null +++ b/packages/shared/src/pricing.test.ts @@ -0,0 +1,145 @@ +import { describe, expect, it } from "vitest"; + +import { + PRICING_TABLE, + UNKNOWN_MODEL_PRICING, + computeTurnCost, + formatUsd, + getPricing, +} from "./pricing.ts"; + +describe("pricing/getPricing", () => { + it("resolves canonical Claude slug", () => { + const p = getPricing("claude-sonnet-4-6"); + expect(p.provider).toBe("claudeAgent"); + expect(p.inputPerMTok).toBe(3); + expect(p.cachedInputPerMTok).toBe(0.3); + expect(p.outputPerMTok).toBe(15); + }); + + it("resolves Claude short alias via provider", () => { + const p = getPricing("sonnet", "claudeAgent"); + expect(p.provider).toBe("claudeAgent"); + expect(p.inputPerMTok).toBe(3); + }); + + it("resolves Codex canonical slug", () => { + const p = getPricing("gpt-5.4"); + expect(p.provider).toBe("codex"); + expect(p.inputPerMTok).toBe(1.25); + expect(p.outputPerMTok).toBe(10); + }); + + it("resolves Codex spark as mini tier", () => { + const p = getPricing("gpt-5.3-codex-spark"); + expect(p.outputPerMTok).toBe(2); + }); + + it("falls back to zero-rate for unknown model", () => { + const p = getPricing("llama-7b-xyz"); + expect(p).toEqual(UNKNOWN_MODEL_PRICING); + }); + + it("falls back for empty / null model", () => { + expect(getPricing(null)).toEqual(UNKNOWN_MODEL_PRICING); + expect(getPricing("")).toEqual(UNKNOWN_MODEL_PRICING); + expect(getPricing(" ")).toEqual(UNKNOWN_MODEL_PRICING); + }); + + it("defaults reasoningOutput rate to output rate", () => { + for (const pricing of PRICING_TABLE.values()) { + expect(pricing.reasoningOutputPerMTok).toBe(pricing.outputPerMTok); + } + }); +}); + +describe("pricing/computeTurnCost", () => { + it("computes Claude Sonnet turn cost correctly", () => { + const cost = computeTurnCost("claude-sonnet-4-6", { + inputTokens: 10_000, + cachedInputTokens: 100_000, + outputTokens: 2_000, + reasoningOutputTokens: 500, + }); + // 10k * $3/Mtok = $0.03 + expect(cost.inputUsd).toBeCloseTo(0.03, 6); + // 100k * $0.30/Mtok = $0.03 + expect(cost.cachedUsd).toBeCloseTo(0.03, 6); + // 2k * $15/Mtok = $0.03 + expect(cost.outputUsd).toBeCloseTo(0.03, 6); + // 500 * $15/Mtok = $0.0075 + expect(cost.reasoningUsd).toBeCloseTo(0.0075, 6); + expect(cost.totalUsd).toBeCloseTo(0.0975, 6); + }); + + it("computes Codex GPT-5.4 turn cost correctly", () => { + const cost = computeTurnCost("gpt-5.4", { + inputTokens: 1_000_000, + cachedInputTokens: 0, + outputTokens: 100_000, + reasoningOutputTokens: 50_000, + }); + // 1M * $1.25 = $1.25 + expect(cost.inputUsd).toBeCloseTo(1.25, 6); + expect(cost.cachedUsd).toBe(0); + // 100k * $10/Mtok = $1 + expect(cost.outputUsd).toBeCloseTo(1, 6); + // 50k * $10/Mtok = $0.5 + expect(cost.reasoningUsd).toBeCloseTo(0.5, 6); + expect(cost.totalUsd).toBeCloseTo(2.75, 6); + }); + + it("returns zero cost for unknown model", () => { + const cost = computeTurnCost("fake-model", { + inputTokens: 10_000, + outputTokens: 10_000, + }); + expect(cost.totalUsd).toBe(0); + }); + + it("ignores negative / non-finite deltas", () => { + const cost = computeTurnCost("claude-sonnet-4-6", { + inputTokens: -100, + outputTokens: Number.NaN, + cachedInputTokens: Number.POSITIVE_INFINITY, + reasoningOutputTokens: 0, + }); + expect(cost.totalUsd).toBe(0); + }); + + it("handles missing fields", () => { + const cost = computeTurnCost("claude-sonnet-4-6", { outputTokens: 1_000 }); + expect(cost.outputUsd).toBeCloseTo(0.015, 6); + expect(cost.inputUsd).toBe(0); + expect(cost.cachedUsd).toBe(0); + expect(cost.reasoningUsd).toBe(0); + expect(cost.totalUsd).toBeCloseTo(0.015, 6); + }); +}); + +describe("pricing/formatUsd", () => { + it("formats zero + invalid", () => { + expect(formatUsd(0)).toBe("$0.00"); + expect(formatUsd(null)).toBe("$0.00"); + expect(formatUsd(Number.NaN)).toBe("$0.00"); + expect(formatUsd(-1)).toBe("$0.00"); + }); + + it("formats sub-cent", () => { + expect(formatUsd(0.002)).toBe("<$0.01"); + }); + + it("formats cents with 3 digits trimmed", () => { + expect(formatUsd(0.125)).toBe("$0.125"); + expect(formatUsd(0.12)).toBe("$0.12"); + }); + + it("formats 2-digit dollars", () => { + expect(formatUsd(1.234)).toBe("$1.23"); + expect(formatUsd(12.5)).toBe("$12.50"); + }); + + it("formats large dollars rounded", () => { + expect(formatUsd(1234.56)).toBe("$1,235"); + }); +}); diff --git a/packages/shared/src/pricing.ts b/packages/shared/src/pricing.ts new file mode 100644 index 0000000000..3ab9685348 --- /dev/null +++ b/packages/shared/src/pricing.ts @@ -0,0 +1,243 @@ +import { normalizeModelSlug } from "./model.ts"; +import type { ProviderKind } from "@t3tools/contracts"; + +/** + * USD price per 1,000,000 tokens for each token class. + * + * `cachedInput` is the discounted input price applied when the provider + * serves cached prefix tokens (Anthropic prompt caching / OpenAI cached input). + * `reasoningOutput` defaults to `output` when a model does not bill reasoning + * tokens separately. + */ +export interface ModelPricing { + readonly provider: ProviderKind | "unknown"; + readonly inputPerMTok: number; + readonly cachedInputPerMTok: number; + readonly outputPerMTok: number; + readonly reasoningOutputPerMTok: number; +} + +/** Raw seed rates (USD per 1M tokens). Source: public provider pricing pages. */ +const SEED_PRICING: ReadonlyArray< + readonly [string, Omit & { reasoningOutputPerMTok?: number }] +> = [ + // ── Anthropic / Claude ─────────────────────────────────────────────── + // Extended-thinking tokens are billed as output tokens. + [ + "claude-sonnet-4-6", + { + provider: "claudeAgent", + inputPerMTok: 3, + cachedInputPerMTok: 0.3, + outputPerMTok: 15, + }, + ], + [ + "claude-opus-4-7", + { + provider: "claudeAgent", + inputPerMTok: 15, + cachedInputPerMTok: 1.5, + outputPerMTok: 75, + }, + ], + [ + "claude-opus-4-6", + { + provider: "claudeAgent", + inputPerMTok: 15, + cachedInputPerMTok: 1.5, + outputPerMTok: 75, + }, + ], + [ + "claude-opus-4-5", + { + provider: "claudeAgent", + inputPerMTok: 15, + cachedInputPerMTok: 1.5, + outputPerMTok: 75, + }, + ], + [ + "claude-haiku-4-5", + { + provider: "claudeAgent", + inputPerMTok: 1, + cachedInputPerMTok: 0.1, + outputPerMTok: 5, + }, + ], + // ── OpenAI / Codex ─────────────────────────────────────────────────── + // Codex app routes use GPT-5 family pricing. Reasoning tokens bill as output. + [ + "gpt-5.4", + { + provider: "codex", + inputPerMTok: 1.25, + cachedInputPerMTok: 0.125, + outputPerMTok: 10, + }, + ], + [ + "gpt-5.3-codex", + { + provider: "codex", + inputPerMTok: 1.25, + cachedInputPerMTok: 0.125, + outputPerMTok: 10, + }, + ], + [ + "gpt-5.3-codex-spark", + { + provider: "codex", + inputPerMTok: 0.25, + cachedInputPerMTok: 0.025, + outputPerMTok: 2, + }, + ], + [ + "gpt-5.4-mini", + { + provider: "codex", + inputPerMTok: 0.25, + cachedInputPerMTok: 0.025, + outputPerMTok: 2, + }, + ], +]; + +/** + * Pricing table keyed by canonical model slug. + * Frozen so consumers can't mutate rates at runtime. + */ +export const PRICING_TABLE: ReadonlyMap = (() => { + const map = new Map(); + for (const [slug, raw] of SEED_PRICING) { + map.set(slug, { + ...raw, + reasoningOutputPerMTok: raw.reasoningOutputPerMTok ?? raw.outputPerMTok, + }); + } + return map; +})(); + +/** Zero-cost fallback for unknown models. Keeps total cost honest (no fake rate). */ +export const UNKNOWN_MODEL_PRICING: ModelPricing = { + provider: "unknown", + inputPerMTok: 0, + cachedInputPerMTok: 0, + outputPerMTok: 0, + reasoningOutputPerMTok: 0, +}; + +/** + * Resolve pricing for a model slug. Tries provider-aware alias normalization + * first (so `"sonnet"` → `"claude-sonnet-4-6"`), then direct lookup, then + * returns the zero-rate fallback. + */ +export function getPricing( + model: string | null | undefined, + provider?: ProviderKind, +): ModelPricing { + if (typeof model !== "string") { + return UNKNOWN_MODEL_PRICING; + } + const trimmed = model.trim(); + if (!trimmed) { + return UNKNOWN_MODEL_PRICING; + } + // Provider-aware alias normalization. + if (provider) { + const normalized = normalizeModelSlug(trimmed, provider); + if (normalized) { + const direct = PRICING_TABLE.get(normalized); + if (direct) return direct; + } + } + // Direct lookup (raw slug may already be canonical). + const direct = PRICING_TABLE.get(trimmed); + if (direct) return direct; + + // Try each provider's aliases as a last resort. + const providers: ProviderKind[] = ["codex", "claudeAgent", "cursor", "opencode"]; + for (const p of providers) { + const normalized = normalizeModelSlug(trimmed, p); + if (normalized) { + const hit = PRICING_TABLE.get(normalized); + if (hit) return hit; + } + } + return UNKNOWN_MODEL_PRICING; +} + +export interface TurnTokenDeltas { + readonly inputTokens: number; + readonly cachedInputTokens: number; + readonly outputTokens: number; + readonly reasoningOutputTokens: number; +} + +export interface TurnCostBreakdown { + readonly inputUsd: number; + readonly cachedUsd: number; + readonly outputUsd: number; + readonly reasoningUsd: number; + readonly totalUsd: number; +} + +export const ZERO_COST: TurnCostBreakdown = { + inputUsd: 0, + cachedUsd: 0, + outputUsd: 0, + reasoningUsd: 0, + totalUsd: 0, +}; + +function finite(value: number | null | undefined): number { + return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : 0; +} + +/** + * Compute USD cost for one turn's token deltas. + * Anthropic bills cached-input tokens at a reduced rate *instead of* the + * full input rate — so callers pass the non-cached input count in + * `inputTokens` and the cached prefix count in `cachedInputTokens`. + */ +export function computeTurnCost( + model: string | null | undefined, + deltas: Partial, + provider?: ProviderKind, +): TurnCostBreakdown { + const pricing = getPricing(model, provider); + const input = finite(deltas.inputTokens); + const cached = finite(deltas.cachedInputTokens); + const output = finite(deltas.outputTokens); + const reasoning = finite(deltas.reasoningOutputTokens); + + const inputUsd = (input / 1_000_000) * pricing.inputPerMTok; + const cachedUsd = (cached / 1_000_000) * pricing.cachedInputPerMTok; + const outputUsd = (output / 1_000_000) * pricing.outputPerMTok; + const reasoningUsd = (reasoning / 1_000_000) * pricing.reasoningOutputPerMTok; + const totalUsd = inputUsd + cachedUsd + outputUsd + reasoningUsd; + + return { inputUsd, cachedUsd, outputUsd, reasoningUsd, totalUsd }; +} + +/** Format USD amount for UI display. */ +export function formatUsd(value: number | null | undefined): string { + if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) { + return "$0.00"; + } + if (value < 0.01) { + return `<$0.01`; + } + if (value < 1) { + return `$${value.toFixed(3).replace(/0$/, "")}`; + } + if (value < 100) { + return `$${value.toFixed(2)}`; + } + return `$${Math.round(value).toLocaleString("en-US")}`; +} From 589d72ca2b1d88bdee9332e13ba0be67cf00ae08 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Tue, 21 Apr 2026 19:32:59 +0200 Subject: [PATCH 02/16] feat(web): add cost store with session + month buckets localStorage-persisted zustand store at t3code:cost-store:v1. Pure reducers accumulate token + USD spend per thread (session) and per YYYY-MM in local tz (month-to-date). sanitize*() guards garbage payloads; selectors expose session/month buckets and avg cost per turn. Tests: 17 pass. --- apps/web/src/lib/costStore.test.ts | 310 ++++++++++++++++++++++++++++ apps/web/src/lib/costStore.ts | 318 +++++++++++++++++++++++++++++ 2 files changed, 628 insertions(+) create mode 100644 apps/web/src/lib/costStore.test.ts create mode 100644 apps/web/src/lib/costStore.ts diff --git a/apps/web/src/lib/costStore.test.ts b/apps/web/src/lib/costStore.test.ts new file mode 100644 index 0000000000..1162f11d12 --- /dev/null +++ b/apps/web/src/lib/costStore.test.ts @@ -0,0 +1,310 @@ +import { beforeEach, describe, expect, it } from "vitest"; + +import { + COST_STORE_STORAGE_KEY, + localMonthKey, + reduceRecordTurnCost, + reduceResetSession, + sanitizePersistedCostState, + selectCostSummary, + useCostStore, + type PersistedCostState, +} from "./costStore"; + +function freshState(): PersistedCostState { + return { version: 1, sessions: {}, months: {} }; +} + +const cost = (total: number) => ({ + inputUsd: 0, + cachedUsd: 0, + outputUsd: 0, + reasoningUsd: 0, + totalUsd: total, +}); + +const deltas = ( + d: Partial<{ + inputTokens: number; + cachedInputTokens: number; + outputTokens: number; + reasoningOutputTokens: number; + }> = {}, +) => ({ + inputTokens: d.inputTokens ?? 0, + cachedInputTokens: d.cachedInputTokens ?? 0, + outputTokens: d.outputTokens ?? 0, + reasoningOutputTokens: d.reasoningOutputTokens ?? 0, +}); + +describe("localMonthKey", () => { + it("formats YYYY-MM in local tz", () => { + const date = new Date(2026, 3, 7, 12, 0, 0); // April 7 2026 local + expect(localMonthKey(date)).toBe("2026-04"); + }); + + it("pads single-digit months", () => { + const date = new Date(2026, 0, 1, 0, 0, 0); + expect(localMonthKey(date)).toBe("2026-01"); + }); +}); + +describe("reduceRecordTurnCost", () => { + const at = new Date(2026, 3, 21, 10, 0, 0); // April 21 2026 + + it("accumulates into session + month bucket", () => { + let state = freshState(); + state = reduceRecordTurnCost(state, { + threadId: "t1", + model: "claude-sonnet-4-6", + deltas: deltas({ inputTokens: 1_000, outputTokens: 500 }), + breakdown: cost(0.01), + at, + }); + state = reduceRecordTurnCost(state, { + threadId: "t1", + model: "claude-sonnet-4-6", + deltas: deltas({ inputTokens: 500, outputTokens: 200 }), + breakdown: cost(0.005), + at, + }); + + const session = state.sessions["t1"]!; + expect(session.totalUsd).toBeCloseTo(0.015, 6); + expect(session.turnCount).toBe(2); + expect(session.byModel["claude-sonnet-4-6"]!.inputTokens).toBe(1_500); + expect(session.byModel["claude-sonnet-4-6"]!.outputTokens).toBe(700); + expect(session.byModel["claude-sonnet-4-6"]!.turnCount).toBe(2); + + const month = state.months["2026-04"]!; + expect(month.totalUsd).toBeCloseTo(0.015, 6); + expect(month.turnCount).toBe(2); + }); + + it("keeps per-model tallies separate", () => { + let state = freshState(); + state = reduceRecordTurnCost(state, { + threadId: "t1", + model: "claude-sonnet-4-6", + deltas: deltas({ outputTokens: 100 }), + breakdown: cost(0.01), + at, + }); + state = reduceRecordTurnCost(state, { + threadId: "t1", + model: "gpt-5.4", + deltas: deltas({ outputTokens: 100 }), + breakdown: cost(0.02), + at, + }); + const session = state.sessions["t1"]!; + expect(Object.keys(session.byModel).sort()).toEqual(["claude-sonnet-4-6", "gpt-5.4"]); + expect(session.totalUsd).toBeCloseTo(0.03, 6); + }); + + it("isolates sessions by threadId", () => { + let state = freshState(); + state = reduceRecordTurnCost(state, { + threadId: "t1", + model: "claude-sonnet-4-6", + deltas: deltas({ outputTokens: 100 }), + breakdown: cost(0.01), + at, + }); + state = reduceRecordTurnCost(state, { + threadId: "t2", + model: "claude-sonnet-4-6", + deltas: deltas({ outputTokens: 100 }), + breakdown: cost(0.02), + at, + }); + expect(state.sessions["t1"]!.totalUsd).toBeCloseTo(0.01, 6); + expect(state.sessions["t2"]!.totalUsd).toBeCloseTo(0.02, 6); + // Month aggregates both sessions. + expect(state.months["2026-04"]!.totalUsd).toBeCloseTo(0.03, 6); + }); + + it("buckets by local month", () => { + let state = freshState(); + state = reduceRecordTurnCost(state, { + threadId: "t1", + model: "claude-sonnet-4-6", + deltas: deltas({ outputTokens: 100 }), + breakdown: cost(0.01), + at: new Date(2026, 2, 31, 10, 0, 0), // March + }); + state = reduceRecordTurnCost(state, { + threadId: "t1", + model: "claude-sonnet-4-6", + deltas: deltas({ outputTokens: 100 }), + breakdown: cost(0.02), + at: new Date(2026, 3, 1, 10, 0, 0), // April + }); + expect(Object.keys(state.months).sort()).toEqual(["2026-03", "2026-04"]); + expect(state.months["2026-03"]!.totalUsd).toBeCloseTo(0.01, 6); + expect(state.months["2026-04"]!.totalUsd).toBeCloseTo(0.02, 6); + // Session spans both months. + expect(state.sessions["t1"]!.totalUsd).toBeCloseTo(0.03, 6); + }); + + it("ignores zero-token zero-cost turns", () => { + const before = freshState(); + const after = reduceRecordTurnCost(before, { + threadId: "t1", + model: "claude-sonnet-4-6", + deltas: deltas(), + breakdown: cost(0), + at, + }); + expect(after).toBe(before); + }); + + it("ignores blank threadId / model", () => { + const before = freshState(); + const a = reduceRecordTurnCost(before, { + threadId: "", + model: "claude-sonnet-4-6", + deltas: deltas({ outputTokens: 10 }), + breakdown: cost(0.01), + at, + }); + const b = reduceRecordTurnCost(before, { + threadId: "t1", + model: "", + deltas: deltas({ outputTokens: 10 }), + breakdown: cost(0.01), + at, + }); + expect(a).toBe(before); + expect(b).toBe(before); + }); +}); + +describe("reduceResetSession", () => { + it("removes the session but keeps month", () => { + let state = freshState(); + state = reduceRecordTurnCost(state, { + threadId: "t1", + model: "claude-sonnet-4-6", + deltas: deltas({ outputTokens: 100 }), + breakdown: cost(0.01), + at: new Date(2026, 3, 21, 10, 0, 0), + }); + const next = reduceResetSession(state, "t1"); + expect(next.sessions["t1"]).toBeUndefined(); + expect(next.months["2026-04"]!.totalUsd).toBeCloseTo(0.01, 6); + }); + + it("no-op for unknown threadId", () => { + const state = freshState(); + expect(reduceResetSession(state, "nope")).toBe(state); + }); +}); + +describe("sanitizePersistedCostState", () => { + it("returns initial for garbage", () => { + expect(sanitizePersistedCostState(null).sessions).toEqual({}); + expect(sanitizePersistedCostState("bad").months).toEqual({}); + expect(sanitizePersistedCostState({ version: 99 }).months).toEqual({}); + }); + + it("drops invalid month keys", () => { + const cleaned = sanitizePersistedCostState({ + version: 1, + sessions: {}, + months: { + "2026-04": { totalUsd: 1, turnCount: 1, byModel: {} }, + "bogus": { totalUsd: 99, turnCount: 1, byModel: {} }, + }, + }); + expect(Object.keys(cleaned.months)).toEqual(["2026-04"]); + }); + + it("coerces non-finite numbers to zero", () => { + const cleaned = sanitizePersistedCostState({ + version: 1, + sessions: { + t1: { + totalUsd: Number.NaN, + turnCount: -5, + byModel: { + "claude-sonnet-4-6": { + inputTokens: "abc", + outputTokens: 10, + totalUsd: 5, + turnCount: 1, + }, + }, + }, + }, + months: {}, + }); + const s = cleaned.sessions["t1"]!; + expect(s.totalUsd).toBe(0); + expect(s.turnCount).toBe(0); + expect(s.byModel["claude-sonnet-4-6"]!.inputTokens).toBe(0); + expect(s.byModel["claude-sonnet-4-6"]!.outputTokens).toBe(10); + expect(s.byModel["claude-sonnet-4-6"]!.totalUsd).toBe(5); + }); +}); + +describe("selectCostSummary", () => { + it("returns zero summary for empty state", () => { + const summary = selectCostSummary(freshState(), "t1", new Date(2026, 3, 21)); + expect(summary.sessionUsd).toBe(0); + expect(summary.monthUsd).toBe(0); + expect(summary.averagePerTurnUsd).toBeNull(); + expect(summary.monthKey).toBe("2026-04"); + }); + + it("computes average per turn", () => { + let state = freshState(); + for (let i = 0; i < 4; i += 1) { + state = reduceRecordTurnCost(state, { + threadId: "t1", + model: "claude-sonnet-4-6", + deltas: deltas({ outputTokens: 100 }), + breakdown: cost(0.01), + at: new Date(2026, 3, 21), + }); + } + const summary = selectCostSummary(state, "t1", new Date(2026, 3, 21)); + expect(summary.sessionUsd).toBeCloseTo(0.04, 6); + expect(summary.averagePerTurnUsd).toBeCloseTo(0.01, 6); + expect(summary.sessionTurnCount).toBe(4); + }); +}); + +describe("useCostStore (zustand)", () => { + beforeEach(() => { + useCostStore.getState().resetAll(); + if (typeof window !== "undefined") { + window.localStorage.removeItem(COST_STORE_STORAGE_KEY); + } + }); + + it("records turn cost via action", () => { + useCostStore.getState().recordTurnCost({ + threadId: "t1", + model: "claude-sonnet-4-6", + deltas: deltas({ inputTokens: 1_000, outputTokens: 500 }), + breakdown: cost(0.01), + at: new Date(2026, 3, 21), + }); + const state = useCostStore.getState(); + expect(state.sessions["t1"]!.totalUsd).toBeCloseTo(0.01, 6); + expect(state.months["2026-04"]!.totalUsd).toBeCloseTo(0.01, 6); + }); + + it("resetSession clears one thread", () => { + useCostStore.getState().recordTurnCost({ + threadId: "t1", + model: "claude-sonnet-4-6", + deltas: deltas({ outputTokens: 100 }), + breakdown: cost(0.01), + at: new Date(2026, 3, 21), + }); + useCostStore.getState().resetSession("t1"); + expect(useCostStore.getState().sessions["t1"]).toBeUndefined(); + }); +}); diff --git a/apps/web/src/lib/costStore.ts b/apps/web/src/lib/costStore.ts new file mode 100644 index 0000000000..1cf4fbeb8b --- /dev/null +++ b/apps/web/src/lib/costStore.ts @@ -0,0 +1,318 @@ +import { Debouncer } from "@tanstack/react-pacer"; +import { create } from "zustand"; +import type { TurnCostBreakdown, TurnTokenDeltas } from "@t3tools/shared/pricing"; +import { formatUsd } from "@t3tools/shared/pricing"; + +export const COST_STORE_STORAGE_KEY = "t3code:cost-store:v1"; + +/** Cumulative token counts + USD spend for one model within a bucket. */ +export interface ModelCostEntry { + inputTokens: number; + cachedInputTokens: number; + outputTokens: number; + reasoningOutputTokens: number; + totalUsd: number; + turnCount: number; +} + +export interface CostBucket { + totalUsd: number; + turnCount: number; + byModel: Record; +} + +export interface PersistedCostState { + version: 1; + sessions: Record; + months: Record; +} + +export interface CostStoreState extends PersistedCostState { + recordTurnCost: (input: RecordTurnCostInput) => void; + resetSession: (threadId: string) => void; + resetAll: () => void; + /** Test-only hook: replace state atomically. */ + __replaceState: (next: PersistedCostState) => void; +} + +export interface RecordTurnCostInput { + threadId: string; + model: string; + deltas: TurnTokenDeltas; + breakdown: TurnCostBreakdown; + /** Override "now" for deterministic tests. */ + at?: Date; +} + +const emptyBucket: () => CostBucket = () => ({ totalUsd: 0, turnCount: 0, byModel: {} }); +const emptyModelEntry: () => ModelCostEntry = () => ({ + inputTokens: 0, + cachedInputTokens: 0, + outputTokens: 0, + reasoningOutputTokens: 0, + totalUsd: 0, + turnCount: 0, +}); + +const initialState: PersistedCostState = { + version: 1, + sessions: {}, + months: {}, +}; + +/** + * Compute `YYYY-MM` key for a Date in the **local** timezone. + * Done via `getFullYear/getMonth` (not toISOString) so the month rolls over + * on the user's clock, not UTC's. + */ +export function localMonthKey(date: Date = new Date()): string { + const year = date.getFullYear().toString().padStart(4, "0"); + const month = (date.getMonth() + 1).toString().padStart(2, "0"); + return `${year}-${month}`; +} + +function addTurnToEntry( + entry: ModelCostEntry, + deltas: TurnTokenDeltas, + breakdown: TurnCostBreakdown, +): ModelCostEntry { + return { + inputTokens: entry.inputTokens + deltas.inputTokens, + cachedInputTokens: entry.cachedInputTokens + deltas.cachedInputTokens, + outputTokens: entry.outputTokens + deltas.outputTokens, + reasoningOutputTokens: entry.reasoningOutputTokens + deltas.reasoningOutputTokens, + totalUsd: entry.totalUsd + breakdown.totalUsd, + turnCount: entry.turnCount + 1, + }; +} + +function addTurnToBucket( + bucket: CostBucket, + model: string, + deltas: TurnTokenDeltas, + breakdown: TurnCostBreakdown, +): CostBucket { + const existing = bucket.byModel[model] ?? emptyModelEntry(); + return { + totalUsd: bucket.totalUsd + breakdown.totalUsd, + turnCount: bucket.turnCount + 1, + byModel: { + ...bucket.byModel, + [model]: addTurnToEntry(existing, deltas, breakdown), + }, + }; +} + +/** Pure reducer: record one turn into the given state. */ +export function reduceRecordTurnCost( + state: PersistedCostState, + input: RecordTurnCostInput, +): PersistedCostState { + const { threadId, model, deltas, breakdown } = input; + if (!threadId || !model) { + return state; + } + // Skip no-op turns to keep storage tiny. + const totalTokens = + deltas.inputTokens + + deltas.cachedInputTokens + + deltas.outputTokens + + deltas.reasoningOutputTokens; + if (totalTokens <= 0 && breakdown.totalUsd <= 0) { + return state; + } + const monthKey = localMonthKey(input.at ?? new Date()); + const session = state.sessions[threadId] ?? emptyBucket(); + const month = state.months[monthKey] ?? emptyBucket(); + return { + ...state, + sessions: { + ...state.sessions, + [threadId]: addTurnToBucket(session, model, deltas, breakdown), + }, + months: { + ...state.months, + [monthKey]: addTurnToBucket(month, model, deltas, breakdown), + }, + }; +} + +export function reduceResetSession( + state: PersistedCostState, + threadId: string, +): PersistedCostState { + if (!(threadId in state.sessions)) { + return state; + } + const nextSessions = { ...state.sessions }; + delete nextSessions[threadId]; + return { ...state, sessions: nextSessions }; +} + +function sanitizeNumber(value: unknown): number { + return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : 0; +} + +function sanitizeModelEntry(raw: unknown): ModelCostEntry | null { + if (!raw || typeof raw !== "object") { + return null; + } + const r = raw as Record; + return { + inputTokens: sanitizeNumber(r.inputTokens), + cachedInputTokens: sanitizeNumber(r.cachedInputTokens), + outputTokens: sanitizeNumber(r.outputTokens), + reasoningOutputTokens: sanitizeNumber(r.reasoningOutputTokens), + totalUsd: sanitizeNumber(r.totalUsd), + turnCount: sanitizeNumber(r.turnCount), + }; +} + +function sanitizeBucket(raw: unknown): CostBucket | null { + if (!raw || typeof raw !== "object") { + return null; + } + const r = raw as Record; + const byModelRaw = (r.byModel ?? {}) as Record; + const byModel: Record = {}; + if (byModelRaw && typeof byModelRaw === "object") { + for (const [model, entry] of Object.entries(byModelRaw)) { + if (!model) continue; + const cleaned = sanitizeModelEntry(entry); + if (cleaned) byModel[model] = cleaned; + } + } + return { + totalUsd: sanitizeNumber(r.totalUsd), + turnCount: sanitizeNumber(r.turnCount), + byModel, + }; +} + +export function sanitizePersistedCostState(raw: unknown): PersistedCostState { + if (!raw || typeof raw !== "object") { + return initialState; + } + const r = raw as Record; + if (r.version !== 1) { + return initialState; + } + const sessions: Record = {}; + const months: Record = {}; + const sessionsRaw = (r.sessions ?? {}) as Record; + const monthsRaw = (r.months ?? {}) as Record; + if (sessionsRaw && typeof sessionsRaw === "object") { + for (const [threadId, bucket] of Object.entries(sessionsRaw)) { + if (!threadId) continue; + const cleaned = sanitizeBucket(bucket); + if (cleaned) sessions[threadId] = cleaned; + } + } + if (monthsRaw && typeof monthsRaw === "object") { + for (const [monthKey, bucket] of Object.entries(monthsRaw)) { + if (!/^\d{4}-\d{2}$/.test(monthKey)) continue; + const cleaned = sanitizeBucket(bucket); + if (cleaned) months[monthKey] = cleaned; + } + } + return { version: 1, sessions, months }; +} + +function readPersistedState(): PersistedCostState { + if (typeof window === "undefined") { + return initialState; + } + try { + const raw = window.localStorage.getItem(COST_STORE_STORAGE_KEY); + if (!raw) return initialState; + return sanitizePersistedCostState(JSON.parse(raw)); + } catch { + return initialState; + } +} + +function persistState(state: PersistedCostState): void { + if (typeof window === "undefined") return; + try { + const { version, sessions, months } = state; + window.localStorage.setItem( + COST_STORE_STORAGE_KEY, + JSON.stringify({ version, sessions, months } satisfies PersistedCostState), + ); + } catch { + // ignore quota / serialization errors + } +} + +const debouncedPersist = new Debouncer(persistState, { wait: 400 }); + +export const useCostStore = create((set) => ({ + ...readPersistedState(), + recordTurnCost: (input) => set((state) => reduceRecordTurnCost(state, input)), + resetSession: (threadId) => set((state) => reduceResetSession(state, threadId)), + resetAll: () => set(() => ({ ...initialState })), + __replaceState: (next) => set(() => ({ ...next })), +})); + +useCostStore.subscribe((state) => { + const { version, sessions, months } = state; + debouncedPersist.maybeExecute({ version, sessions, months }); +}); + +if (typeof window !== "undefined" && typeof window.addEventListener === "function") { + window.addEventListener("beforeunload", () => { + debouncedPersist.flush(); + }); +} + +// ── Selectors ──────────────────────────────────────────────────────────── + +export function selectSessionBucket( + state: PersistedCostState, + threadId: string | null | undefined, +): CostBucket { + if (!threadId) return emptyBucket(); + return state.sessions[threadId] ?? emptyBucket(); +} + +export function selectMonthBucket( + state: PersistedCostState, + monthKey: string = localMonthKey(), +): CostBucket { + return state.months[monthKey] ?? emptyBucket(); +} + +export interface CostSummary { + readonly sessionUsd: number; + readonly monthUsd: number; + readonly sessionTurnCount: number; + readonly monthTurnCount: number; + readonly monthKey: string; + readonly session: CostBucket; + readonly month: CostBucket; + readonly averagePerTurnUsd: number | null; +} + +export function selectCostSummary( + state: PersistedCostState, + threadId: string | null | undefined, + now: Date = new Date(), +): CostSummary { + const monthKey = localMonthKey(now); + const session = selectSessionBucket(state, threadId); + const month = selectMonthBucket(state, monthKey); + const averagePerTurnUsd = + session.turnCount > 0 ? session.totalUsd / session.turnCount : null; + return { + sessionUsd: session.totalUsd, + monthUsd: month.totalUsd, + sessionTurnCount: session.turnCount, + monthTurnCount: month.turnCount, + monthKey, + session, + month, + averagePerTurnUsd, + }; +} + +export { formatUsd }; From bf76cc687eaf2112aa7624685b1158f268f8ce88 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Tue, 21 Apr 2026 19:36:13 +0200 Subject: [PATCH 03/16] feat(web): wire token-usage events to cost store useCostTracking hook observes activeThread activities and records each new context-window.updated event (with lastXxxTokens deltas) into the cost store. Seeds seen-set on mount / thread switch so historical activity is not retroactively charged to this month. Pure processActivitiesForCost reducer is unit-tested; the hook is a thin ref+effect wrapper. Tests: 9 pass. --- apps/web/src/lib/useCostTracking.test.ts | 160 +++++++++++++++++++++++ apps/web/src/lib/useCostTracking.ts | 118 +++++++++++++++++ 2 files changed, 278 insertions(+) create mode 100644 apps/web/src/lib/useCostTracking.test.ts create mode 100644 apps/web/src/lib/useCostTracking.ts diff --git a/apps/web/src/lib/useCostTracking.test.ts b/apps/web/src/lib/useCostTracking.test.ts new file mode 100644 index 0000000000..9590cf820e --- /dev/null +++ b/apps/web/src/lib/useCostTracking.test.ts @@ -0,0 +1,160 @@ +import { describe, expect, it } from "vitest"; +import { EventId, type ModelSelection, type OrchestrationThreadActivity, TurnId } from "@t3tools/contracts"; + +import { processActivitiesForCost } from "./useCostTracking"; + +function makeContextWindowActivity( + id: string, + payload: Record, + createdAt = "2026-04-21T10:00:00.000Z", +): OrchestrationThreadActivity { + return { + id: EventId.make(id), + tone: "info", + kind: "context-window.updated", + summary: "Context window updated", + payload, + turnId: TurnId.make("turn-1"), + createdAt, + }; +} + +const sonnet: ModelSelection = { + provider: "claudeAgent", + model: "claude-sonnet-4-6", +}; + +describe("processActivitiesForCost", () => { + it("returns empty records with null threadId", () => { + const result = processActivitiesForCost(null, [], sonnet, null); + expect(result.records).toEqual([]); + expect(result.nextSeen.size).toBe(0); + }); + + it("seeds existing activities without recording on first mount", () => { + const acts = [ + makeContextWindowActivity("evt-a", { lastOutputTokens: 1000 }), + makeContextWindowActivity("evt-b", { lastOutputTokens: 500 }), + ]; + const result = processActivitiesForCost("t1", acts, sonnet, null); + expect(result.records).toEqual([]); + expect(result.nextSeen.size).toBe(2); + }); + + it("records only new activities on subsequent call", () => { + const seed = processActivitiesForCost( + "t1", + [makeContextWindowActivity("evt-a", { lastOutputTokens: 100 })], + sonnet, + null, + ); + const next = processActivitiesForCost( + "t1", + [ + makeContextWindowActivity("evt-a", { lastOutputTokens: 100 }), + makeContextWindowActivity("evt-b", { + lastInputTokens: 1_000, + lastCachedInputTokens: 500, + lastOutputTokens: 200, + }), + ], + sonnet, + seed.nextSeen, + ); + expect(next.records).toHaveLength(1); + const record = next.records[0]!; + expect(record.threadId).toBe("t1"); + expect(record.model).toBe("claude-sonnet-4-6"); + expect(record.deltas.inputTokens).toBe(1_000); + expect(record.deltas.outputTokens).toBe(200); + // 1000*3 + 500*0.3 + 200*15 = 3000+150+3000 = 6150 / 1M = $0.00615 + expect(record.breakdown.totalUsd).toBeCloseTo(0.00615, 6); + }); + + it("skips events without per-turn deltas", () => { + const seed = processActivitiesForCost("t1", [], sonnet, null); + const next = processActivitiesForCost( + "t1", + [makeContextWindowActivity("evt-1", { usedTokens: 10_000 })], + sonnet, + seed.nextSeen, + ); + expect(next.records).toEqual([]); + expect(next.nextSeen.has("evt-1")).toBe(true); + }); + + it("skips non-context-window activity kinds", () => { + const seed = processActivitiesForCost("t1", [], sonnet, null); + const other: OrchestrationThreadActivity = { + id: EventId.make("evt-tool"), + tone: "info", + kind: "tool.started", + summary: "tool.started", + payload: { lastOutputTokens: 1_000 }, + turnId: TurnId.make("turn-1"), + createdAt: "2026-04-21T10:00:00.000Z", + }; + const next = processActivitiesForCost("t1", [other], sonnet, seed.nextSeen); + expect(next.records).toEqual([]); + expect(next.nextSeen.has("evt-tool")).toBe(true); + }); + + it("skips when model selection missing", () => { + const seed = processActivitiesForCost("t1", [], null, null); + const next = processActivitiesForCost( + "t1", + [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })], + null, + seed.nextSeen, + ); + expect(next.records).toEqual([]); + }); + + it("skips when pricing resolves to zero (unknown model)", () => { + const seed = processActivitiesForCost("t1", [], sonnet, null); + const next = processActivitiesForCost( + "t1", + [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })], + { provider: "opencode", model: "some/unknown-model" }, + seed.nextSeen, + ); + expect(next.records).toEqual([]); + expect(next.nextSeen.has("evt-1")).toBe(true); + }); + + it("deduplicates by activity id", () => { + const seed = processActivitiesForCost("t1", [], sonnet, null); + const firstPass = processActivitiesForCost( + "t1", + [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })], + sonnet, + seed.nextSeen, + ); + expect(firstPass.records).toHaveLength(1); + const secondPass = processActivitiesForCost( + "t1", + [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })], + sonnet, + firstPass.nextSeen, + ); + expect(secondPass.records).toEqual([]); + }); + + it("uses activity.createdAt as `at` timestamp", () => { + const seed = processActivitiesForCost("t1", [], sonnet, null); + const next = processActivitiesForCost( + "t1", + [ + makeContextWindowActivity( + "evt-1", + { lastOutputTokens: 1_000 }, + "2026-03-15T00:00:00.000Z", + ), + ], + sonnet, + seed.nextSeen, + ); + const record = next.records[0]!; + expect(record.at?.toISOString()).toBe("2026-03-15T00:00:00.000Z"); + }); +}); diff --git a/apps/web/src/lib/useCostTracking.ts b/apps/web/src/lib/useCostTracking.ts new file mode 100644 index 0000000000..237f656262 --- /dev/null +++ b/apps/web/src/lib/useCostTracking.ts @@ -0,0 +1,118 @@ +import { useEffect, useRef } from "react"; +import type { ModelSelection, OrchestrationThreadActivity } from "@t3tools/contracts"; +import { + computeTurnCost, + type TurnCostBreakdown, + type TurnTokenDeltas, +} from "@t3tools/shared/pricing"; + +import { useCostStore, type RecordTurnCostInput } from "./costStore"; + +interface SeenRef { + threadId: string | null | undefined; + ids: Set; +} + +function toNonNegative(value: unknown): number { + return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : 0; +} + +function extractDeltas(payload: unknown): TurnTokenDeltas | null { + if (!payload || typeof payload !== "object") return null; + const p = payload as Record; + const input = toNonNegative(p.lastInputTokens); + const cached = toNonNegative(p.lastCachedInputTokens); + const output = toNonNegative(p.lastOutputTokens); + const reasoning = toNonNegative(p.lastReasoningOutputTokens); + if (input + cached + output + reasoning <= 0) return null; + return { + inputTokens: input, + cachedInputTokens: cached, + outputTokens: output, + reasoningOutputTokens: reasoning, + }; +} + +export interface ProcessActivitiesResult { + readonly records: ReadonlyArray; + readonly nextSeen: Set; +} + +/** + * Pure: find new `context-window.updated` events that carry per-turn + * token deltas and translate them into cost-store inputs. Returns updated + * "seen" set for caller to persist. + * + * Behaviour: + * - If `prevSeen` is `null`, treat all activities as "already seen" and + * emit no records — used for initial mount / thread switch. + * - Otherwise, only new activity IDs are considered. + */ +export function processActivitiesForCost( + threadId: string | null | undefined, + activities: ReadonlyArray | undefined, + modelSelection: ModelSelection | null | undefined, + prevSeen: Set | null, +): ProcessActivitiesResult { + if (!threadId || !activities || activities.length === 0) { + return { records: [], nextSeen: prevSeen ?? new Set() }; + } + if (prevSeen === null) { + // Initial mount / thread switch: seed seen set with current activity IDs. + return { + records: [], + nextSeen: new Set(activities.map((a) => a.id as string)), + }; + } + const seen = new Set(prevSeen); + const model = modelSelection?.model; + const provider = modelSelection?.provider; + const records: RecordTurnCostInput[] = []; + for (const activity of activities) { + const id = activity.id as string; + if (seen.has(id)) continue; + seen.add(id); + if (activity.kind !== "context-window.updated") continue; + const deltas = extractDeltas(activity.payload); + if (!deltas) continue; + if (!model) continue; + const breakdown: TurnCostBreakdown = computeTurnCost(model, deltas, provider); + if (breakdown.totalUsd <= 0) continue; + records.push({ + threadId, + model, + deltas, + breakdown, + at: activity.createdAt ? new Date(activity.createdAt) : new Date(), + }); + } + return { records, nextSeen: seen }; +} + +/** + * Observe thread activity stream and record cost for each new + * `context-window.updated` event. Seeds on first mount so historical + * activities aren't retroactively charged. + */ +export function useCostTracking( + threadId: string | null | undefined, + activities: ReadonlyArray | undefined, + modelSelection: ModelSelection | null | undefined, +): void { + const recordTurnCost = useCostStore((state) => state.recordTurnCost); + const seenRef = useRef({ threadId: undefined, ids: new Set() }); + + useEffect(() => { + const prev = seenRef.current.threadId === threadId ? seenRef.current.ids : null; + const { records, nextSeen } = processActivitiesForCost( + threadId, + activities, + modelSelection, + prev, + ); + seenRef.current = { threadId, ids: nextSeen }; + for (const record of records) { + recordTurnCost(record); + } + }, [threadId, activities, modelSelection, recordTurnCost]); +} From 7769f248beecca3a69d55eb85a788619f98249a1 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Tue, 21 Apr 2026 19:39:25 +0200 Subject: [PATCH 04/16] feat(web): add CostMeter + mount in composer toolbar CostMeter mirrors ContextWindowMeter's ring + Popover style. Fill ratio uses VITE_MONTHLY_BUDGET_USD if set, else a compressed log scale. Popover shows session/MTD totals, budget %, turn count, avg cost per turn, and per-model breakdown. Turns destructive color when over budget. useCostSummary zustand hook reads sessions + months slices and recomputes summary; cheap enough to recompute per render since selector is O(models). Composer wires useCostTracking side-effect + passes summary to ComposerFooterPrimaryActions next to ContextWindowMeter. --- apps/web/src/components/chat/ChatComposer.tsx | 12 ++ apps/web/src/components/chat/CostMeter.tsx | 156 ++++++++++++++++++ apps/web/src/lib/costStore.ts | 10 ++ 3 files changed, 178 insertions(+) create mode 100644 apps/web/src/components/chat/CostMeter.tsx diff --git a/apps/web/src/components/chat/ChatComposer.tsx b/apps/web/src/components/chat/ChatComposer.tsx index 3d3b081af9..da3184e8ad 100644 --- a/apps/web/src/components/chat/ChatComposer.tsx +++ b/apps/web/src/components/chat/ChatComposer.tsx @@ -77,6 +77,7 @@ import { renderProviderTraitsPicker, } from "./composerProviderRegistry"; import { ContextWindowMeter } from "./ContextWindowMeter"; +import { CostMeter } from "./CostMeter"; import { buildExpandedImagePreview, type ExpandedImagePreview } from "./ExpandedImagePreview"; import { basenameOfPath } from "../../vscode-icons"; import { cn, randomUUID } from "~/lib/utils"; @@ -102,6 +103,8 @@ import type { SessionPhase, Thread } from "../../types"; import type { PendingUserInputDraftAnswer } from "../../pendingUserInput"; import type { PendingApproval, PendingUserInput } from "../../session-logic"; import { deriveLatestContextWindowSnapshot } from "../../lib/contextWindow"; +import { useCostSummary, type CostSummary } from "../../lib/costStore"; +import { useCostTracking } from "../../lib/useCostTracking"; import { formatProviderSkillDisplayName } from "../../providerSkillPresentation"; import { searchProviderSkills } from "../../providerSkillSearch"; @@ -269,6 +272,7 @@ const ComposerFooterModeControls = memo(function ComposerFooterModeControls(prop const ComposerFooterPrimaryActions = memo(function ComposerFooterPrimaryActions(props: { compact: boolean; activeContextWindow: ReturnType; + costSummary: CostSummary; isPreparingWorktree: boolean; pendingAction: { questionIndex: number; @@ -290,6 +294,7 @@ const ComposerFooterPrimaryActions = memo(function ComposerFooterPrimaryActions( return ( <> {props.activeContextWindow ? : null} + {props.isPreparingWorktree ? ( Preparing worktree... ) : null} @@ -639,6 +644,12 @@ export const ChatComposer = memo( [activeThreadActivities], ); + // ------------------------------------------------------------------ + // Cost tracking (session + month-to-date spend) + // ------------------------------------------------------------------ + useCostTracking(activeThreadId, activeThreadActivities, activeThreadModelSelection); + const costSummary = useCostSummary(activeThreadId); + // ------------------------------------------------------------------ // Composer-local state // ------------------------------------------------------------------ @@ -1953,6 +1964,7 @@ export const ChatComposer = memo( }).env + ?.VITE_MONTHLY_BUDGET_USD; + if (!raw) return null; + const parsed = Number.parseFloat(raw); + return Number.isFinite(parsed) && parsed > 0 ? parsed : null; +} + +function formatCompactUsd(value: number): string { + if (value <= 0) return "$0"; + if (value < 1) return `¢${Math.round(value * 100)}`; + if (value < 100) return `$${value.toFixed(value < 10 ? 1 : 0).replace(/\.0$/, "")}`; + if (value < 1_000) return `$${Math.round(value)}`; + return `$${(value / 1_000).toFixed(1).replace(/\.0$/, "")}k`; +} + +function formatPercentage(value: number): string { + if (value < 10) { + return `${value.toFixed(1).replace(/\.0$/, "")}%`; + } + return `${Math.round(value)}%`; +} + +export function CostMeter(props: { summary: CostSummary }) { + const { summary } = props; + const budget = readBudget(); + + // Ring: if budget set, fill by MTD/budget ratio; else fill by bucket of + // session-vs-month (bounded 0–100) so it still animates. + const ratio = budget + ? Math.min(100, (summary.monthUsd / budget) * 100) + : summary.monthUsd <= 0 + ? 0 + : Math.min(100, Math.log10(summary.monthUsd + 1) * 25); + + const radius = 9.75; + const circumference = 2 * Math.PI * radius; + const dashOffset = circumference - (ratio / 100) * circumference; + + const overBudget = budget ? summary.monthUsd >= budget : false; + + const centerLabel = summary.monthUsd > 0 ? formatCompactUsd(summary.monthUsd) : "$0"; + const ariaLabel = budget + ? `Cost ${formatUsd(summary.monthUsd)} of ${formatUsd(budget)} this month (${formatPercentage(ratio)})` + : `Cost ${formatUsd(summary.monthUsd)} this month, ${formatUsd(summary.sessionUsd)} this session`; + + return ( + + + + + + {centerLabel} + + + + } + /> + +
+
+ Cost +
+
+ {formatUsd(summary.sessionUsd)} + session + + {formatUsd(summary.monthUsd)} + MTD +
+ {budget ? ( +
+ Budget: {formatUsd(budget)} ({formatPercentage(ratio)} used) +
+ ) : null} + {summary.sessionTurnCount > 0 && summary.averagePerTurnUsd !== null ? ( +
+ {summary.sessionTurnCount} + {summary.sessionTurnCount === 1 ? " turn" : " turns"} this session ·{" "} + {formatUsd(summary.averagePerTurnUsd)}/turn avg +
+ ) : null} + {summary.month.turnCount > 0 ? ( + + ) : null} +
+
+
+ ); +} + +function ModelBreakdown(props: { summary: CostSummary }) { + const entries = Object.entries(props.summary.month.byModel) + .filter(([, entry]) => entry.totalUsd > 0) + .sort((left, right) => right[1].totalUsd - left[1].totalUsd); + if (entries.length === 0) return null; + return ( +
+
+ Models (this month) +
+ {entries.map(([model, entry]) => ( +
+ {model} + + {formatUsd(entry.totalUsd)} · {entry.turnCount} + {entry.turnCount === 1 ? " turn" : " turns"} + +
+ ))} +
+ ); +} diff --git a/apps/web/src/lib/costStore.ts b/apps/web/src/lib/costStore.ts index 1cf4fbeb8b..509276b5e2 100644 --- a/apps/web/src/lib/costStore.ts +++ b/apps/web/src/lib/costStore.ts @@ -293,6 +293,16 @@ export interface CostSummary { readonly averagePerTurnUsd: number | null; } +export function useCostSummary( + threadId: string | null | undefined, + now?: Date, +): CostSummary { + const sessions = useCostStore((state) => state.sessions); + const months = useCostStore((state) => state.months); + // Intentionally rebuild on any change to sessions/months — selector is cheap. + return selectCostSummary({ version: 1, sessions, months }, threadId, now); +} + export function selectCostSummary( state: PersistedCostState, threadId: string | null | undefined, From d9225e86dd5921b5fdd53d902d970c0eea62b087 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Tue, 21 Apr 2026 20:39:57 +0200 Subject: [PATCH 05/16] feat(server): T3CODE_STATE_SUBDIR + --use-userdata flag Let dev mode point at the installed app's "userdata" state for history continuity, and pave the way for a server-side usage/ JSON store that both dev and prod reuse. - deriveServerPaths accepts optional stateSubdir; env wins over the default (dev/userdata selection via devUrl). - Adds usageDir (/usage) to derived paths + ensures it exists at startup. - dev-runner: new --state-subdir flag + --use-userdata shortcut; forwards to T3CODE_STATE_SUBDIR. Startup logs warn loudly when dev is aimed at userdata. - Tests: dev-runner env matrix (22 pass), cli-config subdir override + usageDir derivation (10 pass). --- apps/server/src/cli-config.test.ts | 55 +++++++++++++++++++++++++ apps/server/src/cli.ts | 12 +++++- apps/server/src/config.ts | 12 +++++- scripts/dev-runner.test.ts | 66 ++++++++++++++++++++++++++++++ scripts/dev-runner.ts | 40 +++++++++++++++++- 5 files changed, 182 insertions(+), 3 deletions(-) diff --git a/apps/server/src/cli-config.test.ts b/apps/server/src/cli-config.test.ts index 5adece7302..71c71648c3 100644 --- a/apps/server/src/cli-config.test.ts +++ b/apps/server/src/cli-config.test.ts @@ -525,4 +525,59 @@ it.layer(NodeServices.layer)("cli config resolution", (it) => { }); }), ); + + it.effect("T3CODE_STATE_SUBDIR overrides default dev/userdata selection", () => + Effect.gen(function* () { + const { join } = yield* Path.Path; + const baseDir = join(os.tmpdir(), "t3-cli-config-state-subdir"); + const resolved = yield* resolveServerConfig( + { + mode: Option.some("web"), + port: Option.some(3773), + host: Option.none(), + baseDir: Option.some(baseDir), + cwd: Option.none(), + devUrl: Option.some(new URL("http://127.0.0.1:5173")), + noBrowser: Option.some(true), + bootstrapFd: Option.none(), + autoBootstrapProjectFromCwd: Option.none(), + logWebSocketEvents: Option.none(), + }, + Option.none(), + ).pipe( + Effect.provide( + Layer.mergeAll( + ConfigProvider.layer( + ConfigProvider.fromEnv({ + env: { T3CODE_STATE_SUBDIR: "userdata" }, + }), + ), + NetService.layer, + ), + ), + ); + + // Even though devUrl is set (would normally pick "dev"), env override wins. + assert.equal(resolved.stateDir, join(baseDir, "userdata")); + assert.equal(resolved.dbPath, join(baseDir, "userdata", "state.sqlite")); + assert.equal(resolved.usageDir, join(baseDir, "userdata", "usage")); + }), + ); + + it.effect("deriveServerPaths exposes usageDir under stateDir", () => + Effect.gen(function* () { + const { join } = yield* Path.Path; + const baseDir = join(os.tmpdir(), "t3-derive-paths-usage"); + const prodPaths = yield* deriveServerPaths(baseDir, undefined); + assert.equal(prodPaths.usageDir, join(baseDir, "userdata", "usage")); + const devPaths = yield* deriveServerPaths(baseDir, new URL("http://localhost:5173")); + assert.equal(devPaths.usageDir, join(baseDir, "dev", "usage")); + const overridePaths = yield* deriveServerPaths( + baseDir, + new URL("http://localhost:5173"), + "userdata", + ); + assert.equal(overridePaths.usageDir, join(baseDir, "userdata", "usage")); + }), + ); }); diff --git a/apps/server/src/cli.ts b/apps/server/src/cli.ts index 4fc23a1ded..34648ec51b 100644 --- a/apps/server/src/cli.ts +++ b/apps/server/src/cli.ts @@ -174,6 +174,10 @@ const EnvServerConfig = Config.all({ Config.option, Config.map(Option.getOrUndefined), ), + stateSubdir: Config.string("T3CODE_STATE_SUBDIR").pipe( + Config.option, + Config.map(Option.getOrUndefined), + ), }); interface CliServerFlags { @@ -286,8 +290,14 @@ export const resolveServerConfig = ( const rawCwd = Option.getOrElse(normalizedFlags.cwd, () => process.cwd()); const cwd = path.resolve(yield* expandHomePath(rawCwd.trim())); yield* fs.makeDirectory(cwd, { recursive: true }); - const derivedPaths = yield* deriveServerPaths(baseDir, devUrl); + const derivedPaths = yield* deriveServerPaths(baseDir, devUrl, env.stateSubdir); yield* ensureServerDirectories(derivedPaths); + if (devUrl !== undefined && env.stateSubdir?.trim() === "userdata") { + yield* Effect.logWarning( + "⚠️ T3CODE_STATE_SUBDIR=userdata: dev server is reading/writing the installed app's state directory. Quit the installed app first to avoid corruption.", + { stateDir: derivedPaths.stateDir }, + ); + } const persistedObservabilitySettings = yield* loadPersistedObservabilitySettings( derivedPaths.settingsPath, ); diff --git a/apps/server/src/config.ts b/apps/server/src/config.ts index 7840c76115..c885a47554 100644 --- a/apps/server/src/config.ts +++ b/apps/server/src/config.ts @@ -37,6 +37,7 @@ export interface ServerDerivedPaths { readonly environmentIdPath: string; readonly serverRuntimeStatePath: string; readonly secretsDir: string; + readonly usageDir: string; } /** @@ -70,14 +71,21 @@ export interface ServerConfigShape extends ServerDerivedPaths { export const deriveServerPaths = Effect.fn(function* ( baseDir: ServerConfigShape["baseDir"], devUrl: ServerConfigShape["devUrl"], + stateSubdir?: string | undefined, ): Effect.fn.Return { const { join } = yield* Path.Path; - const stateDir = join(baseDir, devUrl !== undefined ? "dev" : "userdata"); + // When T3CODE_STATE_SUBDIR is set, honor it verbatim — lets a dev-mode + // server read/write the installed app's "userdata" store when explicitly + // opted in. Falls back to the default: "dev" in dev mode, "userdata" in + // production. + const subdir = stateSubdir?.trim() || (devUrl !== undefined ? "dev" : "userdata"); + const stateDir = join(baseDir, subdir); const dbPath = join(stateDir, "state.sqlite"); const attachmentsDir = join(stateDir, "attachments"); const logsDir = join(stateDir, "logs"); const providerLogsDir = join(logsDir, "provider"); const providerStatusCacheDir = join(baseDir, "caches"); + const usageDir = join(stateDir, "usage"); return { stateDir, dbPath, @@ -96,6 +104,7 @@ export const deriveServerPaths = Effect.fn(function* ( environmentIdPath: join(stateDir, "environment-id"), serverRuntimeStatePath: join(stateDir, "server-runtime.json"), secretsDir: join(stateDir, "secrets"), + usageDir, }; }); @@ -116,6 +125,7 @@ export const ensureServerDirectories = Effect.fn(function* (derivedPaths: Server fs.makeDirectory(derivedPaths.providerStatusCacheDir, { recursive: true }), fs.makeDirectory(path.dirname(derivedPaths.anonymousIdPath), { recursive: true }), fs.makeDirectory(path.dirname(derivedPaths.serverRuntimeStatePath), { recursive: true }), + fs.makeDirectory(derivedPaths.usageDir, { recursive: true }), ], { concurrency: "unbounded" }, ); diff --git a/scripts/dev-runner.test.ts b/scripts/dev-runner.test.ts index ce4865eced..64e593b38f 100644 --- a/scripts/dev-runner.test.ts +++ b/scripts/dev-runner.test.ts @@ -93,6 +93,72 @@ it.layer(NodeServices.layer)("dev-runner", (it) => { assert.equal(env.T3CODE_LOG_WS_EVENTS, "1"); assert.equal(env.T3CODE_HOST, "0.0.0.0"); assert.equal(env.VITE_DEV_SERVER_URL, "http://localhost:7331/"); + // No stateSubdir override by default. + assert.equal(env.T3CODE_STATE_SUBDIR, undefined); + }), + ); + + it.effect("forwards explicit stateSubdir to T3CODE_STATE_SUBDIR", () => + Effect.gen(function* () { + const env = yield* createDevRunnerEnv({ + mode: "dev", + baseEnv: {}, + serverOffset: 0, + webOffset: 0, + t3Home: undefined, + stateSubdir: "userdata", + noBrowser: undefined, + autoBootstrapProjectFromCwd: undefined, + logWebSocketEvents: undefined, + host: undefined, + port: undefined, + devUrl: undefined, + }); + + assert.equal(env.T3CODE_STATE_SUBDIR, "userdata"); + }), + ); + + it.effect("--use-userdata shortcut sets T3CODE_STATE_SUBDIR=userdata", () => + Effect.gen(function* () { + const env = yield* createDevRunnerEnv({ + mode: "dev", + baseEnv: {}, + serverOffset: 0, + webOffset: 0, + t3Home: undefined, + useUserdata: true, + noBrowser: undefined, + autoBootstrapProjectFromCwd: undefined, + logWebSocketEvents: undefined, + host: undefined, + port: undefined, + devUrl: undefined, + }); + + assert.equal(env.T3CODE_STATE_SUBDIR, "userdata"); + }), + ); + + it.effect("explicit stateSubdir overrides --use-userdata", () => + Effect.gen(function* () { + const env = yield* createDevRunnerEnv({ + mode: "dev", + baseEnv: {}, + serverOffset: 0, + webOffset: 0, + t3Home: undefined, + stateSubdir: "custom", + useUserdata: true, + noBrowser: undefined, + autoBootstrapProjectFromCwd: undefined, + logWebSocketEvents: undefined, + host: undefined, + port: undefined, + devUrl: undefined, + }); + + assert.equal(env.T3CODE_STATE_SUBDIR, "custom"); }), ); diff --git a/scripts/dev-runner.ts b/scripts/dev-runner.ts index 1621b60da7..61fcae04d0 100644 --- a/scripts/dev-runner.ts +++ b/scripts/dev-runner.ts @@ -122,6 +122,8 @@ interface CreateDevRunnerEnvInput { readonly serverOffset: number; readonly webOffset: number; readonly t3Home: string | undefined; + readonly stateSubdir?: string | undefined; + readonly useUserdata?: boolean | undefined; readonly noBrowser: boolean | undefined; readonly autoBootstrapProjectFromCwd: boolean | undefined; readonly logWebSocketEvents: boolean | undefined; @@ -136,6 +138,8 @@ export function createDevRunnerEnv({ serverOffset, webOffset, t3Home, + stateSubdir, + useUserdata, noBrowser, autoBootstrapProjectFromCwd, logWebSocketEvents, @@ -148,6 +152,8 @@ export function createDevRunnerEnv({ const webPort = BASE_WEB_PORT + webOffset; const resolvedBaseDir = yield* resolveBaseDir(t3Home); const isDesktopMode = mode === "dev:desktop"; + const resolvedStateSubdir = + stateSubdir?.trim() || (useUserdata === true ? "userdata" : undefined); const output: NodeJS.ProcessEnv = { ...baseEnv, @@ -158,6 +164,12 @@ export function createDevRunnerEnv({ T3CODE_HOME: resolvedBaseDir, }; + if (resolvedStateSubdir !== undefined) { + output.T3CODE_STATE_SUBDIR = resolvedStateSubdir; + } else { + delete output.T3CODE_STATE_SUBDIR; + } + if (!isDesktopMode) { output.T3CODE_PORT = String(serverPort); output.VITE_HTTP_URL = `http://localhost:${serverPort}`; @@ -365,6 +377,8 @@ export function resolveModePortOffsets({ interface DevRunnerCliInput { readonly mode: DevMode; readonly t3Home: string | undefined; + readonly stateSubdir?: string | undefined; + readonly useUserdata?: boolean | undefined; readonly noBrowser: boolean | undefined; readonly autoBootstrapProjectFromCwd: boolean | undefined; readonly logWebSocketEvents: boolean | undefined; @@ -409,6 +423,8 @@ export function runDevRunnerWithInput(input: DevRunnerCliInput) { serverOffset, webOffset, t3Home: input.t3Home, + stateSubdir: input.stateSubdir, + useUserdata: input.useUserdata, noBrowser: input.noBrowser, autoBootstrapProjectFromCwd: input.autoBootstrapProjectFromCwd, logWebSocketEvents: input.logWebSocketEvents, @@ -422,10 +438,20 @@ export function runDevRunnerWithInput(input: DevRunnerCliInput) { ? ` selectedOffset(server=${serverOffset},web=${webOffset})` : ""; + const subdirSuffix = env.T3CODE_STATE_SUBDIR + ? ` stateSubdir=${env.T3CODE_STATE_SUBDIR}` + : ""; + yield* Effect.logInfo( - `[dev-runner] mode=${input.mode} source=${source}${selectionSuffix} serverPort=${String(env.T3CODE_PORT)} webPort=${String(env.PORT)} baseDir=${String(env.T3CODE_HOME)}`, + `[dev-runner] mode=${input.mode} source=${source}${selectionSuffix} serverPort=${String(env.T3CODE_PORT)} webPort=${String(env.PORT)} baseDir=${String(env.T3CODE_HOME)}${subdirSuffix}`, ); + if (env.T3CODE_STATE_SUBDIR === "userdata") { + yield* Effect.logWarning( + "⚠️ dev-runner: --use-userdata is active; server will write to the installed app's state. Quit the installed app first.", + ); + } + if (input.dryRun) { return; } @@ -475,6 +501,18 @@ const devRunnerCli = Command.make("dev-runner", { Flag.withDescription("Base directory for all T3 Code data (equivalent to T3CODE_HOME)."), Flag.withFallbackConfig(optionalStringConfig("T3CODE_HOME")), ), + stateSubdir: Flag.string("state-subdir").pipe( + Flag.withDescription( + "State directory name under T3CODE_HOME (e.g. 'dev' or 'userdata'; equivalent to T3CODE_STATE_SUBDIR).", + ), + Flag.withFallbackConfig(optionalStringConfig("T3CODE_STATE_SUBDIR")), + ), + useUserdata: Flag.boolean("use-userdata").pipe( + Flag.withDescription( + "Shortcut for --state-subdir=userdata; reads/writes the installed app's state. Quit the installed app first.", + ), + Flag.withDefault(false), + ), noBrowser: Flag.boolean("no-browser").pipe( Flag.withDescription("Browser auto-open toggle (equivalent to T3CODE_NO_BROWSER)."), Flag.withFallbackConfig(optionalBooleanConfig("T3CODE_NO_BROWSER")), From 33cb77a399b39e2d5d95a0f73593a0c1b31adc42 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Tue, 21 Apr 2026 20:41:05 +0200 Subject: [PATCH 06/16] feat(contracts): cache-creation tokens + model on token-usage payload - Add cacheCreationInputTokens + lastCacheCreationInputTokens to ThreadTokenUsageSnapshot. Anthropic charges cache-write at 1.25x input; reporting it separately lets the cost meter bill correctly. - Add optional model field to ThreadTokenUsageUpdatedPayload so the server-side cost tracker can resolve pricing without a lookup against thread state. --- packages/contracts/src/providerRuntime.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/packages/contracts/src/providerRuntime.ts b/packages/contracts/src/providerRuntime.ts index 8387880ea3..e732451a7b 100644 --- a/packages/contracts/src/providerRuntime.ts +++ b/packages/contracts/src/providerRuntime.ts @@ -304,11 +304,17 @@ export const ThreadTokenUsageSnapshot = Schema.Struct({ maxTokens: Schema.optional(PositiveInt), inputTokens: Schema.optional(NonNegativeInt), cachedInputTokens: Schema.optional(NonNegativeInt), + /** + * Tokens written to the provider's prompt cache this turn. Anthropic bills + * cache-write at 1.25× the base input rate; cache-read at 0.1× the base rate. + */ + cacheCreationInputTokens: Schema.optional(NonNegativeInt), outputTokens: Schema.optional(NonNegativeInt), reasoningOutputTokens: Schema.optional(NonNegativeInt), lastUsedTokens: Schema.optional(NonNegativeInt), lastInputTokens: Schema.optional(NonNegativeInt), lastCachedInputTokens: Schema.optional(NonNegativeInt), + lastCacheCreationInputTokens: Schema.optional(NonNegativeInt), lastOutputTokens: Schema.optional(NonNegativeInt), lastReasoningOutputTokens: Schema.optional(NonNegativeInt), toolUses: Schema.optional(NonNegativeInt), @@ -319,6 +325,8 @@ export type ThreadTokenUsageSnapshot = typeof ThreadTokenUsageSnapshot.Type; const ThreadTokenUsageUpdatedPayload = Schema.Struct({ usage: ThreadTokenUsageSnapshot, + /** Resolved model slug for the turn this usage belongs to, if known. */ + model: Schema.optional(TrimmedNonEmptyStringSchema), }); export type ThreadTokenUsageUpdatedPayload = typeof ThreadTokenUsageUpdatedPayload.Type; From 615967d80ed6c65cf866aed61385c575d39fac70 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Tue, 21 Apr 2026 20:43:43 +0200 Subject: [PATCH 07/16] feat(shared): 4th pricing tier for cache-creation tokens Anthropic bills cache-writes at 1.25x input; OpenAI has no separate write tier. Model a distinct cacheCreationInputPerMTok rate (with provider-aware defaults) so the cost meter no longer conflates cache hits, cache writes, and fresh input. - ModelPricing gains cacheCreationInputPerMTok; Claude auto-applies the 1.25x multiplier, OpenAI defaults to inputPerMTok. - TurnTokenDeltas + TurnCostBreakdown gain cacheCreation slots; zero for providers that don't distinguish the tier. - computeTurnCost bills each class additively. - Client extractDeltas reads lastCacheCreationInputTokens; helpers + fixtures carry the new field through. - Tests: +2 cases covering Anthropic cache-write premium and the OpenAI default. --- apps/web/src/lib/costStore.test.ts | 3 ++ apps/web/src/lib/useCostTracking.ts | 4 +- packages/shared/src/pricing.test.ts | 23 +++++++++- packages/shared/src/pricing.ts | 67 +++++++++++++++++++++++------ 4 files changed, 81 insertions(+), 16 deletions(-) diff --git a/apps/web/src/lib/costStore.test.ts b/apps/web/src/lib/costStore.test.ts index 1162f11d12..0602f2ce6a 100644 --- a/apps/web/src/lib/costStore.test.ts +++ b/apps/web/src/lib/costStore.test.ts @@ -18,6 +18,7 @@ function freshState(): PersistedCostState { const cost = (total: number) => ({ inputUsd: 0, cachedUsd: 0, + cacheCreationUsd: 0, outputUsd: 0, reasoningUsd: 0, totalUsd: total, @@ -27,12 +28,14 @@ const deltas = ( d: Partial<{ inputTokens: number; cachedInputTokens: number; + cacheCreationInputTokens: number; outputTokens: number; reasoningOutputTokens: number; }> = {}, ) => ({ inputTokens: d.inputTokens ?? 0, cachedInputTokens: d.cachedInputTokens ?? 0, + cacheCreationInputTokens: d.cacheCreationInputTokens ?? 0, outputTokens: d.outputTokens ?? 0, reasoningOutputTokens: d.reasoningOutputTokens ?? 0, }); diff --git a/apps/web/src/lib/useCostTracking.ts b/apps/web/src/lib/useCostTracking.ts index 237f656262..6757ecc305 100644 --- a/apps/web/src/lib/useCostTracking.ts +++ b/apps/web/src/lib/useCostTracking.ts @@ -22,12 +22,14 @@ function extractDeltas(payload: unknown): TurnTokenDeltas | null { const p = payload as Record; const input = toNonNegative(p.lastInputTokens); const cached = toNonNegative(p.lastCachedInputTokens); + const cacheCreation = toNonNegative(p.lastCacheCreationInputTokens); const output = toNonNegative(p.lastOutputTokens); const reasoning = toNonNegative(p.lastReasoningOutputTokens); - if (input + cached + output + reasoning <= 0) return null; + if (input + cached + cacheCreation + output + reasoning <= 0) return null; return { inputTokens: input, cachedInputTokens: cached, + cacheCreationInputTokens: cacheCreation, outputTokens: output, reasoningOutputTokens: reasoning, }; diff --git a/packages/shared/src/pricing.test.ts b/packages/shared/src/pricing.test.ts index de76bea819..dfa437cd79 100644 --- a/packages/shared/src/pricing.test.ts +++ b/packages/shared/src/pricing.test.ts @@ -15,6 +15,13 @@ describe("pricing/getPricing", () => { expect(p.inputPerMTok).toBe(3); expect(p.cachedInputPerMTok).toBe(0.3); expect(p.outputPerMTok).toBe(15); + // Anthropic cache-write = 1.25× input. + expect(p.cacheCreationInputPerMTok).toBeCloseTo(3 * 1.25, 6); + }); + + it("defaults OpenAI cacheCreation rate to input rate", () => { + const p = getPricing("gpt-5.4"); + expect(p.cacheCreationInputPerMTok).toBe(p.inputPerMTok); }); it("resolves Claude short alias via provider", () => { @@ -58,6 +65,7 @@ describe("pricing/computeTurnCost", () => { const cost = computeTurnCost("claude-sonnet-4-6", { inputTokens: 10_000, cachedInputTokens: 100_000, + cacheCreationInputTokens: 20_000, outputTokens: 2_000, reasoningOutputTokens: 500, }); @@ -65,23 +73,27 @@ describe("pricing/computeTurnCost", () => { expect(cost.inputUsd).toBeCloseTo(0.03, 6); // 100k * $0.30/Mtok = $0.03 expect(cost.cachedUsd).toBeCloseTo(0.03, 6); + // 20k * ($3 * 1.25 = $3.75)/Mtok = $0.075 + expect(cost.cacheCreationUsd).toBeCloseTo(0.075, 6); // 2k * $15/Mtok = $0.03 expect(cost.outputUsd).toBeCloseTo(0.03, 6); // 500 * $15/Mtok = $0.0075 expect(cost.reasoningUsd).toBeCloseTo(0.0075, 6); - expect(cost.totalUsd).toBeCloseTo(0.0975, 6); + expect(cost.totalUsd).toBeCloseTo(0.1725, 6); }); it("computes Codex GPT-5.4 turn cost correctly", () => { const cost = computeTurnCost("gpt-5.4", { inputTokens: 1_000_000, cachedInputTokens: 0, + cacheCreationInputTokens: 0, outputTokens: 100_000, reasoningOutputTokens: 50_000, }); // 1M * $1.25 = $1.25 expect(cost.inputUsd).toBeCloseTo(1.25, 6); expect(cost.cachedUsd).toBe(0); + expect(cost.cacheCreationUsd).toBe(0); // 100k * $10/Mtok = $1 expect(cost.outputUsd).toBeCloseTo(1, 6); // 50k * $10/Mtok = $0.5 @@ -89,6 +101,15 @@ describe("pricing/computeTurnCost", () => { expect(cost.totalUsd).toBeCloseTo(2.75, 6); }); + it("applies Anthropic cache-write premium correctly", () => { + // Pure cache-creation: 1M tokens at 1.25× base rate + const cost = computeTurnCost("claude-sonnet-4-6", { + cacheCreationInputTokens: 1_000_000, + }); + expect(cost.cacheCreationUsd).toBeCloseTo(3 * 1.25, 6); + expect(cost.totalUsd).toBeCloseTo(3.75, 6); + }); + it("returns zero cost for unknown model", () => { const cost = computeTurnCost("fake-model", { inputTokens: 10_000, diff --git a/packages/shared/src/pricing.ts b/packages/shared/src/pricing.ts index 3ab9685348..96bcda1505 100644 --- a/packages/shared/src/pricing.ts +++ b/packages/shared/src/pricing.ts @@ -4,25 +4,44 @@ import type { ProviderKind } from "@t3tools/contracts"; /** * USD price per 1,000,000 tokens for each token class. * - * `cachedInput` is the discounted input price applied when the provider - * serves cached prefix tokens (Anthropic prompt caching / OpenAI cached input). - * `reasoningOutput` defaults to `output` when a model does not bill reasoning - * tokens separately. + * - `inputPerMTok` — non-cached prompt tokens. + * - `cachedInputPerMTok` — cache-READ tokens (Anthropic 0.1× / OpenAI cached input). + * - `cacheCreationInputPerMTok` — cache-WRITE premium tier (Anthropic 1.25×). + * Providers without a distinct cache-write tier (OpenAI, etc.) set this equal + * to `inputPerMTok`. + * - `outputPerMTok` — model output tokens. + * - `reasoningOutputPerMTok` — reasoning output. Defaults to `outputPerMTok` + * when a model does not bill reasoning tokens separately. */ export interface ModelPricing { readonly provider: ProviderKind | "unknown"; readonly inputPerMTok: number; readonly cachedInputPerMTok: number; + readonly cacheCreationInputPerMTok: number; readonly outputPerMTok: number; readonly reasoningOutputPerMTok: number; } +/** + * Raw seed rates. We derive the cache-creation + reasoning tiers when not + * specified so the table below stays readable. + */ +type SeedPricing = { + readonly provider: ProviderKind | "unknown"; + readonly inputPerMTok: number; + readonly cachedInputPerMTok: number; + readonly outputPerMTok: number; + readonly cacheCreationInputPerMTok?: number; + readonly reasoningOutputPerMTok?: number; +}; + +const ANTHROPIC_CACHE_WRITE_MULTIPLIER = 1.25; + /** Raw seed rates (USD per 1M tokens). Source: public provider pricing pages. */ -const SEED_PRICING: ReadonlyArray< - readonly [string, Omit & { reasoningOutputPerMTok?: number }] -> = [ +const SEED_PRICING: ReadonlyArray = [ // ── Anthropic / Claude ─────────────────────────────────────────────── - // Extended-thinking tokens are billed as output tokens. + // Cache-read = 0.1× input; cache-write = 1.25× input. + // Extended-thinking tokens bill as output. [ "claude-sonnet-4-6", { @@ -69,7 +88,9 @@ const SEED_PRICING: ReadonlyArray< }, ], // ── OpenAI / Codex ─────────────────────────────────────────────────── - // Codex app routes use GPT-5 family pricing. Reasoning tokens bill as output. + // OpenAI does not bill a separate cache-creation tier — cached-input rate + // applies on hits; misses price at the normal input rate. We therefore + // default cacheCreationInputPerMTok to inputPerMTok below. [ "gpt-5.4", { @@ -115,8 +136,14 @@ const SEED_PRICING: ReadonlyArray< export const PRICING_TABLE: ReadonlyMap = (() => { const map = new Map(); for (const [slug, raw] of SEED_PRICING) { + const cacheCreationInputPerMTok = + raw.cacheCreationInputPerMTok ?? + (raw.provider === "claudeAgent" + ? raw.inputPerMTok * ANTHROPIC_CACHE_WRITE_MULTIPLIER + : raw.inputPerMTok); map.set(slug, { ...raw, + cacheCreationInputPerMTok, reasoningOutputPerMTok: raw.reasoningOutputPerMTok ?? raw.outputPerMTok, }); } @@ -128,6 +155,7 @@ export const UNKNOWN_MODEL_PRICING: ModelPricing = { provider: "unknown", inputPerMTok: 0, cachedInputPerMTok: 0, + cacheCreationInputPerMTok: 0, outputPerMTok: 0, reasoningOutputPerMTok: 0, }; @@ -175,6 +203,7 @@ export function getPricing( export interface TurnTokenDeltas { readonly inputTokens: number; readonly cachedInputTokens: number; + readonly cacheCreationInputTokens: number; readonly outputTokens: number; readonly reasoningOutputTokens: number; } @@ -182,6 +211,7 @@ export interface TurnTokenDeltas { export interface TurnCostBreakdown { readonly inputUsd: number; readonly cachedUsd: number; + readonly cacheCreationUsd: number; readonly outputUsd: number; readonly reasoningUsd: number; readonly totalUsd: number; @@ -190,6 +220,7 @@ export interface TurnCostBreakdown { export const ZERO_COST: TurnCostBreakdown = { inputUsd: 0, cachedUsd: 0, + cacheCreationUsd: 0, outputUsd: 0, reasoningUsd: 0, totalUsd: 0, @@ -201,9 +232,15 @@ function finite(value: number | null | undefined): number { /** * Compute USD cost for one turn's token deltas. - * Anthropic bills cached-input tokens at a reduced rate *instead of* the - * full input rate — so callers pass the non-cached input count in - * `inputTokens` and the cached prefix count in `cachedInputTokens`. + * + * Token classes: + * - `inputTokens` — non-cached input. + * - `cachedInputTokens` — cache-READ tokens (discounted). + * - `cacheCreationInputTokens` — cache-WRITE tokens (premium on Anthropic). + * - `outputTokens` — model output. + * - `reasoningOutputTokens` — reasoning output. Defaults to output rate. + * + * Each class is billed *additively*, matching how providers invoice. */ export function computeTurnCost( model: string | null | undefined, @@ -213,16 +250,18 @@ export function computeTurnCost( const pricing = getPricing(model, provider); const input = finite(deltas.inputTokens); const cached = finite(deltas.cachedInputTokens); + const cacheCreation = finite(deltas.cacheCreationInputTokens); const output = finite(deltas.outputTokens); const reasoning = finite(deltas.reasoningOutputTokens); const inputUsd = (input / 1_000_000) * pricing.inputPerMTok; const cachedUsd = (cached / 1_000_000) * pricing.cachedInputPerMTok; + const cacheCreationUsd = (cacheCreation / 1_000_000) * pricing.cacheCreationInputPerMTok; const outputUsd = (output / 1_000_000) * pricing.outputPerMTok; const reasoningUsd = (reasoning / 1_000_000) * pricing.reasoningOutputPerMTok; - const totalUsd = inputUsd + cachedUsd + outputUsd + reasoningUsd; + const totalUsd = inputUsd + cachedUsd + cacheCreationUsd + outputUsd + reasoningUsd; - return { inputUsd, cachedUsd, outputUsd, reasoningUsd, totalUsd }; + return { inputUsd, cachedUsd, cacheCreationUsd, outputUsd, reasoningUsd, totalUsd }; } /** Format USD amount for UI display. */ From 37f692fb6eeae008feb4232af35b8b3a84f0af64 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Tue, 21 Apr 2026 20:51:48 +0200 Subject: [PATCH 08/16] fix(server): Claude adapter reports correct per-turn + per-tier token usage The Claude adapter lumped cache_read / cache_creation / fresh input into a single inputTokens field and emitted no per-turn deltas, leaving the cost meter silently $0 for every Claude turn and over-charging cached contexts by ~10x when it did fire. It also clamped usedTokens at maxTokens on cumulative totals, pinning the context ring at 100% once totalProcessedTokens exceeded the window. Changes: - Extract parseClaudeUsageBreakdown: splits SDK usage into four tiers (input / cachedInput / cacheCreationInput / output) with an explicit totalTokens. - normalizeClaudeTokenUsage emits all four tiers and drops the min(total, max) cap; callers decide how to render overflow. - Add buildClaudeTurnCompleteUsage: maintains a per-session lastTurnCumulativeUsage accumulator, subtracts from each result.usage to produce lastInputTokens / lastCachedInputTokens / lastCacheCreationInputTokens / lastOutputTokens deltas for the cost tracker. usedTokens prefers the task snapshot (real current context) over the cumulative total. - Context state gains lastTurnCumulativeUsage; initialized at session start, advanced on each turn-complete emission. Tests: - New ClaudeAdapter.usage.test.ts: 10 unit tests cover parseBreakdown semantics, first-turn vs second-turn deltas, clamp behaviour, task-snapshot fallback, and negative-delta guards. - ClaudeAdapter.test.ts updated: three existing cases now assert the split tiers + uncapped usedTokens (what the SDK actually reports). - Full server suite: 894 pass. --- .../src/provider/Layers/ClaudeAdapter.test.ts | 25 +- .../src/provider/Layers/ClaudeAdapter.ts | 265 +++++++++++++----- .../Layers/ClaudeAdapter.usage.test.ts | 173 ++++++++++++ 3 files changed, 391 insertions(+), 72 deletions(-) create mode 100644 apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts index 79c66bdfcf..0846009a49 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts @@ -1595,12 +1595,22 @@ describe("ClaudeAdapterLive", () => { const usageEvent = runtimeEvents.find((event) => event.type === "thread.token-usage.updated"); assert.equal(usageEvent?.type, "thread.token-usage.updated"); if (usageEvent?.type === "thread.token-usage.updated") { + // First turn: no prior cumulative, so last* deltas equal cumulative + // totals. Cache read/write split correctly; usedTokens = cumulative + // total (no task snapshot in this test). assert.deepEqual(usageEvent.payload, { usage: { usedTokens: 24542, lastUsedTokens: 24542, - inputTokens: 23863, + totalProcessedTokens: 24542, + inputTokens: 4, + cachedInputTokens: 21144, + cacheCreationInputTokens: 2715, outputTokens: 679, + lastInputTokens: 4, + lastCachedInputTokens: 21144, + lastCacheCreationInputTokens: 2715, + lastOutputTokens: 679, maxTokens: 200000, }, }); @@ -1611,7 +1621,7 @@ describe("ClaudeAdapterLive", () => { ); }); - it.effect("clamps oversized Claude usage to the reported context window", () => { + it.effect("reports Claude usage uncapped when cumulative exceeds context window", () => { const harness = makeHarness(); return Effect.gen(function* () { const adapter = yield* ClaudeAdapter; @@ -1659,10 +1669,12 @@ describe("ClaudeAdapterLive", () => { const usageEvent = runtimeEvents.find((event) => event.type === "thread.token-usage.updated"); assert.equal(usageEvent?.type, "thread.token-usage.updated"); if (usageEvent?.type === "thread.token-usage.updated") { + // usedTokens is no longer clamped: the cumulative result total is + // reported as-is. UI clamps for ring display; callers get truth. assert.deepEqual(usageEvent.payload, { usage: { - usedTokens: 200000, - lastUsedTokens: 200000, + usedTokens: 535000, + lastUsedTokens: 535000, totalProcessedTokens: 535000, maxTokens: 200000, }, @@ -1739,10 +1751,13 @@ describe("ClaudeAdapterLive", () => { const finalUsageEvent = usageEvents.at(-1); assert.equal(finalUsageEvent?.type, "thread.token-usage.updated"); if (finalUsageEvent?.type === "thread.token-usage.updated") { + // Task snapshot drives usedTokens (real current-context); result + // cumulative drives totalProcessedTokens. lastUsedTokens reports + // the turn's total (cumulative since there's no prior turn). assert.deepEqual(finalUsageEvent.payload, { usage: { usedTokens: 190000, - lastUsedTokens: 190000, + lastUsedTokens: 535000, totalProcessedTokens: 535000, maxTokens: 200000, }, diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.ts b/apps/server/src/provider/Layers/ClaudeAdapter.ts index 81980acb9b..11e7a2569b 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.ts @@ -158,6 +158,14 @@ interface ClaudeSessionContext { turnState: ClaudeTurnState | undefined; lastKnownContextWindow: number | undefined; lastKnownTokenUsage: ThreadTokenUsageSnapshot | undefined; + /** + * Cumulative per-class token counts emitted in the prior turn's + * `result.usage`. Claude's SDK reports `result.usage` as a running total + * across every API call in the session, so per-turn cost requires + * subtracting this snapshot from the current cumulative totals. Cleared + * on session start; reset after each emission. + */ + lastTurnCumulativeUsage: ClaudeUsageBreakdown | undefined; lastAssistantUuid: string | undefined; lastThreadStartedId: string | undefined; stopped: boolean; @@ -290,61 +298,197 @@ function maxClaudeContextWindowFromModelUsage( return maxContextWindow; } -function normalizeClaudeTokenUsage( - value: unknown, - contextWindow?: number, -): ThreadTokenUsageSnapshot | undefined { +/** + * Breakdown of a Claude SDK usage record across the four token classes we + * price separately. Pure — no derived totals, no capping. Callers combine + * with prior session state to compute context/ring values or per-turn deltas. + */ +interface ClaudeUsageBreakdown { + readonly inputTokens: number; + readonly cachedInputTokens: number; + readonly cacheCreationInputTokens: number; + readonly outputTokens: number; + /** + * `usage.total_tokens` when the SDK reports it explicitly, otherwise the + * sum of the four classes. Used to drive `usedTokens` when no task + * snapshot is available. + */ + readonly totalTokens: number; + readonly toolUses?: number; + readonly durationMs?: number; +} + +function nonNegativeNumber(value: unknown): number { + return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : 0; +} + +export function parseClaudeUsageBreakdown(value: unknown): ClaudeUsageBreakdown | undefined { if (!value || typeof value !== "object") { return undefined; } - const usage = value as Record; - const inputTokens = - (typeof usage.input_tokens === "number" && Number.isFinite(usage.input_tokens) - ? usage.input_tokens - : 0) + - (typeof usage.cache_creation_input_tokens === "number" && - Number.isFinite(usage.cache_creation_input_tokens) - ? usage.cache_creation_input_tokens - : 0) + - (typeof usage.cache_read_input_tokens === "number" && - Number.isFinite(usage.cache_read_input_tokens) - ? usage.cache_read_input_tokens - : 0); - const outputTokens = - typeof usage.output_tokens === "number" && Number.isFinite(usage.output_tokens) - ? usage.output_tokens - : 0; - const derivedTotalProcessedTokens = inputTokens + outputTokens; - const totalProcessedTokens = - (typeof usage.total_tokens === "number" && Number.isFinite(usage.total_tokens) + const inputTokens = nonNegativeNumber(usage.input_tokens); + const cachedInputTokens = nonNegativeNumber(usage.cache_read_input_tokens); + const cacheCreationInputTokens = nonNegativeNumber(usage.cache_creation_input_tokens); + const outputTokens = nonNegativeNumber(usage.output_tokens); + const derivedTotal = + inputTokens + cachedInputTokens + cacheCreationInputTokens + outputTokens; + const totalTokens = + typeof usage.total_tokens === "number" && Number.isFinite(usage.total_tokens) && usage.total_tokens > 0 ? usage.total_tokens - : undefined) ?? (derivedTotalProcessedTokens > 0 ? derivedTotalProcessedTokens : undefined); - if (totalProcessedTokens === undefined || totalProcessedTokens <= 0) { + : derivedTotal; + if (totalTokens <= 0) { return undefined; } + const toolUses = + typeof usage.tool_uses === "number" && Number.isFinite(usage.tool_uses) + ? usage.tool_uses + : undefined; + const durationMs = + typeof usage.duration_ms === "number" && Number.isFinite(usage.duration_ms) + ? usage.duration_ms + : undefined; + return { + inputTokens, + cachedInputTokens, + cacheCreationInputTokens, + outputTokens, + totalTokens, + ...(toolUses !== undefined ? { toolUses } : {}), + ...(durationMs !== undefined ? { durationMs } : {}), + }; +} +/** + * Normalize a single Claude usage record into a `ThreadTokenUsageSnapshot`. + * + * Used for mid-turn snapshots (task_progress / task_notification) — each such + * event represents one API call's usage, which (for the latest call) matches + * the current context window size. The four token classes are reported + * separately so downstream cost math can apply the correct tier. + * + * No capping: `usedTokens` reflects `total_tokens` (or the derived sum) as + * reported. Callers that want to clamp for ring display should do so in the + * UI layer. + */ +function normalizeClaudeTokenUsage( + value: unknown, + contextWindow?: number, +): ThreadTokenUsageSnapshot | undefined { + const breakdown = parseClaudeUsageBreakdown(value); + if (!breakdown) { + return undefined; + } const maxTokens = typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0 ? contextWindow : undefined; - const usedTokens = - maxTokens !== undefined ? Math.min(totalProcessedTokens, maxTokens) : totalProcessedTokens; - return { - usedTokens, - lastUsedTokens: usedTokens, - ...(totalProcessedTokens > usedTokens ? { totalProcessedTokens } : {}), - ...(inputTokens > 0 ? { inputTokens } : {}), - ...(outputTokens > 0 ? { outputTokens } : {}), - ...(maxTokens !== undefined ? { maxTokens } : {}), - ...(typeof usage.tool_uses === "number" && Number.isFinite(usage.tool_uses) - ? { toolUses: usage.tool_uses } + usedTokens: breakdown.totalTokens, + lastUsedTokens: breakdown.totalTokens, + ...(breakdown.inputTokens > 0 ? { inputTokens: breakdown.inputTokens } : {}), + ...(breakdown.cachedInputTokens > 0 ? { cachedInputTokens: breakdown.cachedInputTokens } : {}), + ...(breakdown.cacheCreationInputTokens > 0 + ? { cacheCreationInputTokens: breakdown.cacheCreationInputTokens } : {}), - ...(typeof usage.duration_ms === "number" && Number.isFinite(usage.duration_ms) - ? { durationMs: usage.duration_ms } + ...(breakdown.outputTokens > 0 ? { outputTokens: breakdown.outputTokens } : {}), + ...(maxTokens !== undefined ? { maxTokens } : {}), + ...(breakdown.toolUses !== undefined ? { toolUses: breakdown.toolUses } : {}), + ...(breakdown.durationMs !== undefined ? { durationMs: breakdown.durationMs } : {}), + }; +} + +/** + * Build the turn-complete usage snapshot. Combines: + * - Mid-turn task snapshot (current context size) for `usedTokens`. + * - Cumulative session totals from `result.usage` for `totalProcessedTokens` + * and the cumulative per-class counts. + * - Per-turn deltas via subtraction against the prior turn's cumulative — + * this populates `lastInputTokens / lastCachedInputTokens / + * lastCacheCreationInputTokens / lastOutputTokens` for the downstream + * cost meter. + * + * `priorCumulative` is mutated by the caller after emission so the next turn + * sees fresh baseline state. + */ +export interface ClaudeTurnCompleteUsageInput { + readonly resultUsage: unknown; + readonly taskSnapshot: ThreadTokenUsageSnapshot | undefined; + readonly contextWindow?: number | undefined; + readonly priorCumulative?: ClaudeUsageBreakdown | undefined; +} + +export interface ClaudeTurnCompleteUsageResult { + readonly snapshot: ThreadTokenUsageSnapshot | undefined; + readonly nextCumulative: ClaudeUsageBreakdown | undefined; +} + +export function buildClaudeTurnCompleteUsage( + input: ClaudeTurnCompleteUsageInput, +): ClaudeTurnCompleteUsageResult { + const cumulative = parseClaudeUsageBreakdown(input.resultUsage); + const maxTokens = + typeof input.contextWindow === "number" && + Number.isFinite(input.contextWindow) && + input.contextWindow > 0 + ? input.contextWindow + : undefined; + + if (!cumulative) { + // No result.usage — fall back to whatever task snapshot we have, stamped + // with the freshest maxTokens. + if (!input.taskSnapshot) { + return { snapshot: undefined, nextCumulative: input.priorCumulative }; + } + return { + snapshot: { + ...input.taskSnapshot, + ...(maxTokens !== undefined ? { maxTokens } : {}), + }, + nextCumulative: input.priorCumulative, + }; + } + + const prior = input.priorCumulative ?? { + inputTokens: 0, + cachedInputTokens: 0, + cacheCreationInputTokens: 0, + outputTokens: 0, + totalTokens: 0, + }; + const deltaInput = Math.max(0, cumulative.inputTokens - prior.inputTokens); + const deltaCached = Math.max(0, cumulative.cachedInputTokens - prior.cachedInputTokens); + const deltaCacheCreation = Math.max( + 0, + cumulative.cacheCreationInputTokens - prior.cacheCreationInputTokens, + ); + const deltaOutput = Math.max(0, cumulative.outputTokens - prior.outputTokens); + const lastTotal = deltaInput + deltaCached + deltaCacheCreation + deltaOutput; + + // usedTokens: prefer the task snapshot (current context size); fall back to + // the cumulative total when no task snapshot was recorded for this turn. + const usedTokens = input.taskSnapshot?.usedTokens ?? cumulative.totalTokens; + + const snapshot: ThreadTokenUsageSnapshot = { + usedTokens, + lastUsedTokens: lastTotal > 0 ? lastTotal : cumulative.totalTokens, + totalProcessedTokens: cumulative.totalTokens, + ...(cumulative.inputTokens > 0 ? { inputTokens: cumulative.inputTokens } : {}), + ...(cumulative.cachedInputTokens > 0 ? { cachedInputTokens: cumulative.cachedInputTokens } : {}), + ...(cumulative.cacheCreationInputTokens > 0 + ? { cacheCreationInputTokens: cumulative.cacheCreationInputTokens } : {}), + ...(cumulative.outputTokens > 0 ? { outputTokens: cumulative.outputTokens } : {}), + ...(deltaInput > 0 ? { lastInputTokens: deltaInput } : {}), + ...(deltaCached > 0 ? { lastCachedInputTokens: deltaCached } : {}), + ...(deltaCacheCreation > 0 ? { lastCacheCreationInputTokens: deltaCacheCreation } : {}), + ...(deltaOutput > 0 ? { lastOutputTokens: deltaOutput } : {}), + ...(maxTokens !== undefined ? { maxTokens } : {}), + ...(cumulative.toolUses !== undefined ? { toolUses: cumulative.toolUses } : {}), + ...(cumulative.durationMs !== undefined ? { durationMs: cumulative.durationMs } : {}), }; + + return { snapshot, nextCumulative: cumulative }; } function asCanonicalTurnId(value: TurnId): TurnId { @@ -1385,34 +1529,20 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( context.lastKnownContextWindow = resultContextWindow; } - // The SDK result.usage contains *accumulated* totals across all API calls - // (input_tokens, cache_read_input_tokens, etc. summed over every request). - // This does NOT represent the current context window size. - // Instead, use the last known context-window-accurate usage from task_progress - // events and treat the accumulated total as totalProcessedTokens. - const accumulatedSnapshot = normalizeClaudeTokenUsage( - result?.usage, - resultContextWindow ?? context.lastKnownContextWindow, - ); - const accumulatedTotalProcessedTokens = - accumulatedSnapshot?.totalProcessedTokens ?? accumulatedSnapshot?.usedTokens; - const lastGoodUsage = context.lastKnownTokenUsage; - const maxTokens = resultContextWindow ?? context.lastKnownContextWindow; - const usageSnapshot: ThreadTokenUsageSnapshot | undefined = lastGoodUsage - ? { - ...lastGoodUsage, - ...(typeof maxTokens === "number" && Number.isFinite(maxTokens) && maxTokens > 0 - ? { maxTokens } - : {}), - ...(typeof accumulatedTotalProcessedTokens === "number" && - Number.isFinite(accumulatedTotalProcessedTokens) && - accumulatedTotalProcessedTokens > lastGoodUsage.usedTokens - ? { - totalProcessedTokens: accumulatedTotalProcessedTokens, - } - : {}), - } - : accumulatedSnapshot; + // `result.usage` reports running totals across every API call in the + // session. We combine it with the freshest per-call task snapshot (for + // `usedTokens` — the real current-context value) and with the prior + // turn's cumulative snapshot (to derive this turn's per-class deltas). + const turnUsage = buildClaudeTurnCompleteUsage({ + resultUsage: result?.usage, + taskSnapshot: context.lastKnownTokenUsage, + contextWindow: resultContextWindow ?? context.lastKnownContextWindow, + priorCumulative: context.lastTurnCumulativeUsage, + }); + const usageSnapshot = turnUsage.snapshot; + if (turnUsage.nextCumulative !== undefined) { + context.lastTurnCumulativeUsage = turnUsage.nextCumulative; + } const turnState = context.turnState; if (!turnState) { @@ -2918,6 +3048,7 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( turnState: undefined, lastKnownContextWindow: undefined, lastKnownTokenUsage: undefined, + lastTurnCumulativeUsage: undefined, lastAssistantUuid: resumeState?.resumeSessionAt, lastThreadStartedId: undefined, stopped: false, diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts new file mode 100644 index 0000000000..c651512d3c --- /dev/null +++ b/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts @@ -0,0 +1,173 @@ +import { describe, expect, it } from "vitest"; + +import { + buildClaudeTurnCompleteUsage, + parseClaudeUsageBreakdown, + type ClaudeTurnCompleteUsageResult, +} from "./ClaudeAdapter.ts"; + +describe("parseClaudeUsageBreakdown", () => { + it("splits Anthropic fields into four token tiers", () => { + const b = parseClaudeUsageBreakdown({ + input_tokens: 4, + cache_creation_input_tokens: 2715, + cache_read_input_tokens: 21144, + output_tokens: 679, + }); + expect(b).toEqual({ + inputTokens: 4, + cachedInputTokens: 21144, + cacheCreationInputTokens: 2715, + outputTokens: 679, + totalTokens: 4 + 2715 + 21144 + 679, + }); + }); + + it("prefers explicit total_tokens over the derived sum", () => { + const b = parseClaudeUsageBreakdown({ + total_tokens: 999, + input_tokens: 1, + output_tokens: 2, + }); + expect(b?.totalTokens).toBe(999); + }); + + it("derives total when only total_tokens reported", () => { + const b = parseClaudeUsageBreakdown({ total_tokens: 42 }); + expect(b?.totalTokens).toBe(42); + expect(b?.inputTokens).toBe(0); + }); + + it("returns undefined for empty / malformed input", () => { + expect(parseClaudeUsageBreakdown(null)).toBeUndefined(); + expect(parseClaudeUsageBreakdown({})).toBeUndefined(); + expect(parseClaudeUsageBreakdown({ total_tokens: 0 })).toBeUndefined(); + }); +}); + +describe("buildClaudeTurnCompleteUsage", () => { + it("builds first-turn deltas equal to cumulative totals", () => { + const res = buildClaudeTurnCompleteUsage({ + resultUsage: { + input_tokens: 1_000, + cache_read_input_tokens: 5_000, + cache_creation_input_tokens: 2_000, + output_tokens: 500, + }, + taskSnapshot: undefined, + contextWindow: 200_000, + priorCumulative: undefined, + }); + const snap = res.snapshot!; + expect(snap.inputTokens).toBe(1_000); + expect(snap.cachedInputTokens).toBe(5_000); + expect(snap.cacheCreationInputTokens).toBe(2_000); + expect(snap.outputTokens).toBe(500); + expect(snap.lastInputTokens).toBe(1_000); + expect(snap.lastCachedInputTokens).toBe(5_000); + expect(snap.lastCacheCreationInputTokens).toBe(2_000); + expect(snap.lastOutputTokens).toBe(500); + expect(snap.lastUsedTokens).toBe(8_500); + expect(snap.usedTokens).toBe(8_500); + expect(snap.totalProcessedTokens).toBe(8_500); + expect(snap.maxTokens).toBe(200_000); + expect(res.nextCumulative).toBeDefined(); + }); + + it("computes second-turn deltas against the prior cumulative", () => { + const turn1 = buildClaudeTurnCompleteUsage({ + resultUsage: { + input_tokens: 1_000, + cache_read_input_tokens: 5_000, + output_tokens: 500, + }, + taskSnapshot: undefined, + contextWindow: 200_000, + priorCumulative: undefined, + }); + const turn2 = buildClaudeTurnCompleteUsage({ + resultUsage: { + // Cumulative totals have grown — turn 2 added 500 input, 1k cached, + // 300 cache-creation, 200 output. + input_tokens: 1_500, + cache_read_input_tokens: 6_000, + cache_creation_input_tokens: 300, + output_tokens: 700, + }, + taskSnapshot: undefined, + contextWindow: 200_000, + priorCumulative: turn1.nextCumulative, + }); + const s = turn2.snapshot!; + expect(s.inputTokens).toBe(1_500); + expect(s.cachedInputTokens).toBe(6_000); + expect(s.cacheCreationInputTokens).toBe(300); + expect(s.outputTokens).toBe(700); + expect(s.lastInputTokens).toBe(500); + expect(s.lastCachedInputTokens).toBe(1_000); + expect(s.lastCacheCreationInputTokens).toBe(300); + expect(s.lastOutputTokens).toBe(200); + expect(s.lastUsedTokens).toBe(500 + 1_000 + 300 + 200); + }); + + it("does not cap usedTokens to maxTokens", () => { + const res = buildClaudeTurnCompleteUsage({ + resultUsage: { total_tokens: 535_000 }, + taskSnapshot: undefined, + contextWindow: 200_000, + priorCumulative: undefined, + }); + expect(res.snapshot!.usedTokens).toBe(535_000); + expect(res.snapshot!.maxTokens).toBe(200_000); + }); + + it("uses task snapshot usedTokens when available (current context)", () => { + const res = buildClaudeTurnCompleteUsage({ + resultUsage: { total_tokens: 535_000 }, + taskSnapshot: { + usedTokens: 190_000, + lastUsedTokens: 190_000, + }, + contextWindow: 200_000, + priorCumulative: undefined, + }); + expect(res.snapshot!.usedTokens).toBe(190_000); + expect(res.snapshot!.totalProcessedTokens).toBe(535_000); + }); + + it("falls back to task snapshot when result.usage is absent", () => { + const res: ClaudeTurnCompleteUsageResult = buildClaudeTurnCompleteUsage({ + resultUsage: undefined, + taskSnapshot: { usedTokens: 500, lastUsedTokens: 500 }, + contextWindow: 100_000, + priorCumulative: undefined, + }); + expect(res.snapshot?.usedTokens).toBe(500); + expect(res.nextCumulative).toBeUndefined(); + }); + + it("clamps negative deltas to zero when cumulative goes backwards", () => { + const prior = { + inputTokens: 1_000, + cachedInputTokens: 5_000, + cacheCreationInputTokens: 0, + outputTokens: 500, + totalTokens: 6_500, + }; + // Unexpected: SDK reports lower cumulative (shouldn't happen, but guard + // against it so cost math never goes negative). + const res = buildClaudeTurnCompleteUsage({ + resultUsage: { + input_tokens: 900, + cache_read_input_tokens: 4_000, + output_tokens: 400, + }, + taskSnapshot: undefined, + priorCumulative: prior, + }); + const s = res.snapshot!; + expect(s.lastInputTokens).toBeUndefined(); // delta was 0 + expect(s.lastCachedInputTokens).toBeUndefined(); + expect(s.lastOutputTokens).toBeUndefined(); + }); +}); From f38801193d4ea61becf898e66e17dbad1d5d42e1 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Tue, 21 Apr 2026 20:59:39 +0200 Subject: [PATCH 09/16] =?UTF-8?q?feat(server):=20CostTracker=20layer=20?= =?UTF-8?q?=E2=80=94=20JSON-backed=20cost=20ledger?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a server-owned cost ledger that writes three atomic JSON files per recorded turn: - session_.json per-thread cumulative - YYYY-MM.json month bucket (local tz) - alltime.json running total since install Works across dev, installed app, and standalone binaries because persistence lives next to the server's existing SQLite state at //usage/. Atomic writes mirror serverSettings: write .tmp, rename into place; errors log and swallow so orchestration never blocks on FS failure. Components: - types.ts: plain-TS interfaces + local-tz month key helper + empty-bucket constructors. - Reducer.ts: pure deriveTurnDeltas / processTurn / isTurnNoOp / sanitizePersistedFile. Prefers lastXxxTokens from the payload (Codex + post-fix Claude); falls back to delta-vs-lastCumulative for older providers. Zero-cost unknown models still record their token usage. - Services/CostTracker.ts: Effect Context.Service API (recordUsage / getSummary / updates stream). - Layers/CostTracker.ts: FS-backed live layer; semaphore-serialized writes; PubSub exposes live updates for WS broadcast. - shared/pricing: re-export ProviderKind so server consumers don't reach into contracts for it. Tests: 14 pure reducer cases + 5 live-layer cases (record, idempotent no-op, accumulate, stream emission, zero-summary). All green. --- .../src/cost/Layers/CostTracker.test.ts | 146 ++++++++ apps/server/src/cost/Layers/CostTracker.ts | 199 +++++++++++ apps/server/src/cost/Reducer.test.ts | 311 ++++++++++++++++ apps/server/src/cost/Reducer.ts | 337 ++++++++++++++++++ apps/server/src/cost/Services/CostTracker.ts | 49 +++ apps/server/src/cost/types.ts | 120 +++++++ packages/shared/src/pricing.ts | 2 + 7 files changed, 1164 insertions(+) create mode 100644 apps/server/src/cost/Layers/CostTracker.test.ts create mode 100644 apps/server/src/cost/Layers/CostTracker.ts create mode 100644 apps/server/src/cost/Reducer.test.ts create mode 100644 apps/server/src/cost/Reducer.ts create mode 100644 apps/server/src/cost/Services/CostTracker.ts create mode 100644 apps/server/src/cost/types.ts diff --git a/apps/server/src/cost/Layers/CostTracker.test.ts b/apps/server/src/cost/Layers/CostTracker.test.ts new file mode 100644 index 0000000000..1a2acb4325 --- /dev/null +++ b/apps/server/src/cost/Layers/CostTracker.test.ts @@ -0,0 +1,146 @@ +import * as NodeServices from "@effect/platform-node/NodeServices"; +import { assert, it } from "@effect/vitest"; +import { Effect, Fiber, FileSystem, Layer, Path, Stream } from "effect"; + +import { ServerConfig } from "../../config.ts"; +import { CostTrackerLive } from "./CostTracker.ts"; +import { CostTrackerService } from "../Services/CostTracker.ts"; +import { localMonthKey } from "../types.ts"; + +const SONNET = "claude-sonnet-4-6"; + +const makeLayer = () => { + const configLayer = ServerConfig.layerTest(process.cwd(), { prefix: "t3-cost-" }); + return Layer.mergeAll(CostTrackerLive.pipe(Layer.provide(configLayer)), configLayer); +}; + +it.layer(NodeServices.layer)("CostTrackerLive", (it) => { + it.effect("records a turn and persists session/month/alltime files", () => + Effect.gen(function* () { + const tracker = yield* CostTrackerService; + const config = yield* ServerConfig; + const fs = yield* FileSystem.FileSystem; + const path = yield* Path.Path; + + const at = new Date(2026, 3, 21, 10, 0, 0); + const monthKey = localMonthKey(at); + const summary = yield* tracker.recordUsage({ + threadId: "thread-1", + model: SONNET, + usage: { + inputTokens: 1_000, + cachedInputTokens: 5_000, + outputTokens: 500, + lastInputTokens: 1_000, + lastCachedInputTokens: 5_000, + lastOutputTokens: 500, + }, + at, + }); + + assert.equal(summary.thread?.turnCount, 1); + assert.equal(summary.month.turnCount, 1); + assert.equal(summary.allTime.turnCount, 1); + assert.equal(summary.monthKey, monthKey); + assert.ok(summary.month.totalUsd > 0); + + const sessionPath = path.join(config.usageDir, "session_thread-1.json"); + const monthPath = path.join(config.usageDir, `${monthKey}.json`); + const alltimePath = path.join(config.usageDir, "alltime.json"); + assert.equal(yield* fs.exists(sessionPath), true); + assert.equal(yield* fs.exists(monthPath), true); + assert.equal(yield* fs.exists(alltimePath), true); + + const monthRaw = yield* fs.readFileString(monthPath); + const monthParsed = JSON.parse(monthRaw) as { + readonly kind: string; + readonly bucket: { readonly turnCount: number }; + }; + assert.equal(monthParsed.kind, "month"); + assert.equal(monthParsed.bucket.turnCount, 1); + }).pipe(Effect.provide(makeLayer())), + ); + + it.effect("is idempotent for zero-delta turns", () => + Effect.gen(function* () { + const tracker = yield* CostTrackerService; + const summary = yield* tracker.recordUsage({ + threadId: "thread-1", + model: SONNET, + usage: {}, + at: new Date(2026, 3, 21, 10, 0, 0), + }); + assert.equal(summary.month.turnCount, 0); + assert.equal(summary.allTime.turnCount, 0); + }).pipe(Effect.provide(makeLayer())), + ); + + it.effect("accumulates multiple turns", () => + Effect.gen(function* () { + const tracker = yield* CostTrackerService; + const at = new Date(2026, 3, 21, 10, 0, 0); + yield* tracker.recordUsage({ + threadId: "thread-1", + model: SONNET, + usage: { + inputTokens: 1_000, + outputTokens: 500, + lastInputTokens: 1_000, + lastOutputTokens: 500, + }, + at, + }); + const second = yield* tracker.recordUsage({ + threadId: "thread-1", + model: SONNET, + usage: { + inputTokens: 2_000, + outputTokens: 900, + lastInputTokens: 1_000, + lastOutputTokens: 400, + }, + at, + }); + assert.equal(second.thread?.turnCount, 2); + assert.equal(second.month.turnCount, 2); + assert.equal(second.allTime.turnCount, 2); + }).pipe(Effect.provide(makeLayer())), + ); + + it.effect("emits summary on the updates stream after a write", () => + Effect.gen(function* () { + const tracker = yield* CostTrackerService; + const fiber = yield* Effect.forkChild( + Stream.take(tracker.updates, 1).pipe(Stream.runCollect), + ); + yield* tracker.recordUsage({ + threadId: "thread-stream", + model: SONNET, + usage: { + lastInputTokens: 100, + lastOutputTokens: 50, + }, + at: new Date(2026, 3, 21), + }); + const chunk = yield* Fiber.join(fiber); + const events = Array.from(chunk); + assert.equal(events.length, 1); + assert.ok(events[0]!.month.turnCount >= 1); + }).pipe(Effect.provide(makeLayer())), + ); + + it.effect("getSummary returns zero for an unused session/month", () => + Effect.gen(function* () { + const tracker = yield* CostTrackerService; + // Fresh layer per test, but be defensive: pin to a month no other test + // has touched. The thread-level bucket is per-threadId so that's safe. + const summary = yield* tracker.getSummary({ + threadId: "never-seen", + at: new Date(2019, 11, 1), + }); + assert.equal(summary.thread?.turnCount, 0); + assert.equal(summary.month.turnCount, 0); + assert.equal(summary.monthKey, "2019-12"); + }).pipe(Effect.provide(makeLayer())), + ); +}); diff --git a/apps/server/src/cost/Layers/CostTracker.ts b/apps/server/src/cost/Layers/CostTracker.ts new file mode 100644 index 0000000000..5d7c9bb0a2 --- /dev/null +++ b/apps/server/src/cost/Layers/CostTracker.ts @@ -0,0 +1,199 @@ +/** + * CostTrackerLive - JSON-backed cost ledger. + * + * Writes three atomic files per recorded turn: + * - `session_.json` + * - `.json` (local tz) + * - `alltime.json` + * + * Atomic pattern mirrors `serverSettings`: write `.tmp`, rename into place. + * Errors never block orchestration — the caller wraps `recordUsage` in + * `Effect.catchAll(logError)`. + * + * @module CostTrackerLive + */ +import { Data, Effect, FileSystem, Layer, Path, PubSub, Semaphore, Stream } from "effect"; + +class CostFileParseError extends Data.TaggedError("CostFileParseError")<{ + readonly path: string; + readonly cause: unknown; +}> {} + +import { ServerConfig } from "../../config.ts"; +import { CostTrackerService, type CostTrackerShape } from "../Services/CostTracker.ts"; +import { + processTurn, + sanitizePersistedFile, + type ProcessTurnResult, +} from "../Reducer.ts"; +import type { + CostBucket, + CostSummary, + PersistedCostFile, + PersistedCostFileKind, + RecordUsageInput, +} from "../types.ts"; +import { emptyCostBucket, localMonthKey } from "../types.ts"; + +function encodeFile(file: PersistedCostFile): string { + return `${JSON.stringify(file, null, 2)}\n`; +} + +function sessionFilename(threadId: string): string { + // Normalize threadId for a flat filename — threadIds are UUID-like, but + // encodeURIComponent keeps us safe if a provider ever emits special chars. + return `session_${encodeURIComponent(threadId)}.json`; +} + +function monthFilename(monthKey: string): string { + return `${monthKey}.json`; +} + +const ALLTIME_FILENAME = "alltime.json"; + +const make = Effect.gen(function* () { + const { usageDir } = yield* ServerConfig; + const fs = yield* FileSystem.FileSystem; + const path = yield* Path.Path; + // One writer at a time so concurrent turns don't clobber the same file. + const writeSemaphore = yield* Semaphore.make(1); + const updatesPubSub = yield* PubSub.unbounded(); + + // Ensure the directory exists even if config bootstrap skipped it. + yield* fs.makeDirectory(usageDir, { recursive: true }).pipe(Effect.ignore({ log: true })); + + const filePathFor = (kind: PersistedCostFileKind, key: string): string => { + switch (kind) { + case "session": + return path.join(usageDir, sessionFilename(key)); + case "month": + return path.join(usageDir, monthFilename(key)); + case "alltime": + return path.join(usageDir, ALLTIME_FILENAME); + } + }; + + const readFileIfExists = (absPath: string) => + Effect.gen(function* () { + const exists = yield* fs.exists(absPath).pipe(Effect.orElseSucceed(() => false)); + if (!exists) return undefined; + const raw = yield* fs.readFileString(absPath).pipe(Effect.orElseSucceed(() => "")); + if (!raw.trim()) return undefined; + return yield* Effect.try({ + try: () => JSON.parse(raw) as unknown, + catch: (cause) => new CostFileParseError({ path: absPath, cause }), + }).pipe(Effect.orElseSucceed(() => undefined)); + }); + + const loadFile = ( + kind: PersistedCostFileKind, + key: string, + now: Date, + ): Effect.Effect => + Effect.gen(function* () { + const raw = yield* readFileIfExists(filePathFor(kind, key)); + return sanitizePersistedFile(raw, kind, key, now); + }); + + const writeFileAtomically = (file: PersistedCostFile) => + Effect.gen(function* () { + const target = filePathFor(file.kind, file.key); + const tempPath = `${target}.${process.pid}.${Date.now()}.${Math.random() + .toString(36) + .slice(2, 8)}.tmp`; + const encoded = encodeFile(file); + yield* fs.writeFileString(tempPath, encoded); + yield* fs + .rename(tempPath, target) + .pipe(Effect.ensuring(fs.remove(tempPath, { force: true }).pipe(Effect.ignore({ log: true })))); + }).pipe(Effect.ignoreCause({ log: true })); + + const summaryFromFiles = ( + session: PersistedCostFile | null, + month: PersistedCostFile, + allTime: PersistedCostFile, + monthKey: string, + ): CostSummary => ({ + thread: session?.bucket ?? null, + month: month.bucket, + allTime: allTime.bucket, + monthKey, + }); + + const emptyBucketFile = ( + kind: PersistedCostFileKind, + key: string, + now: Date, + ): PersistedCostFile => ({ + version: 1, + kind, + key, + bucket: emptyCostBucket(now), + }); + + const getSummary: CostTrackerShape["getSummary"] = (input) => + Effect.gen(function* () { + const now = input.at ?? new Date(); + const monthKey = localMonthKey(now); + const [month, allTime, threadFile] = yield* Effect.all( + [ + loadFile("month", monthKey, now), + loadFile("alltime", "alltime", now), + input.threadId ? loadFile("session", input.threadId, now) : Effect.succeed(null), + ], + { concurrency: "unbounded" }, + ); + return summaryFromFiles(threadFile, month, allTime, monthKey); + }); + + const recordUsage: CostTrackerShape["recordUsage"] = (input: RecordUsageInput) => + writeSemaphore.withPermits(1)( + Effect.gen(function* () { + const now = input.at ?? new Date(); + const monthKey = localMonthKey(now); + const session = yield* loadFile("session", input.threadId, now); + const month = yield* loadFile("month", monthKey, now); + const allTime = yield* loadFile("alltime", "alltime", now); + + const result: ProcessTurnResult = processTurn({ + input, + session, + month, + allTime, + now, + }); + + if (result.applied) { + yield* Effect.all( + [ + writeFileAtomically(result.session), + writeFileAtomically(result.month), + writeFileAtomically(result.allTime), + ], + { concurrency: "unbounded" }, + ); + } + + const summary: CostSummary = { + thread: result.session.bucket, + month: result.month.bucket, + allTime: result.allTime.bucket, + monthKey: result.monthKey, + }; + + if (result.applied) { + yield* PubSub.publish(updatesPubSub, summary).pipe(Effect.asVoid); + } + return summary; + }), + ); + + const shape: CostTrackerShape = { + recordUsage, + getSummary, + updates: Stream.fromPubSub(updatesPubSub), + }; + return shape; +}); + +export const CostTrackerLive = Layer.effect(CostTrackerService, make); diff --git a/apps/server/src/cost/Reducer.test.ts b/apps/server/src/cost/Reducer.test.ts new file mode 100644 index 0000000000..689bc83d0b --- /dev/null +++ b/apps/server/src/cost/Reducer.test.ts @@ -0,0 +1,311 @@ +import { describe, expect, it } from "vitest"; + +import { + deriveTurnDeltas, + isTurnNoOp, + processTurn, + sanitizePersistedFile, +} from "./Reducer.ts"; +import type { CumulativeUsageSnapshot, PersistedCostFile, RecordUsageInput } from "./types.ts"; +import { localMonthKey, zeroCumulativeUsage } from "./types.ts"; + +const SONNET = "claude-sonnet-4-6"; + +describe("deriveTurnDeltas", () => { + it("prefers explicit lastXxx fields", () => { + const { deltas, nextCumulative } = deriveTurnDeltas( + { + inputTokens: 1_000, + cachedInputTokens: 5_000, + cacheCreationInputTokens: 500, + outputTokens: 200, + lastInputTokens: 800, + lastCachedInputTokens: 3_000, + lastCacheCreationInputTokens: 100, + lastOutputTokens: 50, + }, + { + inputTokens: 200, + cachedInputTokens: 2_000, + cacheCreationInputTokens: 400, + outputTokens: 150, + reasoningOutputTokens: 0, + }, + ); + expect(deltas.inputTokens).toBe(800); + expect(deltas.cachedInputTokens).toBe(3_000); + expect(deltas.cacheCreationInputTokens).toBe(100); + expect(deltas.outputTokens).toBe(50); + // Cumulative reported in payload is used verbatim. + expect(nextCumulative.inputTokens).toBe(1_000); + expect(nextCumulative.cachedInputTokens).toBe(5_000); + }); + + it("subtracts cumulative snapshot when no lastXxx present", () => { + const prior: CumulativeUsageSnapshot = { + inputTokens: 100, + cachedInputTokens: 50, + cacheCreationInputTokens: 0, + outputTokens: 40, + reasoningOutputTokens: 0, + }; + const { deltas, nextCumulative } = deriveTurnDeltas( + { + inputTokens: 250, + cachedInputTokens: 300, + outputTokens: 100, + }, + prior, + ); + expect(deltas.inputTokens).toBe(150); + expect(deltas.cachedInputTokens).toBe(250); + expect(deltas.cacheCreationInputTokens).toBe(0); + expect(deltas.outputTokens).toBe(60); + expect(nextCumulative.inputTokens).toBe(250); + }); + + it("clamps negative deltas to zero", () => { + const prior: CumulativeUsageSnapshot = { + inputTokens: 500, + cachedInputTokens: 0, + cacheCreationInputTokens: 0, + outputTokens: 200, + reasoningOutputTokens: 0, + }; + const { deltas } = deriveTurnDeltas( + { inputTokens: 300, outputTokens: 150 }, + prior, + ); + expect(deltas.inputTokens).toBe(0); + expect(deltas.outputTokens).toBe(0); + }); + + it("rolls lastXxx onto prior cumulative when cumulative is absent", () => { + const { nextCumulative } = deriveTurnDeltas( + { lastInputTokens: 400, lastOutputTokens: 200 }, + zeroCumulativeUsage(), + ); + expect(nextCumulative.inputTokens).toBe(400); + expect(nextCumulative.outputTokens).toBe(200); + }); +}); + +describe("processTurn", () => { + const at = new Date(2026, 3, 21, 10, 0, 0); // local April 2026 + const monthKey = localMonthKey(at); + + const baseInput: RecordUsageInput = { + threadId: "thread-1", + model: SONNET, + usage: { + inputTokens: 1_000, + cachedInputTokens: 5_000, + cacheCreationInputTokens: 0, + outputTokens: 500, + lastInputTokens: 1_000, + lastCachedInputTokens: 5_000, + lastOutputTokens: 500, + }, + at, + }; + + it("records a new turn across all three buckets", () => { + const res = processTurn({ input: baseInput, session: undefined, month: undefined, allTime: undefined }); + expect(res.applied).toBe(true); + expect(res.monthKey).toBe(monthKey); + // 1000*$3 + 5000*$0.3 + 500*$15 = $3 + $1.5 + $7.5 = $12 per 1M → /1M = $0.012 + // 1k*3/1M + 5k*0.3/1M + 500*15/1M = 0.003 + 0.0015 + 0.0075 = $0.012 + expect(res.costUsd).toBeCloseTo(0.012, 6); + expect(res.session.bucket.totalUsd).toBeCloseTo(0.012, 6); + expect(res.session.bucket.turnCount).toBe(1); + expect(res.session.bucket.byModel[SONNET]!.inputTokens).toBe(1_000); + expect(res.session.lastCumulative?.inputTokens).toBe(1_000); + expect(res.month.bucket.turnCount).toBe(1); + expect(res.allTime.bucket.turnCount).toBe(1); + }); + + it("accumulates a second turn", () => { + const turn1 = processTurn({ + input: baseInput, + session: undefined, + month: undefined, + allTime: undefined, + }); + const turn2Input: RecordUsageInput = { + ...baseInput, + usage: { + inputTokens: 1_500, + cachedInputTokens: 6_000, + outputTokens: 700, + lastInputTokens: 500, + lastCachedInputTokens: 1_000, + lastOutputTokens: 200, + }, + }; + const res = processTurn({ + input: turn2Input, + session: turn1.session, + month: turn1.month, + allTime: turn1.allTime, + }); + expect(res.applied).toBe(true); + expect(res.session.bucket.turnCount).toBe(2); + // 500*3 + 1000*0.3 + 200*15 = 1500+300+3000 = 4800 / 1M = $0.0048 + expect(res.costUsd).toBeCloseTo(0.0048, 6); + expect(res.session.bucket.totalUsd).toBeCloseTo(0.012 + 0.0048, 6); + }); + + it("is a no-op when no tokens flow (zero deltas)", () => { + const emptyInput: RecordUsageInput = { + threadId: "thread-1", + model: SONNET, + usage: { inputTokens: 0, outputTokens: 0 }, + at, + }; + const res = processTurn({ + input: emptyInput, + session: undefined, + month: undefined, + allTime: undefined, + }); + expect(res.applied).toBe(false); + expect(res.session.bucket.turnCount).toBe(0); + expect(res.costUsd).toBe(0); + }); + + it("buckets by local month", () => { + const marchInput: RecordUsageInput = { + ...baseInput, + at: new Date(2026, 2, 31, 23, 0, 0), // last day of March local + }; + const turn1 = processTurn({ + input: marchInput, + session: undefined, + month: undefined, + allTime: undefined, + }); + expect(turn1.monthKey).toBe("2026-03"); + const aprilInput: RecordUsageInput = { + ...baseInput, + at: new Date(2026, 3, 1, 1, 0, 0), + usage: { + ...baseInput.usage, + inputTokens: 2_000, + cachedInputTokens: 10_000, + outputTokens: 1_000, + lastInputTokens: 1_000, + lastCachedInputTokens: 5_000, + lastOutputTokens: 500, + }, + }; + const turn2 = processTurn({ + input: aprilInput, + session: turn1.session, + // April file is empty — new month means a new month bucket, not last month's. + month: undefined, + allTime: turn1.allTime, + }); + expect(turn2.monthKey).toBe("2026-04"); + expect(turn2.month.bucket.turnCount).toBe(1); + expect(turn2.allTime.bucket.turnCount).toBe(2); + expect(turn2.session.bucket.turnCount).toBe(2); + }); + + it("zero-cost unknown model still records token usage", () => { + const input: RecordUsageInput = { + threadId: "t1", + model: "some-unknown-model", + usage: { + lastInputTokens: 1_000, + lastOutputTokens: 500, + }, + at, + }; + const res = processTurn({ input, session: undefined, month: undefined, allTime: undefined }); + expect(res.applied).toBe(true); + expect(res.costUsd).toBe(0); + expect(res.session.bucket.byModel["some-unknown-model"]!.inputTokens).toBe(1_000); + expect(res.session.bucket.byModel["some-unknown-model"]!.outputTokens).toBe(500); + expect(res.session.bucket.byModel["some-unknown-model"]!.totalUsd).toBe(0); + }); +}); + +describe("isTurnNoOp", () => { + it("detects zero across all tiers", () => { + expect( + isTurnNoOp({ + inputTokens: 0, + cachedInputTokens: 0, + cacheCreationInputTokens: 0, + outputTokens: 0, + reasoningOutputTokens: 0, + }), + ).toBe(true); + }); + it("detects non-zero in any tier", () => { + expect( + isTurnNoOp({ + inputTokens: 0, + cachedInputTokens: 1, + cacheCreationInputTokens: 0, + outputTokens: 0, + reasoningOutputTokens: 0, + }), + ).toBe(false); + }); +}); + +describe("sanitizePersistedFile", () => { + it("returns an empty bucket when raw is garbage", () => { + const file = sanitizePersistedFile(null, "session", "thread-1"); + expect(file.bucket.turnCount).toBe(0); + expect(file.kind).toBe("session"); + expect(file.key).toBe("thread-1"); + }); + + it("coerces invalid numeric fields to zero", () => { + const file = sanitizePersistedFile( + { + version: 1, + kind: "session", + key: "t1", + bucket: { + totalUsd: "bad" as unknown as number, + turnCount: -5, + byModel: { + [SONNET]: { + inputTokens: 100, + outputTokens: "bad" as unknown as number, + }, + }, + updatedAt: "2026-04-21", + }, + lastCumulative: { + inputTokens: 100, + outputTokens: 50, + }, + }, + "session", + "t1", + ); + expect(file.bucket.totalUsd).toBe(0); + expect(file.bucket.turnCount).toBe(0); + expect(file.bucket.byModel[SONNET]!.outputTokens).toBe(0); + expect(file.lastCumulative?.inputTokens).toBe(100); + }); + + it("drops lastCumulative for non-session files", () => { + const file = sanitizePersistedFile( + { + version: 1, + kind: "month", + key: "2026-04", + bucket: { totalUsd: 0, turnCount: 0, byModel: {}, updatedAt: "" }, + lastCumulative: { inputTokens: 1 }, + } as unknown as PersistedCostFile, + "month", + "2026-04", + ); + expect(file.lastCumulative).toBeUndefined(); + }); +}); diff --git a/apps/server/src/cost/Reducer.ts b/apps/server/src/cost/Reducer.ts new file mode 100644 index 0000000000..bb9d4c7cd2 --- /dev/null +++ b/apps/server/src/cost/Reducer.ts @@ -0,0 +1,337 @@ +/** + * Pure cost-tracker reducers. No filesystem, no Effect — just math on plain + * objects so the write-path logic is trivial to unit-test. + */ +import { computeTurnCost, type ProviderKind, type TurnTokenDeltas } from "@t3tools/shared/pricing"; +import type { + CostBucket, + CumulativeUsageSnapshot, + ModelCostEntry, + PersistedCostFile, + PersistedCostFileKind, + RecordUsageInput, + UsageSnapshotLite, +} from "./types.ts"; +import { + emptyCostBucket, + emptyModelCostEntry, + localMonthKey, + zeroCumulativeUsage, +} from "./types.ts"; + +function finiteNonNeg(value: unknown): number { + return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : 0; +} + +/** + * Derive the deltas for this turn. Prefers the payload's `lastXxxTokens` + * fields (Codex and post-fix Claude); falls back to subtracting against the + * session file's `lastCumulative` snapshot (older providers / recovered + * sessions). + */ +export function deriveTurnDeltas( + usage: UsageSnapshotLite, + priorCumulative: CumulativeUsageSnapshot | undefined, +): { + readonly deltas: TurnTokenDeltas; + readonly nextCumulative: CumulativeUsageSnapshot; +} { + const hasExplicitLast = + usage.lastInputTokens !== undefined || + usage.lastCachedInputTokens !== undefined || + usage.lastCacheCreationInputTokens !== undefined || + usage.lastOutputTokens !== undefined || + usage.lastReasoningOutputTokens !== undefined; + + const currentCumulative: CumulativeUsageSnapshot = { + inputTokens: finiteNonNeg(usage.inputTokens), + cachedInputTokens: finiteNonNeg(usage.cachedInputTokens), + cacheCreationInputTokens: finiteNonNeg(usage.cacheCreationInputTokens), + outputTokens: finiteNonNeg(usage.outputTokens), + reasoningOutputTokens: finiteNonNeg(usage.reasoningOutputTokens), + }; + + if (hasExplicitLast) { + const deltas: TurnTokenDeltas = { + inputTokens: finiteNonNeg(usage.lastInputTokens), + cachedInputTokens: finiteNonNeg(usage.lastCachedInputTokens), + cacheCreationInputTokens: finiteNonNeg(usage.lastCacheCreationInputTokens), + outputTokens: finiteNonNeg(usage.lastOutputTokens), + reasoningOutputTokens: finiteNonNeg(usage.lastReasoningOutputTokens), + }; + // Next cumulative tracks whatever the payload reports cumulatively. If + // the payload gives lastXxx but not the cumulative totals, roll the + // deltas into the prior cumulative so we still have somewhere to land. + const nextCumulative = + currentCumulative.inputTokens + + currentCumulative.cachedInputTokens + + currentCumulative.cacheCreationInputTokens + + currentCumulative.outputTokens + + currentCumulative.reasoningOutputTokens > + 0 + ? currentCumulative + : addCumulative(priorCumulative ?? zeroCumulativeUsage(), deltas); + return { deltas, nextCumulative }; + } + + const prior = priorCumulative ?? zeroCumulativeUsage(); + const deltas: TurnTokenDeltas = { + inputTokens: Math.max(0, currentCumulative.inputTokens - prior.inputTokens), + cachedInputTokens: Math.max(0, currentCumulative.cachedInputTokens - prior.cachedInputTokens), + cacheCreationInputTokens: Math.max( + 0, + currentCumulative.cacheCreationInputTokens - prior.cacheCreationInputTokens, + ), + outputTokens: Math.max(0, currentCumulative.outputTokens - prior.outputTokens), + reasoningOutputTokens: Math.max( + 0, + currentCumulative.reasoningOutputTokens - prior.reasoningOutputTokens, + ), + }; + return { deltas, nextCumulative: currentCumulative }; +} + +function addCumulative( + base: CumulativeUsageSnapshot, + deltas: TurnTokenDeltas, +): CumulativeUsageSnapshot { + return { + inputTokens: base.inputTokens + deltas.inputTokens, + cachedInputTokens: base.cachedInputTokens + deltas.cachedInputTokens, + cacheCreationInputTokens: base.cacheCreationInputTokens + deltas.cacheCreationInputTokens, + outputTokens: base.outputTokens + deltas.outputTokens, + reasoningOutputTokens: base.reasoningOutputTokens + deltas.reasoningOutputTokens, + }; +} + +function addEntry( + entry: ModelCostEntry, + deltas: TurnTokenDeltas, + costUsd: number, +): ModelCostEntry { + return { + inputTokens: entry.inputTokens + deltas.inputTokens, + cachedInputTokens: entry.cachedInputTokens + deltas.cachedInputTokens, + cacheCreationInputTokens: entry.cacheCreationInputTokens + deltas.cacheCreationInputTokens, + outputTokens: entry.outputTokens + deltas.outputTokens, + reasoningOutputTokens: entry.reasoningOutputTokens + deltas.reasoningOutputTokens, + totalUsd: entry.totalUsd + costUsd, + turnCount: entry.turnCount + 1, + }; +} + +export function addTurnToBucket( + bucket: CostBucket, + model: string, + deltas: TurnTokenDeltas, + costUsd: number, + now: Date, +): CostBucket { + const prev = bucket.byModel[model] ?? emptyModelCostEntry(); + return { + totalUsd: bucket.totalUsd + costUsd, + turnCount: bucket.turnCount + 1, + byModel: { + ...bucket.byModel, + [model]: addEntry(prev, deltas, costUsd), + }, + updatedAt: now.toISOString(), + }; +} + +/** True when no billable tokens changed — tracker should no-op. */ +export function isTurnNoOp(deltas: TurnTokenDeltas): boolean { + return ( + deltas.inputTokens + + deltas.cachedInputTokens + + deltas.cacheCreationInputTokens + + deltas.outputTokens + + deltas.reasoningOutputTokens <= + 0 + ); +} + +export interface ProcessTurnArgs { + readonly input: RecordUsageInput; + readonly session: PersistedCostFile | undefined; + readonly month: PersistedCostFile | undefined; + readonly allTime: PersistedCostFile | undefined; + readonly now?: Date; +} + +export interface ProcessTurnResult { + readonly session: PersistedCostFile; + readonly month: PersistedCostFile; + readonly allTime: PersistedCostFile; + readonly monthKey: string; + readonly deltas: TurnTokenDeltas; + readonly costUsd: number; + readonly applied: boolean; +} + +/** + * Pure reducer: given the current persisted state for the three buckets and + * one runtime usage event, produce the three updated files. Idempotent when + * the turn contributes zero tokens (returns inputs unchanged). + */ +export function processTurn(args: ProcessTurnArgs): ProcessTurnResult { + const now = args.now ?? args.input.at ?? new Date(); + const monthKey = localMonthKey(now); + + const priorSessionBucket = + args.session?.bucket ?? emptyCostBucket(now); + const priorMonthBucket = args.month?.bucket ?? emptyCostBucket(now); + const priorAllTimeBucket = args.allTime?.bucket ?? emptyCostBucket(now); + + const { deltas, nextCumulative } = deriveTurnDeltas( + args.input.usage, + args.session?.lastCumulative, + ); + + if (isTurnNoOp(deltas)) { + return { + session: { + version: 1, + kind: "session", + key: args.input.threadId, + bucket: priorSessionBucket, + ...(args.session?.lastCumulative + ? { lastCumulative: args.session.lastCumulative } + : {}), + }, + month: { + version: 1, + kind: "month", + key: args.month?.key ?? monthKey, + bucket: priorMonthBucket, + }, + allTime: { + version: 1, + kind: "alltime", + key: "alltime", + bucket: priorAllTimeBucket, + }, + monthKey, + deltas, + costUsd: 0, + applied: false, + }; + } + + const breakdown = computeTurnCost( + args.input.model, + deltas, + args.input.provider as ProviderKind | undefined, + ); + const costUsd = breakdown.totalUsd; + + const nextSession: PersistedCostFile = { + version: 1, + kind: "session", + key: args.input.threadId, + bucket: addTurnToBucket(priorSessionBucket, args.input.model, deltas, costUsd, now), + lastCumulative: nextCumulative, + }; + const nextMonth: PersistedCostFile = { + version: 1, + kind: "month", + key: monthKey, + bucket: addTurnToBucket(priorMonthBucket, args.input.model, deltas, costUsd, now), + }; + const nextAllTime: PersistedCostFile = { + version: 1, + kind: "alltime", + key: "alltime", + bucket: addTurnToBucket(priorAllTimeBucket, args.input.model, deltas, costUsd, now), + }; + + return { + session: nextSession, + month: nextMonth, + allTime: nextAllTime, + monthKey, + deltas, + costUsd, + applied: true, + }; +} + +// ── Sanitization ──────────────────────────────────────────────────────── + +function sanitizeNumber(value: unknown): number { + return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : 0; +} + +function sanitizeModelEntry(raw: unknown): ModelCostEntry | null { + if (!raw || typeof raw !== "object") return null; + const r = raw as Record; + return { + inputTokens: sanitizeNumber(r.inputTokens), + cachedInputTokens: sanitizeNumber(r.cachedInputTokens), + cacheCreationInputTokens: sanitizeNumber(r.cacheCreationInputTokens), + outputTokens: sanitizeNumber(r.outputTokens), + reasoningOutputTokens: sanitizeNumber(r.reasoningOutputTokens), + totalUsd: sanitizeNumber(r.totalUsd), + turnCount: sanitizeNumber(r.turnCount), + }; +} + +function sanitizeBucket(raw: unknown, now: Date): CostBucket { + if (!raw || typeof raw !== "object") return emptyCostBucket(now); + const r = raw as Record; + const byModelRaw = (r.byModel ?? {}) as Record; + const byModel: Record = {}; + for (const [model, entry] of Object.entries(byModelRaw)) { + if (!model) continue; + const cleaned = sanitizeModelEntry(entry); + if (cleaned) byModel[model] = cleaned; + } + return { + totalUsd: sanitizeNumber(r.totalUsd), + turnCount: sanitizeNumber(r.turnCount), + byModel, + updatedAt: typeof r.updatedAt === "string" ? r.updatedAt : now.toISOString(), + }; +} + +function sanitizeLastCumulative(raw: unknown): CumulativeUsageSnapshot | undefined { + if (!raw || typeof raw !== "object") return undefined; + const r = raw as Record; + return { + inputTokens: sanitizeNumber(r.inputTokens), + cachedInputTokens: sanitizeNumber(r.cachedInputTokens), + cacheCreationInputTokens: sanitizeNumber(r.cacheCreationInputTokens), + outputTokens: sanitizeNumber(r.outputTokens), + reasoningOutputTokens: sanitizeNumber(r.reasoningOutputTokens), + }; +} + +/** Parse a JSON blob into a `PersistedCostFile`, swallowing malformed data. */ +export function sanitizePersistedFile( + raw: unknown, + expectedKind: PersistedCostFileKind, + expectedKey: string, + now: Date = new Date(), +): PersistedCostFile { + if (!raw || typeof raw !== "object") { + return { + version: 1, + kind: expectedKind, + key: expectedKey, + bucket: emptyCostBucket(now), + }; + } + const r = raw as Record; + const version = r.version === 1 ? 1 : 1; + const kind = r.kind === expectedKind ? expectedKind : expectedKind; + const key = typeof r.key === "string" && r.key.length > 0 ? r.key : expectedKey; + const bucket = sanitizeBucket(r.bucket, now); + const lastCumulative = sanitizeLastCumulative(r.lastCumulative); + return { + version, + kind, + key, + bucket, + ...(lastCumulative && expectedKind === "session" ? { lastCumulative } : {}), + }; +} diff --git a/apps/server/src/cost/Services/CostTracker.ts b/apps/server/src/cost/Services/CostTracker.ts new file mode 100644 index 0000000000..e6cb746baa --- /dev/null +++ b/apps/server/src/cost/Services/CostTracker.ts @@ -0,0 +1,49 @@ +/** + * CostTrackerService - USD + token ledger for every Claude/Codex turn. + * + * Backed by plain JSON under `//usage/`: + * - `session_.json` — per-thread cumulative. + * - `YYYY-MM.json` — month bucket (local tz). + * - `alltime.json` — running total since install. + * + * Works in dev, installed-app, and standalone binaries because persistence + * lives next to the server's SQLite state. Client reads via a snapshot + * endpoint; the tracker also exposes a Stream of post-write summaries so + * the web UI can subscribe to live updates. + * + * @module CostTrackerService + */ +import { Context } from "effect"; +import type { Effect, Stream } from "effect"; + +import type { CostSummary, RecordUsageInput } from "../types.ts"; + +export interface CostTrackerShape { + /** + * Record a single turn's usage. Idempotent when deltas sum to zero (e.g. + * a redelivered no-op snapshot). Returns the summary after the write so + * the caller can broadcast without a second read. + */ + readonly recordUsage: (input: RecordUsageInput) => Effect.Effect; + + /** + * Read the current summary for a given thread. `threadId` may be omitted + * to get just month + all-time totals (e.g. the user is between threads). + */ + readonly getSummary: (input: { + readonly threadId?: string | undefined; + readonly at?: Date | undefined; + }) => Effect.Effect; + + /** + * Live stream of summaries emitted after each `recordUsage` write. + * Consumers pair it with `getSummary` for the initial value, then follow + * the stream. + */ + readonly updates: Stream.Stream; +} + +export class CostTrackerService extends Context.Service< + CostTrackerService, + CostTrackerShape +>()("t3/cost/Services/CostTracker/CostTrackerService") {} diff --git a/apps/server/src/cost/types.ts b/apps/server/src/cost/types.ts new file mode 100644 index 0000000000..fd640660f6 --- /dev/null +++ b/apps/server/src/cost/types.ts @@ -0,0 +1,120 @@ +/** + * Shared cost-tracker types. Persisted to disk verbatim under + * `//usage/*.json`. Loose interfaces + a sanitizer pass + * — we're the only writer, so round-tripping through Effect.Schema is + * overkill here. The sanitizer tolerates garbage and returns a fresh empty + * bucket rather than crashing. + */ + +/** Running tallies for a single (model, bucket) pair. */ +export interface ModelCostEntry { + readonly inputTokens: number; + readonly cachedInputTokens: number; + readonly cacheCreationInputTokens: number; + readonly outputTokens: number; + readonly reasoningOutputTokens: number; + readonly totalUsd: number; + readonly turnCount: number; +} + +export const emptyModelCostEntry = (): ModelCostEntry => ({ + inputTokens: 0, + cachedInputTokens: 0, + cacheCreationInputTokens: 0, + outputTokens: 0, + reasoningOutputTokens: 0, + totalUsd: 0, + turnCount: 0, +}); + +/** + * A cost bucket — used for per-thread (session), per-month, and all-time + * aggregates. Same shape, different persistence files. + */ +export interface CostBucket { + readonly totalUsd: number; + readonly turnCount: number; + readonly byModel: Record; + readonly updatedAt: string; +} + +export const emptyCostBucket = (now: Date = new Date()): CostBucket => ({ + totalUsd: 0, + turnCount: 0, + byModel: {}, + updatedAt: now.toISOString(), +}); + +export type PersistedCostFileKind = "session" | "month" | "alltime"; + +/** Last cumulative usage snapshot — drives delta math when payload lacks lastXxx. */ +export interface CumulativeUsageSnapshot { + readonly inputTokens: number; + readonly cachedInputTokens: number; + readonly cacheCreationInputTokens: number; + readonly outputTokens: number; + readonly reasoningOutputTokens: number; +} + +export const zeroCumulativeUsage = (): CumulativeUsageSnapshot => ({ + inputTokens: 0, + cachedInputTokens: 0, + cacheCreationInputTokens: 0, + outputTokens: 0, + reasoningOutputTokens: 0, +}); + +export interface PersistedCostFile { + readonly version: 1; + readonly kind: PersistedCostFileKind; + readonly key: string; + readonly bucket: CostBucket; + /** + * Session files only. Runtime payloads from Claude/Codex carry cumulative + * totals across the whole thread; we subtract this snapshot to get the + * just-completed turn's deltas. + */ + readonly lastCumulative?: CumulativeUsageSnapshot; +} + +export interface CostSummary { + readonly thread: CostBucket | null; + readonly month: CostBucket; + readonly allTime: CostBucket; + readonly monthKey: string; +} + +export interface RecordUsageInput { + readonly threadId: string; + readonly model: string; + readonly usage: UsageSnapshotLite; + readonly provider?: string | undefined; + readonly at?: Date; +} + +/** + * Minimal shape we need from `ThreadTokenUsageSnapshot`; accepting a plain + * record keeps tests independent of the contracts package. + */ +export interface UsageSnapshotLite { + readonly inputTokens?: number | undefined; + readonly cachedInputTokens?: number | undefined; + readonly cacheCreationInputTokens?: number | undefined; + readonly outputTokens?: number | undefined; + readonly reasoningOutputTokens?: number | undefined; + readonly lastInputTokens?: number | undefined; + readonly lastCachedInputTokens?: number | undefined; + readonly lastCacheCreationInputTokens?: number | undefined; + readonly lastOutputTokens?: number | undefined; + readonly lastReasoningOutputTokens?: number | undefined; +} + +/** + * `YYYY-MM` key for a Date in the user's local timezone. Statusline.sh-style + * monthly bucket: rollover on the user's clock, not UTC. + */ +export function localMonthKey(date: Date = new Date()): string { + const year = date.getFullYear().toString().padStart(4, "0"); + const month = (date.getMonth() + 1).toString().padStart(2, "0"); + return `${year}-${month}`; +} diff --git a/packages/shared/src/pricing.ts b/packages/shared/src/pricing.ts index 96bcda1505..bd4e22ef84 100644 --- a/packages/shared/src/pricing.ts +++ b/packages/shared/src/pricing.ts @@ -1,6 +1,8 @@ import { normalizeModelSlug } from "./model.ts"; import type { ProviderKind } from "@t3tools/contracts"; +export type { ProviderKind }; + /** * USD price per 1,000,000 tokens for each token class. * From f41104167fa7cef247784655d28a8f4f733aa771 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Tue, 21 Apr 2026 21:10:41 +0200 Subject: [PATCH 10/16] feat(cost): server-owned ledger + client migrates off localStorage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the runtime event stream into the new CostTracker and expose the ledger over HTTP so web + desktop + standalone binaries all share the same authoritative cost data. Server (c11 + c12) - ProviderRuntimeIngestion now calls CostTracker.recordUsage after appending the context-window.updated activity. Errors are logged and swallowed so orchestration is never blocked by FS faults. - Model comes from event.payload.model (set by adapters) with a fallback to thread.modelSelection.model. - CostTrackerLive added to the server composition root + wired into test + integration layers (stub mock for server.test.ts). - New GET /api/cost/summary?threadId=X route returns the freshest session + month + all-time summary. CORS handled via the existing browserApi layer. Client (c13) - Drop zustand + localStorage. The old costStore.ts / useCostTracking.ts (plus their tests) are gone — server is now source of truth. - New lib/costQuery.ts: react-query queryOptions + sanitizer for the HTTP response, plus formatUsd utility. Invalidation helper bumps the cache whenever the active thread receives a new context-window.updated activity, so the ring updates within one render of the server write. - ChatComposer replaces useCostTracking/useCostSummary with a useQuery subscription and a tiny effect that invalidates on new usage activities. Plumbs activeProvider through to the meter. - CostMeter: rebuild around the new {thread, month, allTime} shape. Popover now shows session ⋅ MTD ⋅ all-time and gracefully renders "—" for providers without token-usage telemetry (cursor / opencode) instead of a misleading $0. Tests: 913 server pass, 906 web pass (26 old localStorage tests deleted, replaced by server-owned CostTracker coverage from c10). --- .../OrchestrationEngineHarness.integration.ts | 5 +- apps/server/src/cost/http.ts | 50 +++ .../Layers/ProviderRuntimeIngestion.test.ts | 5 +- .../Layers/ProviderRuntimeIngestion.ts | 23 ++ apps/server/src/server.test.ts | 20 ++ apps/server/src/server.ts | 4 + apps/web/src/components/chat/ChatComposer.tsx | 51 ++- apps/web/src/components/chat/CostMeter.tsx | 112 ++++-- apps/web/src/lib/costQuery.ts | 202 +++++++++++ apps/web/src/lib/costStore.test.ts | 313 ----------------- apps/web/src/lib/costStore.ts | 328 ------------------ apps/web/src/lib/useCostTracking.test.ts | 160 --------- apps/web/src/lib/useCostTracking.ts | 120 ------- 13 files changed, 427 insertions(+), 966 deletions(-) create mode 100644 apps/server/src/cost/http.ts create mode 100644 apps/web/src/lib/costQuery.ts delete mode 100644 apps/web/src/lib/costStore.test.ts delete mode 100644 apps/web/src/lib/costStore.ts delete mode 100644 apps/web/src/lib/useCostTracking.test.ts delete mode 100644 apps/web/src/lib/useCostTracking.ts diff --git a/apps/server/integration/OrchestrationEngineHarness.integration.ts b/apps/server/integration/OrchestrationEngineHarness.integration.ts index 6f9f4c6f44..7015aeea9e 100644 --- a/apps/server/integration/OrchestrationEngineHarness.integration.ts +++ b/apps/server/integration/OrchestrationEngineHarness.integration.ts @@ -23,6 +23,7 @@ import { } from "effect"; import { CheckpointStoreLive } from "../src/checkpointing/Layers/CheckpointStore.ts"; +import { CostTrackerLive } from "../src/cost/Layers/CostTracker.ts"; import { CheckpointStore } from "../src/checkpointing/Services/CheckpointStore.ts"; import { GitCoreLive } from "../src/git/Layers/GitCore.ts"; import { GitCore, type GitCoreShape } from "../src/git/Services/GitCore.ts"; @@ -359,13 +360,15 @@ export const makeOrchestrationIntegrationHarness = ( }), ), ); + const configLayer = ServerConfig.layerTest(workspaceDir, rootDir); const layer = Layer.empty.pipe( Layer.provideMerge(runtimeServicesLayer), Layer.provideMerge(orchestrationReactorLayer), Layer.provide(persistenceLayer), Layer.provideMerge(RepositoryIdentityResolverLive), Layer.provideMerge(ServerSettingsService.layerTest()), - Layer.provideMerge(ServerConfig.layerTest(workspaceDir, rootDir)), + Layer.provideMerge(CostTrackerLive.pipe(Layer.provide(configLayer))), + Layer.provideMerge(configLayer), Layer.provideMerge(NodeServices.layer), ); diff --git a/apps/server/src/cost/http.ts b/apps/server/src/cost/http.ts new file mode 100644 index 0000000000..a8bea550e6 --- /dev/null +++ b/apps/server/src/cost/http.ts @@ -0,0 +1,50 @@ +/** + * HTTP routes for the CostTracker ledger. + * + * One endpoint for now: `GET /api/cost/summary?threadId=X` returning the + * live CostSummary (session + month + all-time). The client refetches on + * each turn.completed activity; no WS push needed for v1 since the user + * watching their own session is already on a refresh cadence driven by + * the orchestration event stream. + */ +import { Effect } from "effect"; +import { HttpRouter, HttpServerRequest, HttpServerResponse } from "effect/unstable/http"; + +import { ServerAuth } from "../auth/Services/ServerAuth.ts"; +import { respondToAuthError } from "../auth/http.ts"; +import { CostTrackerService } from "./Services/CostTracker.ts"; +import { localMonthKey } from "./types.ts"; + +export const costSummaryRouteLayer = HttpRouter.add( + "GET", + "/api/cost/summary", + Effect.gen(function* () { + const request = yield* HttpServerRequest.HttpServerRequest; + const serverAuth = yield* ServerAuth; + yield* serverAuth.authenticateHttpRequest(request); + + const tracker = yield* CostTrackerService; + + const url = HttpServerRequest.toURL(request); + const threadId = (() => { + if (url._tag === "None") return undefined; + const raw = url.value.searchParams.get("threadId"); + return typeof raw === "string" && raw.length > 0 ? raw : undefined; + })(); + + const summary = yield* tracker.getSummary({ + threadId, + at: new Date(), + }); + + return HttpServerResponse.jsonUnsafe( + { + monthKey: summary.monthKey ?? localMonthKey(), + thread: summary.thread, + month: summary.month, + allTime: summary.allTime, + }, + { status: 200 }, + ); + }).pipe(Effect.catchTag("AuthError", respondToAuthError)), +); diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts index 577c5050ea..f334ad5ff1 100644 --- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts +++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts @@ -33,6 +33,7 @@ import { RepositoryIdentityResolverLive } from "../../project/Layers/RepositoryI import { OrchestrationEngineLive } from "./OrchestrationEngine.ts"; import { OrchestrationProjectionPipelineLive } from "./ProjectionPipeline.ts"; import { OrchestrationProjectionSnapshotQueryLive } from "./ProjectionSnapshotQuery.ts"; +import { CostTrackerLive } from "../../cost/Layers/CostTracker.ts"; import { ProviderRuntimeIngestionLive } from "./ProviderRuntimeIngestion.ts"; import { OrchestrationEngineService, @@ -208,12 +209,14 @@ describe("ProviderRuntimeIngestion", () => { Layer.provide(RepositoryIdentityResolverLive), Layer.provide(SqlitePersistenceMemory), ); + const configLayer = ServerConfig.layerTest(process.cwd(), process.cwd()); const layer = ProviderRuntimeIngestionLive.pipe( Layer.provideMerge(orchestrationLayer), Layer.provideMerge(SqlitePersistenceMemory), Layer.provideMerge(Layer.succeed(ProviderService, provider.service)), Layer.provideMerge(makeTestServerSettingsLayer(options?.serverSettings)), - Layer.provideMerge(ServerConfig.layerTest(process.cwd(), process.cwd())), + Layer.provideMerge(CostTrackerLive.pipe(Layer.provide(configLayer))), + Layer.provideMerge(configLayer), Layer.provideMerge(NodeServices.layer), ); runtime = ManagedRuntime.make(layer); diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts index 7eeeed2d51..053f04669f 100644 --- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts +++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts @@ -16,6 +16,7 @@ import { import { Cache, Cause, Duration, Effect, Layer, Option, Stream } from "effect"; import { makeDrainableWorker } from "@t3tools/shared/DrainableWorker"; +import { CostTrackerService } from "../../cost/Services/CostTracker.ts"; import { ProviderService } from "../../provider/Services/ProviderService.ts"; import { ProjectionTurnRepository } from "../../persistence/Services/ProjectionTurns.ts"; import { ProjectionTurnRepositoryLive } from "../../persistence/Layers/ProjectionTurns.ts"; @@ -525,6 +526,7 @@ const make = Effect.gen(function* () { const providerService = yield* ProviderService; const projectionTurnRepository = yield* ProjectionTurnRepository; const serverSettingsService = yield* ServerSettingsService; + const costTracker = yield* CostTrackerService; const turnMessageIdsByTurnKey = yield* Cache.make>({ capacity: TURN_MESSAGE_IDS_BY_TURN_CACHE_CAPACITY, @@ -1519,6 +1521,23 @@ const make = Effect.gen(function* () { createdAt: activity.createdAt, }), ).pipe(Effect.asVoid); + + // Side-channel: feed token usage into the CostTracker so the JSON + // ledger stays in sync with the activity stream. Failures never block + // ingestion — we log and drop. + if (event.type === "thread.token-usage.updated") { + const model = event.payload.model ?? thread.modelSelection.model; + const provider = thread.modelSelection.provider; + yield* costTracker + .recordUsage({ + threadId: thread.id, + model, + provider, + usage: event.payload.usage, + at: new Date(event.createdAt), + }) + .pipe(Effect.asVoid, Effect.ignoreCause({ log: true })); + } }); const processDomainEvent = (_event: TurnStartRequestedDomainEvent) => Effect.void; @@ -1570,3 +1589,7 @@ export const ProviderRuntimeIngestionLive = Layer.effect( ProviderRuntimeIngestionService, make, ).pipe(Layer.provide(ProjectionTurnRepositoryLive)); + +// Note: CostTrackerLive must be provided in the composition root (bin.ts or +// server runtime layer). Keeping it out of ProviderRuntimeIngestionLive keeps +// the dep graph explicit and lets tests substitute a stub CostTracker. diff --git a/apps/server/src/server.test.ts b/apps/server/src/server.test.ts index 47e159d303..e7b05dfc8a 100644 --- a/apps/server/src/server.test.ts +++ b/apps/server/src/server.test.ts @@ -51,6 +51,7 @@ import { vi } from "vitest"; import type { ServerConfigShape } from "./config.ts"; import { deriveServerPaths, ServerConfig } from "./config.ts"; import { makeRoutesLayer } from "./server.ts"; +import { CostTrackerService } from "./cost/Services/CostTracker.ts"; import { resolveAttachmentRelativePath } from "./attachmentPaths.ts"; import { CheckpointDiffQuery, @@ -504,6 +505,25 @@ const buildAppUnderTest = (options?: { ); const appLayer = servedRoutesLayer.pipe( + Layer.provide( + Layer.mock(CostTrackerService)({ + recordUsage: () => + Effect.succeed({ + thread: null, + month: { totalUsd: 0, turnCount: 0, byModel: {}, updatedAt: "" }, + allTime: { totalUsd: 0, turnCount: 0, byModel: {}, updatedAt: "" }, + monthKey: "1970-01", + }), + getSummary: () => + Effect.succeed({ + thread: null, + month: { totalUsd: 0, turnCount: 0, byModel: {}, updatedAt: "" }, + allTime: { totalUsd: 0, turnCount: 0, byModel: {}, updatedAt: "" }, + monthKey: "1970-01", + }), + updates: Stream.empty, + }), + ), Layer.provide( Layer.mock(BrowserTraceCollector)({ record: () => Effect.void, diff --git a/apps/server/src/server.ts b/apps/server/src/server.ts index f94bbb34b5..0530dce3b3 100644 --- a/apps/server/src/server.ts +++ b/apps/server/src/server.ts @@ -36,6 +36,7 @@ import { TerminalManagerLive } from "./terminal/Layers/Manager.ts"; import { GitManagerLive } from "./git/Layers/GitManager.ts"; import { KeybindingsLive } from "./keybindings.ts"; import { ServerRuntimeStartup, ServerRuntimeStartupLive } from "./serverRuntimeStartup.ts"; +import { CostTrackerLive } from "./cost/Layers/CostTracker.ts"; import { OrchestrationReactorLive } from "./orchestration/Layers/OrchestrationReactor.ts"; import { RuntimeReceiptBusLive } from "./orchestration/Layers/RuntimeReceiptBus.ts"; import { ProviderRuntimeIngestionLive } from "./orchestration/Layers/ProviderRuntimeIngestion.ts"; @@ -76,6 +77,7 @@ import { orchestrationDispatchRouteLayer, orchestrationSnapshotRouteLayer, } from "./orchestration/http.ts"; +import { costSummaryRouteLayer } from "./cost/http.ts"; import { NetService } from "@t3tools/shared/Net"; const PtyAdapterLive = Layer.unwrap( @@ -133,6 +135,7 @@ const ReactorLayerLive = Layer.empty.pipe( Layer.provideMerge(CheckpointReactorLive), Layer.provideMerge(ThreadDeletionReactorLive), Layer.provideMerge(RuntimeReceiptBusLive), + Layer.provideMerge(CostTrackerLive), ); const CheckpointingLayerLive = Layer.empty.pipe( @@ -263,6 +266,7 @@ export const makeRoutesLayer = Layer.mergeAll( authSessionRouteLayer, authWebSocketTokenRouteLayer, attachmentsRouteLayer, + costSummaryRouteLayer, orchestrationDispatchRouteLayer, orchestrationSnapshotRouteLayer, otlpTracesProxyRouteLayer, diff --git a/apps/web/src/components/chat/ChatComposer.tsx b/apps/web/src/components/chat/ChatComposer.tsx index da3184e8ad..8e5471e5a9 100644 --- a/apps/web/src/components/chat/ChatComposer.tsx +++ b/apps/web/src/components/chat/ChatComposer.tsx @@ -29,7 +29,7 @@ import { useRef, useState, } from "react"; -import { useQuery } from "@tanstack/react-query"; +import { useQuery, useQueryClient } from "@tanstack/react-query"; import { useDebouncedValue } from "@tanstack/react-pacer"; import { projectSearchEntriesQueryOptions } from "~/lib/projectReactQuery"; import { @@ -103,8 +103,12 @@ import type { SessionPhase, Thread } from "../../types"; import type { PendingUserInputDraftAnswer } from "../../pendingUserInput"; import type { PendingApproval, PendingUserInput } from "../../session-logic"; import { deriveLatestContextWindowSnapshot } from "../../lib/contextWindow"; -import { useCostSummary, type CostSummary } from "../../lib/costStore"; -import { useCostTracking } from "../../lib/useCostTracking"; +import { + costSummaryQueryOptions, + invalidateCostSummary, + EMPTY_COST_SUMMARY, + type CostSummary, +} from "../../lib/costQuery"; import { formatProviderSkillDisplayName } from "../../providerSkillPresentation"; import { searchProviderSkills } from "../../providerSkillSearch"; @@ -273,6 +277,7 @@ const ComposerFooterPrimaryActions = memo(function ComposerFooterPrimaryActions( compact: boolean; activeContextWindow: ReturnType; costSummary: CostSummary; + activeProvider: ProviderKind | null; isPreparingWorktree: boolean; pendingAction: { questionIndex: number; @@ -294,7 +299,7 @@ const ComposerFooterPrimaryActions = memo(function ComposerFooterPrimaryActions( return ( <> {props.activeContextWindow ? : null} - + {props.isPreparingWorktree ? ( Preparing worktree... ) : null} @@ -476,7 +481,7 @@ export const ChatComposer = memo( routeThreadRef, draftId, activeThreadId, - activeThreadEnvironmentId: _activeThreadEnvironmentId, + activeThreadEnvironmentId, activeThread, isServerThread: _isServerThread, isLocalDraftThread: _isLocalDraftThread, @@ -646,9 +651,40 @@ export const ChatComposer = memo( // ------------------------------------------------------------------ // Cost tracking (session + month-to-date spend) + // Server owns the ledger (apps/server/src/cost/*). Client fetches + // summary + invalidates on each new context-window.updated activity + // so the ring reflects the freshest write. // ------------------------------------------------------------------ - useCostTracking(activeThreadId, activeThreadActivities, activeThreadModelSelection); - const costSummary = useCostSummary(activeThreadId); + const costQueryClient = useQueryClient(); + const costSummaryQuery = useQuery( + costSummaryQueryOptions({ + environmentId: activeThreadEnvironmentId ?? null, + threadId: activeThreadId, + }), + ); + const costSummary: CostSummary = costSummaryQuery.data ?? EMPTY_COST_SUMMARY; + const latestContextWindowActivityId = useMemo(() => { + if (!activeThreadActivities) return null; + for (let index = activeThreadActivities.length - 1; index >= 0; index -= 1) { + const activity = activeThreadActivities[index]; + if (activity?.kind === "context-window.updated") { + return String(activity.id); + } + } + return null; + }, [activeThreadActivities]); + useEffect(() => { + if (!latestContextWindowActivityId || !activeThreadEnvironmentId) return; + void invalidateCostSummary(costQueryClient, { + environmentId: activeThreadEnvironmentId, + threadId: activeThreadId, + }); + }, [ + latestContextWindowActivityId, + activeThreadEnvironmentId, + activeThreadId, + costQueryClient, + ]); // ------------------------------------------------------------------ // Composer-local state @@ -1965,6 +2001,7 @@ export const ChatComposer = memo( compact={isComposerPrimaryActionsCompact} activeContextWindow={activeContextWindow} costSummary={costSummary} + activeProvider={activeThreadModelSelection?.provider ?? selectedProvider ?? null} pendingAction={pendingPrimaryAction} isRunning={phase === "running"} showPlanFollowUpPrompt={ diff --git a/apps/web/src/components/chat/CostMeter.tsx b/apps/web/src/components/chat/CostMeter.tsx index 6c52d60762..b7646f6415 100644 --- a/apps/web/src/components/chat/CostMeter.tsx +++ b/apps/web/src/components/chat/CostMeter.tsx @@ -1,5 +1,7 @@ +import type { ProviderKind } from "@t3tools/contracts"; + import { cn } from "~/lib/utils"; -import { formatUsd, type CostSummary } from "~/lib/costStore"; +import { formatUsd, type CostSummary } from "~/lib/costQuery"; import { Popover, PopoverPopup, PopoverTrigger } from "../ui/popover"; function readBudget(): number | null { @@ -25,28 +27,52 @@ function formatPercentage(value: number): string { return `${Math.round(value)}%`; } -export function CostMeter(props: { summary: CostSummary }) { - const { summary } = props; +/** + * Providers whose server adapters don't yet emit token-usage events. We + * surface "—" to avoid a misleading $0. (See c15: full provider-variance + * UI in a follow-up commit.) + */ +const PROVIDERS_WITHOUT_USAGE_TELEMETRY = new Set(["cursor", "opencode"]); + +export function CostMeter(props: { + summary: CostSummary; + activeProvider?: ProviderKind | null | undefined; +}) { + const { summary, activeProvider } = props; const budget = readBudget(); - // Ring: if budget set, fill by MTD/budget ratio; else fill by bucket of - // session-vs-month (bounded 0–100) so it still animates. - const ratio = budget - ? Math.min(100, (summary.monthUsd / budget) * 100) - : summary.monthUsd <= 0 - ? 0 - : Math.min(100, Math.log10(summary.monthUsd + 1) * 25); + const sessionUsd = summary.thread?.totalUsd ?? 0; + const sessionTurnCount = summary.thread?.turnCount ?? 0; + const monthUsd = summary.month.totalUsd; + const averagePerTurnUsd = sessionTurnCount > 0 ? sessionUsd / sessionTurnCount : null; + const providerUnsupported = activeProvider + ? PROVIDERS_WITHOUT_USAGE_TELEMETRY.has(activeProvider) + : false; + + const ratio = providerUnsupported + ? 0 + : budget + ? Math.min(100, (monthUsd / budget) * 100) + : monthUsd <= 0 + ? 0 + : Math.min(100, Math.log10(monthUsd + 1) * 25); const radius = 9.75; const circumference = 2 * Math.PI * radius; const dashOffset = circumference - (ratio / 100) * circumference; - const overBudget = budget ? summary.monthUsd >= budget : false; + const overBudget = budget ? monthUsd >= budget : false; + const centerLabel = providerUnsupported + ? "—" + : monthUsd > 0 + ? formatCompactUsd(monthUsd) + : "$0"; - const centerLabel = summary.monthUsd > 0 ? formatCompactUsd(summary.monthUsd) : "$0"; - const ariaLabel = budget - ? `Cost ${formatUsd(summary.monthUsd)} of ${formatUsd(budget)} this month (${formatPercentage(ratio)})` - : `Cost ${formatUsd(summary.monthUsd)} this month, ${formatUsd(summary.sessionUsd)} this session`; + const ariaLabel = providerUnsupported + ? `Cost tracking unavailable for ${activeProvider}` + : budget + ? `Cost ${formatUsd(monthUsd)} of ${formatUsd(budget)} this month (${formatPercentage(ratio)})` + : `Cost ${formatUsd(monthUsd)} this month, ${formatUsd(sessionUsd)} this session`; return ( @@ -104,28 +130,42 @@ export function CostMeter(props: { summary: CostSummary }) {
Cost
-
- {formatUsd(summary.sessionUsd)} - session - - {formatUsd(summary.monthUsd)} - MTD -
- {budget ? ( -
- Budget: {formatUsd(budget)} ({formatPercentage(ratio)} used) -
- ) : null} - {summary.sessionTurnCount > 0 && summary.averagePerTurnUsd !== null ? ( + {providerUnsupported ? (
- {summary.sessionTurnCount} - {summary.sessionTurnCount === 1 ? " turn" : " turns"} this session ·{" "} - {formatUsd(summary.averagePerTurnUsd)}/turn avg + Usage telemetry not available for this provider.
- ) : null} - {summary.month.turnCount > 0 ? ( - - ) : null} + ) : ( + <> +
+ {formatUsd(sessionUsd)} + session + + {formatUsd(monthUsd)} + MTD + + {formatUsd(summary.allTime.totalUsd)} + all-time +
+ {budget ? ( +
+ Budget: {formatUsd(budget)} ({formatPercentage(ratio)} used) +
+ ) : null} + {sessionTurnCount > 0 && averagePerTurnUsd !== null ? ( +
+ {sessionTurnCount} + {sessionTurnCount === 1 ? " turn" : " turns"} this session ·{" "} + {formatUsd(averagePerTurnUsd)}/turn avg +
+ ) : null} + {summary.month.turnCount > 0 ? : null} + + )}
@@ -134,7 +174,7 @@ export function CostMeter(props: { summary: CostSummary }) { function ModelBreakdown(props: { summary: CostSummary }) { const entries = Object.entries(props.summary.month.byModel) - .filter(([, entry]) => entry.totalUsd > 0) + .filter(([, entry]) => entry.totalUsd > 0 || entry.turnCount > 0) .sort((left, right) => right[1].totalUsd - left[1].totalUsd); if (entries.length === 0) return null; return ( diff --git a/apps/web/src/lib/costQuery.ts b/apps/web/src/lib/costQuery.ts new file mode 100644 index 0000000000..9c301363f6 --- /dev/null +++ b/apps/web/src/lib/costQuery.ts @@ -0,0 +1,202 @@ +/** + * Cost summary queries. + * + * Reads from the server's `/api/cost/summary` endpoint. Server owns the + * ledger (see apps/server/src/cost/*) so the client is a read-only + * consumer — localStorage is no longer involved. + * + * React Query caches the summary per (environment, thread). The composer + * invalidates this query whenever the active thread receives a new + * `context-window.updated` activity so the ring updates in near-realtime. + */ +import type { EnvironmentId, ThreadId } from "@t3tools/contracts"; +import { + queryOptions, + type QueryClient, + useQueryClient, +} from "@tanstack/react-query"; + +import { resolveEnvironmentHttpUrl } from "../environments/runtime"; + +const COST_SUMMARY_STALE_TIME_MS = 5_000; + +/** Bucket shape mirrors apps/server/src/cost/types.ts. Kept duplicated so + * the client doesn't import server-only modules. */ +export interface ModelCostEntry { + readonly inputTokens: number; + readonly cachedInputTokens: number; + readonly cacheCreationInputTokens: number; + readonly outputTokens: number; + readonly reasoningOutputTokens: number; + readonly totalUsd: number; + readonly turnCount: number; +} + +export interface CostBucket { + readonly totalUsd: number; + readonly turnCount: number; + readonly byModel: Record; + readonly updatedAt: string; +} + +export interface CostSummary { + readonly monthKey: string; + readonly thread: CostBucket | null; + readonly month: CostBucket; + readonly allTime: CostBucket; +} + +export const emptyBucket = (): CostBucket => ({ + totalUsd: 0, + turnCount: 0, + byModel: {}, + updatedAt: "", +}); + +const monthKeyNow = () => { + const now = new Date(); + return `${now.getFullYear()}-${(now.getMonth() + 1).toString().padStart(2, "0")}`; +}; + +export const EMPTY_COST_SUMMARY: CostSummary = { + monthKey: monthKeyNow(), + thread: null, + month: emptyBucket(), + allTime: emptyBucket(), +}; + +export const costQueryKeys = { + all: ["cost"] as const, + summary: (environmentId: EnvironmentId | null, threadId: ThreadId | null) => + ["cost", "summary", environmentId ?? null, threadId ?? null] as const, +}; + +async function fetchCostSummary(input: { + readonly environmentId: EnvironmentId; + readonly threadId: ThreadId | null; + readonly signal?: AbortSignal; +}): Promise { + const url = resolveEnvironmentHttpUrl({ + environmentId: input.environmentId, + pathname: "/api/cost/summary", + searchParams: input.threadId ? { threadId: String(input.threadId) } : {}, + }); + const response = await fetch(url, { + method: "GET", + credentials: "include", + ...(input.signal ? { signal: input.signal } : {}), + }); + if (!response.ok) { + throw new Error(`Failed to load cost summary: HTTP ${response.status}`); + } + const raw = (await response.json()) as unknown; + return sanitizeSummary(raw); +} + +function sanitizeSummary(raw: unknown): CostSummary { + if (!raw || typeof raw !== "object") return EMPTY_COST_SUMMARY; + const r = raw as Record; + return { + monthKey: typeof r.monthKey === "string" ? r.monthKey : monthKeyNow(), + thread: sanitizeBucketOrNull(r.thread), + month: sanitizeBucket(r.month), + allTime: sanitizeBucket(r.allTime), + }; +} + +function sanitizeBucket(raw: unknown): CostBucket { + if (!raw || typeof raw !== "object") return emptyBucket(); + const r = raw as Record; + const byModelRaw = (r.byModel ?? {}) as Record; + const byModel: Record = {}; + for (const [model, entry] of Object.entries(byModelRaw)) { + if (!model || !entry || typeof entry !== "object") continue; + byModel[model] = sanitizeEntry(entry); + } + return { + totalUsd: toNonNeg(r.totalUsd), + turnCount: toNonNeg(r.turnCount), + byModel, + updatedAt: typeof r.updatedAt === "string" ? r.updatedAt : "", + }; +} + +function sanitizeBucketOrNull(raw: unknown): CostBucket | null { + if (!raw || typeof raw !== "object") return null; + return sanitizeBucket(raw); +} + +function sanitizeEntry(raw: unknown): ModelCostEntry { + const r = raw as Record; + return { + inputTokens: toNonNeg(r.inputTokens), + cachedInputTokens: toNonNeg(r.cachedInputTokens), + cacheCreationInputTokens: toNonNeg(r.cacheCreationInputTokens), + outputTokens: toNonNeg(r.outputTokens), + reasoningOutputTokens: toNonNeg(r.reasoningOutputTokens), + totalUsd: toNonNeg(r.totalUsd), + turnCount: toNonNeg(r.turnCount), + }; +} + +function toNonNeg(value: unknown): number { + return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : 0; +} + +export function costSummaryQueryOptions(input: { + readonly environmentId: EnvironmentId | null; + readonly threadId: ThreadId | null; +}) { + return queryOptions({ + queryKey: costQueryKeys.summary(input.environmentId, input.threadId), + queryFn: ({ signal }) => { + if (!input.environmentId) { + return Promise.resolve(EMPTY_COST_SUMMARY); + } + return fetchCostSummary({ + environmentId: input.environmentId, + threadId: input.threadId, + signal, + }); + }, + enabled: input.environmentId !== null, + staleTime: COST_SUMMARY_STALE_TIME_MS, + placeholderData: EMPTY_COST_SUMMARY, + }); +} + +/** Invalidate the cost query for a specific thread (or all threads if omitted). */ +export function invalidateCostSummary( + queryClient: QueryClient, + input?: { + readonly environmentId?: EnvironmentId | null; + readonly threadId?: ThreadId | null; + }, +) { + if (input?.environmentId !== undefined || input?.threadId !== undefined) { + return queryClient.invalidateQueries({ + queryKey: costQueryKeys.summary(input.environmentId ?? null, input.threadId ?? null), + }); + } + return queryClient.invalidateQueries({ queryKey: costQueryKeys.all }); +} + +/** Convenience hook returning the invalidator for consumers outside React Query's mutation flow. */ +export function useInvalidateCostSummary() { + const queryClient = useQueryClient(); + return (input?: { + readonly environmentId?: EnvironmentId | null; + readonly threadId?: ThreadId | null; + }) => invalidateCostSummary(queryClient, input); +} + +/** Format USD for UI; kept here so the component imports one utility module. */ +export function formatUsd(value: number | null | undefined): string { + if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) { + return "$0.00"; + } + if (value < 0.01) return "<$0.01"; + if (value < 1) return `$${value.toFixed(3).replace(/0$/, "")}`; + if (value < 100) return `$${value.toFixed(2)}`; + return `$${Math.round(value).toLocaleString("en-US")}`; +} diff --git a/apps/web/src/lib/costStore.test.ts b/apps/web/src/lib/costStore.test.ts deleted file mode 100644 index 0602f2ce6a..0000000000 --- a/apps/web/src/lib/costStore.test.ts +++ /dev/null @@ -1,313 +0,0 @@ -import { beforeEach, describe, expect, it } from "vitest"; - -import { - COST_STORE_STORAGE_KEY, - localMonthKey, - reduceRecordTurnCost, - reduceResetSession, - sanitizePersistedCostState, - selectCostSummary, - useCostStore, - type PersistedCostState, -} from "./costStore"; - -function freshState(): PersistedCostState { - return { version: 1, sessions: {}, months: {} }; -} - -const cost = (total: number) => ({ - inputUsd: 0, - cachedUsd: 0, - cacheCreationUsd: 0, - outputUsd: 0, - reasoningUsd: 0, - totalUsd: total, -}); - -const deltas = ( - d: Partial<{ - inputTokens: number; - cachedInputTokens: number; - cacheCreationInputTokens: number; - outputTokens: number; - reasoningOutputTokens: number; - }> = {}, -) => ({ - inputTokens: d.inputTokens ?? 0, - cachedInputTokens: d.cachedInputTokens ?? 0, - cacheCreationInputTokens: d.cacheCreationInputTokens ?? 0, - outputTokens: d.outputTokens ?? 0, - reasoningOutputTokens: d.reasoningOutputTokens ?? 0, -}); - -describe("localMonthKey", () => { - it("formats YYYY-MM in local tz", () => { - const date = new Date(2026, 3, 7, 12, 0, 0); // April 7 2026 local - expect(localMonthKey(date)).toBe("2026-04"); - }); - - it("pads single-digit months", () => { - const date = new Date(2026, 0, 1, 0, 0, 0); - expect(localMonthKey(date)).toBe("2026-01"); - }); -}); - -describe("reduceRecordTurnCost", () => { - const at = new Date(2026, 3, 21, 10, 0, 0); // April 21 2026 - - it("accumulates into session + month bucket", () => { - let state = freshState(); - state = reduceRecordTurnCost(state, { - threadId: "t1", - model: "claude-sonnet-4-6", - deltas: deltas({ inputTokens: 1_000, outputTokens: 500 }), - breakdown: cost(0.01), - at, - }); - state = reduceRecordTurnCost(state, { - threadId: "t1", - model: "claude-sonnet-4-6", - deltas: deltas({ inputTokens: 500, outputTokens: 200 }), - breakdown: cost(0.005), - at, - }); - - const session = state.sessions["t1"]!; - expect(session.totalUsd).toBeCloseTo(0.015, 6); - expect(session.turnCount).toBe(2); - expect(session.byModel["claude-sonnet-4-6"]!.inputTokens).toBe(1_500); - expect(session.byModel["claude-sonnet-4-6"]!.outputTokens).toBe(700); - expect(session.byModel["claude-sonnet-4-6"]!.turnCount).toBe(2); - - const month = state.months["2026-04"]!; - expect(month.totalUsd).toBeCloseTo(0.015, 6); - expect(month.turnCount).toBe(2); - }); - - it("keeps per-model tallies separate", () => { - let state = freshState(); - state = reduceRecordTurnCost(state, { - threadId: "t1", - model: "claude-sonnet-4-6", - deltas: deltas({ outputTokens: 100 }), - breakdown: cost(0.01), - at, - }); - state = reduceRecordTurnCost(state, { - threadId: "t1", - model: "gpt-5.4", - deltas: deltas({ outputTokens: 100 }), - breakdown: cost(0.02), - at, - }); - const session = state.sessions["t1"]!; - expect(Object.keys(session.byModel).sort()).toEqual(["claude-sonnet-4-6", "gpt-5.4"]); - expect(session.totalUsd).toBeCloseTo(0.03, 6); - }); - - it("isolates sessions by threadId", () => { - let state = freshState(); - state = reduceRecordTurnCost(state, { - threadId: "t1", - model: "claude-sonnet-4-6", - deltas: deltas({ outputTokens: 100 }), - breakdown: cost(0.01), - at, - }); - state = reduceRecordTurnCost(state, { - threadId: "t2", - model: "claude-sonnet-4-6", - deltas: deltas({ outputTokens: 100 }), - breakdown: cost(0.02), - at, - }); - expect(state.sessions["t1"]!.totalUsd).toBeCloseTo(0.01, 6); - expect(state.sessions["t2"]!.totalUsd).toBeCloseTo(0.02, 6); - // Month aggregates both sessions. - expect(state.months["2026-04"]!.totalUsd).toBeCloseTo(0.03, 6); - }); - - it("buckets by local month", () => { - let state = freshState(); - state = reduceRecordTurnCost(state, { - threadId: "t1", - model: "claude-sonnet-4-6", - deltas: deltas({ outputTokens: 100 }), - breakdown: cost(0.01), - at: new Date(2026, 2, 31, 10, 0, 0), // March - }); - state = reduceRecordTurnCost(state, { - threadId: "t1", - model: "claude-sonnet-4-6", - deltas: deltas({ outputTokens: 100 }), - breakdown: cost(0.02), - at: new Date(2026, 3, 1, 10, 0, 0), // April - }); - expect(Object.keys(state.months).sort()).toEqual(["2026-03", "2026-04"]); - expect(state.months["2026-03"]!.totalUsd).toBeCloseTo(0.01, 6); - expect(state.months["2026-04"]!.totalUsd).toBeCloseTo(0.02, 6); - // Session spans both months. - expect(state.sessions["t1"]!.totalUsd).toBeCloseTo(0.03, 6); - }); - - it("ignores zero-token zero-cost turns", () => { - const before = freshState(); - const after = reduceRecordTurnCost(before, { - threadId: "t1", - model: "claude-sonnet-4-6", - deltas: deltas(), - breakdown: cost(0), - at, - }); - expect(after).toBe(before); - }); - - it("ignores blank threadId / model", () => { - const before = freshState(); - const a = reduceRecordTurnCost(before, { - threadId: "", - model: "claude-sonnet-4-6", - deltas: deltas({ outputTokens: 10 }), - breakdown: cost(0.01), - at, - }); - const b = reduceRecordTurnCost(before, { - threadId: "t1", - model: "", - deltas: deltas({ outputTokens: 10 }), - breakdown: cost(0.01), - at, - }); - expect(a).toBe(before); - expect(b).toBe(before); - }); -}); - -describe("reduceResetSession", () => { - it("removes the session but keeps month", () => { - let state = freshState(); - state = reduceRecordTurnCost(state, { - threadId: "t1", - model: "claude-sonnet-4-6", - deltas: deltas({ outputTokens: 100 }), - breakdown: cost(0.01), - at: new Date(2026, 3, 21, 10, 0, 0), - }); - const next = reduceResetSession(state, "t1"); - expect(next.sessions["t1"]).toBeUndefined(); - expect(next.months["2026-04"]!.totalUsd).toBeCloseTo(0.01, 6); - }); - - it("no-op for unknown threadId", () => { - const state = freshState(); - expect(reduceResetSession(state, "nope")).toBe(state); - }); -}); - -describe("sanitizePersistedCostState", () => { - it("returns initial for garbage", () => { - expect(sanitizePersistedCostState(null).sessions).toEqual({}); - expect(sanitizePersistedCostState("bad").months).toEqual({}); - expect(sanitizePersistedCostState({ version: 99 }).months).toEqual({}); - }); - - it("drops invalid month keys", () => { - const cleaned = sanitizePersistedCostState({ - version: 1, - sessions: {}, - months: { - "2026-04": { totalUsd: 1, turnCount: 1, byModel: {} }, - "bogus": { totalUsd: 99, turnCount: 1, byModel: {} }, - }, - }); - expect(Object.keys(cleaned.months)).toEqual(["2026-04"]); - }); - - it("coerces non-finite numbers to zero", () => { - const cleaned = sanitizePersistedCostState({ - version: 1, - sessions: { - t1: { - totalUsd: Number.NaN, - turnCount: -5, - byModel: { - "claude-sonnet-4-6": { - inputTokens: "abc", - outputTokens: 10, - totalUsd: 5, - turnCount: 1, - }, - }, - }, - }, - months: {}, - }); - const s = cleaned.sessions["t1"]!; - expect(s.totalUsd).toBe(0); - expect(s.turnCount).toBe(0); - expect(s.byModel["claude-sonnet-4-6"]!.inputTokens).toBe(0); - expect(s.byModel["claude-sonnet-4-6"]!.outputTokens).toBe(10); - expect(s.byModel["claude-sonnet-4-6"]!.totalUsd).toBe(5); - }); -}); - -describe("selectCostSummary", () => { - it("returns zero summary for empty state", () => { - const summary = selectCostSummary(freshState(), "t1", new Date(2026, 3, 21)); - expect(summary.sessionUsd).toBe(0); - expect(summary.monthUsd).toBe(0); - expect(summary.averagePerTurnUsd).toBeNull(); - expect(summary.monthKey).toBe("2026-04"); - }); - - it("computes average per turn", () => { - let state = freshState(); - for (let i = 0; i < 4; i += 1) { - state = reduceRecordTurnCost(state, { - threadId: "t1", - model: "claude-sonnet-4-6", - deltas: deltas({ outputTokens: 100 }), - breakdown: cost(0.01), - at: new Date(2026, 3, 21), - }); - } - const summary = selectCostSummary(state, "t1", new Date(2026, 3, 21)); - expect(summary.sessionUsd).toBeCloseTo(0.04, 6); - expect(summary.averagePerTurnUsd).toBeCloseTo(0.01, 6); - expect(summary.sessionTurnCount).toBe(4); - }); -}); - -describe("useCostStore (zustand)", () => { - beforeEach(() => { - useCostStore.getState().resetAll(); - if (typeof window !== "undefined") { - window.localStorage.removeItem(COST_STORE_STORAGE_KEY); - } - }); - - it("records turn cost via action", () => { - useCostStore.getState().recordTurnCost({ - threadId: "t1", - model: "claude-sonnet-4-6", - deltas: deltas({ inputTokens: 1_000, outputTokens: 500 }), - breakdown: cost(0.01), - at: new Date(2026, 3, 21), - }); - const state = useCostStore.getState(); - expect(state.sessions["t1"]!.totalUsd).toBeCloseTo(0.01, 6); - expect(state.months["2026-04"]!.totalUsd).toBeCloseTo(0.01, 6); - }); - - it("resetSession clears one thread", () => { - useCostStore.getState().recordTurnCost({ - threadId: "t1", - model: "claude-sonnet-4-6", - deltas: deltas({ outputTokens: 100 }), - breakdown: cost(0.01), - at: new Date(2026, 3, 21), - }); - useCostStore.getState().resetSession("t1"); - expect(useCostStore.getState().sessions["t1"]).toBeUndefined(); - }); -}); diff --git a/apps/web/src/lib/costStore.ts b/apps/web/src/lib/costStore.ts deleted file mode 100644 index 509276b5e2..0000000000 --- a/apps/web/src/lib/costStore.ts +++ /dev/null @@ -1,328 +0,0 @@ -import { Debouncer } from "@tanstack/react-pacer"; -import { create } from "zustand"; -import type { TurnCostBreakdown, TurnTokenDeltas } from "@t3tools/shared/pricing"; -import { formatUsd } from "@t3tools/shared/pricing"; - -export const COST_STORE_STORAGE_KEY = "t3code:cost-store:v1"; - -/** Cumulative token counts + USD spend for one model within a bucket. */ -export interface ModelCostEntry { - inputTokens: number; - cachedInputTokens: number; - outputTokens: number; - reasoningOutputTokens: number; - totalUsd: number; - turnCount: number; -} - -export interface CostBucket { - totalUsd: number; - turnCount: number; - byModel: Record; -} - -export interface PersistedCostState { - version: 1; - sessions: Record; - months: Record; -} - -export interface CostStoreState extends PersistedCostState { - recordTurnCost: (input: RecordTurnCostInput) => void; - resetSession: (threadId: string) => void; - resetAll: () => void; - /** Test-only hook: replace state atomically. */ - __replaceState: (next: PersistedCostState) => void; -} - -export interface RecordTurnCostInput { - threadId: string; - model: string; - deltas: TurnTokenDeltas; - breakdown: TurnCostBreakdown; - /** Override "now" for deterministic tests. */ - at?: Date; -} - -const emptyBucket: () => CostBucket = () => ({ totalUsd: 0, turnCount: 0, byModel: {} }); -const emptyModelEntry: () => ModelCostEntry = () => ({ - inputTokens: 0, - cachedInputTokens: 0, - outputTokens: 0, - reasoningOutputTokens: 0, - totalUsd: 0, - turnCount: 0, -}); - -const initialState: PersistedCostState = { - version: 1, - sessions: {}, - months: {}, -}; - -/** - * Compute `YYYY-MM` key for a Date in the **local** timezone. - * Done via `getFullYear/getMonth` (not toISOString) so the month rolls over - * on the user's clock, not UTC's. - */ -export function localMonthKey(date: Date = new Date()): string { - const year = date.getFullYear().toString().padStart(4, "0"); - const month = (date.getMonth() + 1).toString().padStart(2, "0"); - return `${year}-${month}`; -} - -function addTurnToEntry( - entry: ModelCostEntry, - deltas: TurnTokenDeltas, - breakdown: TurnCostBreakdown, -): ModelCostEntry { - return { - inputTokens: entry.inputTokens + deltas.inputTokens, - cachedInputTokens: entry.cachedInputTokens + deltas.cachedInputTokens, - outputTokens: entry.outputTokens + deltas.outputTokens, - reasoningOutputTokens: entry.reasoningOutputTokens + deltas.reasoningOutputTokens, - totalUsd: entry.totalUsd + breakdown.totalUsd, - turnCount: entry.turnCount + 1, - }; -} - -function addTurnToBucket( - bucket: CostBucket, - model: string, - deltas: TurnTokenDeltas, - breakdown: TurnCostBreakdown, -): CostBucket { - const existing = bucket.byModel[model] ?? emptyModelEntry(); - return { - totalUsd: bucket.totalUsd + breakdown.totalUsd, - turnCount: bucket.turnCount + 1, - byModel: { - ...bucket.byModel, - [model]: addTurnToEntry(existing, deltas, breakdown), - }, - }; -} - -/** Pure reducer: record one turn into the given state. */ -export function reduceRecordTurnCost( - state: PersistedCostState, - input: RecordTurnCostInput, -): PersistedCostState { - const { threadId, model, deltas, breakdown } = input; - if (!threadId || !model) { - return state; - } - // Skip no-op turns to keep storage tiny. - const totalTokens = - deltas.inputTokens + - deltas.cachedInputTokens + - deltas.outputTokens + - deltas.reasoningOutputTokens; - if (totalTokens <= 0 && breakdown.totalUsd <= 0) { - return state; - } - const monthKey = localMonthKey(input.at ?? new Date()); - const session = state.sessions[threadId] ?? emptyBucket(); - const month = state.months[monthKey] ?? emptyBucket(); - return { - ...state, - sessions: { - ...state.sessions, - [threadId]: addTurnToBucket(session, model, deltas, breakdown), - }, - months: { - ...state.months, - [monthKey]: addTurnToBucket(month, model, deltas, breakdown), - }, - }; -} - -export function reduceResetSession( - state: PersistedCostState, - threadId: string, -): PersistedCostState { - if (!(threadId in state.sessions)) { - return state; - } - const nextSessions = { ...state.sessions }; - delete nextSessions[threadId]; - return { ...state, sessions: nextSessions }; -} - -function sanitizeNumber(value: unknown): number { - return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : 0; -} - -function sanitizeModelEntry(raw: unknown): ModelCostEntry | null { - if (!raw || typeof raw !== "object") { - return null; - } - const r = raw as Record; - return { - inputTokens: sanitizeNumber(r.inputTokens), - cachedInputTokens: sanitizeNumber(r.cachedInputTokens), - outputTokens: sanitizeNumber(r.outputTokens), - reasoningOutputTokens: sanitizeNumber(r.reasoningOutputTokens), - totalUsd: sanitizeNumber(r.totalUsd), - turnCount: sanitizeNumber(r.turnCount), - }; -} - -function sanitizeBucket(raw: unknown): CostBucket | null { - if (!raw || typeof raw !== "object") { - return null; - } - const r = raw as Record; - const byModelRaw = (r.byModel ?? {}) as Record; - const byModel: Record = {}; - if (byModelRaw && typeof byModelRaw === "object") { - for (const [model, entry] of Object.entries(byModelRaw)) { - if (!model) continue; - const cleaned = sanitizeModelEntry(entry); - if (cleaned) byModel[model] = cleaned; - } - } - return { - totalUsd: sanitizeNumber(r.totalUsd), - turnCount: sanitizeNumber(r.turnCount), - byModel, - }; -} - -export function sanitizePersistedCostState(raw: unknown): PersistedCostState { - if (!raw || typeof raw !== "object") { - return initialState; - } - const r = raw as Record; - if (r.version !== 1) { - return initialState; - } - const sessions: Record = {}; - const months: Record = {}; - const sessionsRaw = (r.sessions ?? {}) as Record; - const monthsRaw = (r.months ?? {}) as Record; - if (sessionsRaw && typeof sessionsRaw === "object") { - for (const [threadId, bucket] of Object.entries(sessionsRaw)) { - if (!threadId) continue; - const cleaned = sanitizeBucket(bucket); - if (cleaned) sessions[threadId] = cleaned; - } - } - if (monthsRaw && typeof monthsRaw === "object") { - for (const [monthKey, bucket] of Object.entries(monthsRaw)) { - if (!/^\d{4}-\d{2}$/.test(monthKey)) continue; - const cleaned = sanitizeBucket(bucket); - if (cleaned) months[monthKey] = cleaned; - } - } - return { version: 1, sessions, months }; -} - -function readPersistedState(): PersistedCostState { - if (typeof window === "undefined") { - return initialState; - } - try { - const raw = window.localStorage.getItem(COST_STORE_STORAGE_KEY); - if (!raw) return initialState; - return sanitizePersistedCostState(JSON.parse(raw)); - } catch { - return initialState; - } -} - -function persistState(state: PersistedCostState): void { - if (typeof window === "undefined") return; - try { - const { version, sessions, months } = state; - window.localStorage.setItem( - COST_STORE_STORAGE_KEY, - JSON.stringify({ version, sessions, months } satisfies PersistedCostState), - ); - } catch { - // ignore quota / serialization errors - } -} - -const debouncedPersist = new Debouncer(persistState, { wait: 400 }); - -export const useCostStore = create((set) => ({ - ...readPersistedState(), - recordTurnCost: (input) => set((state) => reduceRecordTurnCost(state, input)), - resetSession: (threadId) => set((state) => reduceResetSession(state, threadId)), - resetAll: () => set(() => ({ ...initialState })), - __replaceState: (next) => set(() => ({ ...next })), -})); - -useCostStore.subscribe((state) => { - const { version, sessions, months } = state; - debouncedPersist.maybeExecute({ version, sessions, months }); -}); - -if (typeof window !== "undefined" && typeof window.addEventListener === "function") { - window.addEventListener("beforeunload", () => { - debouncedPersist.flush(); - }); -} - -// ── Selectors ──────────────────────────────────────────────────────────── - -export function selectSessionBucket( - state: PersistedCostState, - threadId: string | null | undefined, -): CostBucket { - if (!threadId) return emptyBucket(); - return state.sessions[threadId] ?? emptyBucket(); -} - -export function selectMonthBucket( - state: PersistedCostState, - monthKey: string = localMonthKey(), -): CostBucket { - return state.months[monthKey] ?? emptyBucket(); -} - -export interface CostSummary { - readonly sessionUsd: number; - readonly monthUsd: number; - readonly sessionTurnCount: number; - readonly monthTurnCount: number; - readonly monthKey: string; - readonly session: CostBucket; - readonly month: CostBucket; - readonly averagePerTurnUsd: number | null; -} - -export function useCostSummary( - threadId: string | null | undefined, - now?: Date, -): CostSummary { - const sessions = useCostStore((state) => state.sessions); - const months = useCostStore((state) => state.months); - // Intentionally rebuild on any change to sessions/months — selector is cheap. - return selectCostSummary({ version: 1, sessions, months }, threadId, now); -} - -export function selectCostSummary( - state: PersistedCostState, - threadId: string | null | undefined, - now: Date = new Date(), -): CostSummary { - const monthKey = localMonthKey(now); - const session = selectSessionBucket(state, threadId); - const month = selectMonthBucket(state, monthKey); - const averagePerTurnUsd = - session.turnCount > 0 ? session.totalUsd / session.turnCount : null; - return { - sessionUsd: session.totalUsd, - monthUsd: month.totalUsd, - sessionTurnCount: session.turnCount, - monthTurnCount: month.turnCount, - monthKey, - session, - month, - averagePerTurnUsd, - }; -} - -export { formatUsd }; diff --git a/apps/web/src/lib/useCostTracking.test.ts b/apps/web/src/lib/useCostTracking.test.ts deleted file mode 100644 index 9590cf820e..0000000000 --- a/apps/web/src/lib/useCostTracking.test.ts +++ /dev/null @@ -1,160 +0,0 @@ -import { describe, expect, it } from "vitest"; -import { EventId, type ModelSelection, type OrchestrationThreadActivity, TurnId } from "@t3tools/contracts"; - -import { processActivitiesForCost } from "./useCostTracking"; - -function makeContextWindowActivity( - id: string, - payload: Record, - createdAt = "2026-04-21T10:00:00.000Z", -): OrchestrationThreadActivity { - return { - id: EventId.make(id), - tone: "info", - kind: "context-window.updated", - summary: "Context window updated", - payload, - turnId: TurnId.make("turn-1"), - createdAt, - }; -} - -const sonnet: ModelSelection = { - provider: "claudeAgent", - model: "claude-sonnet-4-6", -}; - -describe("processActivitiesForCost", () => { - it("returns empty records with null threadId", () => { - const result = processActivitiesForCost(null, [], sonnet, null); - expect(result.records).toEqual([]); - expect(result.nextSeen.size).toBe(0); - }); - - it("seeds existing activities without recording on first mount", () => { - const acts = [ - makeContextWindowActivity("evt-a", { lastOutputTokens: 1000 }), - makeContextWindowActivity("evt-b", { lastOutputTokens: 500 }), - ]; - const result = processActivitiesForCost("t1", acts, sonnet, null); - expect(result.records).toEqual([]); - expect(result.nextSeen.size).toBe(2); - }); - - it("records only new activities on subsequent call", () => { - const seed = processActivitiesForCost( - "t1", - [makeContextWindowActivity("evt-a", { lastOutputTokens: 100 })], - sonnet, - null, - ); - const next = processActivitiesForCost( - "t1", - [ - makeContextWindowActivity("evt-a", { lastOutputTokens: 100 }), - makeContextWindowActivity("evt-b", { - lastInputTokens: 1_000, - lastCachedInputTokens: 500, - lastOutputTokens: 200, - }), - ], - sonnet, - seed.nextSeen, - ); - expect(next.records).toHaveLength(1); - const record = next.records[0]!; - expect(record.threadId).toBe("t1"); - expect(record.model).toBe("claude-sonnet-4-6"); - expect(record.deltas.inputTokens).toBe(1_000); - expect(record.deltas.outputTokens).toBe(200); - // 1000*3 + 500*0.3 + 200*15 = 3000+150+3000 = 6150 / 1M = $0.00615 - expect(record.breakdown.totalUsd).toBeCloseTo(0.00615, 6); - }); - - it("skips events without per-turn deltas", () => { - const seed = processActivitiesForCost("t1", [], sonnet, null); - const next = processActivitiesForCost( - "t1", - [makeContextWindowActivity("evt-1", { usedTokens: 10_000 })], - sonnet, - seed.nextSeen, - ); - expect(next.records).toEqual([]); - expect(next.nextSeen.has("evt-1")).toBe(true); - }); - - it("skips non-context-window activity kinds", () => { - const seed = processActivitiesForCost("t1", [], sonnet, null); - const other: OrchestrationThreadActivity = { - id: EventId.make("evt-tool"), - tone: "info", - kind: "tool.started", - summary: "tool.started", - payload: { lastOutputTokens: 1_000 }, - turnId: TurnId.make("turn-1"), - createdAt: "2026-04-21T10:00:00.000Z", - }; - const next = processActivitiesForCost("t1", [other], sonnet, seed.nextSeen); - expect(next.records).toEqual([]); - expect(next.nextSeen.has("evt-tool")).toBe(true); - }); - - it("skips when model selection missing", () => { - const seed = processActivitiesForCost("t1", [], null, null); - const next = processActivitiesForCost( - "t1", - [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })], - null, - seed.nextSeen, - ); - expect(next.records).toEqual([]); - }); - - it("skips when pricing resolves to zero (unknown model)", () => { - const seed = processActivitiesForCost("t1", [], sonnet, null); - const next = processActivitiesForCost( - "t1", - [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })], - { provider: "opencode", model: "some/unknown-model" }, - seed.nextSeen, - ); - expect(next.records).toEqual([]); - expect(next.nextSeen.has("evt-1")).toBe(true); - }); - - it("deduplicates by activity id", () => { - const seed = processActivitiesForCost("t1", [], sonnet, null); - const firstPass = processActivitiesForCost( - "t1", - [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })], - sonnet, - seed.nextSeen, - ); - expect(firstPass.records).toHaveLength(1); - const secondPass = processActivitiesForCost( - "t1", - [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })], - sonnet, - firstPass.nextSeen, - ); - expect(secondPass.records).toEqual([]); - }); - - it("uses activity.createdAt as `at` timestamp", () => { - const seed = processActivitiesForCost("t1", [], sonnet, null); - const next = processActivitiesForCost( - "t1", - [ - makeContextWindowActivity( - "evt-1", - { lastOutputTokens: 1_000 }, - "2026-03-15T00:00:00.000Z", - ), - ], - sonnet, - seed.nextSeen, - ); - const record = next.records[0]!; - expect(record.at?.toISOString()).toBe("2026-03-15T00:00:00.000Z"); - }); -}); diff --git a/apps/web/src/lib/useCostTracking.ts b/apps/web/src/lib/useCostTracking.ts deleted file mode 100644 index 6757ecc305..0000000000 --- a/apps/web/src/lib/useCostTracking.ts +++ /dev/null @@ -1,120 +0,0 @@ -import { useEffect, useRef } from "react"; -import type { ModelSelection, OrchestrationThreadActivity } from "@t3tools/contracts"; -import { - computeTurnCost, - type TurnCostBreakdown, - type TurnTokenDeltas, -} from "@t3tools/shared/pricing"; - -import { useCostStore, type RecordTurnCostInput } from "./costStore"; - -interface SeenRef { - threadId: string | null | undefined; - ids: Set; -} - -function toNonNegative(value: unknown): number { - return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : 0; -} - -function extractDeltas(payload: unknown): TurnTokenDeltas | null { - if (!payload || typeof payload !== "object") return null; - const p = payload as Record; - const input = toNonNegative(p.lastInputTokens); - const cached = toNonNegative(p.lastCachedInputTokens); - const cacheCreation = toNonNegative(p.lastCacheCreationInputTokens); - const output = toNonNegative(p.lastOutputTokens); - const reasoning = toNonNegative(p.lastReasoningOutputTokens); - if (input + cached + cacheCreation + output + reasoning <= 0) return null; - return { - inputTokens: input, - cachedInputTokens: cached, - cacheCreationInputTokens: cacheCreation, - outputTokens: output, - reasoningOutputTokens: reasoning, - }; -} - -export interface ProcessActivitiesResult { - readonly records: ReadonlyArray; - readonly nextSeen: Set; -} - -/** - * Pure: find new `context-window.updated` events that carry per-turn - * token deltas and translate them into cost-store inputs. Returns updated - * "seen" set for caller to persist. - * - * Behaviour: - * - If `prevSeen` is `null`, treat all activities as "already seen" and - * emit no records — used for initial mount / thread switch. - * - Otherwise, only new activity IDs are considered. - */ -export function processActivitiesForCost( - threadId: string | null | undefined, - activities: ReadonlyArray | undefined, - modelSelection: ModelSelection | null | undefined, - prevSeen: Set | null, -): ProcessActivitiesResult { - if (!threadId || !activities || activities.length === 0) { - return { records: [], nextSeen: prevSeen ?? new Set() }; - } - if (prevSeen === null) { - // Initial mount / thread switch: seed seen set with current activity IDs. - return { - records: [], - nextSeen: new Set(activities.map((a) => a.id as string)), - }; - } - const seen = new Set(prevSeen); - const model = modelSelection?.model; - const provider = modelSelection?.provider; - const records: RecordTurnCostInput[] = []; - for (const activity of activities) { - const id = activity.id as string; - if (seen.has(id)) continue; - seen.add(id); - if (activity.kind !== "context-window.updated") continue; - const deltas = extractDeltas(activity.payload); - if (!deltas) continue; - if (!model) continue; - const breakdown: TurnCostBreakdown = computeTurnCost(model, deltas, provider); - if (breakdown.totalUsd <= 0) continue; - records.push({ - threadId, - model, - deltas, - breakdown, - at: activity.createdAt ? new Date(activity.createdAt) : new Date(), - }); - } - return { records, nextSeen: seen }; -} - -/** - * Observe thread activity stream and record cost for each new - * `context-window.updated` event. Seeds on first mount so historical - * activities aren't retroactively charged. - */ -export function useCostTracking( - threadId: string | null | undefined, - activities: ReadonlyArray | undefined, - modelSelection: ModelSelection | null | undefined, -): void { - const recordTurnCost = useCostStore((state) => state.recordTurnCost); - const seenRef = useRef({ threadId: undefined, ids: new Set() }); - - useEffect(() => { - const prev = seenRef.current.threadId === threadId ? seenRef.current.ids : null; - const { records, nextSeen } = processActivitiesForCost( - threadId, - activities, - modelSelection, - prev, - ); - seenRef.current = { threadId, ids: nextSeen }; - for (const record of records) { - recordTurnCost(record); - } - }, [threadId, activities, modelSelection, recordTurnCost]); -} From 96768f185b859bee8510b270ad97a317a834f7e7 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Tue, 21 Apr 2026 21:53:13 +0200 Subject: [PATCH 11/16] fix(web): stop button stays active after model response completes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the final `thread.message-sent` (streaming:false) arrives, the client marks `latestTurn.state` as "completed" but leaves `session.status === "running"` until the separate `thread.session-set` event (emitted server-side on `turn.completed`) arrives. In that gap: - The stop button stays red because visibility is derived from `derivePhase(session)` → `"running"` via `session.status`. - Clicking it dispatches `thread.turn.interrupt`; the server has no active turn so the command is a no-op, and the UI stays stuck until the late `thread.session-set` lands. Fix: - `store.ts` `thread.message-sent` handler: when the final assistant message for the currently active turn arrives and `latestTurn` resolves to "completed", optimistically flip `session.status` / `orchestrationStatus` to "ready" and clear `activeTurnId`. The later server-sent `thread.session-set` overwrites session via `mapSession` and is idempotent over this change. Interrupted and errored turns are excluded (checked via `latestTurn.state === "completed"` and the `activeTurnId === event.turnId` guard). - `ChatView.tsx` `onInterrupt`: defensive guard — if `latestTurn` is already in a terminal state (completed / interrupted / error), skip the dispatch. This closes the small window where a click lands before React re-renders the composer. Tests: - Updated the existing replay-batch test: after a final assistant `message-sent` for the active turn, `session.status` is now "ready" and `activeTurnId` is cleared. - Added a test that a mismatched turnId (active turn ≠ streaming:false message turn) does NOT reconcile — the server's session-set remains authoritative. - Added a test that an interrupted turn's final message does NOT reconcile session to "ready". All 908 web tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/web/src/components/ChatView.tsx | 14 +++ apps/web/src/store.test.ts | 123 ++++++++++++++++++++++++++- apps/web/src/store.ts | 28 ++++++ 3 files changed, 164 insertions(+), 1 deletion(-) diff --git a/apps/web/src/components/ChatView.tsx b/apps/web/src/components/ChatView.tsx index 0c76059b6a..552d9fcaf9 100644 --- a/apps/web/src/components/ChatView.tsx +++ b/apps/web/src/components/ChatView.tsx @@ -2667,6 +2667,20 @@ export default function ChatView(props: ChatViewProps) { const onInterrupt = async () => { const api = readEnvironmentApi(environmentId); if (!api || !activeThread) return; + // Defensive: if the latest turn is already in a terminal state the + // server has no active turn to interrupt, so the dispatch would be a + // no-op round-trip. Skip it — the store reconciles session.status in + // the `thread.message-sent` handler, so the stop button should have + // already disappeared; this guard handles the small window where a + // click landed before the React re-render. + const latestTurnState = activeThread.latestTurn?.state; + if ( + latestTurnState === "completed" || + latestTurnState === "interrupted" || + latestTurnState === "error" + ) { + return; + } await api.orchestration.dispatchCommand({ type: "thread.turn.interrupt", commandId: newCommandId(), diff --git a/apps/web/src/store.test.ts b/apps/web/src/store.test.ts index 9bb01ba0be..19b35f8a12 100644 --- a/apps/web/src/store.test.ts +++ b/apps/web/src/store.test.ts @@ -741,11 +741,132 @@ describe("incremental orchestration updates", () => { localEnvironmentId, ); - expect(threadsOf(next)[0]?.session?.status).toBe("running"); + // The final `thread.message-sent` for the active turn optimistically + // flips session.status from "running" → "ready" and clears + // activeTurnId, so the stop button does not remain active while we + // wait for the server's follow-up `thread.session-set` event. + expect(threadsOf(next)[0]?.session?.status).toBe("ready"); + expect(threadsOf(next)[0]?.session?.orchestrationStatus).toBe("ready"); + expect(threadsOf(next)[0]?.session?.activeTurnId).toBeUndefined(); expect(threadsOf(next)[0]?.latestTurn?.state).toBe("completed"); expect(threadsOf(next)[0]?.messages).toHaveLength(1); }); + it("does not reconcile session when the completed turn is not the active turn", () => { + const thread = makeThread({ + latestTurn: { + turnId: TurnId.make("turn-1"), + state: "running", + requestedAt: "2026-02-27T00:00:00.000Z", + startedAt: "2026-02-27T00:00:00.000Z", + completedAt: null, + assistantMessageId: null, + }, + }); + const state = makeState(thread); + + const next = applyOrchestrationEvents( + state, + [ + makeEvent( + "thread.session-set", + { + threadId: thread.id, + session: { + threadId: thread.id, + status: "running", + providerName: "codex", + runtimeMode: "full-access", + activeTurnId: TurnId.make("turn-2"), + lastError: null, + updatedAt: "2026-02-27T00:00:02.000Z", + }, + }, + { sequence: 2 }, + ), + makeEvent( + "thread.message-sent", + { + threadId: thread.id, + messageId: MessageId.make("assistant-1"), + role: "assistant", + text: "done", + turnId: TurnId.make("turn-1"), + streaming: false, + createdAt: "2026-02-27T00:00:03.000Z", + updatedAt: "2026-02-27T00:00:03.000Z", + }, + { sequence: 3 }, + ), + ], + localEnvironmentId, + ); + + // activeTurnId is turn-2 but the streaming:false message is for turn-1; + // do not reconcile — the server's session-set is still authoritative. + expect(threadsOf(next)[0]?.session?.status).toBe("running"); + expect(threadsOf(next)[0]?.session?.activeTurnId).toBe(TurnId.make("turn-2")); + }); + + it("does not reconcile session when the final message is for an interrupted turn", () => { + const thread = makeThread(); + const state = makeState(thread); + + const next = applyOrchestrationEvents( + state, + [ + makeEvent( + "thread.session-set", + { + threadId: thread.id, + session: { + threadId: thread.id, + status: "running", + providerName: "codex", + runtimeMode: "full-access", + activeTurnId: TurnId.make("turn-1"), + lastError: null, + updatedAt: "2026-02-27T00:00:02.000Z", + }, + }, + { sequence: 2 }, + ), + makeEvent( + "thread.turn-interrupt-requested", + { + threadId: thread.id, + turnId: TurnId.make("turn-1"), + createdAt: "2026-02-27T00:00:02.500Z", + }, + { sequence: 3 }, + ), + makeEvent( + "thread.message-sent", + { + threadId: thread.id, + messageId: MessageId.make("assistant-1"), + role: "assistant", + text: "partial", + turnId: TurnId.make("turn-1"), + streaming: false, + createdAt: "2026-02-27T00:00:03.000Z", + updatedAt: "2026-02-27T00:00:03.000Z", + }, + { sequence: 4 }, + ), + ], + localEnvironmentId, + ); + + // turn-interrupt-requested moved latestTurn to "interrupted"; the + // final message-sent keeps it interrupted and must NOT flip + // session.status to "ready" — only a cleanly completed turn triggers + // the optimistic reconcile. + expect(threadsOf(next)[0]?.latestTurn?.state).toBe("interrupted"); + expect(threadsOf(next)[0]?.session?.status).toBe("running"); + expect(threadsOf(next)[0]?.session?.activeTurnId).toBe(TurnId.make("turn-1")); + }); + it("does not regress latestTurn when an older turn diff completes late", () => { const state = makeState( makeThread({ diff --git a/apps/web/src/store.ts b/apps/web/src/store.ts index 3b1976bf9a..e9867ca0f0 100644 --- a/apps/web/src/store.ts +++ b/apps/web/src/store.ts @@ -1432,8 +1432,36 @@ function applyEnvironmentOrchestrationEvent( assistantMessageId: event.payload.messageId, }) : thread.latestTurn; + // Optimistically reconcile session state when the final assistant + // message for the active turn arrives. The server emits a separate + // `thread.session-set` event on `turn.completed` which flips + // session.status → "ready" and clears activeTurnId, but that event + // can arrive after the final `thread.message-sent`. In that gap + // the stop button stays active and clicking it dispatches a dead + // interrupt command (no active turn) that the server no-ops. Flip + // the status locally here so the button disappears immediately; the + // later session-set is idempotent over this change. + const shouldReconcileSession = + event.payload.role === "assistant" && + event.payload.streaming === false && + event.payload.turnId !== null && + thread.session !== null && + thread.session.orchestrationStatus === "running" && + thread.session.activeTurnId === event.payload.turnId && + latestTurn?.state === "completed"; + const nextSession: Thread["session"] = + shouldReconcileSession && thread.session !== null + ? { + ...thread.session, + status: "ready", + orchestrationStatus: "ready", + activeTurnId: undefined, + updatedAt: event.occurredAt, + } + : thread.session; return { ...thread, + session: nextSession, messages: cappedMessages, turnDiffSummaries, latestTurn, From 76a3495f6d59e65ec763ce447fd4321398451501 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Tue, 21 Apr 2026 22:09:35 +0200 Subject: [PATCH 12/16] fix: bot review follow-ups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Cursor Bugbot + Macroscope findings on #2273: - apps/server/src/cost/Reducer.ts: drop the no-op ternaries in sanitizePersistedFile (`r.version === 1 ? 1 : 1` and `r.kind === expectedKind ? expectedKind : expectedKind`). Both always returned the right-hand value regardless of the stored value, so they were silently forcing the expected defaults — which is actually the intended sanitize-on-mismatch behaviour. Simplify to the constants directly and add a comment explaining the intent. (Macroscope, Reducer.ts:325-326.) - apps/web/src/lib/costQuery.ts: stop duplicating `formatUsd` and instead re-export it from `@t3tools/shared/pricing` (the shared package was already a workspace dep and owns computeTurnCost next to the formatter). Keeping the re-export so CostMeter and any future consumer continue to import from `~/lib/costQuery` as the single cost-UI utility module. (Cursor, duplicated-function.) - apps/web/src/lib/costQuery.ts: remove the dead `useInvalidateCostSummary` hook. The ChatComposer calls `invalidateCostSummary` directly with its own `useQueryClient`, so the hook wrapper was unused surface area. (Cursor, dead-code.) Verified: web typecheck clean, web tests 908/908 pass, server cost tests 19/19 pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/server/src/cost/Reducer.ts | 10 ++++++---- apps/web/src/lib/costQuery.ts | 31 +++++++------------------------ 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/apps/server/src/cost/Reducer.ts b/apps/server/src/cost/Reducer.ts index bb9d4c7cd2..a9ac9e27b2 100644 --- a/apps/server/src/cost/Reducer.ts +++ b/apps/server/src/cost/Reducer.ts @@ -322,14 +322,16 @@ export function sanitizePersistedFile( }; } const r = raw as Record; - const version = r.version === 1 ? 1 : 1; - const kind = r.kind === expectedKind ? expectedKind : expectedKind; + // version and kind are forced to the expected values — any drift from + // what the caller asked for is treated as malformed and silently + // sanitized (the surrounding contract only supports version 1 and the + // requested kind). const key = typeof r.key === "string" && r.key.length > 0 ? r.key : expectedKey; const bucket = sanitizeBucket(r.bucket, now); const lastCumulative = sanitizeLastCumulative(r.lastCumulative); return { - version, - kind, + version: 1, + kind: expectedKind, key, bucket, ...(lastCumulative && expectedKind === "session" ? { lastCumulative } : {}), diff --git a/apps/web/src/lib/costQuery.ts b/apps/web/src/lib/costQuery.ts index 9c301363f6..a8a09af89d 100644 --- a/apps/web/src/lib/costQuery.ts +++ b/apps/web/src/lib/costQuery.ts @@ -10,14 +10,16 @@ * `context-window.updated` activity so the ring updates in near-realtime. */ import type { EnvironmentId, ThreadId } from "@t3tools/contracts"; -import { - queryOptions, - type QueryClient, - useQueryClient, -} from "@tanstack/react-query"; +import { queryOptions, type QueryClient } from "@tanstack/react-query"; import { resolveEnvironmentHttpUrl } from "../environments/runtime"; +// Re-export the shared USD formatter so `~/lib/costQuery` stays the single +// import surface for cost UI consumers (see CostMeter.tsx) while the +// actual implementation lives in @t3tools/shared/pricing alongside +// computeTurnCost. +export { formatUsd } from "@t3tools/shared/pricing"; + const COST_SUMMARY_STALE_TIME_MS = 5_000; /** Bucket shape mirrors apps/server/src/cost/types.ts. Kept duplicated so @@ -181,22 +183,3 @@ export function invalidateCostSummary( return queryClient.invalidateQueries({ queryKey: costQueryKeys.all }); } -/** Convenience hook returning the invalidator for consumers outside React Query's mutation flow. */ -export function useInvalidateCostSummary() { - const queryClient = useQueryClient(); - return (input?: { - readonly environmentId?: EnvironmentId | null; - readonly threadId?: ThreadId | null; - }) => invalidateCostSummary(queryClient, input); -} - -/** Format USD for UI; kept here so the component imports one utility module. */ -export function formatUsd(value: number | null | undefined): string { - if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) { - return "$0.00"; - } - if (value < 0.01) return "<$0.01"; - if (value < 1) return `$${value.toFixed(3).replace(/0$/, "")}`; - if (value < 100) return `$${value.toFixed(2)}`; - return `$${Math.round(value).toLocaleString("en-US")}`; -} From b027c89ef871eacf5b74c3c62a21b89846190957 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Wed, 22 Apr 2026 00:38:02 +0200 Subject: [PATCH 13/16] fix(cost): drop mid-turn snapshots + rewrite usedTokens as input-side MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two independent bugs in the token-usage pipeline, both user-visible and both rooted in the same conflation between the context-window dimension (what fills the ring) and the billing dimension (what lands in the cost ledger). ## 1. Cost ledger over-counting (CRITICAL) Claude emits `thread.token-usage.updated` events from three places per turn: every `task_progress`, every `task_notification`, and the final `completeTurn`. The mid-turn snapshots carry per-API-call breakdowns *without* `lastXxxTokens` fields, while the turn-complete snapshot carries cumulative totals *with* `lastXxx` deltas. `ProviderRuntimeIngestion` fed every one of these events into `CostTracker.recordUsage`. For the mid-turn events, the Reducer's `hasExplicitLast=false` branch subtracts the payload's cumulative against the session's `lastCumulative` — but what gets stored in `lastCumulative` between mid-turn events is one API call's breakdown, not the session running total, so the resulting "deltas" are arbitrary diffs between per-call snapshots. Net effect: cost over/undercounted unpredictably every turn, and `turnCount` inflated by 3–10× because every mid-turn snapshot with any positive delta bumped it. Fix: gate `recordUsage` in `ProviderRuntimeIngestion` on the presence of any `lastXxxTokens` field. Mid-turn snapshots still flow to the `context-window.updated` activity for the ring, they just skip the ledger. Codex only emits one snapshot per turn (and always with `lastXxx`) so it's unaffected. While here, normalise the model slug (`resolveModelSlugForProvider`) before passing it to the ledger so aliased/canonical variants collapse to a single `byModel` key. ## 2. Context-window ring over-reporting Both adapters set `usedTokens = totalTokens`, which for the cost dimension meant *every* billed token including outputs. But the ring consumes `usedTokens / maxTokens`, and output tokens are generated *out* of the model — they don't live in the prompt window, so including them inflated the ring (especially on long- output turns). Reasoning tokens have the same property (ephemeral, not persisted into next-turn context). Fix: redefine `usedTokens` as the input-side total only (`input + cache-read + cache-creation`), in both `normalizeClaudeTokenUsage`/`buildClaudeTurnCompleteUsage` and `normalizeCodexTokenUsage` (`last.inputTokens + last.cachedInputTokens` — Codex V2 has no cache-creation tier). `totalProcessedTokens` keeps the original semantic ("tokens processed so far", billing-side). Added a contract-level JSDoc on `ThreadTokenUsageSnapshot` that spells out the two dimensions and the `lastXxxTokens` "turn-final" signal. Also: the client's `deriveLatestContextWindowSnapshot` was silently dropping `cacheCreationInputTokens` / `lastCacheCreationInputTokens` from the `ContextWindowSnapshot` shape even though the payload carries them. Wire them through. ## 3. Migration Existing ledger files are polluted and can't be repaired in-place. Added a `.schema-v2` sentinel in the usage dir: `CostTrackerLive` boots, sees no sentinel, wipes only the known ledger files (`session_*.json`, `YYYY-MM.json`, `alltime.json`) — any stray files are left alone — writes the sentinel, and subsequent boots skip. Bumping `LEDGER_SCHEMA_VERSION` is the single line needed for any future reducer-incompatible change. ## Tests - Reworked Claude/Codex adapter assertions for the new input-side `usedTokens` semantic (24542 → 23863 for the Claude cumulative case, 126 → 120 for Codex, etc.); explanatory comments added. - New ProviderRuntimeIngestion test: mid-turn snapshot (no `lastXxx`) projects into the activity stream but does NOT bump the ledger; turn-final snapshot records exactly one turn. - New CostTrackerLive tests: first boot wipes pre-v2 ledger files (including a `.json` stray, which survives); subsequent boot with sentinel present leaves ledger files intact. - Existing ingestion tests retargeted at a temp-dir base so the first-boot wipe can't touch the developer's real `/userdata/usage/` directory. All 203 server tests pass in the changed files; 908 web tests pass; 126 shared tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/cost/Layers/CostTracker.test.ts | 92 +++++++++++++++++++ apps/server/src/cost/Layers/CostTracker.ts | 56 +++++++++++ .../Layers/ProviderRuntimeIngestion.test.ts | 78 +++++++++++++++- .../Layers/ProviderRuntimeIngestion.ts | 46 +++++++--- .../src/provider/Layers/ClaudeAdapter.test.ts | 11 ++- .../src/provider/Layers/ClaudeAdapter.ts | 42 +++++++-- .../Layers/ClaudeAdapter.usage.test.ts | 14 ++- .../src/provider/Layers/CodexAdapter.test.ts | 8 +- .../src/provider/Layers/CodexAdapter.ts | 21 +++-- apps/web/src/lib/contextWindow.ts | 2 + packages/contracts/src/providerRuntime.ts | 28 ++++++ 11 files changed, 360 insertions(+), 38 deletions(-) diff --git a/apps/server/src/cost/Layers/CostTracker.test.ts b/apps/server/src/cost/Layers/CostTracker.test.ts index 1a2acb4325..41392ff5f6 100644 --- a/apps/server/src/cost/Layers/CostTracker.test.ts +++ b/apps/server/src/cost/Layers/CostTracker.test.ts @@ -1,3 +1,7 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + import * as NodeServices from "@effect/platform-node/NodeServices"; import { assert, it } from "@effect/vitest"; import { Effect, Fiber, FileSystem, Layer, Path, Stream } from "effect"; @@ -14,6 +18,15 @@ const makeLayer = () => { return Layer.mergeAll(CostTrackerLive.pipe(Layer.provide(configLayer)), configLayer); }; +/** + * Build a layer pointing at a pre-existing temp dir so the migration has + * ledger files to wipe on boot. Caller is responsible for `rmSync` cleanup. + */ +const makeLayerAt = (baseDir: string) => { + const configLayer = ServerConfig.layerTest(process.cwd(), baseDir); + return Layer.mergeAll(CostTrackerLive.pipe(Layer.provide(configLayer)), configLayer); +}; + it.layer(NodeServices.layer)("CostTrackerLive", (it) => { it.effect("records a turn and persists session/month/alltime files", () => Effect.gen(function* () { @@ -143,4 +156,83 @@ it.layer(NodeServices.layer)("CostTrackerLive", (it) => { assert.equal(summary.monthKey, "2019-12"); }).pipe(Effect.provide(makeLayer())), ); + + it.effect("wipes pre-v2 ledger files on first boot and writes a schema sentinel", () => { + // Seed a usage dir that looks like a pre-migration install: a pair of + // session files, one month bucket, one all-time file, and an + // unrelated stray file we must leave alone. + const baseDir = fs.mkdtempSync(path.join(os.tmpdir(), "t3-cost-wipe-")); + const usageDir = path.join(baseDir, "userdata", "usage"); + fs.mkdirSync(usageDir, { recursive: true }); + const seededLedgerFiles = [ + "session_thread-a.json", + "session_thread-b.json", + "2026-04.json", + "alltime.json", + ]; + for (const name of seededLedgerFiles) { + fs.writeFileSync( + path.join(usageDir, name), + JSON.stringify({ version: 1, bucket: { totalUsd: 42, turnCount: 99 } }), + ); + } + // Stray non-ledger file that must survive the wipe. + const strayPath = path.join(usageDir, "notes.txt"); + fs.writeFileSync(strayPath, "unrelated"); + + return Effect.gen(function* () { + // Tracker service is resolved here so the layer effect — and thus + // the migration — runs before we assert. + yield* CostTrackerService; + + for (const name of seededLedgerFiles) { + assert.equal( + fs.existsSync(path.join(usageDir, name)), + false, + `expected ${name} to be wiped`, + ); + } + assert.equal(fs.existsSync(strayPath), true, "expected stray file to survive"); + + const sentinelPath = path.join(usageDir, ".schema-v2"); + assert.equal(fs.existsSync(sentinelPath), true); + const sentinelContents = JSON.parse(fs.readFileSync(sentinelPath, "utf8")) as { + readonly version: number; + readonly wipedFileCount: number; + }; + assert.equal(sentinelContents.version, 2); + assert.equal(sentinelContents.wipedFileCount, seededLedgerFiles.length); + }).pipe( + Effect.provide(makeLayerAt(baseDir)), + Effect.ensuring( + Effect.sync(() => fs.rmSync(baseDir, { recursive: true, force: true })), + ), + ); + }); + + it.effect("skips the wipe on subsequent boots when the sentinel is present", () => { + const baseDir = fs.mkdtempSync(path.join(os.tmpdir(), "t3-cost-wipe-idempotent-")); + const usageDir = path.join(baseDir, "userdata", "usage"); + fs.mkdirSync(usageDir, { recursive: true }); + // Pre-existing sentinel → migration is a no-op; ledger files survive. + fs.writeFileSync( + path.join(usageDir, ".schema-v2"), + JSON.stringify({ version: 2, migratedAt: "2026-04-01T00:00:00.000Z" }), + ); + const preservedPath = path.join(usageDir, "session_thread-keep.json"); + fs.writeFileSync( + preservedPath, + JSON.stringify({ version: 1, bucket: { totalUsd: 1, turnCount: 1 } }), + ); + + return Effect.gen(function* () { + yield* CostTrackerService; + assert.equal(fs.existsSync(preservedPath), true); + }).pipe( + Effect.provide(makeLayerAt(baseDir)), + Effect.ensuring( + Effect.sync(() => fs.rmSync(baseDir, { recursive: true, force: true })), + ), + ); + }); }); diff --git a/apps/server/src/cost/Layers/CostTracker.ts b/apps/server/src/cost/Layers/CostTracker.ts index 5d7c9bb0a2..3d6c1282c9 100644 --- a/apps/server/src/cost/Layers/CostTracker.ts +++ b/apps/server/src/cost/Layers/CostTracker.ts @@ -51,6 +51,20 @@ function monthFilename(monthKey: string): string { const ALLTIME_FILENAME = "alltime.json"; +/** + * Ledger schema version. Bump when the on-disk format changes in a way that + * makes older files incompatible with the new reducer — a sentinel file + * `.schema-v` is written to `usageDir` and, if missing on startup, the + * ledger is wiped (only the JSON ledger files; untracked files in the + * directory are left alone). Rationale for v2: prior versions fed mid-turn + * `thread.token-usage.updated` snapshots into the cost reducer, which + * double-counted token totals and inflated `turnCount` by N per real turn. + * Those buckets can't be retroactively repaired, so we reset on upgrade. + */ +const LEDGER_SCHEMA_VERSION = 2 as const; +const LEDGER_SCHEMA_SENTINEL = `.schema-v${LEDGER_SCHEMA_VERSION}`; +const LEDGER_FILE_PATTERN = /^(session_.+|\d{4}-\d{2}|alltime)\.json$/; + const make = Effect.gen(function* () { const { usageDir } = yield* ServerConfig; const fs = yield* FileSystem.FileSystem; @@ -62,6 +76,48 @@ const make = Effect.gen(function* () { // Ensure the directory exists even if config bootstrap skipped it. yield* fs.makeDirectory(usageDir, { recursive: true }).pipe(Effect.ignore({ log: true })); + // Migration: wipe ledger files polluted by the pre-v2 reducer. Idempotent + // via the `.schema-vN` sentinel — once present, subsequent boots skip. + yield* Effect.gen(function* () { + const sentinelPath = path.join(usageDir, LEDGER_SCHEMA_SENTINEL); + const sentinelExists = yield* fs + .exists(sentinelPath) + .pipe(Effect.orElseSucceed(() => false)); + if (sentinelExists) return; + + const entries = yield* fs + .readDirectory(usageDir) + .pipe(Effect.orElseSucceed(() => [] as Array)); + const ledgerFiles = entries.filter((entry) => LEDGER_FILE_PATTERN.test(entry)); + if (ledgerFiles.length > 0) { + yield* Effect.logInfo( + `CostTracker: migrating usage ledger to schema v${LEDGER_SCHEMA_VERSION}; wiping ${ledgerFiles.length} pre-migration file(s)`, + ); + yield* Effect.forEach( + ledgerFiles, + (entry) => + fs + .remove(path.join(usageDir, entry), { force: true }) + .pipe(Effect.ignoreCause({ log: true })), + { concurrency: "unbounded", discard: true }, + ); + } + yield* fs + .writeFileString( + sentinelPath, + `${JSON.stringify( + { + version: LEDGER_SCHEMA_VERSION, + migratedAt: new Date().toISOString(), + wipedFileCount: ledgerFiles.length, + }, + null, + 2, + )}\n`, + ) + .pipe(Effect.ignoreCause({ log: true })); + }).pipe(Effect.ignoreCause({ log: true })); + const filePathFor = (kind: PersistedCostFileKind, key: string): string => { switch (kind) { case "session": diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts index f334ad5ff1..361875200e 100644 --- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts +++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts @@ -34,6 +34,7 @@ import { OrchestrationEngineLive } from "./OrchestrationEngine.ts"; import { OrchestrationProjectionPipelineLive } from "./ProjectionPipeline.ts"; import { OrchestrationProjectionSnapshotQueryLive } from "./ProjectionSnapshotQuery.ts"; import { CostTrackerLive } from "../../cost/Layers/CostTracker.ts"; +import { CostTrackerService } from "../../cost/Services/CostTracker.ts"; import { ProviderRuntimeIngestionLive } from "./ProviderRuntimeIngestion.ts"; import { OrchestrationEngineService, @@ -171,7 +172,7 @@ type ProviderRuntimeTestCheckpoint = ProviderRuntimeTestThread["checkpoints"][nu describe("ProviderRuntimeIngestion", () => { let runtime: ManagedRuntime.ManagedRuntime< - OrchestrationEngineService | ProviderRuntimeIngestionService, + OrchestrationEngineService | ProviderRuntimeIngestionService | CostTrackerService, unknown > | null = null; let scope: Scope.Closeable | null = null; @@ -209,7 +210,11 @@ describe("ProviderRuntimeIngestion", () => { Layer.provide(RepositoryIdentityResolverLive), Layer.provide(SqlitePersistenceMemory), ); - const configLayer = ServerConfig.layerTest(process.cwd(), process.cwd()); + // Use a scoped temp dir for the test base — avoids writing into the + // developer's real `/userdata/usage/` when the ingestion harness + // runs `CostTrackerLive` (which now performs a schema-sentinel wipe + // on boot if no sentinel is present). + const configLayer = ServerConfig.layerTest(process.cwd(), { prefix: "t3-ingestion-" }); const layer = ProviderRuntimeIngestionLive.pipe( Layer.provideMerge(orchestrationLayer), Layer.provideMerge(SqlitePersistenceMemory), @@ -222,6 +227,7 @@ describe("ProviderRuntimeIngestion", () => { runtime = ManagedRuntime.make(layer); const engine = await runtime.runPromise(Effect.service(OrchestrationEngineService)); const ingestion = await runtime.runPromise(Effect.service(ProviderRuntimeIngestionService)); + const costTracker = await runtime.runPromise(Effect.service(CostTrackerService)); scope = await Effect.runPromise(Scope.make("sequential")); await Effect.runPromise(ingestion.start().pipe(Scope.provide(scope))); const drain = () => Effect.runPromise(ingestion.drain); @@ -290,6 +296,7 @@ describe("ProviderRuntimeIngestion", () => { emit: provider.emit, setProviderSession: provider.setSession, drain, + costTracker, }; } @@ -2659,6 +2666,73 @@ describe("ProviderRuntimeIngestion", () => { }); }); + it("routes only turn-final token-usage events to the cost ledger", async () => { + const harness = await createHarness(); + const now = new Date().toISOString(); + + // Mid-turn snapshot (what Claude emits from task_progress / + // task_notification): cumulative breakdown present but NO `lastXxx` + // turn-delta fields. This should flow to the activity stream for the + // context-window ring but must not reach the cost ledger — the + // Reducer's cumulative-subtraction fallback would otherwise treat + // each mid-turn snapshot as a separate turn and over-count. + harness.emit({ + type: "thread.token-usage.updated", + eventId: asEventId("evt-token-usage-mid-turn"), + provider: "claudeAgent", + createdAt: now, + threadId: asThreadId("thread-1"), + payload: { + usage: { + usedTokens: 1_000, + inputTokens: 1_000, + outputTokens: 200, + }, + }, + }); + + await waitForThread(harness.engine, (entry) => + entry.activities.some( + (activity: ProviderRuntimeTestActivity) => activity.kind === "context-window.updated", + ), + ); + await harness.drain(); + + const summaryAfterMidTurn = await runtime!.runPromise( + harness.costTracker.getSummary({ threadId: asThreadId("thread-1") }), + ); + expect(summaryAfterMidTurn.thread?.turnCount ?? 0).toBe(0); + expect(summaryAfterMidTurn.month.turnCount).toBe(0); + + // Turn-final snapshot: `lastXxx` deltas present → cost ledger records + // exactly one turn. + harness.emit({ + type: "thread.token-usage.updated", + eventId: asEventId("evt-token-usage-turn-final"), + provider: "claudeAgent", + createdAt: new Date().toISOString(), + threadId: asThreadId("thread-1"), + turnId: asTurnId("turn-1"), + payload: { + usage: { + usedTokens: 1_000, + inputTokens: 1_000, + outputTokens: 200, + lastInputTokens: 1_000, + lastOutputTokens: 200, + }, + }, + }); + + await harness.drain(); + + const summaryAfterTurnFinal = await runtime!.runPromise( + harness.costTracker.getSummary({ threadId: asThreadId("thread-1") }), + ); + expect(summaryAfterTurnFinal.thread?.turnCount).toBe(1); + expect(summaryAfterTurnFinal.month.turnCount).toBe(1); + }); + it("projects Claude usage snapshots with context window into normalized thread activities", async () => { const harness = await createHarness(); const now = new Date().toISOString(); diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts index 053f04669f..d3fdde10bb 100644 --- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts +++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts @@ -15,6 +15,7 @@ import { } from "@t3tools/contracts"; import { Cache, Cause, Duration, Effect, Layer, Option, Stream } from "effect"; import { makeDrainableWorker } from "@t3tools/shared/DrainableWorker"; +import { resolveModelSlugForProvider } from "@t3tools/shared/model"; import { CostTrackerService } from "../../cost/Services/CostTracker.ts"; import { ProviderService } from "../../provider/Services/ProviderService.ts"; @@ -1525,18 +1526,41 @@ const make = Effect.gen(function* () { // Side-channel: feed token usage into the CostTracker so the JSON // ledger stays in sync with the activity stream. Failures never block // ingestion — we log and drop. + // + // Only *turn-final* usage events reach the ledger. Providers (notably + // Claude) emit mid-turn snapshots from each `task_progress` / + // `task_notification` that carry per-API-call breakdowns *without* + // `lastXxxTokens` fields; feeding those through the Reducer's + // cumulative-subtraction fallback would double-count tokens and + // inflate `turnCount` by N per real turn. The presence of any + // `lastXxxTokens` field is the signal that this event represents the + // end of a turn with meaningful deltas — mid-turn snapshots still + // flow through the activity stream for the context-window ring, + // they just skip the cost ledger. if (event.type === "thread.token-usage.updated") { - const model = event.payload.model ?? thread.modelSelection.model; - const provider = thread.modelSelection.provider; - yield* costTracker - .recordUsage({ - threadId: thread.id, - model, - provider, - usage: event.payload.usage, - at: new Date(event.createdAt), - }) - .pipe(Effect.asVoid, Effect.ignoreCause({ log: true })); + const usage = event.payload.usage; + const hasTurnDeltas = + usage.lastInputTokens !== undefined || + usage.lastCachedInputTokens !== undefined || + usage.lastCacheCreationInputTokens !== undefined || + usage.lastOutputTokens !== undefined || + usage.lastReasoningOutputTokens !== undefined; + if (hasTurnDeltas) { + const provider = thread.modelSelection.provider; + const rawModel = event.payload.model ?? thread.modelSelection.model; + // Normalize to the canonical slug so the `byModel` ledger key is + // stable across turns that happen to report aliased slugs. + const model = resolveModelSlugForProvider(provider, rawModel); + yield* costTracker + .recordUsage({ + threadId: thread.id, + model, + provider, + usage, + at: new Date(event.createdAt), + }) + .pipe(Effect.asVoid, Effect.ignoreCause({ log: true })); + } } }); diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts index 0846009a49..5e023c07a7 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts @@ -1596,12 +1596,15 @@ describe("ClaudeAdapterLive", () => { assert.equal(usageEvent?.type, "thread.token-usage.updated"); if (usageEvent?.type === "thread.token-usage.updated") { // First turn: no prior cumulative, so last* deltas equal cumulative - // totals. Cache read/write split correctly; usedTokens = cumulative - // total (no task snapshot in this test). + // totals. Cache read/write split correctly. `usedTokens` + + // `lastUsedTokens` report input-side only (4 input + 21_144 cached + // + 2_715 cache-write = 23_863); output (679) is billed separately + // and tracked via `outputTokens` / `lastOutputTokens`. + // `totalProcessedTokens` keeps the full billed cumulative (24_542). assert.deepEqual(usageEvent.payload, { usage: { - usedTokens: 24542, - lastUsedTokens: 24542, + usedTokens: 23863, + lastUsedTokens: 23863, totalProcessedTokens: 24542, inputTokens: 4, cachedInputTokens: 21144, diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.ts b/apps/server/src/provider/Layers/ClaudeAdapter.ts index 11e7a2569b..d65330a6a7 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.ts @@ -367,9 +367,15 @@ export function parseClaudeUsageBreakdown(value: unknown): ClaudeUsageBreakdown * the current context window size. The four token classes are reported * separately so downstream cost math can apply the correct tier. * - * No capping: `usedTokens` reflects `total_tokens` (or the derived sum) as - * reported. Callers that want to clamp for ring display should do so in the - * UI layer. + * `usedTokens` reports the **input-side** tokens only (context the model + * consumed: input + cache-read + cache-creation). Output + reasoning are + * billed separately and do not live in the prompt window; including them + * inflates the context ring for long-output turns. When the SDK reports + * only an opaque `total_tokens` (no class breakdown), we fall back to that + * number so the ring still shows *something* rather than zero. + * + * No capping: callers that want to clamp for ring display should do so in + * the UI layer. */ function normalizeClaudeTokenUsage( value: unknown, @@ -383,9 +389,12 @@ function normalizeClaudeTokenUsage( typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0 ? contextWindow : undefined; + const inputSideTokens = + breakdown.inputTokens + breakdown.cachedInputTokens + breakdown.cacheCreationInputTokens; + const usedTokens = inputSideTokens > 0 ? inputSideTokens : breakdown.totalTokens; return { - usedTokens: breakdown.totalTokens, - lastUsedTokens: breakdown.totalTokens, + usedTokens, + lastUsedTokens: usedTokens, ...(breakdown.inputTokens > 0 ? { inputTokens: breakdown.inputTokens } : {}), ...(breakdown.cachedInputTokens > 0 ? { cachedInputTokens: breakdown.cachedInputTokens } : {}), ...(breakdown.cacheCreationInputTokens > 0 @@ -463,15 +472,28 @@ export function buildClaudeTurnCompleteUsage( cumulative.cacheCreationInputTokens - prior.cacheCreationInputTokens, ); const deltaOutput = Math.max(0, cumulative.outputTokens - prior.outputTokens); - const lastTotal = deltaInput + deltaCached + deltaCacheCreation + deltaOutput; - // usedTokens: prefer the task snapshot (current context size); fall back to - // the cumulative total when no task snapshot was recorded for this turn. - const usedTokens = input.taskSnapshot?.usedTokens ?? cumulative.totalTokens; + // Context-window semantics: `usedTokens` reports input-side only (tokens + // the model actually has in its prompt window). Output + reasoning are + // billed but not persisted into the context, so including them over- + // reports the ring for long-output turns. + const lastInputSideTokens = deltaInput + deltaCached + deltaCacheCreation; + const cumulativeInputSideTokens = + cumulative.inputTokens + cumulative.cachedInputTokens + cumulative.cacheCreationInputTokens; + const cumulativeUsedFallback = + cumulativeInputSideTokens > 0 ? cumulativeInputSideTokens : cumulative.totalTokens; + // Prefer the freshest task snapshot (captured per-API-call → matches the + // real current context size). Fall back to the cumulative input-side. + const usedTokens = input.taskSnapshot?.usedTokens ?? cumulativeUsedFallback; + // `lastUsedTokens` mirrors `usedTokens` at turn scope. When this turn + // actually consumed prompt tokens, use its input-side delta; otherwise + // fall back to the cumulative read so we never report 0 for a turn that + // still had billable activity. + const lastUsedTokens = lastInputSideTokens > 0 ? lastInputSideTokens : cumulativeUsedFallback; const snapshot: ThreadTokenUsageSnapshot = { usedTokens, - lastUsedTokens: lastTotal > 0 ? lastTotal : cumulative.totalTokens, + lastUsedTokens, totalProcessedTokens: cumulative.totalTokens, ...(cumulative.inputTokens > 0 ? { inputTokens: cumulative.inputTokens } : {}), ...(cumulative.cachedInputTokens > 0 ? { cachedInputTokens: cumulative.cachedInputTokens } : {}), diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts index c651512d3c..3015d2820b 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts @@ -67,8 +67,13 @@ describe("buildClaudeTurnCompleteUsage", () => { expect(snap.lastCachedInputTokens).toBe(5_000); expect(snap.lastCacheCreationInputTokens).toBe(2_000); expect(snap.lastOutputTokens).toBe(500); - expect(snap.lastUsedTokens).toBe(8_500); - expect(snap.usedTokens).toBe(8_500); + // usedTokens + lastUsedTokens are input-side only (1_000+5_000+2_000 = + // 8_000). Output is billed (`outputTokens`) but excluded from the + // context-window ring since it doesn't live in the prompt. + expect(snap.usedTokens).toBe(8_000); + expect(snap.lastUsedTokens).toBe(8_000); + // totalProcessedTokens keeps the full cumulative billed total for + // informational display ("tokens processed so far"). expect(snap.totalProcessedTokens).toBe(8_500); expect(snap.maxTokens).toBe(200_000); expect(res.nextCumulative).toBeDefined(); @@ -107,7 +112,10 @@ describe("buildClaudeTurnCompleteUsage", () => { expect(s.lastCachedInputTokens).toBe(1_000); expect(s.lastCacheCreationInputTokens).toBe(300); expect(s.lastOutputTokens).toBe(200); - expect(s.lastUsedTokens).toBe(500 + 1_000 + 300 + 200); + // lastUsedTokens is input-side only (context consumed this turn): + // 500 + 1_000 + 300 = 1_800. Output (200) is tracked separately in + // lastOutputTokens for billing but not in the context window total. + expect(s.lastUsedTokens).toBe(1_800); }); it("does not cap usedTokens to maxTokens", () => { diff --git a/apps/server/src/provider/Layers/CodexAdapter.test.ts b/apps/server/src/provider/Layers/CodexAdapter.test.ts index 03d4155934..cfc95c19cf 100644 --- a/apps/server/src/provider/Layers/CodexAdapter.test.ts +++ b/apps/server/src/provider/Layers/CodexAdapter.test.ts @@ -883,15 +883,19 @@ lifecycleLayer("CodexAdapterLive lifecycle", (it) => { return; } + // `usedTokens` reports the input-side tokens only (120 input + 0 + // cached = 120) so the context-window ring reflects what's actually + // in the prompt. Output + reasoning are tracked separately for + // billing via `lastOutputTokens` / `lastReasoningOutputTokens`. assert.deepEqual(firstEvent.value.payload.usage, { - usedTokens: 126, + usedTokens: 120, totalProcessedTokens: 11_839, maxTokens: 258_400, inputTokens: 120, cachedInputTokens: 0, outputTokens: 6, reasoningOutputTokens: 0, - lastUsedTokens: 126, + lastUsedTokens: 120, lastInputTokens: 120, lastCachedInputTokens: 0, lastOutputTokens: 6, diff --git a/apps/server/src/provider/Layers/CodexAdapter.ts b/apps/server/src/provider/Layers/CodexAdapter.ts index 0111cd013c..fe9992c53f 100644 --- a/apps/server/src/provider/Layers/CodexAdapter.ts +++ b/apps/server/src/provider/Layers/CodexAdapter.ts @@ -137,17 +137,26 @@ function normalizeCodexTokenUsage( usage: EffectCodexSchema.V2ThreadTokenUsageUpdatedNotification["tokenUsage"], ): ThreadTokenUsageSnapshot | undefined { const totalProcessedTokens = usage.total.totalTokens; - const usedTokens = usage.last.totalTokens; - if (usedTokens === undefined || usedTokens <= 0) { - return undefined; - } - const maxTokens = usage.modelContextWindow ?? undefined; const inputTokens = usage.last.inputTokens; const cachedInputTokens = usage.last.cachedInputTokens; const outputTokens = usage.last.outputTokens; const reasoningOutputTokens = usage.last.reasoningOutputTokens; + // Context-window semantics: `usedTokens` reports the input-side only + // (tokens currently sitting in the model's prompt window). Output + + // reasoning are billed but do not persist into context between turns, so + // including them in the ring over-reports utilisation. Codex re-sends + // the full conversation each turn, so `last.inputTokens + + // last.cachedInputTokens` is the closest analogue to current context + // size. Fall back to the raw `last.totalTokens` only when the + // breakdown is zero (defensive — shouldn't happen for any real turn). + const inputSideTokens = inputTokens + cachedInputTokens; + const usedTokens = inputSideTokens > 0 ? inputSideTokens : usage.last.totalTokens; + if (usedTokens <= 0) { + return undefined; + } + return { usedTokens, ...(totalProcessedTokens !== undefined && totalProcessedTokens > usedTokens @@ -158,7 +167,7 @@ function normalizeCodexTokenUsage( ...(cachedInputTokens !== undefined ? { cachedInputTokens } : {}), ...(outputTokens !== undefined ? { outputTokens } : {}), ...(reasoningOutputTokens !== undefined ? { reasoningOutputTokens } : {}), - ...(usedTokens !== undefined ? { lastUsedTokens: usedTokens } : {}), + lastUsedTokens: usedTokens, ...(inputTokens !== undefined ? { lastInputTokens: inputTokens } : {}), ...(cachedInputTokens !== undefined ? { lastCachedInputTokens: cachedInputTokens } : {}), ...(outputTokens !== undefined ? { lastOutputTokens: outputTokens } : {}), diff --git a/apps/web/src/lib/contextWindow.ts b/apps/web/src/lib/contextWindow.ts index f668135a13..cf03acae84 100644 --- a/apps/web/src/lib/contextWindow.ts +++ b/apps/web/src/lib/contextWindow.ts @@ -56,11 +56,13 @@ export function deriveLatestContextWindowSnapshot( remainingPercentage, inputTokens: asFiniteNumber(payload?.inputTokens), cachedInputTokens: asFiniteNumber(payload?.cachedInputTokens), + cacheCreationInputTokens: asFiniteNumber(payload?.cacheCreationInputTokens), outputTokens: asFiniteNumber(payload?.outputTokens), reasoningOutputTokens: asFiniteNumber(payload?.reasoningOutputTokens), lastUsedTokens: asFiniteNumber(payload?.lastUsedTokens), lastInputTokens: asFiniteNumber(payload?.lastInputTokens), lastCachedInputTokens: asFiniteNumber(payload?.lastCachedInputTokens), + lastCacheCreationInputTokens: asFiniteNumber(payload?.lastCacheCreationInputTokens), lastOutputTokens: asFiniteNumber(payload?.lastOutputTokens), lastReasoningOutputTokens: asFiniteNumber(payload?.lastReasoningOutputTokens), toolUses: asFiniteNumber(payload?.toolUses), diff --git a/packages/contracts/src/providerRuntime.ts b/packages/contracts/src/providerRuntime.ts index e732451a7b..6a4bf7b7d5 100644 --- a/packages/contracts/src/providerRuntime.ts +++ b/packages/contracts/src/providerRuntime.ts @@ -298,6 +298,34 @@ const ThreadMetadataUpdatedPayload = Schema.Struct({ }); export type ThreadMetadataUpdatedPayload = typeof ThreadMetadataUpdatedPayload.Type; +/** + * Snapshot of how many tokens the model has consumed on a thread. + * + * Two distinct dimensions are reported here — don't confuse them: + * + * 1. **Context-window dimension** (for the ring display): how much of the + * model's prompt window is currently occupied. + * - `usedTokens` = input-side tokens **only** (input + cache-read + + * cache-creation). Output and reasoning tokens are generated *out* of + * the model and do not live in the prompt window, so they are + * excluded — including them inflates the ring for long-output turns. + * - `lastUsedTokens` = the same measure scoped to the most recent turn. + * - `maxTokens` = the model's declared context-window size. + * + * 2. **Billing dimension** (for the cost ledger): how many tokens were + * billed for this turn, class-by-class, so downstream pricing can apply + * the correct tier. These are *not* clamped to the context window — + * per-turn output tokens are separate from what persists into context. + * - `inputTokens` / `cachedInputTokens` / `cacheCreationInputTokens` / + * `outputTokens` / `reasoningOutputTokens` — cumulative class totals. + * - `lastXxxTokens` — the delta for the most recent turn. The presence + * of any `lastXxxTokens` field is the canonical signal that this + * snapshot represents the end of a turn; mid-turn snapshots omit + * them and flow only to the context-window activity (not the cost + * ledger, see `ProviderRuntimeIngestion`). + * - `totalProcessedTokens` — cumulative tokens billed across the + * session (for display). + */ export const ThreadTokenUsageSnapshot = Schema.Struct({ usedTokens: NonNegativeInt, totalProcessedTokens: Schema.optional(NonNegativeInt), From d46b444d417b2347c7e9023aecf90166e48c8a79 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Wed, 22 Apr 2026 01:11:10 +0200 Subject: [PATCH 14/16] fix(cost): use per-call input-side from SDKAssistantMessage for ring accuracy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The earlier switch to input-side `usedTokens` still showed inflated values for Claude Opus (and any multi-call turn) because the two signals we trusted are both unreliable sources of current context size: 1. `result.usage` is **session-cumulative** across every API call on the thread, not just this turn. Summing its input-side classes grows linearly with turn count — exactly what users saw on Opus, which makes many API calls per turn. 2. `task_progress.usage` only carries an opaque SDK `total_tokens`; the Anthropic-native per-class breakdown (`input_tokens` / `cache_read_input_tokens` / `cache_creation_input_tokens`) is **not present** on `SDKTaskProgressMessage.usage`. Parsing it always falls through to `total_tokens`. The only source that carries the *exact per-call prompt breakdown* is `SDKAssistantMessage.message.usage` — that's `BetaUsage` from the Anthropic API, refreshed on every assistant frame. Fix: - New `context.lastApiCallInputSideTokens` tracks `input_tokens + cache_read_input_tokens + cache_creation_input_tokens` captured from each `SDKAssistantMessage.message.usage`. Refreshed per frame, cleared after the turn-completion emission so the next turn starts clean. - `handleAssistantMessage` also emits a `thread.token-usage.updated` event on each assistant frame with this input-side sum as `usedTokens`, so the mid-turn ring tracks real prompt size (not the SDK's opaque total). - `buildClaudeTurnCompleteUsage` now takes an optional `lastApiCallInputSide` and uses it as the top-priority `usedTokens` source. Priority: 1. `lastApiCallInputSide` — exact current context. 2. `taskSnapshot.usedTokens` — SDK opaque (fallback). 3. Per-turn *delta* input-side — last-ditch when neither above is present. The old session-cumulative fallback has been removed; it inflated any multi-call turn. - `lastUsedTokens` mirrors `usedTokens` when the per-turn input-side delta is zero, so we never fall back to the session-cumulative sum. Tests: - Updated the "preserves oversized result totals after task progress" test: `lastUsedTokens` is now `190_000` (mirrors `usedTokens`), not `535_000` (the removed cumulative fallback). - New `prefers lastApiCallInputSide over the task snapshot for usedTokens`: when both are present, per-call wins. - New `does NOT fall back to cumulative input-side for usedTokens`: with a real prior cumulative, fallback now returns the per-turn delta, not the session-wide sum. - New adapter-level test verifying an assistant frame with Anthropic-native usage emits a `thread.token-usage.updated` event with `usedTokens = input + cache_read + cache_creation`. Important: existing threads retain their pre-fix `usedTokens` values in stored `context-window.updated` activities until the next turn generates a new activity. The ring self-heals on the first new turn; old turns in-history keep their stale numbers. Verified: 206/206 targeted server tests pass (3 new), 908/908 web tests pass, typecheck + oxlint clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/provider/Layers/ClaudeAdapter.test.ts | 81 ++++++++++- .../src/provider/Layers/ClaudeAdapter.ts | 132 ++++++++++++++++-- .../Layers/ClaudeAdapter.usage.test.ts | 55 ++++++++ 3 files changed, 250 insertions(+), 18 deletions(-) diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts index 5e023c07a7..bcaab95283 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts @@ -1544,6 +1544,75 @@ describe("ClaudeAdapterLive", () => { ); }); + it.effect( + "emits thread token usage from assistant frame usage (per-call input side)", + () => { + const harness = makeHarness(); + return Effect.gen(function* () { + const adapter = yield* ClaudeAdapter; + + const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 8).pipe( + Stream.runCollect, + Effect.forkChild, + ); + + yield* adapter.startSession({ + threadId: THREAD_ID, + provider: "claudeAgent", + runtimeMode: "full-access", + }); + + yield* adapter.sendTurn({ + threadId: THREAD_ID, + input: "hello", + attachments: [], + }); + + // Assistant frame carrying Anthropic-native per-call usage. + // input (fresh) + cache_read + cache_creation = 80 + 45_000 + 2_000 + // = 47_080 — the current context-window fill, unlike the + // session-cumulative `result.usage` which would grow across calls. + harness.query.emit({ + type: "assistant", + session_id: "sdk-session-assistant-usage", + uuid: "assistant-usage-1", + parent_tool_use_id: null, + message: { + id: "assistant-message-usage", + content: [{ type: "text", text: "ok" }], + usage: { + input_tokens: 80, + cache_read_input_tokens: 45_000, + cache_creation_input_tokens: 2_000, + output_tokens: 12, + }, + }, + } as unknown as SDKMessage); + + const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber)); + const usageEvent = runtimeEvents.find( + (event) => event.type === "thread.token-usage.updated", + ); + assert.equal(usageEvent?.type, "thread.token-usage.updated"); + if (usageEvent?.type === "thread.token-usage.updated") { + assert.deepEqual(usageEvent.payload, { + usage: { + usedTokens: 47_080, + lastUsedTokens: 47_080, + inputTokens: 80, + cachedInputTokens: 45_000, + cacheCreationInputTokens: 2_000, + outputTokens: 12, + }, + }); + } + }).pipe( + Effect.provideService(Random.Random, makeDeterministicRandomService()), + Effect.provide(harness.layer), + ); + }, + ); + it.effect("emits Claude context window on result completion usage snapshots", () => { const harness = makeHarness(); return Effect.gen(function* () { @@ -1754,13 +1823,17 @@ describe("ClaudeAdapterLive", () => { const finalUsageEvent = usageEvents.at(-1); assert.equal(finalUsageEvent?.type, "thread.token-usage.updated"); if (finalUsageEvent?.type === "thread.token-usage.updated") { - // Task snapshot drives usedTokens (real current-context); result - // cumulative drives totalProcessedTokens. lastUsedTokens reports - // the turn's total (cumulative since there's no prior turn). + // Task snapshot drives `usedTokens` (SDK-opaque current context, + // 190k), `totalProcessedTokens` carries the billing-side + // cumulative (535k). `lastUsedTokens` mirrors `usedTokens` + // because the per-turn delta input-side is zero (this test's + // result has only `total_tokens`, no breakdown) and we now + // refuse to fall back to the session-cumulative sum, which + // would inflate the ring over multi-call turns. assert.deepEqual(finalUsageEvent.payload, { usage: { usedTokens: 190000, - lastUsedTokens: 535000, + lastUsedTokens: 190000, totalProcessedTokens: 535000, maxTokens: 200000, }, diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.ts b/apps/server/src/provider/Layers/ClaudeAdapter.ts index d65330a6a7..3654645e86 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.ts @@ -158,6 +158,17 @@ interface ClaudeSessionContext { turnState: ClaudeTurnState | undefined; lastKnownContextWindow: number | undefined; lastKnownTokenUsage: ThreadTokenUsageSnapshot | undefined; + /** + * Input-side token sum (input + cache-read + cache-creation) of the + * most recent Anthropic API call captured from `SDKAssistantMessage`. + * This is the authoritative current-context-size signal for the ring: + * unlike `result.usage` (session-cumulative) or `task_progress.usage` + * (SDK-opaque `total_tokens` only), each assistant frame carries the + * exact per-call prompt breakdown. Refreshed on every assistant frame; + * cleared after each turn's completion event so the next turn starts + * without stale carry-over. + */ + lastApiCallInputSideTokens: number | undefined; /** * Cumulative per-class token counts emitted in the prior turn's * `result.usage`. Claude's SDK reports `result.usage` as a running total @@ -425,6 +436,15 @@ export interface ClaudeTurnCompleteUsageInput { readonly taskSnapshot: ThreadTokenUsageSnapshot | undefined; readonly contextWindow?: number | undefined; readonly priorCumulative?: ClaudeUsageBreakdown | undefined; + /** + * Input-side token sum (input + cache-read + cache-creation) from the + * *last* Anthropic API call on this turn. When available, this is the + * authoritative current-context-size signal for the ring — the + * cumulative `resultUsage` is a session-wide sum and over-reports + * multi-call turns, and the task-snapshot fallback only exposes an + * opaque SDK `total_tokens`. + */ + readonly lastApiCallInputSide?: number | undefined; } export interface ClaudeTurnCompleteUsageResult { @@ -478,18 +498,31 @@ export function buildClaudeTurnCompleteUsage( // billed but not persisted into the context, so including them over- // reports the ring for long-output turns. const lastInputSideTokens = deltaInput + deltaCached + deltaCacheCreation; - const cumulativeInputSideTokens = - cumulative.inputTokens + cumulative.cachedInputTokens + cumulative.cacheCreationInputTokens; - const cumulativeUsedFallback = - cumulativeInputSideTokens > 0 ? cumulativeInputSideTokens : cumulative.totalTokens; - // Prefer the freshest task snapshot (captured per-API-call → matches the - // real current context size). Fall back to the cumulative input-side. - const usedTokens = input.taskSnapshot?.usedTokens ?? cumulativeUsedFallback; - // `lastUsedTokens` mirrors `usedTokens` at turn scope. When this turn - // actually consumed prompt tokens, use its input-side delta; otherwise - // fall back to the cumulative read so we never report 0 for a turn that - // still had billable activity. - const lastUsedTokens = lastInputSideTokens > 0 ? lastInputSideTokens : cumulativeUsedFallback; + // `resultUsage` is a session-wide cumulative across every API call on + // the thread (not just this turn!), so summing its input-side classes + // inflates the ring proportionally to turn count. We only fall back + // to it when nothing else is available, using the per-turn *delta* + // input-side — which represents just the tokens added this turn. + const deltaUsedFallback = + lastInputSideTokens > 0 ? lastInputSideTokens : cumulative.totalTokens; + // Priority order for `usedTokens` (authoritative → approximate): + // 1. `lastApiCallInputSide` — exact current context size, captured + // from the last assistant frame's per-call `usage`. + // 2. `taskSnapshot.usedTokens` — SDK-opaque `total_tokens` from the + // freshest `task_progress`/`task_notification` snapshot. Better + // than cumulative-input but not class-accurate. + // 3. `deltaUsedFallback` — per-turn delta input-side. Last-ditch + // when neither above is present (unusual — no assistant frames + + // no task events means a no-content turn). + const usedTokens = + input.lastApiCallInputSide !== undefined && input.lastApiCallInputSide > 0 + ? input.lastApiCallInputSide + : (input.taskSnapshot?.usedTokens ?? deltaUsedFallback); + // `lastUsedTokens` is the per-turn echo of `usedTokens`. Prefer the + // per-turn input-side delta (tokens *added* this turn); fall back to + // the same resolved `usedTokens` so we never emit 0 for a turn that + // clearly had activity. + const lastUsedTokens = lastInputSideTokens > 0 ? lastInputSideTokens : usedTokens; const snapshot: ThreadTokenUsageSnapshot = { usedTokens, @@ -1553,18 +1586,30 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( // `result.usage` reports running totals across every API call in the // session. We combine it with the freshest per-call task snapshot (for - // `usedTokens` — the real current-context value) and with the prior - // turn's cumulative snapshot (to derive this turn's per-class deltas). + // the SDK's opaque `total_tokens`) and with the prior turn's cumulative + // snapshot (to derive this turn's per-class deltas). The preferred + // `usedTokens` source, however, is the input-side token sum of the + // *last Anthropic API call* in this turn — captured directly from the + // freshest `SDKAssistantMessage.usage` via `context.lastApiCallInputSideTokens`. + // That number is the only one that tracks current context size + // precisely for multi-call turns (Opus, extended thinking, heavy tool + // use), because `result.usage` is session-cumulative and the + // task-event `usage` only exposes an opaque `total_tokens`. const turnUsage = buildClaudeTurnCompleteUsage({ resultUsage: result?.usage, taskSnapshot: context.lastKnownTokenUsage, contextWindow: resultContextWindow ?? context.lastKnownContextWindow, priorCumulative: context.lastTurnCumulativeUsage, + lastApiCallInputSide: context.lastApiCallInputSideTokens, }); const usageSnapshot = turnUsage.snapshot; if (turnUsage.nextCumulative !== undefined) { context.lastTurnCumulativeUsage = turnUsage.nextCumulative; } + // Clear per-turn scratch so the next turn starts without stale + // carry-over — `lastApiCallInputSideTokens` is captured fresh from + // the next turn's assistant frames. + context.lastApiCallInputSideTokens = undefined; const turnState = context.turnState; if (!turnState) { @@ -2144,6 +2189,64 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( yield* backfillAssistantTextBlocksFromSnapshot(context, message); } + // Capture the per-API-call input-side token count from this assistant + // frame and emit it as the freshest `usedTokens` for the + // context-window ring. Each `SDKAssistantMessage` carries Anthropic's + // native per-call usage (`message.message.usage`), so + // `input_tokens + cache_read_input_tokens + cache_creation_input_tokens` + // is the tokens *currently* in the prompt window — much more accurate + // than the SDK-opaque `total_tokens` on `task_progress` (which lacks + // per-class breakdown) or the session-cumulative `result.usage` + // (which grows with every API call in the turn). + const perCallBreakdown = parseClaudeUsageBreakdown( + (message.message as { usage?: unknown }).usage, + ); + if (perCallBreakdown) { + const inputSide = + perCallBreakdown.inputTokens + + perCallBreakdown.cachedInputTokens + + perCallBreakdown.cacheCreationInputTokens; + if (inputSide > 0) { + context.lastApiCallInputSideTokens = inputSide; + const maxTokens = context.lastKnownContextWindow; + const ringSnapshot: ThreadTokenUsageSnapshot = { + usedTokens: inputSide, + lastUsedTokens: inputSide, + ...(perCallBreakdown.inputTokens > 0 + ? { inputTokens: perCallBreakdown.inputTokens } + : {}), + ...(perCallBreakdown.cachedInputTokens > 0 + ? { cachedInputTokens: perCallBreakdown.cachedInputTokens } + : {}), + ...(perCallBreakdown.cacheCreationInputTokens > 0 + ? { cacheCreationInputTokens: perCallBreakdown.cacheCreationInputTokens } + : {}), + ...(perCallBreakdown.outputTokens > 0 + ? { outputTokens: perCallBreakdown.outputTokens } + : {}), + ...(typeof maxTokens === "number" && Number.isFinite(maxTokens) && maxTokens > 0 + ? { maxTokens } + : {}), + }; + context.lastKnownTokenUsage = ringSnapshot; + const usageStamp = yield* makeEventStamp(); + yield* offerRuntimeEvent({ + type: "thread.token-usage.updated", + eventId: usageStamp.eventId, + provider: PROVIDER, + createdAt: usageStamp.createdAt, + threadId: context.session.threadId, + ...(context.turnState + ? { turnId: asCanonicalTurnId(context.turnState.turnId) } + : {}), + payload: { + usage: ringSnapshot, + }, + providerRefs: nativeProviderRefs(context), + }); + } + } + context.lastAssistantUuid = message.uuid; yield* updateResumeCursor(context); }); @@ -3070,6 +3173,7 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( turnState: undefined, lastKnownContextWindow: undefined, lastKnownTokenUsage: undefined, + lastApiCallInputSideTokens: undefined, lastTurnCumulativeUsage: undefined, lastAssistantUuid: resumeState?.resumeSessionAt, lastThreadStartedId: undefined, diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts index 3015d2820b..4b9f993c99 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts @@ -154,6 +154,61 @@ describe("buildClaudeTurnCompleteUsage", () => { expect(res.nextCumulative).toBeUndefined(); }); + it("prefers lastApiCallInputSide over the task snapshot for usedTokens", () => { + // Session-cumulative result.usage reports big numbers (multiple calls + // have run across the whole session), but only the last API call's + // input-side count matters for the ring. The SDK's opaque + // `task_progress.total_tokens` (via taskSnapshot.usedTokens) is less + // trustworthy than the per-call input-side captured from + // `SDKAssistantMessage.usage`, so the per-call value wins. + const res = buildClaudeTurnCompleteUsage({ + resultUsage: { + input_tokens: 10_000, // session cumulative across many calls + cache_read_input_tokens: 150_000, + cache_creation_input_tokens: 5_000, + output_tokens: 20_000, + }, + taskSnapshot: { usedTokens: 999_999, lastUsedTokens: 999_999 }, + contextWindow: 200_000, + priorCumulative: undefined, + lastApiCallInputSide: 48_000, + }); + expect(res.snapshot!.usedTokens).toBe(48_000); + // totalProcessedTokens still tracks billing-side cumulative for + // informational display ("tokens processed so far"). + expect(res.snapshot!.totalProcessedTokens).toBe(185_000); + }); + + it("does NOT fall back to cumulative input-side for usedTokens", () => { + // Previously we added `input + cached + cacheCreation` from + // `result.usage` when no task snapshot was available. That sum is + // *session-cumulative* in Claude's SDK — it over-reports for any + // multi-call turn. With no task snapshot and no last-API-call + // capture, we now fall back to the per-turn delta input-side + // (first turn → equals cumulative; subsequent turns → just this + // turn's additions). + const res = buildClaudeTurnCompleteUsage({ + resultUsage: { + input_tokens: 5_000, + cache_read_input_tokens: 200_000, + cache_creation_input_tokens: 10_000, + output_tokens: 3_000, + }, + taskSnapshot: undefined, + contextWindow: 200_000, + priorCumulative: { + inputTokens: 4_000, + cachedInputTokens: 180_000, + cacheCreationInputTokens: 8_000, + outputTokens: 2_500, + totalTokens: 194_500, + }, + }); + // Per-turn input-side delta = 1_000 + 20_000 + 2_000 = 23_000. + expect(res.snapshot!.usedTokens).toBe(23_000); + expect(res.snapshot!.lastUsedTokens).toBe(23_000); + }); + it("clamps negative deltas to zero when cumulative goes backwards", () => { const prior = { inputTokens: 1_000, From 1790ec541cd9d4bc7724e3718755fe931e29925d Mon Sep 17 00:00:00 2001 From: Olympicx Date: Wed, 22 Apr 2026 01:11:25 +0200 Subject: [PATCH 15/16] chore(desktop): bump to 0.0.21 + rebrand to "T3 by Stan" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Local rebuild for personal distribution off the feat/token-cost-meter branch. Keeps the app bundle identifier (`com.t3tools.t3code`) untouched so existing auto-update channels aren't disturbed, but changes the user-facing name, dev launcher label, and artifact filename. - apps/desktop/package.json: productName → "T3 by Stan". - apps/desktop/scripts/electron-launcher.mjs: APP_DISPLAY_NAME follows the new name (dev / prod variants). - scripts/build-desktop-artifact.ts: artifactName → `T3-by-Stan-${version}-${arch}.${ext}` so the DMG / zip / blockmap files land as `release/T3-by-Stan-0.0.21-arm64.dmg` etc. - apps/{desktop,server,web}/package.json + bun.lock: version bump 0.0.20 → 0.0.21. The legacy user-data migration constant in `apps/desktop/src/main.ts` (`LEGACY_USER_DATA_DIR_NAME = "T3 Code (Alpha)"`) is intentionally left alone so this build still picks up data from the prior install. Built macOS arm64 DMG sits at release/T3-by-Stan-0.0.21-arm64.dmg (136 MB, unsigned / ad-hoc — Gatekeeper first-launch warning expected). Signing / notarization not configured; would require Apple Developer credentials. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/desktop/package.json | 4 ++-- apps/desktop/scripts/electron-launcher.mjs | 2 +- apps/server/package.json | 2 +- apps/web/package.json | 2 +- bun.lock | 6 +++--- scripts/build-desktop-artifact.ts | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/apps/desktop/package.json b/apps/desktop/package.json index 2a4ced70e7..a38c664b16 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -1,6 +1,6 @@ { "name": "@t3tools/desktop", - "version": "0.0.20", + "version": "0.0.21", "private": true, "type": "module", "main": "dist-electron/main.cjs", @@ -28,5 +28,5 @@ "typescript": "catalog:", "vitest": "catalog:" }, - "productName": "T3 Code (Alpha)" + "productName": "T3 by Stan" } diff --git a/apps/desktop/scripts/electron-launcher.mjs b/apps/desktop/scripts/electron-launcher.mjs index 1453cbe666..12e6f6f6bd 100644 --- a/apps/desktop/scripts/electron-launcher.mjs +++ b/apps/desktop/scripts/electron-launcher.mjs @@ -17,7 +17,7 @@ import { dirname, join, resolve } from "node:path"; import { fileURLToPath } from "node:url"; const isDevelopment = Boolean(process.env.VITE_DEV_SERVER_URL); -const APP_DISPLAY_NAME = isDevelopment ? "T3 Code (Dev)" : "T3 Code (Alpha)"; +const APP_DISPLAY_NAME = isDevelopment ? "T3 by Stan (Dev)" : "T3 by Stan"; const APP_BUNDLE_ID = isDevelopment ? "com.t3tools.t3code.dev" : "com.t3tools.t3code"; const LAUNCHER_VERSION = 2; diff --git a/apps/server/package.json b/apps/server/package.json index 14dbe35bcb..13a8124cb2 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -1,6 +1,6 @@ { "name": "t3", - "version": "0.0.20", + "version": "0.0.21", "license": "MIT", "repository": { "type": "git", diff --git a/apps/web/package.json b/apps/web/package.json index b18defebbe..11e69d1248 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -1,6 +1,6 @@ { "name": "@t3tools/web", - "version": "0.0.20", + "version": "0.0.21", "private": true, "type": "module", "scripts": { diff --git a/bun.lock b/bun.lock index e9b7511e34..8fb16c217b 100644 --- a/bun.lock +++ b/bun.lock @@ -14,7 +14,7 @@ }, "apps/desktop": { "name": "@t3tools/desktop", - "version": "0.0.20", + "version": "0.0.21", "dependencies": { "effect": "catalog:", "electron": "40.6.0", @@ -43,7 +43,7 @@ }, "apps/server": { "name": "t3", - "version": "0.0.20", + "version": "0.0.21", "bin": { "t3": "./dist/bin.mjs", }, @@ -75,7 +75,7 @@ }, "apps/web": { "name": "@t3tools/web", - "version": "0.0.20", + "version": "0.0.21", "dependencies": { "@base-ui/react": "^1.2.0", "@dnd-kit/core": "^6.3.1", diff --git a/scripts/build-desktop-artifact.ts b/scripts/build-desktop-artifact.ts index 74e8bed0cb..5f3ae5427c 100644 --- a/scripts/build-desktop-artifact.ts +++ b/scripts/build-desktop-artifact.ts @@ -569,7 +569,7 @@ const createBuildConfig = Effect.fn("createBuildConfig")(function* ( const buildConfig: Record = { appId: "com.t3tools.t3code", productName: resolveDesktopProductName(version), - artifactName: "T3-Code-${version}-${arch}.${ext}", + artifactName: "T3-by-Stan-${version}-${arch}.${ext}", directories: { buildResources: "apps/desktop/resources", }, From bd0fc3bdcdabccfd9123a62af9128b99864d04e7 Mon Sep 17 00:00:00 2001 From: Olympicx Date: Wed, 22 Apr 2026 01:21:26 +0200 Subject: [PATCH 16/16] chore(desktop): bump to 0.0.22 for Opus context-ring fix rebuild Rebuilds the personal T3-by-Stan DMG to pick up the per-call input-side usedTokens fix (d46b444d) so the context ring shows accurate values on Opus + multi-call turns. No behavioural change beyond version; bun.lock re-synced. Artifact: release/T3-by-Stan-0.0.22-arm64.dmg (136 MB, unsigned). Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/desktop/package.json | 2 +- apps/server/package.json | 2 +- apps/web/package.json | 2 +- bun.lock | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/apps/desktop/package.json b/apps/desktop/package.json index a38c664b16..8f2211d7df 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -1,6 +1,6 @@ { "name": "@t3tools/desktop", - "version": "0.0.21", + "version": "0.0.22", "private": true, "type": "module", "main": "dist-electron/main.cjs", diff --git a/apps/server/package.json b/apps/server/package.json index 13a8124cb2..c65979430e 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -1,6 +1,6 @@ { "name": "t3", - "version": "0.0.21", + "version": "0.0.22", "license": "MIT", "repository": { "type": "git", diff --git a/apps/web/package.json b/apps/web/package.json index 11e69d1248..99069389d6 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -1,6 +1,6 @@ { "name": "@t3tools/web", - "version": "0.0.21", + "version": "0.0.22", "private": true, "type": "module", "scripts": { diff --git a/bun.lock b/bun.lock index 8fb16c217b..287d35f87b 100644 --- a/bun.lock +++ b/bun.lock @@ -14,7 +14,7 @@ }, "apps/desktop": { "name": "@t3tools/desktop", - "version": "0.0.21", + "version": "0.0.22", "dependencies": { "effect": "catalog:", "electron": "40.6.0", @@ -43,7 +43,7 @@ }, "apps/server": { "name": "t3", - "version": "0.0.21", + "version": "0.0.22", "bin": { "t3": "./dist/bin.mjs", }, @@ -75,7 +75,7 @@ }, "apps/web": { "name": "@t3tools/web", - "version": "0.0.21", + "version": "0.0.22", "dependencies": { "@base-ui/react": "^1.2.0", "@dnd-kit/core": "^6.3.1",