From 81c65674ec143be3c7c54965928746804da8f996 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Tue, 21 Apr 2026 19:30:58 +0200
Subject: [PATCH 01/16] feat(shared): add model pricing table + computeTurnCost

Seed rates for Claude (sonnet-4.6, opus-4.6/4.7/4.5, haiku-4.5) and
Codex (gpt-5.4, 5.3-codex, spark, mini) in USD per 1M tokens.
getPricing() resolves via provider aliases with zero-rate fallback.
computeTurnCost() splits input / cached / output / reasoning spend.

Prep for session + MTD cost meter.
---
 packages/shared/package.json        |   4 +
 packages/shared/src/pricing.test.ts | 145 +++++++++++++++++
 packages/shared/src/pricing.ts      | 243 ++++++++++++++++++++++++++++
 3 files changed, 392 insertions(+)
 create mode 100644 packages/shared/src/pricing.test.ts
 create mode 100644 packages/shared/src/pricing.ts

diff --git a/packages/shared/package.json b/packages/shared/package.json
index 82085dfcaf..84899e5e31 100644
--- a/packages/shared/package.json
+++ b/packages/shared/package.json
@@ -71,6 +71,10 @@
     "./path": {
       "types": "./src/path.ts",
       "import": "./src/path.ts"
+    },
+    "./pricing": {
+      "types": "./src/pricing.ts",
+      "import": "./src/pricing.ts"
     }
   },
   "scripts": {
diff --git a/packages/shared/src/pricing.test.ts b/packages/shared/src/pricing.test.ts
new file mode 100644
index 0000000000..de76bea819
--- /dev/null
+++ b/packages/shared/src/pricing.test.ts
@@ -0,0 +1,145 @@
+import { describe, expect, it } from "vitest";
+
+import {
+  PRICING_TABLE,
+  UNKNOWN_MODEL_PRICING,
+  computeTurnCost,
+  formatUsd,
+  getPricing,
+} from "./pricing.ts";
+
+describe("pricing/getPricing", () => {
+  it("resolves canonical Claude slug", () => {
+    const p = getPricing("claude-sonnet-4-6");
+    expect(p.provider).toBe("claudeAgent");
+    expect(p.inputPerMTok).toBe(3);
+    expect(p.cachedInputPerMTok).toBe(0.3);
+    expect(p.outputPerMTok).toBe(15);
+  });
+
+  it("resolves Claude short alias via provider", () => {
+    const p = getPricing("sonnet", "claudeAgent");
+    expect(p.provider).toBe("claudeAgent");
+    expect(p.inputPerMTok).toBe(3);
+  });
+
+  it("resolves Codex canonical slug", () => {
+    const p = getPricing("gpt-5.4");
+    expect(p.provider).toBe("codex");
+    expect(p.inputPerMTok).toBe(1.25);
+    expect(p.outputPerMTok).toBe(10);
+  });
+
+  it("resolves Codex spark as mini tier", () => {
+    const p = getPricing("gpt-5.3-codex-spark");
+    expect(p.outputPerMTok).toBe(2);
+  });
+
+  it("falls back to zero-rate for unknown model", () => {
+    const p = getPricing("llama-7b-xyz");
+    expect(p).toEqual(UNKNOWN_MODEL_PRICING);
+  });
+
+  it("falls back for empty / null model", () => {
+    expect(getPricing(null)).toEqual(UNKNOWN_MODEL_PRICING);
+    expect(getPricing("")).toEqual(UNKNOWN_MODEL_PRICING);
+    expect(getPricing("   ")).toEqual(UNKNOWN_MODEL_PRICING);
+  });
+
+  it("defaults reasoningOutput rate to output rate", () => {
+    for (const pricing of PRICING_TABLE.values()) {
+      expect(pricing.reasoningOutputPerMTok).toBe(pricing.outputPerMTok);
+    }
+  });
+});
+
+describe("pricing/computeTurnCost", () => {
+  it("computes Claude Sonnet turn cost correctly", () => {
+    const cost = computeTurnCost("claude-sonnet-4-6", {
+      inputTokens: 10_000,
+      cachedInputTokens: 100_000,
+      outputTokens: 2_000,
+      reasoningOutputTokens: 500,
+    });
+    // 10k * $3/Mtok = $0.03
+    expect(cost.inputUsd).toBeCloseTo(0.03, 6);
+    // 100k * $0.30/Mtok = $0.03
+    expect(cost.cachedUsd).toBeCloseTo(0.03, 6);
+    // 2k * $15/Mtok = $0.03
+    expect(cost.outputUsd).toBeCloseTo(0.03, 6);
+    // 500 * $15/Mtok = $0.0075
+    expect(cost.reasoningUsd).toBeCloseTo(0.0075, 6);
+    expect(cost.totalUsd).toBeCloseTo(0.0975, 6);
+  });
+
+  it("computes Codex GPT-5.4 turn cost correctly", () => {
+    const cost = computeTurnCost("gpt-5.4", {
+      inputTokens: 1_000_000,
+      cachedInputTokens: 0,
+      outputTokens: 100_000,
+      reasoningOutputTokens: 50_000,
+    });
+    // 1M * $1.25 = $1.25
+    expect(cost.inputUsd).toBeCloseTo(1.25, 6);
+    expect(cost.cachedUsd).toBe(0);
+    // 100k * $10/Mtok = $1
+    expect(cost.outputUsd).toBeCloseTo(1, 6);
+    // 50k * $10/Mtok = $0.5
+    expect(cost.reasoningUsd).toBeCloseTo(0.5, 6);
+    expect(cost.totalUsd).toBeCloseTo(2.75, 6);
+  });
+
+  it("returns zero cost for unknown model", () => {
+    const cost = computeTurnCost("fake-model", {
+      inputTokens: 10_000,
+      outputTokens: 10_000,
+    });
+    expect(cost.totalUsd).toBe(0);
+  });
+
+  it("ignores negative / non-finite deltas", () => {
+    const cost = computeTurnCost("claude-sonnet-4-6", {
+      inputTokens: -100,
+      outputTokens: Number.NaN,
+      cachedInputTokens: Number.POSITIVE_INFINITY,
+      reasoningOutputTokens: 0,
+    });
+    expect(cost.totalUsd).toBe(0);
+  });
+
+  it("handles missing fields", () => {
+    const cost = computeTurnCost("claude-sonnet-4-6", { outputTokens: 1_000 });
+    expect(cost.outputUsd).toBeCloseTo(0.015, 6);
+    expect(cost.inputUsd).toBe(0);
+    expect(cost.cachedUsd).toBe(0);
+    expect(cost.reasoningUsd).toBe(0);
+    expect(cost.totalUsd).toBeCloseTo(0.015, 6);
+  });
+});
+
+describe("pricing/formatUsd", () => {
+  it("formats zero + invalid", () => {
+    expect(formatUsd(0)).toBe("$0.00");
+    expect(formatUsd(null)).toBe("$0.00");
+    expect(formatUsd(Number.NaN)).toBe("$0.00");
+    expect(formatUsd(-1)).toBe("$0.00");
+  });
+
+  it("formats sub-cent", () => {
+    expect(formatUsd(0.002)).toBe("<$0.01");
+  });
+
+  it("formats cents with 3 digits trimmed", () => {
+    expect(formatUsd(0.125)).toBe("$0.125");
+    expect(formatUsd(0.12)).toBe("$0.12");
+  });
+
+  it("formats 2-digit dollars", () => {
+    expect(formatUsd(1.234)).toBe("$1.23");
+    expect(formatUsd(12.5)).toBe("$12.50");
+  });
+
+  it("formats large dollars rounded", () => {
+    expect(formatUsd(1234.56)).toBe("$1,235");
+  });
+});
diff --git a/packages/shared/src/pricing.ts b/packages/shared/src/pricing.ts
new file mode 100644
index 0000000000..3ab9685348
--- /dev/null
+++ b/packages/shared/src/pricing.ts
@@ -0,0 +1,243 @@
+import { normalizeModelSlug } from "./model.ts";
+import type { ProviderKind } from "@t3tools/contracts";
+
+/**
+ * USD price per 1,000,000 tokens for each token class.
+ *
+ * `cachedInput` is the discounted input price applied when the provider
+ * serves cached prefix tokens (Anthropic prompt caching / OpenAI cached input).
+ * `reasoningOutput` defaults to `output` when a model does not bill reasoning
+ * tokens separately.
+ */
+export interface ModelPricing {
+  readonly provider: ProviderKind | "unknown";
+  readonly inputPerMTok: number;
+  readonly cachedInputPerMTok: number;
+  readonly outputPerMTok: number;
+  readonly reasoningOutputPerMTok: number;
+}
+
+/** Raw seed rates (USD per 1M tokens). Source: public provider pricing pages. */
+const SEED_PRICING: ReadonlyArray<
+  readonly [string, Omit<ModelPricing, "reasoningOutputPerMTok"> & { reasoningOutputPerMTok?: number }]
+> = [
+  // ── Anthropic / Claude ───────────────────────────────────────────────
+  // Extended-thinking tokens are billed as output tokens.
+  [
+    "claude-sonnet-4-6",
+    {
+      provider: "claudeAgent",
+      inputPerMTok: 3,
+      cachedInputPerMTok: 0.3,
+      outputPerMTok: 15,
+    },
+  ],
+  [
+    "claude-opus-4-7",
+    {
+      provider: "claudeAgent",
+      inputPerMTok: 15,
+      cachedInputPerMTok: 1.5,
+      outputPerMTok: 75,
+    },
+  ],
+  [
+    "claude-opus-4-6",
+    {
+      provider: "claudeAgent",
+      inputPerMTok: 15,
+      cachedInputPerMTok: 1.5,
+      outputPerMTok: 75,
+    },
+  ],
+  [
+    "claude-opus-4-5",
+    {
+      provider: "claudeAgent",
+      inputPerMTok: 15,
+      cachedInputPerMTok: 1.5,
+      outputPerMTok: 75,
+    },
+  ],
+  [
+    "claude-haiku-4-5",
+    {
+      provider: "claudeAgent",
+      inputPerMTok: 1,
+      cachedInputPerMTok: 0.1,
+      outputPerMTok: 5,
+    },
+  ],
+  // ── OpenAI / Codex ───────────────────────────────────────────────────
+  // Codex app routes use GPT-5 family pricing. Reasoning tokens bill as output.
+  [
+    "gpt-5.4",
+    {
+      provider: "codex",
+      inputPerMTok: 1.25,
+      cachedInputPerMTok: 0.125,
+      outputPerMTok: 10,
+    },
+  ],
+  [
+    "gpt-5.3-codex",
+    {
+      provider: "codex",
+      inputPerMTok: 1.25,
+      cachedInputPerMTok: 0.125,
+      outputPerMTok: 10,
+    },
+  ],
+  [
+    "gpt-5.3-codex-spark",
+    {
+      provider: "codex",
+      inputPerMTok: 0.25,
+      cachedInputPerMTok: 0.025,
+      outputPerMTok: 2,
+    },
+  ],
+  [
+    "gpt-5.4-mini",
+    {
+      provider: "codex",
+      inputPerMTok: 0.25,
+      cachedInputPerMTok: 0.025,
+      outputPerMTok: 2,
+    },
+  ],
+];
+
+/**
+ * Pricing table keyed by canonical model slug.
+ * Frozen so consumers can't mutate rates at runtime.
+ */
+export const PRICING_TABLE: ReadonlyMap<string, ModelPricing> = (() => {
+  const map = new Map<string, ModelPricing>();
+  for (const [slug, raw] of SEED_PRICING) {
+    map.set(slug, {
+      ...raw,
+      reasoningOutputPerMTok: raw.reasoningOutputPerMTok ?? raw.outputPerMTok,
+    });
+  }
+  return map;
+})();
+
+/** Zero-cost fallback for unknown models. Keeps total cost honest (no fake rate). */
+export const UNKNOWN_MODEL_PRICING: ModelPricing = {
+  provider: "unknown",
+  inputPerMTok: 0,
+  cachedInputPerMTok: 0,
+  outputPerMTok: 0,
+  reasoningOutputPerMTok: 0,
+};
+
+/**
+ * Resolve pricing for a model slug. Tries provider-aware alias normalization
+ * first (so `"sonnet"` → `"claude-sonnet-4-6"`), then direct lookup, then
+ * returns the zero-rate fallback.
+ */
+export function getPricing(
+  model: string | null | undefined,
+  provider?: ProviderKind,
+): ModelPricing {
+  if (typeof model !== "string") {
+    return UNKNOWN_MODEL_PRICING;
+  }
+  const trimmed = model.trim();
+  if (!trimmed) {
+    return UNKNOWN_MODEL_PRICING;
+  }
+  // Provider-aware alias normalization.
+  if (provider) {
+    const normalized = normalizeModelSlug(trimmed, provider);
+    if (normalized) {
+      const direct = PRICING_TABLE.get(normalized);
+      if (direct) return direct;
+    }
+  }
+  // Direct lookup (raw slug may already be canonical).
+  const direct = PRICING_TABLE.get(trimmed);
+  if (direct) return direct;
+
+  // Try each provider's aliases as a last resort.
+  const providers: ProviderKind[] = ["codex", "claudeAgent", "cursor", "opencode"];
+  for (const p of providers) {
+    const normalized = normalizeModelSlug(trimmed, p);
+    if (normalized) {
+      const hit = PRICING_TABLE.get(normalized);
+      if (hit) return hit;
+    }
+  }
+  return UNKNOWN_MODEL_PRICING;
+}
+
+export interface TurnTokenDeltas {
+  readonly inputTokens: number;
+  readonly cachedInputTokens: number;
+  readonly outputTokens: number;
+  readonly reasoningOutputTokens: number;
+}
+
+export interface TurnCostBreakdown {
+  readonly inputUsd: number;
+  readonly cachedUsd: number;
+  readonly outputUsd: number;
+  readonly reasoningUsd: number;
+  readonly totalUsd: number;
+}
+
+export const ZERO_COST: TurnCostBreakdown = {
+  inputUsd: 0,
+  cachedUsd: 0,
+  outputUsd: 0,
+  reasoningUsd: 0,
+  totalUsd: 0,
+};
+
+function finite(value: number | null | undefined): number {
+  return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : 0;
+}
+
+/**
+ * Compute USD cost for one turn's token deltas.
+ * Anthropic bills cached-input tokens at a reduced rate *instead of* the
+ * full input rate — so callers pass the non-cached input count in
+ * `inputTokens` and the cached prefix count in `cachedInputTokens`.
+ */
+export function computeTurnCost(
+  model: string | null | undefined,
+  deltas: Partial<TurnTokenDeltas>,
+  provider?: ProviderKind,
+): TurnCostBreakdown {
+  const pricing = getPricing(model, provider);
+  const input = finite(deltas.inputTokens);
+  const cached = finite(deltas.cachedInputTokens);
+  const output = finite(deltas.outputTokens);
+  const reasoning = finite(deltas.reasoningOutputTokens);
+
+  const inputUsd = (input / 1_000_000) * pricing.inputPerMTok;
+  const cachedUsd = (cached / 1_000_000) * pricing.cachedInputPerMTok;
+  const outputUsd = (output / 1_000_000) * pricing.outputPerMTok;
+  const reasoningUsd = (reasoning / 1_000_000) * pricing.reasoningOutputPerMTok;
+  const totalUsd = inputUsd + cachedUsd + outputUsd + reasoningUsd;
+
+  return { inputUsd, cachedUsd, outputUsd, reasoningUsd, totalUsd };
+}
+
+/** Format USD amount for UI display. */
+export function formatUsd(value: number | null | undefined): string {
+  if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
+    return "$0.00";
+  }
+  if (value < 0.01) {
+    return `<$0.01`;
+  }
+  if (value < 1) {
+    return `$${value.toFixed(3).replace(/0$/, "")}`;
+  }
+  if (value < 100) {
+    return `$${value.toFixed(2)}`;
+  }
+  return `$${Math.round(value).toLocaleString("en-US")}`;
+}

From 589d72ca2b1d88bdee9332e13ba0be67cf00ae08 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Tue, 21 Apr 2026 19:32:59 +0200
Subject: [PATCH 02/16] feat(web): add cost store with session + month buckets

localStorage-persisted zustand store at t3code:cost-store:v1.
Pure reducers accumulate token + USD spend per thread (session)
and per YYYY-MM in local tz (month-to-date). sanitize*() guards
garbage payloads; selectors expose session/month buckets and
avg cost per turn. Tests: 17 pass.
---
 apps/web/src/lib/costStore.test.ts | 310 ++++++++++++++++++++++++++++
 apps/web/src/lib/costStore.ts      | 318 +++++++++++++++++++++++++++++
 2 files changed, 628 insertions(+)
 create mode 100644 apps/web/src/lib/costStore.test.ts
 create mode 100644 apps/web/src/lib/costStore.ts

diff --git a/apps/web/src/lib/costStore.test.ts b/apps/web/src/lib/costStore.test.ts
new file mode 100644
index 0000000000..1162f11d12
--- /dev/null
+++ b/apps/web/src/lib/costStore.test.ts
@@ -0,0 +1,310 @@
+import { beforeEach, describe, expect, it } from "vitest";
+
+import {
+  COST_STORE_STORAGE_KEY,
+  localMonthKey,
+  reduceRecordTurnCost,
+  reduceResetSession,
+  sanitizePersistedCostState,
+  selectCostSummary,
+  useCostStore,
+  type PersistedCostState,
+} from "./costStore";
+
+function freshState(): PersistedCostState {
+  return { version: 1, sessions: {}, months: {} };
+}
+
+const cost = (total: number) => ({
+  inputUsd: 0,
+  cachedUsd: 0,
+  outputUsd: 0,
+  reasoningUsd: 0,
+  totalUsd: total,
+});
+
+const deltas = (
+  d: Partial<{
+    inputTokens: number;
+    cachedInputTokens: number;
+    outputTokens: number;
+    reasoningOutputTokens: number;
+  }> = {},
+) => ({
+  inputTokens: d.inputTokens ?? 0,
+  cachedInputTokens: d.cachedInputTokens ?? 0,
+  outputTokens: d.outputTokens ?? 0,
+  reasoningOutputTokens: d.reasoningOutputTokens ?? 0,
+});
+
+describe("localMonthKey", () => {
+  it("formats YYYY-MM in local tz", () => {
+    const date = new Date(2026, 3, 7, 12, 0, 0); // April 7 2026 local
+    expect(localMonthKey(date)).toBe("2026-04");
+  });
+
+  it("pads single-digit months", () => {
+    const date = new Date(2026, 0, 1, 0, 0, 0);
+    expect(localMonthKey(date)).toBe("2026-01");
+  });
+});
+
+describe("reduceRecordTurnCost", () => {
+  const at = new Date(2026, 3, 21, 10, 0, 0); // April 21 2026
+
+  it("accumulates into session + month bucket", () => {
+    let state = freshState();
+    state = reduceRecordTurnCost(state, {
+      threadId: "t1",
+      model: "claude-sonnet-4-6",
+      deltas: deltas({ inputTokens: 1_000, outputTokens: 500 }),
+      breakdown: cost(0.01),
+      at,
+    });
+    state = reduceRecordTurnCost(state, {
+      threadId: "t1",
+      model: "claude-sonnet-4-6",
+      deltas: deltas({ inputTokens: 500, outputTokens: 200 }),
+      breakdown: cost(0.005),
+      at,
+    });
+
+    const session = state.sessions["t1"]!;
+    expect(session.totalUsd).toBeCloseTo(0.015, 6);
+    expect(session.turnCount).toBe(2);
+    expect(session.byModel["claude-sonnet-4-6"]!.inputTokens).toBe(1_500);
+    expect(session.byModel["claude-sonnet-4-6"]!.outputTokens).toBe(700);
+    expect(session.byModel["claude-sonnet-4-6"]!.turnCount).toBe(2);
+
+    const month = state.months["2026-04"]!;
+    expect(month.totalUsd).toBeCloseTo(0.015, 6);
+    expect(month.turnCount).toBe(2);
+  });
+
+  it("keeps per-model tallies separate", () => {
+    let state = freshState();
+    state = reduceRecordTurnCost(state, {
+      threadId: "t1",
+      model: "claude-sonnet-4-6",
+      deltas: deltas({ outputTokens: 100 }),
+      breakdown: cost(0.01),
+      at,
+    });
+    state = reduceRecordTurnCost(state, {
+      threadId: "t1",
+      model: "gpt-5.4",
+      deltas: deltas({ outputTokens: 100 }),
+      breakdown: cost(0.02),
+      at,
+    });
+    const session = state.sessions["t1"]!;
+    expect(Object.keys(session.byModel).sort()).toEqual(["claude-sonnet-4-6", "gpt-5.4"]);
+    expect(session.totalUsd).toBeCloseTo(0.03, 6);
+  });
+
+  it("isolates sessions by threadId", () => {
+    let state = freshState();
+    state = reduceRecordTurnCost(state, {
+      threadId: "t1",
+      model: "claude-sonnet-4-6",
+      deltas: deltas({ outputTokens: 100 }),
+      breakdown: cost(0.01),
+      at,
+    });
+    state = reduceRecordTurnCost(state, {
+      threadId: "t2",
+      model: "claude-sonnet-4-6",
+      deltas: deltas({ outputTokens: 100 }),
+      breakdown: cost(0.02),
+      at,
+    });
+    expect(state.sessions["t1"]!.totalUsd).toBeCloseTo(0.01, 6);
+    expect(state.sessions["t2"]!.totalUsd).toBeCloseTo(0.02, 6);
+    // Month aggregates both sessions.
+    expect(state.months["2026-04"]!.totalUsd).toBeCloseTo(0.03, 6);
+  });
+
+  it("buckets by local month", () => {
+    let state = freshState();
+    state = reduceRecordTurnCost(state, {
+      threadId: "t1",
+      model: "claude-sonnet-4-6",
+      deltas: deltas({ outputTokens: 100 }),
+      breakdown: cost(0.01),
+      at: new Date(2026, 2, 31, 10, 0, 0), // March
+    });
+    state = reduceRecordTurnCost(state, {
+      threadId: "t1",
+      model: "claude-sonnet-4-6",
+      deltas: deltas({ outputTokens: 100 }),
+      breakdown: cost(0.02),
+      at: new Date(2026, 3, 1, 10, 0, 0), // April
+    });
+    expect(Object.keys(state.months).sort()).toEqual(["2026-03", "2026-04"]);
+    expect(state.months["2026-03"]!.totalUsd).toBeCloseTo(0.01, 6);
+    expect(state.months["2026-04"]!.totalUsd).toBeCloseTo(0.02, 6);
+    // Session spans both months.
+    expect(state.sessions["t1"]!.totalUsd).toBeCloseTo(0.03, 6);
+  });
+
+  it("ignores zero-token zero-cost turns", () => {
+    const before = freshState();
+    const after = reduceRecordTurnCost(before, {
+      threadId: "t1",
+      model: "claude-sonnet-4-6",
+      deltas: deltas(),
+      breakdown: cost(0),
+      at,
+    });
+    expect(after).toBe(before);
+  });
+
+  it("ignores blank threadId / model", () => {
+    const before = freshState();
+    const a = reduceRecordTurnCost(before, {
+      threadId: "",
+      model: "claude-sonnet-4-6",
+      deltas: deltas({ outputTokens: 10 }),
+      breakdown: cost(0.01),
+      at,
+    });
+    const b = reduceRecordTurnCost(before, {
+      threadId: "t1",
+      model: "",
+      deltas: deltas({ outputTokens: 10 }),
+      breakdown: cost(0.01),
+      at,
+    });
+    expect(a).toBe(before);
+    expect(b).toBe(before);
+  });
+});
+
+describe("reduceResetSession", () => {
+  it("removes the session but keeps month", () => {
+    let state = freshState();
+    state = reduceRecordTurnCost(state, {
+      threadId: "t1",
+      model: "claude-sonnet-4-6",
+      deltas: deltas({ outputTokens: 100 }),
+      breakdown: cost(0.01),
+      at: new Date(2026, 3, 21, 10, 0, 0),
+    });
+    const next = reduceResetSession(state, "t1");
+    expect(next.sessions["t1"]).toBeUndefined();
+    expect(next.months["2026-04"]!.totalUsd).toBeCloseTo(0.01, 6);
+  });
+
+  it("no-op for unknown threadId", () => {
+    const state = freshState();
+    expect(reduceResetSession(state, "nope")).toBe(state);
+  });
+});
+
+describe("sanitizePersistedCostState", () => {
+  it("returns initial for garbage", () => {
+    expect(sanitizePersistedCostState(null).sessions).toEqual({});
+    expect(sanitizePersistedCostState("bad").months).toEqual({});
+    expect(sanitizePersistedCostState({ version: 99 }).months).toEqual({});
+  });
+
+  it("drops invalid month keys", () => {
+    const cleaned = sanitizePersistedCostState({
+      version: 1,
+      sessions: {},
+      months: {
+        "2026-04": { totalUsd: 1, turnCount: 1, byModel: {} },
+        "bogus": { totalUsd: 99, turnCount: 1, byModel: {} },
+      },
+    });
+    expect(Object.keys(cleaned.months)).toEqual(["2026-04"]);
+  });
+
+  it("coerces non-finite numbers to zero", () => {
+    const cleaned = sanitizePersistedCostState({
+      version: 1,
+      sessions: {
+        t1: {
+          totalUsd: Number.NaN,
+          turnCount: -5,
+          byModel: {
+            "claude-sonnet-4-6": {
+              inputTokens: "abc",
+              outputTokens: 10,
+              totalUsd: 5,
+              turnCount: 1,
+            },
+          },
+        },
+      },
+      months: {},
+    });
+    const s = cleaned.sessions["t1"]!;
+    expect(s.totalUsd).toBe(0);
+    expect(s.turnCount).toBe(0);
+    expect(s.byModel["claude-sonnet-4-6"]!.inputTokens).toBe(0);
+    expect(s.byModel["claude-sonnet-4-6"]!.outputTokens).toBe(10);
+    expect(s.byModel["claude-sonnet-4-6"]!.totalUsd).toBe(5);
+  });
+});
+
+describe("selectCostSummary", () => {
+  it("returns zero summary for empty state", () => {
+    const summary = selectCostSummary(freshState(), "t1", new Date(2026, 3, 21));
+    expect(summary.sessionUsd).toBe(0);
+    expect(summary.monthUsd).toBe(0);
+    expect(summary.averagePerTurnUsd).toBeNull();
+    expect(summary.monthKey).toBe("2026-04");
+  });
+
+  it("computes average per turn", () => {
+    let state = freshState();
+    for (let i = 0; i < 4; i += 1) {
+      state = reduceRecordTurnCost(state, {
+        threadId: "t1",
+        model: "claude-sonnet-4-6",
+        deltas: deltas({ outputTokens: 100 }),
+        breakdown: cost(0.01),
+        at: new Date(2026, 3, 21),
+      });
+    }
+    const summary = selectCostSummary(state, "t1", new Date(2026, 3, 21));
+    expect(summary.sessionUsd).toBeCloseTo(0.04, 6);
+    expect(summary.averagePerTurnUsd).toBeCloseTo(0.01, 6);
+    expect(summary.sessionTurnCount).toBe(4);
+  });
+});
+
+describe("useCostStore (zustand)", () => {
+  beforeEach(() => {
+    useCostStore.getState().resetAll();
+    if (typeof window !== "undefined") {
+      window.localStorage.removeItem(COST_STORE_STORAGE_KEY);
+    }
+  });
+
+  it("records turn cost via action", () => {
+    useCostStore.getState().recordTurnCost({
+      threadId: "t1",
+      model: "claude-sonnet-4-6",
+      deltas: deltas({ inputTokens: 1_000, outputTokens: 500 }),
+      breakdown: cost(0.01),
+      at: new Date(2026, 3, 21),
+    });
+    const state = useCostStore.getState();
+    expect(state.sessions["t1"]!.totalUsd).toBeCloseTo(0.01, 6);
+    expect(state.months["2026-04"]!.totalUsd).toBeCloseTo(0.01, 6);
+  });
+
+  it("resetSession clears one thread", () => {
+    useCostStore.getState().recordTurnCost({
+      threadId: "t1",
+      model: "claude-sonnet-4-6",
+      deltas: deltas({ outputTokens: 100 }),
+      breakdown: cost(0.01),
+      at: new Date(2026, 3, 21),
+    });
+    useCostStore.getState().resetSession("t1");
+    expect(useCostStore.getState().sessions["t1"]).toBeUndefined();
+  });
+});
diff --git a/apps/web/src/lib/costStore.ts b/apps/web/src/lib/costStore.ts
new file mode 100644
index 0000000000..1cf4fbeb8b
--- /dev/null
+++ b/apps/web/src/lib/costStore.ts
@@ -0,0 +1,318 @@
+import { Debouncer } from "@tanstack/react-pacer";
+import { create } from "zustand";
+import type { TurnCostBreakdown, TurnTokenDeltas } from "@t3tools/shared/pricing";
+import { formatUsd } from "@t3tools/shared/pricing";
+
+export const COST_STORE_STORAGE_KEY = "t3code:cost-store:v1";
+
+/** Cumulative token counts + USD spend for one model within a bucket. */
+export interface ModelCostEntry {
+  inputTokens: number;
+  cachedInputTokens: number;
+  outputTokens: number;
+  reasoningOutputTokens: number;
+  totalUsd: number;
+  turnCount: number;
+}
+
+export interface CostBucket {
+  totalUsd: number;
+  turnCount: number;
+  byModel: Record<string, ModelCostEntry>;
+}
+
+export interface PersistedCostState {
+  version: 1;
+  sessions: Record<string, CostBucket>;
+  months: Record<string, CostBucket>;
+}
+
+export interface CostStoreState extends PersistedCostState {
+  recordTurnCost: (input: RecordTurnCostInput) => void;
+  resetSession: (threadId: string) => void;
+  resetAll: () => void;
+  /** Test-only hook: replace state atomically. */
+  __replaceState: (next: PersistedCostState) => void;
+}
+
+export interface RecordTurnCostInput {
+  threadId: string;
+  model: string;
+  deltas: TurnTokenDeltas;
+  breakdown: TurnCostBreakdown;
+  /** Override "now" for deterministic tests. */
+  at?: Date;
+}
+
+const emptyBucket: () => CostBucket = () => ({ totalUsd: 0, turnCount: 0, byModel: {} });
+const emptyModelEntry: () => ModelCostEntry = () => ({
+  inputTokens: 0,
+  cachedInputTokens: 0,
+  outputTokens: 0,
+  reasoningOutputTokens: 0,
+  totalUsd: 0,
+  turnCount: 0,
+});
+
+const initialState: PersistedCostState = {
+  version: 1,
+  sessions: {},
+  months: {},
+};
+
+/**
+ * Compute `YYYY-MM` key for a Date in the **local** timezone.
+ * Done via `getFullYear/getMonth` (not toISOString) so the month rolls over
+ * on the user's clock, not UTC's.
+ */
+export function localMonthKey(date: Date = new Date()): string {
+  const year = date.getFullYear().toString().padStart(4, "0");
+  const month = (date.getMonth() + 1).toString().padStart(2, "0");
+  return `${year}-${month}`;
+}
+
+function addTurnToEntry(
+  entry: ModelCostEntry,
+  deltas: TurnTokenDeltas,
+  breakdown: TurnCostBreakdown,
+): ModelCostEntry {
+  return {
+    inputTokens: entry.inputTokens + deltas.inputTokens,
+    cachedInputTokens: entry.cachedInputTokens + deltas.cachedInputTokens,
+    outputTokens: entry.outputTokens + deltas.outputTokens,
+    reasoningOutputTokens: entry.reasoningOutputTokens + deltas.reasoningOutputTokens,
+    totalUsd: entry.totalUsd + breakdown.totalUsd,
+    turnCount: entry.turnCount + 1,
+  };
+}
+
+function addTurnToBucket(
+  bucket: CostBucket,
+  model: string,
+  deltas: TurnTokenDeltas,
+  breakdown: TurnCostBreakdown,
+): CostBucket {
+  const existing = bucket.byModel[model] ?? emptyModelEntry();
+  return {
+    totalUsd: bucket.totalUsd + breakdown.totalUsd,
+    turnCount: bucket.turnCount + 1,
+    byModel: {
+      ...bucket.byModel,
+      [model]: addTurnToEntry(existing, deltas, breakdown),
+    },
+  };
+}
+
+/** Pure reducer: record one turn into the given state. */
+export function reduceRecordTurnCost(
+  state: PersistedCostState,
+  input: RecordTurnCostInput,
+): PersistedCostState {
+  const { threadId, model, deltas, breakdown } = input;
+  if (!threadId || !model) {
+    return state;
+  }
+  // Skip no-op turns to keep storage tiny.
+  const totalTokens =
+    deltas.inputTokens +
+    deltas.cachedInputTokens +
+    deltas.outputTokens +
+    deltas.reasoningOutputTokens;
+  if (totalTokens <= 0 && breakdown.totalUsd <= 0) {
+    return state;
+  }
+  const monthKey = localMonthKey(input.at ?? new Date());
+  const session = state.sessions[threadId] ?? emptyBucket();
+  const month = state.months[monthKey] ?? emptyBucket();
+  return {
+    ...state,
+    sessions: {
+      ...state.sessions,
+      [threadId]: addTurnToBucket(session, model, deltas, breakdown),
+    },
+    months: {
+      ...state.months,
+      [monthKey]: addTurnToBucket(month, model, deltas, breakdown),
+    },
+  };
+}
+
+export function reduceResetSession(
+  state: PersistedCostState,
+  threadId: string,
+): PersistedCostState {
+  if (!(threadId in state.sessions)) {
+    return state;
+  }
+  const nextSessions = { ...state.sessions };
+  delete nextSessions[threadId];
+  return { ...state, sessions: nextSessions };
+}
+
+function sanitizeNumber(value: unknown): number {
+  return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : 0;
+}
+
+function sanitizeModelEntry(raw: unknown): ModelCostEntry | null {
+  if (!raw || typeof raw !== "object") {
+    return null;
+  }
+  const r = raw as Record<string, unknown>;
+  return {
+    inputTokens: sanitizeNumber(r.inputTokens),
+    cachedInputTokens: sanitizeNumber(r.cachedInputTokens),
+    outputTokens: sanitizeNumber(r.outputTokens),
+    reasoningOutputTokens: sanitizeNumber(r.reasoningOutputTokens),
+    totalUsd: sanitizeNumber(r.totalUsd),
+    turnCount: sanitizeNumber(r.turnCount),
+  };
+}
+
+function sanitizeBucket(raw: unknown): CostBucket | null {
+  if (!raw || typeof raw !== "object") {
+    return null;
+  }
+  const r = raw as Record<string, unknown>;
+  const byModelRaw = (r.byModel ?? {}) as Record<string, unknown>;
+  const byModel: Record<string, ModelCostEntry> = {};
+  if (byModelRaw && typeof byModelRaw === "object") {
+    for (const [model, entry] of Object.entries(byModelRaw)) {
+      if (!model) continue;
+      const cleaned = sanitizeModelEntry(entry);
+      if (cleaned) byModel[model] = cleaned;
+    }
+  }
+  return {
+    totalUsd: sanitizeNumber(r.totalUsd),
+    turnCount: sanitizeNumber(r.turnCount),
+    byModel,
+  };
+}
+
+export function sanitizePersistedCostState(raw: unknown): PersistedCostState {
+  if (!raw || typeof raw !== "object") {
+    return initialState;
+  }
+  const r = raw as Record<string, unknown>;
+  if (r.version !== 1) {
+    return initialState;
+  }
+  const sessions: Record<string, CostBucket> = {};
+  const months: Record<string, CostBucket> = {};
+  const sessionsRaw = (r.sessions ?? {}) as Record<string, unknown>;
+  const monthsRaw = (r.months ?? {}) as Record<string, unknown>;
+  if (sessionsRaw && typeof sessionsRaw === "object") {
+    for (const [threadId, bucket] of Object.entries(sessionsRaw)) {
+      if (!threadId) continue;
+      const cleaned = sanitizeBucket(bucket);
+      if (cleaned) sessions[threadId] = cleaned;
+    }
+  }
+  if (monthsRaw && typeof monthsRaw === "object") {
+    for (const [monthKey, bucket] of Object.entries(monthsRaw)) {
+      if (!/^\d{4}-\d{2}$/.test(monthKey)) continue;
+      const cleaned = sanitizeBucket(bucket);
+      if (cleaned) months[monthKey] = cleaned;
+    }
+  }
+  return { version: 1, sessions, months };
+}
+
+function readPersistedState(): PersistedCostState {
+  if (typeof window === "undefined") {
+    return initialState;
+  }
+  try {
+    const raw = window.localStorage.getItem(COST_STORE_STORAGE_KEY);
+    if (!raw) return initialState;
+    return sanitizePersistedCostState(JSON.parse(raw));
+  } catch {
+    return initialState;
+  }
+}
+
+function persistState(state: PersistedCostState): void {
+  if (typeof window === "undefined") return;
+  try {
+    const { version, sessions, months } = state;
+    window.localStorage.setItem(
+      COST_STORE_STORAGE_KEY,
+      JSON.stringify({ version, sessions, months } satisfies PersistedCostState),
+    );
+  } catch {
+    // ignore quota / serialization errors
+  }
+}
+
+const debouncedPersist = new Debouncer(persistState, { wait: 400 });
+
+export const useCostStore = create<CostStoreState>((set) => ({
+  ...readPersistedState(),
+  recordTurnCost: (input) => set((state) => reduceRecordTurnCost(state, input)),
+  resetSession: (threadId) => set((state) => reduceResetSession(state, threadId)),
+  resetAll: () => set(() => ({ ...initialState })),
+  __replaceState: (next) => set(() => ({ ...next })),
+}));
+
+useCostStore.subscribe((state) => {
+  const { version, sessions, months } = state;
+  debouncedPersist.maybeExecute({ version, sessions, months });
+});
+
+if (typeof window !== "undefined" && typeof window.addEventListener === "function") {
+  window.addEventListener("beforeunload", () => {
+    debouncedPersist.flush();
+  });
+}
+
+// ── Selectors ────────────────────────────────────────────────────────────
+
+export function selectSessionBucket(
+  state: PersistedCostState,
+  threadId: string | null | undefined,
+): CostBucket {
+  if (!threadId) return emptyBucket();
+  return state.sessions[threadId] ?? emptyBucket();
+}
+
+export function selectMonthBucket(
+  state: PersistedCostState,
+  monthKey: string = localMonthKey(),
+): CostBucket {
+  return state.months[monthKey] ?? emptyBucket();
+}
+
+export interface CostSummary {
+  readonly sessionUsd: number;
+  readonly monthUsd: number;
+  readonly sessionTurnCount: number;
+  readonly monthTurnCount: number;
+  readonly monthKey: string;
+  readonly session: CostBucket;
+  readonly month: CostBucket;
+  readonly averagePerTurnUsd: number | null;
+}
+
+export function selectCostSummary(
+  state: PersistedCostState,
+  threadId: string | null | undefined,
+  now: Date = new Date(),
+): CostSummary {
+  const monthKey = localMonthKey(now);
+  const session = selectSessionBucket(state, threadId);
+  const month = selectMonthBucket(state, monthKey);
+  const averagePerTurnUsd =
+    session.turnCount > 0 ? session.totalUsd / session.turnCount : null;
+  return {
+    sessionUsd: session.totalUsd,
+    monthUsd: month.totalUsd,
+    sessionTurnCount: session.turnCount,
+    monthTurnCount: month.turnCount,
+    monthKey,
+    session,
+    month,
+    averagePerTurnUsd,
+  };
+}
+
+export { formatUsd };

From bf76cc687eaf2112aa7624685b1158f268f8ce88 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Tue, 21 Apr 2026 19:36:13 +0200
Subject: [PATCH 03/16] feat(web): wire token-usage events to cost store

useCostTracking hook observes activeThread activities and records
each new context-window.updated event (with lastXxxTokens deltas)
into the cost store. Seeds seen-set on mount / thread switch so
historical activity is not retroactively charged to this month.
Pure processActivitiesForCost reducer is unit-tested; the hook is
a thin ref+effect wrapper. Tests: 9 pass.
---
 apps/web/src/lib/useCostTracking.test.ts | 160 +++++++++++++++++++++++
 apps/web/src/lib/useCostTracking.ts      | 118 +++++++++++++++++
 2 files changed, 278 insertions(+)
 create mode 100644 apps/web/src/lib/useCostTracking.test.ts
 create mode 100644 apps/web/src/lib/useCostTracking.ts

diff --git a/apps/web/src/lib/useCostTracking.test.ts b/apps/web/src/lib/useCostTracking.test.ts
new file mode 100644
index 0000000000..9590cf820e
--- /dev/null
+++ b/apps/web/src/lib/useCostTracking.test.ts
@@ -0,0 +1,160 @@
+import { describe, expect, it } from "vitest";
+import { EventId, type ModelSelection, type OrchestrationThreadActivity, TurnId } from "@t3tools/contracts";
+
+import { processActivitiesForCost } from "./useCostTracking";
+
+function makeContextWindowActivity(
+  id: string,
+  payload: Record<string, unknown>,
+  createdAt = "2026-04-21T10:00:00.000Z",
+): OrchestrationThreadActivity {
+  return {
+    id: EventId.make(id),
+    tone: "info",
+    kind: "context-window.updated",
+    summary: "Context window updated",
+    payload,
+    turnId: TurnId.make("turn-1"),
+    createdAt,
+  };
+}
+
+const sonnet: ModelSelection = {
+  provider: "claudeAgent",
+  model: "claude-sonnet-4-6",
+};
+
+describe("processActivitiesForCost", () => {
+  it("returns empty records with null threadId", () => {
+    const result = processActivitiesForCost(null, [], sonnet, null);
+    expect(result.records).toEqual([]);
+    expect(result.nextSeen.size).toBe(0);
+  });
+
+  it("seeds existing activities without recording on first mount", () => {
+    const acts = [
+      makeContextWindowActivity("evt-a", { lastOutputTokens: 1000 }),
+      makeContextWindowActivity("evt-b", { lastOutputTokens: 500 }),
+    ];
+    const result = processActivitiesForCost("t1", acts, sonnet, null);
+    expect(result.records).toEqual([]);
+    expect(result.nextSeen.size).toBe(2);
+  });
+
+  it("records only new activities on subsequent call", () => {
+    const seed = processActivitiesForCost(
+      "t1",
+      [makeContextWindowActivity("evt-a", { lastOutputTokens: 100 })],
+      sonnet,
+      null,
+    );
+    const next = processActivitiesForCost(
+      "t1",
+      [
+        makeContextWindowActivity("evt-a", { lastOutputTokens: 100 }),
+        makeContextWindowActivity("evt-b", {
+          lastInputTokens: 1_000,
+          lastCachedInputTokens: 500,
+          lastOutputTokens: 200,
+        }),
+      ],
+      sonnet,
+      seed.nextSeen,
+    );
+    expect(next.records).toHaveLength(1);
+    const record = next.records[0]!;
+    expect(record.threadId).toBe("t1");
+    expect(record.model).toBe("claude-sonnet-4-6");
+    expect(record.deltas.inputTokens).toBe(1_000);
+    expect(record.deltas.outputTokens).toBe(200);
+    // 1000*3 + 500*0.3 + 200*15 = 3000+150+3000 = 6150 / 1M = $0.00615
+    expect(record.breakdown.totalUsd).toBeCloseTo(0.00615, 6);
+  });
+
+  it("skips events without per-turn deltas", () => {
+    const seed = processActivitiesForCost("t1", [], sonnet, null);
+    const next = processActivitiesForCost(
+      "t1",
+      [makeContextWindowActivity("evt-1", { usedTokens: 10_000 })],
+      sonnet,
+      seed.nextSeen,
+    );
+    expect(next.records).toEqual([]);
+    expect(next.nextSeen.has("evt-1")).toBe(true);
+  });
+
+  it("skips non-context-window activity kinds", () => {
+    const seed = processActivitiesForCost("t1", [], sonnet, null);
+    const other: OrchestrationThreadActivity = {
+      id: EventId.make("evt-tool"),
+      tone: "info",
+      kind: "tool.started",
+      summary: "tool.started",
+      payload: { lastOutputTokens: 1_000 },
+      turnId: TurnId.make("turn-1"),
+      createdAt: "2026-04-21T10:00:00.000Z",
+    };
+    const next = processActivitiesForCost("t1", [other], sonnet, seed.nextSeen);
+    expect(next.records).toEqual([]);
+    expect(next.nextSeen.has("evt-tool")).toBe(true);
+  });
+
+  it("skips when model selection missing", () => {
+    const seed = processActivitiesForCost("t1", [], null, null);
+    const next = processActivitiesForCost(
+      "t1",
+      [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })],
+      null,
+      seed.nextSeen,
+    );
+    expect(next.records).toEqual([]);
+  });
+
+  it("skips when pricing resolves to zero (unknown model)", () => {
+    const seed = processActivitiesForCost("t1", [], sonnet, null);
+    const next = processActivitiesForCost(
+      "t1",
+      [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })],
+      { provider: "opencode", model: "some/unknown-model" },
+      seed.nextSeen,
+    );
+    expect(next.records).toEqual([]);
+    expect(next.nextSeen.has("evt-1")).toBe(true);
+  });
+
+  it("deduplicates by activity id", () => {
+    const seed = processActivitiesForCost("t1", [], sonnet, null);
+    const firstPass = processActivitiesForCost(
+      "t1",
+      [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })],
+      sonnet,
+      seed.nextSeen,
+    );
+    expect(firstPass.records).toHaveLength(1);
+    const secondPass = processActivitiesForCost(
+      "t1",
+      [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })],
+      sonnet,
+      firstPass.nextSeen,
+    );
+    expect(secondPass.records).toEqual([]);
+  });
+
+  it("uses activity.createdAt as `at` timestamp", () => {
+    const seed = processActivitiesForCost("t1", [], sonnet, null);
+    const next = processActivitiesForCost(
+      "t1",
+      [
+        makeContextWindowActivity(
+          "evt-1",
+          { lastOutputTokens: 1_000 },
+          "2026-03-15T00:00:00.000Z",
+        ),
+      ],
+      sonnet,
+      seed.nextSeen,
+    );
+    const record = next.records[0]!;
+    expect(record.at?.toISOString()).toBe("2026-03-15T00:00:00.000Z");
+  });
+});
diff --git a/apps/web/src/lib/useCostTracking.ts b/apps/web/src/lib/useCostTracking.ts
new file mode 100644
index 0000000000..237f656262
--- /dev/null
+++ b/apps/web/src/lib/useCostTracking.ts
@@ -0,0 +1,118 @@
+import { useEffect, useRef } from "react";
+import type { ModelSelection, OrchestrationThreadActivity } from "@t3tools/contracts";
+import {
+  computeTurnCost,
+  type TurnCostBreakdown,
+  type TurnTokenDeltas,
+} from "@t3tools/shared/pricing";
+
+import { useCostStore, type RecordTurnCostInput } from "./costStore";
+
+interface SeenRef {
+  threadId: string | null | undefined;
+  ids: Set<string>;
+}
+
+function toNonNegative(value: unknown): number {
+  return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : 0;
+}
+
+function extractDeltas(payload: unknown): TurnTokenDeltas | null {
+  if (!payload || typeof payload !== "object") return null;
+  const p = payload as Record<string, unknown>;
+  const input = toNonNegative(p.lastInputTokens);
+  const cached = toNonNegative(p.lastCachedInputTokens);
+  const output = toNonNegative(p.lastOutputTokens);
+  const reasoning = toNonNegative(p.lastReasoningOutputTokens);
+  if (input + cached + output + reasoning <= 0) return null;
+  return {
+    inputTokens: input,
+    cachedInputTokens: cached,
+    outputTokens: output,
+    reasoningOutputTokens: reasoning,
+  };
+}
+
+export interface ProcessActivitiesResult {
+  readonly records: ReadonlyArray<RecordTurnCostInput>;
+  readonly nextSeen: Set<string>;
+}
+
+/**
+ * Pure: find new `context-window.updated` events that carry per-turn
+ * token deltas and translate them into cost-store inputs. Returns updated
+ * "seen" set for caller to persist.
+ *
+ * Behaviour:
+ *   - If `prevSeen` is `null`, treat all activities as "already seen" and
+ *     emit no records — used for initial mount / thread switch.
+ *   - Otherwise, only new activity IDs are considered.
+ */
+export function processActivitiesForCost(
+  threadId: string | null | undefined,
+  activities: ReadonlyArray<OrchestrationThreadActivity> | undefined,
+  modelSelection: ModelSelection | null | undefined,
+  prevSeen: Set<string> | null,
+): ProcessActivitiesResult {
+  if (!threadId || !activities || activities.length === 0) {
+    return { records: [], nextSeen: prevSeen ?? new Set() };
+  }
+  if (prevSeen === null) {
+    // Initial mount / thread switch: seed seen set with current activity IDs.
+    return {
+      records: [],
+      nextSeen: new Set(activities.map((a) => a.id as string)),
+    };
+  }
+  const seen = new Set(prevSeen);
+  const model = modelSelection?.model;
+  const provider = modelSelection?.provider;
+  const records: RecordTurnCostInput[] = [];
+  for (const activity of activities) {
+    const id = activity.id as string;
+    if (seen.has(id)) continue;
+    seen.add(id);
+    if (activity.kind !== "context-window.updated") continue;
+    const deltas = extractDeltas(activity.payload);
+    if (!deltas) continue;
+    if (!model) continue;
+    const breakdown: TurnCostBreakdown = computeTurnCost(model, deltas, provider);
+    if (breakdown.totalUsd <= 0) continue;
+    records.push({
+      threadId,
+      model,
+      deltas,
+      breakdown,
+      at: activity.createdAt ? new Date(activity.createdAt) : new Date(),
+    });
+  }
+  return { records, nextSeen: seen };
+}
+
+/**
+ * Observe thread activity stream and record cost for each new
+ * `context-window.updated` event. Seeds on first mount so historical
+ * activities aren't retroactively charged.
+ */
+export function useCostTracking(
+  threadId: string | null | undefined,
+  activities: ReadonlyArray<OrchestrationThreadActivity> | undefined,
+  modelSelection: ModelSelection | null | undefined,
+): void {
+  const recordTurnCost = useCostStore((state) => state.recordTurnCost);
+  const seenRef = useRef<SeenRef>({ threadId: undefined, ids: new Set() });
+
+  useEffect(() => {
+    const prev = seenRef.current.threadId === threadId ? seenRef.current.ids : null;
+    const { records, nextSeen } = processActivitiesForCost(
+      threadId,
+      activities,
+      modelSelection,
+      prev,
+    );
+    seenRef.current = { threadId, ids: nextSeen };
+    for (const record of records) {
+      recordTurnCost(record);
+    }
+  }, [threadId, activities, modelSelection, recordTurnCost]);
+}

From 7769f248beecca3a69d55eb85a788619f98249a1 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Tue, 21 Apr 2026 19:39:25 +0200
Subject: [PATCH 04/16] feat(web): add CostMeter + mount in composer toolbar

CostMeter mirrors ContextWindowMeter's ring + Popover style.
Fill ratio uses VITE_MONTHLY_BUDGET_USD if set, else a compressed
log scale. Popover shows session/MTD totals, budget %, turn count,
avg cost per turn, and per-model breakdown. Turns destructive
color when over budget.

useCostSummary zustand hook reads sessions + months slices and
recomputes summary; cheap enough to recompute per render since
selector is O(models).

Composer wires useCostTracking side-effect + passes summary to
ComposerFooterPrimaryActions next to ContextWindowMeter.
---
 apps/web/src/components/chat/ChatComposer.tsx |  12 ++
 apps/web/src/components/chat/CostMeter.tsx    | 156 ++++++++++++++++++
 apps/web/src/lib/costStore.ts                 |  10 ++
 3 files changed, 178 insertions(+)
 create mode 100644 apps/web/src/components/chat/CostMeter.tsx

diff --git a/apps/web/src/components/chat/ChatComposer.tsx b/apps/web/src/components/chat/ChatComposer.tsx
index 3d3b081af9..da3184e8ad 100644
--- a/apps/web/src/components/chat/ChatComposer.tsx
+++ b/apps/web/src/components/chat/ChatComposer.tsx
@@ -77,6 +77,7 @@ import {
   renderProviderTraitsPicker,
 } from "./composerProviderRegistry";
 import { ContextWindowMeter } from "./ContextWindowMeter";
+import { CostMeter } from "./CostMeter";
 import { buildExpandedImagePreview, type ExpandedImagePreview } from "./ExpandedImagePreview";
 import { basenameOfPath } from "../../vscode-icons";
 import { cn, randomUUID } from "~/lib/utils";
@@ -102,6 +103,8 @@ import type { SessionPhase, Thread } from "../../types";
 import type { PendingUserInputDraftAnswer } from "../../pendingUserInput";
 import type { PendingApproval, PendingUserInput } from "../../session-logic";
 import { deriveLatestContextWindowSnapshot } from "../../lib/contextWindow";
+import { useCostSummary, type CostSummary } from "../../lib/costStore";
+import { useCostTracking } from "../../lib/useCostTracking";
 import { formatProviderSkillDisplayName } from "../../providerSkillPresentation";
 import { searchProviderSkills } from "../../providerSkillSearch";
 
@@ -269,6 +272,7 @@ const ComposerFooterModeControls = memo(function ComposerFooterModeControls(prop
 const ComposerFooterPrimaryActions = memo(function ComposerFooterPrimaryActions(props: {
   compact: boolean;
   activeContextWindow: ReturnType<typeof deriveLatestContextWindowSnapshot>;
+  costSummary: CostSummary;
   isPreparingWorktree: boolean;
   pendingAction: {
     questionIndex: number;
@@ -290,6 +294,7 @@ const ComposerFooterPrimaryActions = memo(function ComposerFooterPrimaryActions(
   return (
     <>
       {props.activeContextWindow ? <ContextWindowMeter usage={props.activeContextWindow} /> : null}
+      <CostMeter summary={props.costSummary} />
       {props.isPreparingWorktree ? (
         <span className="text-muted-foreground/70 text-xs">Preparing worktree...</span>
       ) : null}
@@ -639,6 +644,12 @@ export const ChatComposer = memo(
       [activeThreadActivities],
     );
 
+    // ------------------------------------------------------------------
+    // Cost tracking (session + month-to-date spend)
+    // ------------------------------------------------------------------
+    useCostTracking(activeThreadId, activeThreadActivities, activeThreadModelSelection);
+    const costSummary = useCostSummary(activeThreadId);
+
     // ------------------------------------------------------------------
     // Composer-local state
     // ------------------------------------------------------------------
@@ -1953,6 +1964,7 @@ export const ChatComposer = memo(
                   <ComposerFooterPrimaryActions
                     compact={isComposerPrimaryActionsCompact}
                     activeContextWindow={activeContextWindow}
+                    costSummary={costSummary}
                     pendingAction={pendingPrimaryAction}
                     isRunning={phase === "running"}
                     showPlanFollowUpPrompt={
diff --git a/apps/web/src/components/chat/CostMeter.tsx b/apps/web/src/components/chat/CostMeter.tsx
new file mode 100644
index 0000000000..6c52d60762
--- /dev/null
+++ b/apps/web/src/components/chat/CostMeter.tsx
@@ -0,0 +1,156 @@
+import { cn } from "~/lib/utils";
+import { formatUsd, type CostSummary } from "~/lib/costStore";
+import { Popover, PopoverPopup, PopoverTrigger } from "../ui/popover";
+
+function readBudget(): number | null {
+  const raw = (import.meta as { env?: Record<string, string | undefined> }).env
+    ?.VITE_MONTHLY_BUDGET_USD;
+  if (!raw) return null;
+  const parsed = Number.parseFloat(raw);
+  return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
+}
+
+function formatCompactUsd(value: number): string {
+  if (value <= 0) return "$0";
+  if (value < 1) return `¢${Math.round(value * 100)}`;
+  if (value < 100) return `$${value.toFixed(value < 10 ? 1 : 0).replace(/\.0$/, "")}`;
+  if (value < 1_000) return `$${Math.round(value)}`;
+  return `$${(value / 1_000).toFixed(1).replace(/\.0$/, "")}k`;
+}
+
+function formatPercentage(value: number): string {
+  if (value < 10) {
+    return `${value.toFixed(1).replace(/\.0$/, "")}%`;
+  }
+  return `${Math.round(value)}%`;
+}
+
+export function CostMeter(props: { summary: CostSummary }) {
+  const { summary } = props;
+  const budget = readBudget();
+
+  // Ring: if budget set, fill by MTD/budget ratio; else fill by bucket of
+  // session-vs-month (bounded 0–100) so it still animates.
+  const ratio = budget
+    ? Math.min(100, (summary.monthUsd / budget) * 100)
+    : summary.monthUsd <= 0
+      ? 0
+      : Math.min(100, Math.log10(summary.monthUsd + 1) * 25);
+
+  const radius = 9.75;
+  const circumference = 2 * Math.PI * radius;
+  const dashOffset = circumference - (ratio / 100) * circumference;
+
+  const overBudget = budget ? summary.monthUsd >= budget : false;
+
+  const centerLabel = summary.monthUsd > 0 ? formatCompactUsd(summary.monthUsd) : "$0";
+  const ariaLabel = budget
+    ? `Cost ${formatUsd(summary.monthUsd)} of ${formatUsd(budget)} this month (${formatPercentage(ratio)})`
+    : `Cost ${formatUsd(summary.monthUsd)} this month, ${formatUsd(summary.sessionUsd)} this session`;
+
+  return (
+    <Popover>
+      <PopoverTrigger
+        openOnHover
+        delay={150}
+        closeDelay={0}
+        render={
+          <button
+            type="button"
+            className="group inline-flex items-center justify-center rounded-full transition-opacity hover:opacity-85"
+            aria-label={ariaLabel}
+          >
+            <span className="relative flex h-6 w-6 items-center justify-center">
+              <svg
+                viewBox="0 0 24 24"
+                className="-rotate-90 absolute inset-0 h-full w-full transform-gpu"
+                aria-hidden="true"
+              >
+                <circle
+                  cx="12"
+                  cy="12"
+                  r={radius}
+                  fill="none"
+                  stroke="color-mix(in oklab, var(--color-muted) 70%, transparent)"
+                  strokeWidth="3"
+                />
+                <circle
+                  cx="12"
+                  cy="12"
+                  r={radius}
+                  fill="none"
+                  stroke={overBudget ? "var(--color-destructive)" : "var(--color-muted-foreground)"}
+                  strokeWidth="3"
+                  strokeLinecap="round"
+                  strokeDasharray={circumference}
+                  strokeDashoffset={dashOffset}
+                  className="transition-[stroke-dashoffset] duration-500 ease-out motion-reduce:transition-none"
+                />
+              </svg>
+              <span
+                className={cn(
+                  "relative flex h-[15px] w-[15px] items-center justify-center rounded-full bg-background text-[8px] font-medium",
+                  overBudget ? "text-destructive" : "text-muted-foreground",
+                )}
+              >
+                {centerLabel}
+              </span>
+            </span>
+          </button>
+        }
+      />
+      <PopoverPopup tooltipStyle side="top" align="end" className="w-max max-w-none px-3 py-2">
+        <div className="space-y-1.5 leading-tight">
+          <div className="text-[11px] font-medium uppercase tracking-[0.08em] text-muted-foreground">
+            Cost
+          </div>
+          <div className="whitespace-nowrap text-xs font-medium text-foreground">
+            <span>{formatUsd(summary.sessionUsd)}</span>
+            <span className="mx-1 text-muted-foreground">session</span>
+            <span className="mx-1">⋅</span>
+            <span>{formatUsd(summary.monthUsd)}</span>
+            <span className="mx-1 text-muted-foreground">MTD</span>
+          </div>
+          {budget ? (
+            <div className={cn("text-xs", overBudget ? "text-destructive" : "text-muted-foreground")}>
+              Budget: {formatUsd(budget)} ({formatPercentage(ratio)} used)
+            </div>
+          ) : null}
+          {summary.sessionTurnCount > 0 && summary.averagePerTurnUsd !== null ? (
+            <div className="text-xs text-muted-foreground">
+              {summary.sessionTurnCount}
+              {summary.sessionTurnCount === 1 ? " turn" : " turns"} this session ·{" "}
+              {formatUsd(summary.averagePerTurnUsd)}/turn avg
+            </div>
+          ) : null}
+          {summary.month.turnCount > 0 ? (
+            <ModelBreakdown summary={summary} />
+          ) : null}
+        </div>
+      </PopoverPopup>
+    </Popover>
+  );
+}
+
+function ModelBreakdown(props: { summary: CostSummary }) {
+  const entries = Object.entries(props.summary.month.byModel)
+    .filter(([, entry]) => entry.totalUsd > 0)
+    .sort((left, right) => right[1].totalUsd - left[1].totalUsd);
+  if (entries.length === 0) return null;
+  return (
+    <div className="space-y-0.5 pt-1">
+      <div className="text-[10px] uppercase tracking-[0.08em] text-muted-foreground/80">
+        Models (this month)
+      </div>
+      {entries.map(([model, entry]) => (
+        <div key={model} className="flex items-center justify-between gap-3 text-xs">
+          <span className="truncate font-medium text-foreground">{model}</span>
+          <span className="text-muted-foreground">
+            {formatUsd(entry.totalUsd)} · {entry.turnCount}
+            {entry.turnCount === 1 ? " turn" : " turns"}
+          </span>
+        </div>
+      ))}
+    </div>
+  );
+}
diff --git a/apps/web/src/lib/costStore.ts b/apps/web/src/lib/costStore.ts
index 1cf4fbeb8b..509276b5e2 100644
--- a/apps/web/src/lib/costStore.ts
+++ b/apps/web/src/lib/costStore.ts
@@ -293,6 +293,16 @@ export interface CostSummary {
   readonly averagePerTurnUsd: number | null;
 }
 
+export function useCostSummary(
+  threadId: string | null | undefined,
+  now?: Date,
+): CostSummary {
+  const sessions = useCostStore((state) => state.sessions);
+  const months = useCostStore((state) => state.months);
+  // Intentionally rebuild on any change to sessions/months — selector is cheap.
+  return selectCostSummary({ version: 1, sessions, months }, threadId, now);
+}
+
 export function selectCostSummary(
   state: PersistedCostState,
   threadId: string | null | undefined,

From d9225e86dd5921b5fdd53d902d970c0eea62b087 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Tue, 21 Apr 2026 20:39:57 +0200
Subject: [PATCH 05/16] feat(server): T3CODE_STATE_SUBDIR + --use-userdata flag

Let dev mode point at the installed app's "userdata" state for
history continuity, and pave the way for a server-side usage/ JSON
store that both dev and prod reuse.

- deriveServerPaths accepts optional stateSubdir; env wins over the
  default (dev/userdata selection via devUrl).
- Adds usageDir (<stateDir>/usage) to derived paths + ensures it
  exists at startup.
- dev-runner: new --state-subdir flag + --use-userdata shortcut;
  forwards to T3CODE_STATE_SUBDIR. Startup logs warn loudly when
  dev is aimed at userdata.
- Tests: dev-runner env matrix (22 pass), cli-config subdir override
  + usageDir derivation (10 pass).
---
 apps/server/src/cli-config.test.ts | 55 +++++++++++++++++++++++++
 apps/server/src/cli.ts             | 12 +++++-
 apps/server/src/config.ts          | 12 +++++-
 scripts/dev-runner.test.ts         | 66 ++++++++++++++++++++++++++++++
 scripts/dev-runner.ts              | 40 +++++++++++++++++-
 5 files changed, 182 insertions(+), 3 deletions(-)

diff --git a/apps/server/src/cli-config.test.ts b/apps/server/src/cli-config.test.ts
index 5adece7302..71c71648c3 100644
--- a/apps/server/src/cli-config.test.ts
+++ b/apps/server/src/cli-config.test.ts
@@ -525,4 +525,59 @@ it.layer(NodeServices.layer)("cli config resolution", (it) => {
       });
     }),
   );
+
+  it.effect("T3CODE_STATE_SUBDIR overrides default dev/userdata selection", () =>
+    Effect.gen(function* () {
+      const { join } = yield* Path.Path;
+      const baseDir = join(os.tmpdir(), "t3-cli-config-state-subdir");
+      const resolved = yield* resolveServerConfig(
+        {
+          mode: Option.some("web"),
+          port: Option.some(3773),
+          host: Option.none(),
+          baseDir: Option.some(baseDir),
+          cwd: Option.none(),
+          devUrl: Option.some(new URL("http://127.0.0.1:5173")),
+          noBrowser: Option.some(true),
+          bootstrapFd: Option.none(),
+          autoBootstrapProjectFromCwd: Option.none(),
+          logWebSocketEvents: Option.none(),
+        },
+        Option.none(),
+      ).pipe(
+        Effect.provide(
+          Layer.mergeAll(
+            ConfigProvider.layer(
+              ConfigProvider.fromEnv({
+                env: { T3CODE_STATE_SUBDIR: "userdata" },
+              }),
+            ),
+            NetService.layer,
+          ),
+        ),
+      );
+
+      // Even though devUrl is set (would normally pick "dev"), env override wins.
+      assert.equal(resolved.stateDir, join(baseDir, "userdata"));
+      assert.equal(resolved.dbPath, join(baseDir, "userdata", "state.sqlite"));
+      assert.equal(resolved.usageDir, join(baseDir, "userdata", "usage"));
+    }),
+  );
+
+  it.effect("deriveServerPaths exposes usageDir under stateDir", () =>
+    Effect.gen(function* () {
+      const { join } = yield* Path.Path;
+      const baseDir = join(os.tmpdir(), "t3-derive-paths-usage");
+      const prodPaths = yield* deriveServerPaths(baseDir, undefined);
+      assert.equal(prodPaths.usageDir, join(baseDir, "userdata", "usage"));
+      const devPaths = yield* deriveServerPaths(baseDir, new URL("http://localhost:5173"));
+      assert.equal(devPaths.usageDir, join(baseDir, "dev", "usage"));
+      const overridePaths = yield* deriveServerPaths(
+        baseDir,
+        new URL("http://localhost:5173"),
+        "userdata",
+      );
+      assert.equal(overridePaths.usageDir, join(baseDir, "userdata", "usage"));
+    }),
+  );
 });
diff --git a/apps/server/src/cli.ts b/apps/server/src/cli.ts
index 4fc23a1ded..34648ec51b 100644
--- a/apps/server/src/cli.ts
+++ b/apps/server/src/cli.ts
@@ -174,6 +174,10 @@ const EnvServerConfig = Config.all({
     Config.option,
     Config.map(Option.getOrUndefined),
   ),
+  stateSubdir: Config.string("T3CODE_STATE_SUBDIR").pipe(
+    Config.option,
+    Config.map(Option.getOrUndefined),
+  ),
 });
 
 interface CliServerFlags {
@@ -286,8 +290,14 @@ export const resolveServerConfig = (
     const rawCwd = Option.getOrElse(normalizedFlags.cwd, () => process.cwd());
     const cwd = path.resolve(yield* expandHomePath(rawCwd.trim()));
     yield* fs.makeDirectory(cwd, { recursive: true });
-    const derivedPaths = yield* deriveServerPaths(baseDir, devUrl);
+    const derivedPaths = yield* deriveServerPaths(baseDir, devUrl, env.stateSubdir);
     yield* ensureServerDirectories(derivedPaths);
+    if (devUrl !== undefined && env.stateSubdir?.trim() === "userdata") {
+      yield* Effect.logWarning(
+        "⚠️  T3CODE_STATE_SUBDIR=userdata: dev server is reading/writing the installed app's state directory. Quit the installed app first to avoid corruption.",
+        { stateDir: derivedPaths.stateDir },
+      );
+    }
     const persistedObservabilitySettings = yield* loadPersistedObservabilitySettings(
       derivedPaths.settingsPath,
     );
diff --git a/apps/server/src/config.ts b/apps/server/src/config.ts
index 7840c76115..c885a47554 100644
--- a/apps/server/src/config.ts
+++ b/apps/server/src/config.ts
@@ -37,6 +37,7 @@ export interface ServerDerivedPaths {
   readonly environmentIdPath: string;
   readonly serverRuntimeStatePath: string;
   readonly secretsDir: string;
+  readonly usageDir: string;
 }
 
 /**
@@ -70,14 +71,21 @@ export interface ServerConfigShape extends ServerDerivedPaths {
 export const deriveServerPaths = Effect.fn(function* (
   baseDir: ServerConfigShape["baseDir"],
   devUrl: ServerConfigShape["devUrl"],
+  stateSubdir?: string | undefined,
 ): Effect.fn.Return<ServerDerivedPaths, never, Path.Path> {
   const { join } = yield* Path.Path;
-  const stateDir = join(baseDir, devUrl !== undefined ? "dev" : "userdata");
+  // When T3CODE_STATE_SUBDIR is set, honor it verbatim — lets a dev-mode
+  // server read/write the installed app's "userdata" store when explicitly
+  // opted in. Falls back to the default: "dev" in dev mode, "userdata" in
+  // production.
+  const subdir = stateSubdir?.trim() || (devUrl !== undefined ? "dev" : "userdata");
+  const stateDir = join(baseDir, subdir);
   const dbPath = join(stateDir, "state.sqlite");
   const attachmentsDir = join(stateDir, "attachments");
   const logsDir = join(stateDir, "logs");
   const providerLogsDir = join(logsDir, "provider");
   const providerStatusCacheDir = join(baseDir, "caches");
+  const usageDir = join(stateDir, "usage");
   return {
     stateDir,
     dbPath,
@@ -96,6 +104,7 @@ export const deriveServerPaths = Effect.fn(function* (
     environmentIdPath: join(stateDir, "environment-id"),
     serverRuntimeStatePath: join(stateDir, "server-runtime.json"),
     secretsDir: join(stateDir, "secrets"),
+    usageDir,
   };
 });
 
@@ -116,6 +125,7 @@ export const ensureServerDirectories = Effect.fn(function* (derivedPaths: Server
       fs.makeDirectory(derivedPaths.providerStatusCacheDir, { recursive: true }),
       fs.makeDirectory(path.dirname(derivedPaths.anonymousIdPath), { recursive: true }),
       fs.makeDirectory(path.dirname(derivedPaths.serverRuntimeStatePath), { recursive: true }),
+      fs.makeDirectory(derivedPaths.usageDir, { recursive: true }),
     ],
     { concurrency: "unbounded" },
   );
diff --git a/scripts/dev-runner.test.ts b/scripts/dev-runner.test.ts
index ce4865eced..64e593b38f 100644
--- a/scripts/dev-runner.test.ts
+++ b/scripts/dev-runner.test.ts
@@ -93,6 +93,72 @@ it.layer(NodeServices.layer)("dev-runner", (it) => {
         assert.equal(env.T3CODE_LOG_WS_EVENTS, "1");
         assert.equal(env.T3CODE_HOST, "0.0.0.0");
         assert.equal(env.VITE_DEV_SERVER_URL, "http://localhost:7331/");
+        // No stateSubdir override by default.
+        assert.equal(env.T3CODE_STATE_SUBDIR, undefined);
+      }),
+    );
+
+    it.effect("forwards explicit stateSubdir to T3CODE_STATE_SUBDIR", () =>
+      Effect.gen(function* () {
+        const env = yield* createDevRunnerEnv({
+          mode: "dev",
+          baseEnv: {},
+          serverOffset: 0,
+          webOffset: 0,
+          t3Home: undefined,
+          stateSubdir: "userdata",
+          noBrowser: undefined,
+          autoBootstrapProjectFromCwd: undefined,
+          logWebSocketEvents: undefined,
+          host: undefined,
+          port: undefined,
+          devUrl: undefined,
+        });
+
+        assert.equal(env.T3CODE_STATE_SUBDIR, "userdata");
+      }),
+    );
+
+    it.effect("--use-userdata shortcut sets T3CODE_STATE_SUBDIR=userdata", () =>
+      Effect.gen(function* () {
+        const env = yield* createDevRunnerEnv({
+          mode: "dev",
+          baseEnv: {},
+          serverOffset: 0,
+          webOffset: 0,
+          t3Home: undefined,
+          useUserdata: true,
+          noBrowser: undefined,
+          autoBootstrapProjectFromCwd: undefined,
+          logWebSocketEvents: undefined,
+          host: undefined,
+          port: undefined,
+          devUrl: undefined,
+        });
+
+        assert.equal(env.T3CODE_STATE_SUBDIR, "userdata");
+      }),
+    );
+
+    it.effect("explicit stateSubdir overrides --use-userdata", () =>
+      Effect.gen(function* () {
+        const env = yield* createDevRunnerEnv({
+          mode: "dev",
+          baseEnv: {},
+          serverOffset: 0,
+          webOffset: 0,
+          t3Home: undefined,
+          stateSubdir: "custom",
+          useUserdata: true,
+          noBrowser: undefined,
+          autoBootstrapProjectFromCwd: undefined,
+          logWebSocketEvents: undefined,
+          host: undefined,
+          port: undefined,
+          devUrl: undefined,
+        });
+
+        assert.equal(env.T3CODE_STATE_SUBDIR, "custom");
       }),
     );
 
diff --git a/scripts/dev-runner.ts b/scripts/dev-runner.ts
index 1621b60da7..61fcae04d0 100644
--- a/scripts/dev-runner.ts
+++ b/scripts/dev-runner.ts
@@ -122,6 +122,8 @@ interface CreateDevRunnerEnvInput {
   readonly serverOffset: number;
   readonly webOffset: number;
   readonly t3Home: string | undefined;
+  readonly stateSubdir?: string | undefined;
+  readonly useUserdata?: boolean | undefined;
   readonly noBrowser: boolean | undefined;
   readonly autoBootstrapProjectFromCwd: boolean | undefined;
   readonly logWebSocketEvents: boolean | undefined;
@@ -136,6 +138,8 @@ export function createDevRunnerEnv({
   serverOffset,
   webOffset,
   t3Home,
+  stateSubdir,
+  useUserdata,
   noBrowser,
   autoBootstrapProjectFromCwd,
   logWebSocketEvents,
@@ -148,6 +152,8 @@ export function createDevRunnerEnv({
     const webPort = BASE_WEB_PORT + webOffset;
     const resolvedBaseDir = yield* resolveBaseDir(t3Home);
     const isDesktopMode = mode === "dev:desktop";
+    const resolvedStateSubdir =
+      stateSubdir?.trim() || (useUserdata === true ? "userdata" : undefined);
 
     const output: NodeJS.ProcessEnv = {
       ...baseEnv,
@@ -158,6 +164,12 @@ export function createDevRunnerEnv({
       T3CODE_HOME: resolvedBaseDir,
     };
 
+    if (resolvedStateSubdir !== undefined) {
+      output.T3CODE_STATE_SUBDIR = resolvedStateSubdir;
+    } else {
+      delete output.T3CODE_STATE_SUBDIR;
+    }
+
     if (!isDesktopMode) {
       output.T3CODE_PORT = String(serverPort);
       output.VITE_HTTP_URL = `http://localhost:${serverPort}`;
@@ -365,6 +377,8 @@ export function resolveModePortOffsets<R = NetService>({
 interface DevRunnerCliInput {
   readonly mode: DevMode;
   readonly t3Home: string | undefined;
+  readonly stateSubdir?: string | undefined;
+  readonly useUserdata?: boolean | undefined;
   readonly noBrowser: boolean | undefined;
   readonly autoBootstrapProjectFromCwd: boolean | undefined;
   readonly logWebSocketEvents: boolean | undefined;
@@ -409,6 +423,8 @@ export function runDevRunnerWithInput(input: DevRunnerCliInput) {
       serverOffset,
       webOffset,
       t3Home: input.t3Home,
+      stateSubdir: input.stateSubdir,
+      useUserdata: input.useUserdata,
       noBrowser: input.noBrowser,
       autoBootstrapProjectFromCwd: input.autoBootstrapProjectFromCwd,
       logWebSocketEvents: input.logWebSocketEvents,
@@ -422,10 +438,20 @@ export function runDevRunnerWithInput(input: DevRunnerCliInput) {
         ? ` selectedOffset(server=${serverOffset},web=${webOffset})`
         : "";
 
+    const subdirSuffix = env.T3CODE_STATE_SUBDIR
+      ? ` stateSubdir=${env.T3CODE_STATE_SUBDIR}`
+      : "";
+
     yield* Effect.logInfo(
-      `[dev-runner] mode=${input.mode} source=${source}${selectionSuffix} serverPort=${String(env.T3CODE_PORT)} webPort=${String(env.PORT)} baseDir=${String(env.T3CODE_HOME)}`,
+      `[dev-runner] mode=${input.mode} source=${source}${selectionSuffix} serverPort=${String(env.T3CODE_PORT)} webPort=${String(env.PORT)} baseDir=${String(env.T3CODE_HOME)}${subdirSuffix}`,
     );
 
+    if (env.T3CODE_STATE_SUBDIR === "userdata") {
+      yield* Effect.logWarning(
+        "⚠️  dev-runner: --use-userdata is active; server will write to the installed app's state. Quit the installed app first.",
+      );
+    }
+
     if (input.dryRun) {
       return;
     }
@@ -475,6 +501,18 @@ const devRunnerCli = Command.make("dev-runner", {
     Flag.withDescription("Base directory for all T3 Code data (equivalent to T3CODE_HOME)."),
     Flag.withFallbackConfig(optionalStringConfig("T3CODE_HOME")),
   ),
+  stateSubdir: Flag.string("state-subdir").pipe(
+    Flag.withDescription(
+      "State directory name under T3CODE_HOME (e.g. 'dev' or 'userdata'; equivalent to T3CODE_STATE_SUBDIR).",
+    ),
+    Flag.withFallbackConfig(optionalStringConfig("T3CODE_STATE_SUBDIR")),
+  ),
+  useUserdata: Flag.boolean("use-userdata").pipe(
+    Flag.withDescription(
+      "Shortcut for --state-subdir=userdata; reads/writes the installed app's state. Quit the installed app first.",
+    ),
+    Flag.withDefault(false),
+  ),
   noBrowser: Flag.boolean("no-browser").pipe(
     Flag.withDescription("Browser auto-open toggle (equivalent to T3CODE_NO_BROWSER)."),
     Flag.withFallbackConfig(optionalBooleanConfig("T3CODE_NO_BROWSER")),

From 33cb77a399b39e2d5d95a0f73593a0c1b31adc42 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Tue, 21 Apr 2026 20:41:05 +0200
Subject: [PATCH 06/16] feat(contracts): cache-creation tokens + model on
 token-usage payload

- Add cacheCreationInputTokens + lastCacheCreationInputTokens to
  ThreadTokenUsageSnapshot. Anthropic charges cache-write at 1.25x
  input; reporting it separately lets the cost meter bill correctly.
- Add optional model field to ThreadTokenUsageUpdatedPayload so the
  server-side cost tracker can resolve pricing without a lookup
  against thread state.
---
 packages/contracts/src/providerRuntime.ts | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/packages/contracts/src/providerRuntime.ts b/packages/contracts/src/providerRuntime.ts
index 8387880ea3..e732451a7b 100644
--- a/packages/contracts/src/providerRuntime.ts
+++ b/packages/contracts/src/providerRuntime.ts
@@ -304,11 +304,17 @@ export const ThreadTokenUsageSnapshot = Schema.Struct({
   maxTokens: Schema.optional(PositiveInt),
   inputTokens: Schema.optional(NonNegativeInt),
   cachedInputTokens: Schema.optional(NonNegativeInt),
+  /**
+   * Tokens written to the provider's prompt cache this turn. Anthropic bills
+   * cache-write at 1.25× the base input rate; cache-read at 0.1× the base rate.
+   */
+  cacheCreationInputTokens: Schema.optional(NonNegativeInt),
   outputTokens: Schema.optional(NonNegativeInt),
   reasoningOutputTokens: Schema.optional(NonNegativeInt),
   lastUsedTokens: Schema.optional(NonNegativeInt),
   lastInputTokens: Schema.optional(NonNegativeInt),
   lastCachedInputTokens: Schema.optional(NonNegativeInt),
+  lastCacheCreationInputTokens: Schema.optional(NonNegativeInt),
   lastOutputTokens: Schema.optional(NonNegativeInt),
   lastReasoningOutputTokens: Schema.optional(NonNegativeInt),
   toolUses: Schema.optional(NonNegativeInt),
@@ -319,6 +325,8 @@ export type ThreadTokenUsageSnapshot = typeof ThreadTokenUsageSnapshot.Type;
 
 const ThreadTokenUsageUpdatedPayload = Schema.Struct({
   usage: ThreadTokenUsageSnapshot,
+  /** Resolved model slug for the turn this usage belongs to, if known. */
+  model: Schema.optional(TrimmedNonEmptyStringSchema),
 });
 export type ThreadTokenUsageUpdatedPayload = typeof ThreadTokenUsageUpdatedPayload.Type;
 

From 615967d80ed6c65cf866aed61385c575d39fac70 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Tue, 21 Apr 2026 20:43:43 +0200
Subject: [PATCH 07/16] feat(shared): 4th pricing tier for cache-creation
 tokens

Anthropic bills cache-writes at 1.25x input; OpenAI has no separate
write tier. Model a distinct cacheCreationInputPerMTok rate (with
provider-aware defaults) so the cost meter no longer conflates
cache hits, cache writes, and fresh input.

- ModelPricing gains cacheCreationInputPerMTok; Claude auto-applies
  the 1.25x multiplier, OpenAI defaults to inputPerMTok.
- TurnTokenDeltas + TurnCostBreakdown gain cacheCreation slots; zero
  for providers that don't distinguish the tier.
- computeTurnCost bills each class additively.
- Client extractDeltas reads lastCacheCreationInputTokens; helpers +
  fixtures carry the new field through.
- Tests: +2 cases covering Anthropic cache-write premium and the
  OpenAI default.
---
 apps/web/src/lib/costStore.test.ts  |  3 ++
 apps/web/src/lib/useCostTracking.ts |  4 +-
 packages/shared/src/pricing.test.ts | 23 +++++++++-
 packages/shared/src/pricing.ts      | 67 +++++++++++++++++++++++------
 4 files changed, 81 insertions(+), 16 deletions(-)

diff --git a/apps/web/src/lib/costStore.test.ts b/apps/web/src/lib/costStore.test.ts
index 1162f11d12..0602f2ce6a 100644
--- a/apps/web/src/lib/costStore.test.ts
+++ b/apps/web/src/lib/costStore.test.ts
@@ -18,6 +18,7 @@ function freshState(): PersistedCostState {
 const cost = (total: number) => ({
   inputUsd: 0,
   cachedUsd: 0,
+  cacheCreationUsd: 0,
   outputUsd: 0,
   reasoningUsd: 0,
   totalUsd: total,
@@ -27,12 +28,14 @@ const deltas = (
   d: Partial<{
     inputTokens: number;
     cachedInputTokens: number;
+    cacheCreationInputTokens: number;
     outputTokens: number;
     reasoningOutputTokens: number;
   }> = {},
 ) => ({
   inputTokens: d.inputTokens ?? 0,
   cachedInputTokens: d.cachedInputTokens ?? 0,
+  cacheCreationInputTokens: d.cacheCreationInputTokens ?? 0,
   outputTokens: d.outputTokens ?? 0,
   reasoningOutputTokens: d.reasoningOutputTokens ?? 0,
 });
diff --git a/apps/web/src/lib/useCostTracking.ts b/apps/web/src/lib/useCostTracking.ts
index 237f656262..6757ecc305 100644
--- a/apps/web/src/lib/useCostTracking.ts
+++ b/apps/web/src/lib/useCostTracking.ts
@@ -22,12 +22,14 @@ function extractDeltas(payload: unknown): TurnTokenDeltas | null {
   const p = payload as Record<string, unknown>;
   const input = toNonNegative(p.lastInputTokens);
   const cached = toNonNegative(p.lastCachedInputTokens);
+  const cacheCreation = toNonNegative(p.lastCacheCreationInputTokens);
   const output = toNonNegative(p.lastOutputTokens);
   const reasoning = toNonNegative(p.lastReasoningOutputTokens);
-  if (input + cached + output + reasoning <= 0) return null;
+  if (input + cached + cacheCreation + output + reasoning <= 0) return null;
   return {
     inputTokens: input,
     cachedInputTokens: cached,
+    cacheCreationInputTokens: cacheCreation,
     outputTokens: output,
     reasoningOutputTokens: reasoning,
   };
diff --git a/packages/shared/src/pricing.test.ts b/packages/shared/src/pricing.test.ts
index de76bea819..dfa437cd79 100644
--- a/packages/shared/src/pricing.test.ts
+++ b/packages/shared/src/pricing.test.ts
@@ -15,6 +15,13 @@ describe("pricing/getPricing", () => {
     expect(p.inputPerMTok).toBe(3);
     expect(p.cachedInputPerMTok).toBe(0.3);
     expect(p.outputPerMTok).toBe(15);
+    // Anthropic cache-write = 1.25× input.
+    expect(p.cacheCreationInputPerMTok).toBeCloseTo(3 * 1.25, 6);
+  });
+
+  it("defaults OpenAI cacheCreation rate to input rate", () => {
+    const p = getPricing("gpt-5.4");
+    expect(p.cacheCreationInputPerMTok).toBe(p.inputPerMTok);
   });
 
   it("resolves Claude short alias via provider", () => {
@@ -58,6 +65,7 @@ describe("pricing/computeTurnCost", () => {
     const cost = computeTurnCost("claude-sonnet-4-6", {
       inputTokens: 10_000,
       cachedInputTokens: 100_000,
+      cacheCreationInputTokens: 20_000,
       outputTokens: 2_000,
       reasoningOutputTokens: 500,
     });
@@ -65,23 +73,27 @@ describe("pricing/computeTurnCost", () => {
     expect(cost.inputUsd).toBeCloseTo(0.03, 6);
     // 100k * $0.30/Mtok = $0.03
     expect(cost.cachedUsd).toBeCloseTo(0.03, 6);
+    // 20k * ($3 * 1.25 = $3.75)/Mtok = $0.075
+    expect(cost.cacheCreationUsd).toBeCloseTo(0.075, 6);
     // 2k * $15/Mtok = $0.03
     expect(cost.outputUsd).toBeCloseTo(0.03, 6);
     // 500 * $15/Mtok = $0.0075
     expect(cost.reasoningUsd).toBeCloseTo(0.0075, 6);
-    expect(cost.totalUsd).toBeCloseTo(0.0975, 6);
+    expect(cost.totalUsd).toBeCloseTo(0.1725, 6);
   });
 
   it("computes Codex GPT-5.4 turn cost correctly", () => {
     const cost = computeTurnCost("gpt-5.4", {
       inputTokens: 1_000_000,
       cachedInputTokens: 0,
+      cacheCreationInputTokens: 0,
       outputTokens: 100_000,
       reasoningOutputTokens: 50_000,
     });
     // 1M * $1.25 = $1.25
     expect(cost.inputUsd).toBeCloseTo(1.25, 6);
     expect(cost.cachedUsd).toBe(0);
+    expect(cost.cacheCreationUsd).toBe(0);
     // 100k * $10/Mtok = $1
     expect(cost.outputUsd).toBeCloseTo(1, 6);
     // 50k * $10/Mtok = $0.5
@@ -89,6 +101,15 @@ describe("pricing/computeTurnCost", () => {
     expect(cost.totalUsd).toBeCloseTo(2.75, 6);
   });
 
+  it("applies Anthropic cache-write premium correctly", () => {
+    // Pure cache-creation: 1M tokens at 1.25× base rate
+    const cost = computeTurnCost("claude-sonnet-4-6", {
+      cacheCreationInputTokens: 1_000_000,
+    });
+    expect(cost.cacheCreationUsd).toBeCloseTo(3 * 1.25, 6);
+    expect(cost.totalUsd).toBeCloseTo(3.75, 6);
+  });
+
   it("returns zero cost for unknown model", () => {
     const cost = computeTurnCost("fake-model", {
       inputTokens: 10_000,
diff --git a/packages/shared/src/pricing.ts b/packages/shared/src/pricing.ts
index 3ab9685348..96bcda1505 100644
--- a/packages/shared/src/pricing.ts
+++ b/packages/shared/src/pricing.ts
@@ -4,25 +4,44 @@ import type { ProviderKind } from "@t3tools/contracts";
 /**
  * USD price per 1,000,000 tokens for each token class.
  *
- * `cachedInput` is the discounted input price applied when the provider
- * serves cached prefix tokens (Anthropic prompt caching / OpenAI cached input).
- * `reasoningOutput` defaults to `output` when a model does not bill reasoning
- * tokens separately.
+ * - `inputPerMTok` — non-cached prompt tokens.
+ * - `cachedInputPerMTok` — cache-READ tokens (Anthropic 0.1× / OpenAI cached input).
+ * - `cacheCreationInputPerMTok` — cache-WRITE premium tier (Anthropic 1.25×).
+ *   Providers without a distinct cache-write tier (OpenAI, etc.) set this equal
+ *   to `inputPerMTok`.
+ * - `outputPerMTok` — model output tokens.
+ * - `reasoningOutputPerMTok` — reasoning output. Defaults to `outputPerMTok`
+ *   when a model does not bill reasoning tokens separately.
  */
 export interface ModelPricing {
   readonly provider: ProviderKind | "unknown";
   readonly inputPerMTok: number;
   readonly cachedInputPerMTok: number;
+  readonly cacheCreationInputPerMTok: number;
   readonly outputPerMTok: number;
   readonly reasoningOutputPerMTok: number;
 }
 
+/**
+ * Raw seed rates. We derive the cache-creation + reasoning tiers when not
+ * specified so the table below stays readable.
+ */
+type SeedPricing = {
+  readonly provider: ProviderKind | "unknown";
+  readonly inputPerMTok: number;
+  readonly cachedInputPerMTok: number;
+  readonly outputPerMTok: number;
+  readonly cacheCreationInputPerMTok?: number;
+  readonly reasoningOutputPerMTok?: number;
+};
+
+const ANTHROPIC_CACHE_WRITE_MULTIPLIER = 1.25;
+
 /** Raw seed rates (USD per 1M tokens). Source: public provider pricing pages. */
-const SEED_PRICING: ReadonlyArray<
-  readonly [string, Omit<ModelPricing, "reasoningOutputPerMTok"> & { reasoningOutputPerMTok?: number }]
-> = [
+const SEED_PRICING: ReadonlyArray<readonly [string, SeedPricing]> = [
   // ── Anthropic / Claude ───────────────────────────────────────────────
-  // Extended-thinking tokens are billed as output tokens.
+  // Cache-read = 0.1× input; cache-write = 1.25× input.
+  // Extended-thinking tokens bill as output.
   [
     "claude-sonnet-4-6",
     {
@@ -69,7 +88,9 @@ const SEED_PRICING: ReadonlyArray<
     },
   ],
   // ── OpenAI / Codex ───────────────────────────────────────────────────
-  // Codex app routes use GPT-5 family pricing. Reasoning tokens bill as output.
+  // OpenAI does not bill a separate cache-creation tier — cached-input rate
+  // applies on hits; misses price at the normal input rate. We therefore
+  // default cacheCreationInputPerMTok to inputPerMTok below.
   [
     "gpt-5.4",
     {
@@ -115,8 +136,14 @@ const SEED_PRICING: ReadonlyArray<
 export const PRICING_TABLE: ReadonlyMap<string, ModelPricing> = (() => {
   const map = new Map<string, ModelPricing>();
   for (const [slug, raw] of SEED_PRICING) {
+    const cacheCreationInputPerMTok =
+      raw.cacheCreationInputPerMTok ??
+      (raw.provider === "claudeAgent"
+        ? raw.inputPerMTok * ANTHROPIC_CACHE_WRITE_MULTIPLIER
+        : raw.inputPerMTok);
     map.set(slug, {
       ...raw,
+      cacheCreationInputPerMTok,
       reasoningOutputPerMTok: raw.reasoningOutputPerMTok ?? raw.outputPerMTok,
     });
   }
@@ -128,6 +155,7 @@ export const UNKNOWN_MODEL_PRICING: ModelPricing = {
   provider: "unknown",
   inputPerMTok: 0,
   cachedInputPerMTok: 0,
+  cacheCreationInputPerMTok: 0,
   outputPerMTok: 0,
   reasoningOutputPerMTok: 0,
 };
@@ -175,6 +203,7 @@ export function getPricing(
 export interface TurnTokenDeltas {
   readonly inputTokens: number;
   readonly cachedInputTokens: number;
+  readonly cacheCreationInputTokens: number;
   readonly outputTokens: number;
   readonly reasoningOutputTokens: number;
 }
@@ -182,6 +211,7 @@ export interface TurnTokenDeltas {
 export interface TurnCostBreakdown {
   readonly inputUsd: number;
   readonly cachedUsd: number;
+  readonly cacheCreationUsd: number;
   readonly outputUsd: number;
   readonly reasoningUsd: number;
   readonly totalUsd: number;
@@ -190,6 +220,7 @@ export interface TurnCostBreakdown {
 export const ZERO_COST: TurnCostBreakdown = {
   inputUsd: 0,
   cachedUsd: 0,
+  cacheCreationUsd: 0,
   outputUsd: 0,
   reasoningUsd: 0,
   totalUsd: 0,
@@ -201,9 +232,15 @@ function finite(value: number | null | undefined): number {
 
 /**
  * Compute USD cost for one turn's token deltas.
- * Anthropic bills cached-input tokens at a reduced rate *instead of* the
- * full input rate — so callers pass the non-cached input count in
- * `inputTokens` and the cached prefix count in `cachedInputTokens`.
+ *
+ * Token classes:
+ *   - `inputTokens` — non-cached input.
+ *   - `cachedInputTokens` — cache-READ tokens (discounted).
+ *   - `cacheCreationInputTokens` — cache-WRITE tokens (premium on Anthropic).
+ *   - `outputTokens` — model output.
+ *   - `reasoningOutputTokens` — reasoning output. Defaults to output rate.
+ *
+ * Each class is billed *additively*, matching how providers invoice.
  */
 export function computeTurnCost(
   model: string | null | undefined,
@@ -213,16 +250,18 @@ export function computeTurnCost(
   const pricing = getPricing(model, provider);
   const input = finite(deltas.inputTokens);
   const cached = finite(deltas.cachedInputTokens);
+  const cacheCreation = finite(deltas.cacheCreationInputTokens);
   const output = finite(deltas.outputTokens);
   const reasoning = finite(deltas.reasoningOutputTokens);
 
   const inputUsd = (input / 1_000_000) * pricing.inputPerMTok;
   const cachedUsd = (cached / 1_000_000) * pricing.cachedInputPerMTok;
+  const cacheCreationUsd = (cacheCreation / 1_000_000) * pricing.cacheCreationInputPerMTok;
   const outputUsd = (output / 1_000_000) * pricing.outputPerMTok;
   const reasoningUsd = (reasoning / 1_000_000) * pricing.reasoningOutputPerMTok;
-  const totalUsd = inputUsd + cachedUsd + outputUsd + reasoningUsd;
+  const totalUsd = inputUsd + cachedUsd + cacheCreationUsd + outputUsd + reasoningUsd;
 
-  return { inputUsd, cachedUsd, outputUsd, reasoningUsd, totalUsd };
+  return { inputUsd, cachedUsd, cacheCreationUsd, outputUsd, reasoningUsd, totalUsd };
 }
 
 /** Format USD amount for UI display. */

From 37f692fb6eeae008feb4232af35b8b3a84f0af64 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Tue, 21 Apr 2026 20:51:48 +0200
Subject: [PATCH 08/16] fix(server): Claude adapter reports correct per-turn +
 per-tier token usage

The Claude adapter lumped cache_read / cache_creation / fresh input
into a single inputTokens field and emitted no per-turn deltas,
leaving the cost meter silently $0 for every Claude turn and
over-charging cached contexts by ~10x when it did fire. It also
clamped usedTokens at maxTokens on cumulative totals, pinning the
context ring at 100% once totalProcessedTokens exceeded the window.

Changes:
- Extract parseClaudeUsageBreakdown: splits SDK usage into four
  tiers (input / cachedInput / cacheCreationInput / output) with an
  explicit totalTokens.
- normalizeClaudeTokenUsage emits all four tiers and drops the
  min(total, max) cap; callers decide how to render overflow.
- Add buildClaudeTurnCompleteUsage: maintains a per-session
  lastTurnCumulativeUsage accumulator, subtracts from each
  result.usage to produce lastInputTokens / lastCachedInputTokens /
  lastCacheCreationInputTokens / lastOutputTokens deltas for the
  cost tracker. usedTokens prefers the task snapshot (real current
  context) over the cumulative total.
- Context state gains lastTurnCumulativeUsage; initialized at
  session start, advanced on each turn-complete emission.

Tests:
- New ClaudeAdapter.usage.test.ts: 10 unit tests cover parseBreakdown
  semantics, first-turn vs second-turn deltas, clamp behaviour,
  task-snapshot fallback, and negative-delta guards.
- ClaudeAdapter.test.ts updated: three existing cases now assert the
  split tiers + uncapped usedTokens (what the SDK actually reports).
- Full server suite: 894 pass.
---
 .../src/provider/Layers/ClaudeAdapter.test.ts |  25 +-
 .../src/provider/Layers/ClaudeAdapter.ts      | 265 +++++++++++++-----
 .../Layers/ClaudeAdapter.usage.test.ts        | 173 ++++++++++++
 3 files changed, 391 insertions(+), 72 deletions(-)
 create mode 100644 apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts

diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts
index 79c66bdfcf..0846009a49 100644
--- a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts
+++ b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts
@@ -1595,12 +1595,22 @@ describe("ClaudeAdapterLive", () => {
       const usageEvent = runtimeEvents.find((event) => event.type === "thread.token-usage.updated");
       assert.equal(usageEvent?.type, "thread.token-usage.updated");
       if (usageEvent?.type === "thread.token-usage.updated") {
+        // First turn: no prior cumulative, so last* deltas equal cumulative
+        // totals. Cache read/write split correctly; usedTokens = cumulative
+        // total (no task snapshot in this test).
         assert.deepEqual(usageEvent.payload, {
           usage: {
             usedTokens: 24542,
             lastUsedTokens: 24542,
-            inputTokens: 23863,
+            totalProcessedTokens: 24542,
+            inputTokens: 4,
+            cachedInputTokens: 21144,
+            cacheCreationInputTokens: 2715,
             outputTokens: 679,
+            lastInputTokens: 4,
+            lastCachedInputTokens: 21144,
+            lastCacheCreationInputTokens: 2715,
+            lastOutputTokens: 679,
             maxTokens: 200000,
           },
         });
@@ -1611,7 +1621,7 @@ describe("ClaudeAdapterLive", () => {
     );
   });
 
-  it.effect("clamps oversized Claude usage to the reported context window", () => {
+  it.effect("reports Claude usage uncapped when cumulative exceeds context window", () => {
     const harness = makeHarness();
     return Effect.gen(function* () {
       const adapter = yield* ClaudeAdapter;
@@ -1659,10 +1669,12 @@ describe("ClaudeAdapterLive", () => {
       const usageEvent = runtimeEvents.find((event) => event.type === "thread.token-usage.updated");
       assert.equal(usageEvent?.type, "thread.token-usage.updated");
       if (usageEvent?.type === "thread.token-usage.updated") {
+        // usedTokens is no longer clamped: the cumulative result total is
+        // reported as-is. UI clamps for ring display; callers get truth.
         assert.deepEqual(usageEvent.payload, {
           usage: {
-            usedTokens: 200000,
-            lastUsedTokens: 200000,
+            usedTokens: 535000,
+            lastUsedTokens: 535000,
             totalProcessedTokens: 535000,
             maxTokens: 200000,
           },
@@ -1739,10 +1751,13 @@ describe("ClaudeAdapterLive", () => {
         const finalUsageEvent = usageEvents.at(-1);
         assert.equal(finalUsageEvent?.type, "thread.token-usage.updated");
         if (finalUsageEvent?.type === "thread.token-usage.updated") {
+          // Task snapshot drives usedTokens (real current-context); result
+          // cumulative drives totalProcessedTokens. lastUsedTokens reports
+          // the turn's total (cumulative since there's no prior turn).
           assert.deepEqual(finalUsageEvent.payload, {
             usage: {
               usedTokens: 190000,
-              lastUsedTokens: 190000,
+              lastUsedTokens: 535000,
               totalProcessedTokens: 535000,
               maxTokens: 200000,
             },
diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.ts b/apps/server/src/provider/Layers/ClaudeAdapter.ts
index 81980acb9b..11e7a2569b 100644
--- a/apps/server/src/provider/Layers/ClaudeAdapter.ts
+++ b/apps/server/src/provider/Layers/ClaudeAdapter.ts
@@ -158,6 +158,14 @@ interface ClaudeSessionContext {
   turnState: ClaudeTurnState | undefined;
   lastKnownContextWindow: number | undefined;
   lastKnownTokenUsage: ThreadTokenUsageSnapshot | undefined;
+  /**
+   * Cumulative per-class token counts emitted in the prior turn's
+   * `result.usage`. Claude's SDK reports `result.usage` as a running total
+   * across every API call in the session, so per-turn cost requires
+   * subtracting this snapshot from the current cumulative totals. Cleared
+   * on session start; reset after each emission.
+   */
+  lastTurnCumulativeUsage: ClaudeUsageBreakdown | undefined;
   lastAssistantUuid: string | undefined;
   lastThreadStartedId: string | undefined;
   stopped: boolean;
@@ -290,61 +298,197 @@ function maxClaudeContextWindowFromModelUsage(
   return maxContextWindow;
 }
 
-function normalizeClaudeTokenUsage(
-  value: unknown,
-  contextWindow?: number,
-): ThreadTokenUsageSnapshot | undefined {
+/**
+ * Breakdown of a Claude SDK usage record across the four token classes we
+ * price separately. Pure — no derived totals, no capping. Callers combine
+ * with prior session state to compute context/ring values or per-turn deltas.
+ */
+interface ClaudeUsageBreakdown {
+  readonly inputTokens: number;
+  readonly cachedInputTokens: number;
+  readonly cacheCreationInputTokens: number;
+  readonly outputTokens: number;
+  /**
+   * `usage.total_tokens` when the SDK reports it explicitly, otherwise the
+   * sum of the four classes. Used to drive `usedTokens` when no task
+   * snapshot is available.
+   */
+  readonly totalTokens: number;
+  readonly toolUses?: number;
+  readonly durationMs?: number;
+}
+
+function nonNegativeNumber(value: unknown): number {
+  return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : 0;
+}
+
+export function parseClaudeUsageBreakdown(value: unknown): ClaudeUsageBreakdown | undefined {
   if (!value || typeof value !== "object") {
     return undefined;
   }
-
   const usage = value as Record<string, unknown>;
-  const inputTokens =
-    (typeof usage.input_tokens === "number" && Number.isFinite(usage.input_tokens)
-      ? usage.input_tokens
-      : 0) +
-    (typeof usage.cache_creation_input_tokens === "number" &&
-    Number.isFinite(usage.cache_creation_input_tokens)
-      ? usage.cache_creation_input_tokens
-      : 0) +
-    (typeof usage.cache_read_input_tokens === "number" &&
-    Number.isFinite(usage.cache_read_input_tokens)
-      ? usage.cache_read_input_tokens
-      : 0);
-  const outputTokens =
-    typeof usage.output_tokens === "number" && Number.isFinite(usage.output_tokens)
-      ? usage.output_tokens
-      : 0;
-  const derivedTotalProcessedTokens = inputTokens + outputTokens;
-  const totalProcessedTokens =
-    (typeof usage.total_tokens === "number" && Number.isFinite(usage.total_tokens)
+  const inputTokens = nonNegativeNumber(usage.input_tokens);
+  const cachedInputTokens = nonNegativeNumber(usage.cache_read_input_tokens);
+  const cacheCreationInputTokens = nonNegativeNumber(usage.cache_creation_input_tokens);
+  const outputTokens = nonNegativeNumber(usage.output_tokens);
+  const derivedTotal =
+    inputTokens + cachedInputTokens + cacheCreationInputTokens + outputTokens;
+  const totalTokens =
+    typeof usage.total_tokens === "number" && Number.isFinite(usage.total_tokens) && usage.total_tokens > 0
       ? usage.total_tokens
-      : undefined) ?? (derivedTotalProcessedTokens > 0 ? derivedTotalProcessedTokens : undefined);
-  if (totalProcessedTokens === undefined || totalProcessedTokens <= 0) {
+      : derivedTotal;
+  if (totalTokens <= 0) {
     return undefined;
   }
+  const toolUses =
+    typeof usage.tool_uses === "number" && Number.isFinite(usage.tool_uses)
+      ? usage.tool_uses
+      : undefined;
+  const durationMs =
+    typeof usage.duration_ms === "number" && Number.isFinite(usage.duration_ms)
+      ? usage.duration_ms
+      : undefined;
+  return {
+    inputTokens,
+    cachedInputTokens,
+    cacheCreationInputTokens,
+    outputTokens,
+    totalTokens,
+    ...(toolUses !== undefined ? { toolUses } : {}),
+    ...(durationMs !== undefined ? { durationMs } : {}),
+  };
+}
 
+/**
+ * Normalize a single Claude usage record into a `ThreadTokenUsageSnapshot`.
+ *
+ * Used for mid-turn snapshots (task_progress / task_notification) — each such
+ * event represents one API call's usage, which (for the latest call) matches
+ * the current context window size. The four token classes are reported
+ * separately so downstream cost math can apply the correct tier.
+ *
+ * No capping: `usedTokens` reflects `total_tokens` (or the derived sum) as
+ * reported. Callers that want to clamp for ring display should do so in the
+ * UI layer.
+ */
+function normalizeClaudeTokenUsage(
+  value: unknown,
+  contextWindow?: number,
+): ThreadTokenUsageSnapshot | undefined {
+  const breakdown = parseClaudeUsageBreakdown(value);
+  if (!breakdown) {
+    return undefined;
+  }
   const maxTokens =
     typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0
       ? contextWindow
       : undefined;
-  const usedTokens =
-    maxTokens !== undefined ? Math.min(totalProcessedTokens, maxTokens) : totalProcessedTokens;
-
   return {
-    usedTokens,
-    lastUsedTokens: usedTokens,
-    ...(totalProcessedTokens > usedTokens ? { totalProcessedTokens } : {}),
-    ...(inputTokens > 0 ? { inputTokens } : {}),
-    ...(outputTokens > 0 ? { outputTokens } : {}),
-    ...(maxTokens !== undefined ? { maxTokens } : {}),
-    ...(typeof usage.tool_uses === "number" && Number.isFinite(usage.tool_uses)
-      ? { toolUses: usage.tool_uses }
+    usedTokens: breakdown.totalTokens,
+    lastUsedTokens: breakdown.totalTokens,
+    ...(breakdown.inputTokens > 0 ? { inputTokens: breakdown.inputTokens } : {}),
+    ...(breakdown.cachedInputTokens > 0 ? { cachedInputTokens: breakdown.cachedInputTokens } : {}),
+    ...(breakdown.cacheCreationInputTokens > 0
+      ? { cacheCreationInputTokens: breakdown.cacheCreationInputTokens }
       : {}),
-    ...(typeof usage.duration_ms === "number" && Number.isFinite(usage.duration_ms)
-      ? { durationMs: usage.duration_ms }
+    ...(breakdown.outputTokens > 0 ? { outputTokens: breakdown.outputTokens } : {}),
+    ...(maxTokens !== undefined ? { maxTokens } : {}),
+    ...(breakdown.toolUses !== undefined ? { toolUses: breakdown.toolUses } : {}),
+    ...(breakdown.durationMs !== undefined ? { durationMs: breakdown.durationMs } : {}),
+  };
+}
+
+/**
+ * Build the turn-complete usage snapshot. Combines:
+ *   - Mid-turn task snapshot (current context size) for `usedTokens`.
+ *   - Cumulative session totals from `result.usage` for `totalProcessedTokens`
+ *     and the cumulative per-class counts.
+ *   - Per-turn deltas via subtraction against the prior turn's cumulative —
+ *     this populates `lastInputTokens / lastCachedInputTokens /
+ *     lastCacheCreationInputTokens / lastOutputTokens` for the downstream
+ *     cost meter.
+ *
+ * `priorCumulative` is mutated by the caller after emission so the next turn
+ * sees fresh baseline state.
+ */
+export interface ClaudeTurnCompleteUsageInput {
+  readonly resultUsage: unknown;
+  readonly taskSnapshot: ThreadTokenUsageSnapshot | undefined;
+  readonly contextWindow?: number | undefined;
+  readonly priorCumulative?: ClaudeUsageBreakdown | undefined;
+}
+
+export interface ClaudeTurnCompleteUsageResult {
+  readonly snapshot: ThreadTokenUsageSnapshot | undefined;
+  readonly nextCumulative: ClaudeUsageBreakdown | undefined;
+}
+
+export function buildClaudeTurnCompleteUsage(
+  input: ClaudeTurnCompleteUsageInput,
+): ClaudeTurnCompleteUsageResult {
+  const cumulative = parseClaudeUsageBreakdown(input.resultUsage);
+  const maxTokens =
+    typeof input.contextWindow === "number" &&
+    Number.isFinite(input.contextWindow) &&
+    input.contextWindow > 0
+      ? input.contextWindow
+      : undefined;
+
+  if (!cumulative) {
+    // No result.usage — fall back to whatever task snapshot we have, stamped
+    // with the freshest maxTokens.
+    if (!input.taskSnapshot) {
+      return { snapshot: undefined, nextCumulative: input.priorCumulative };
+    }
+    return {
+      snapshot: {
+        ...input.taskSnapshot,
+        ...(maxTokens !== undefined ? { maxTokens } : {}),
+      },
+      nextCumulative: input.priorCumulative,
+    };
+  }
+
+  const prior = input.priorCumulative ?? {
+    inputTokens: 0,
+    cachedInputTokens: 0,
+    cacheCreationInputTokens: 0,
+    outputTokens: 0,
+    totalTokens: 0,
+  };
+  const deltaInput = Math.max(0, cumulative.inputTokens - prior.inputTokens);
+  const deltaCached = Math.max(0, cumulative.cachedInputTokens - prior.cachedInputTokens);
+  const deltaCacheCreation = Math.max(
+    0,
+    cumulative.cacheCreationInputTokens - prior.cacheCreationInputTokens,
+  );
+  const deltaOutput = Math.max(0, cumulative.outputTokens - prior.outputTokens);
+  const lastTotal = deltaInput + deltaCached + deltaCacheCreation + deltaOutput;
+
+  // usedTokens: prefer the task snapshot (current context size); fall back to
+  // the cumulative total when no task snapshot was recorded for this turn.
+  const usedTokens = input.taskSnapshot?.usedTokens ?? cumulative.totalTokens;
+
+  const snapshot: ThreadTokenUsageSnapshot = {
+    usedTokens,
+    lastUsedTokens: lastTotal > 0 ? lastTotal : cumulative.totalTokens,
+    totalProcessedTokens: cumulative.totalTokens,
+    ...(cumulative.inputTokens > 0 ? { inputTokens: cumulative.inputTokens } : {}),
+    ...(cumulative.cachedInputTokens > 0 ? { cachedInputTokens: cumulative.cachedInputTokens } : {}),
+    ...(cumulative.cacheCreationInputTokens > 0
+      ? { cacheCreationInputTokens: cumulative.cacheCreationInputTokens }
       : {}),
+    ...(cumulative.outputTokens > 0 ? { outputTokens: cumulative.outputTokens } : {}),
+    ...(deltaInput > 0 ? { lastInputTokens: deltaInput } : {}),
+    ...(deltaCached > 0 ? { lastCachedInputTokens: deltaCached } : {}),
+    ...(deltaCacheCreation > 0 ? { lastCacheCreationInputTokens: deltaCacheCreation } : {}),
+    ...(deltaOutput > 0 ? { lastOutputTokens: deltaOutput } : {}),
+    ...(maxTokens !== undefined ? { maxTokens } : {}),
+    ...(cumulative.toolUses !== undefined ? { toolUses: cumulative.toolUses } : {}),
+    ...(cumulative.durationMs !== undefined ? { durationMs: cumulative.durationMs } : {}),
   };
+
+  return { snapshot, nextCumulative: cumulative };
 }
 
 function asCanonicalTurnId(value: TurnId): TurnId {
@@ -1385,34 +1529,20 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
       context.lastKnownContextWindow = resultContextWindow;
     }
 
-    // The SDK result.usage contains *accumulated* totals across all API calls
-    // (input_tokens, cache_read_input_tokens, etc. summed over every request).
-    // This does NOT represent the current context window size.
-    // Instead, use the last known context-window-accurate usage from task_progress
-    // events and treat the accumulated total as totalProcessedTokens.
-    const accumulatedSnapshot = normalizeClaudeTokenUsage(
-      result?.usage,
-      resultContextWindow ?? context.lastKnownContextWindow,
-    );
-    const accumulatedTotalProcessedTokens =
-      accumulatedSnapshot?.totalProcessedTokens ?? accumulatedSnapshot?.usedTokens;
-    const lastGoodUsage = context.lastKnownTokenUsage;
-    const maxTokens = resultContextWindow ?? context.lastKnownContextWindow;
-    const usageSnapshot: ThreadTokenUsageSnapshot | undefined = lastGoodUsage
-      ? {
-          ...lastGoodUsage,
-          ...(typeof maxTokens === "number" && Number.isFinite(maxTokens) && maxTokens > 0
-            ? { maxTokens }
-            : {}),
-          ...(typeof accumulatedTotalProcessedTokens === "number" &&
-          Number.isFinite(accumulatedTotalProcessedTokens) &&
-          accumulatedTotalProcessedTokens > lastGoodUsage.usedTokens
-            ? {
-                totalProcessedTokens: accumulatedTotalProcessedTokens,
-              }
-            : {}),
-        }
-      : accumulatedSnapshot;
+    // `result.usage` reports running totals across every API call in the
+    // session. We combine it with the freshest per-call task snapshot (for
+    // `usedTokens` — the real current-context value) and with the prior
+    // turn's cumulative snapshot (to derive this turn's per-class deltas).
+    const turnUsage = buildClaudeTurnCompleteUsage({
+      resultUsage: result?.usage,
+      taskSnapshot: context.lastKnownTokenUsage,
+      contextWindow: resultContextWindow ?? context.lastKnownContextWindow,
+      priorCumulative: context.lastTurnCumulativeUsage,
+    });
+    const usageSnapshot = turnUsage.snapshot;
+    if (turnUsage.nextCumulative !== undefined) {
+      context.lastTurnCumulativeUsage = turnUsage.nextCumulative;
+    }
 
     const turnState = context.turnState;
     if (!turnState) {
@@ -2918,6 +3048,7 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
         turnState: undefined,
         lastKnownContextWindow: undefined,
         lastKnownTokenUsage: undefined,
+        lastTurnCumulativeUsage: undefined,
         lastAssistantUuid: resumeState?.resumeSessionAt,
         lastThreadStartedId: undefined,
         stopped: false,
diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts
new file mode 100644
index 0000000000..c651512d3c
--- /dev/null
+++ b/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts
@@ -0,0 +1,173 @@
+import { describe, expect, it } from "vitest";
+
+import {
+  buildClaudeTurnCompleteUsage,
+  parseClaudeUsageBreakdown,
+  type ClaudeTurnCompleteUsageResult,
+} from "./ClaudeAdapter.ts";
+
+describe("parseClaudeUsageBreakdown", () => {
+  it("splits Anthropic fields into four token tiers", () => {
+    const b = parseClaudeUsageBreakdown({
+      input_tokens: 4,
+      cache_creation_input_tokens: 2715,
+      cache_read_input_tokens: 21144,
+      output_tokens: 679,
+    });
+    expect(b).toEqual({
+      inputTokens: 4,
+      cachedInputTokens: 21144,
+      cacheCreationInputTokens: 2715,
+      outputTokens: 679,
+      totalTokens: 4 + 2715 + 21144 + 679,
+    });
+  });
+
+  it("prefers explicit total_tokens over the derived sum", () => {
+    const b = parseClaudeUsageBreakdown({
+      total_tokens: 999,
+      input_tokens: 1,
+      output_tokens: 2,
+    });
+    expect(b?.totalTokens).toBe(999);
+  });
+
+  it("derives total when only total_tokens reported", () => {
+    const b = parseClaudeUsageBreakdown({ total_tokens: 42 });
+    expect(b?.totalTokens).toBe(42);
+    expect(b?.inputTokens).toBe(0);
+  });
+
+  it("returns undefined for empty / malformed input", () => {
+    expect(parseClaudeUsageBreakdown(null)).toBeUndefined();
+    expect(parseClaudeUsageBreakdown({})).toBeUndefined();
+    expect(parseClaudeUsageBreakdown({ total_tokens: 0 })).toBeUndefined();
+  });
+});
+
+describe("buildClaudeTurnCompleteUsage", () => {
+  it("builds first-turn deltas equal to cumulative totals", () => {
+    const res = buildClaudeTurnCompleteUsage({
+      resultUsage: {
+        input_tokens: 1_000,
+        cache_read_input_tokens: 5_000,
+        cache_creation_input_tokens: 2_000,
+        output_tokens: 500,
+      },
+      taskSnapshot: undefined,
+      contextWindow: 200_000,
+      priorCumulative: undefined,
+    });
+    const snap = res.snapshot!;
+    expect(snap.inputTokens).toBe(1_000);
+    expect(snap.cachedInputTokens).toBe(5_000);
+    expect(snap.cacheCreationInputTokens).toBe(2_000);
+    expect(snap.outputTokens).toBe(500);
+    expect(snap.lastInputTokens).toBe(1_000);
+    expect(snap.lastCachedInputTokens).toBe(5_000);
+    expect(snap.lastCacheCreationInputTokens).toBe(2_000);
+    expect(snap.lastOutputTokens).toBe(500);
+    expect(snap.lastUsedTokens).toBe(8_500);
+    expect(snap.usedTokens).toBe(8_500);
+    expect(snap.totalProcessedTokens).toBe(8_500);
+    expect(snap.maxTokens).toBe(200_000);
+    expect(res.nextCumulative).toBeDefined();
+  });
+
+  it("computes second-turn deltas against the prior cumulative", () => {
+    const turn1 = buildClaudeTurnCompleteUsage({
+      resultUsage: {
+        input_tokens: 1_000,
+        cache_read_input_tokens: 5_000,
+        output_tokens: 500,
+      },
+      taskSnapshot: undefined,
+      contextWindow: 200_000,
+      priorCumulative: undefined,
+    });
+    const turn2 = buildClaudeTurnCompleteUsage({
+      resultUsage: {
+        // Cumulative totals have grown — turn 2 added 500 input, 1k cached,
+        // 300 cache-creation, 200 output.
+        input_tokens: 1_500,
+        cache_read_input_tokens: 6_000,
+        cache_creation_input_tokens: 300,
+        output_tokens: 700,
+      },
+      taskSnapshot: undefined,
+      contextWindow: 200_000,
+      priorCumulative: turn1.nextCumulative,
+    });
+    const s = turn2.snapshot!;
+    expect(s.inputTokens).toBe(1_500);
+    expect(s.cachedInputTokens).toBe(6_000);
+    expect(s.cacheCreationInputTokens).toBe(300);
+    expect(s.outputTokens).toBe(700);
+    expect(s.lastInputTokens).toBe(500);
+    expect(s.lastCachedInputTokens).toBe(1_000);
+    expect(s.lastCacheCreationInputTokens).toBe(300);
+    expect(s.lastOutputTokens).toBe(200);
+    expect(s.lastUsedTokens).toBe(500 + 1_000 + 300 + 200);
+  });
+
+  it("does not cap usedTokens to maxTokens", () => {
+    const res = buildClaudeTurnCompleteUsage({
+      resultUsage: { total_tokens: 535_000 },
+      taskSnapshot: undefined,
+      contextWindow: 200_000,
+      priorCumulative: undefined,
+    });
+    expect(res.snapshot!.usedTokens).toBe(535_000);
+    expect(res.snapshot!.maxTokens).toBe(200_000);
+  });
+
+  it("uses task snapshot usedTokens when available (current context)", () => {
+    const res = buildClaudeTurnCompleteUsage({
+      resultUsage: { total_tokens: 535_000 },
+      taskSnapshot: {
+        usedTokens: 190_000,
+        lastUsedTokens: 190_000,
+      },
+      contextWindow: 200_000,
+      priorCumulative: undefined,
+    });
+    expect(res.snapshot!.usedTokens).toBe(190_000);
+    expect(res.snapshot!.totalProcessedTokens).toBe(535_000);
+  });
+
+  it("falls back to task snapshot when result.usage is absent", () => {
+    const res: ClaudeTurnCompleteUsageResult = buildClaudeTurnCompleteUsage({
+      resultUsage: undefined,
+      taskSnapshot: { usedTokens: 500, lastUsedTokens: 500 },
+      contextWindow: 100_000,
+      priorCumulative: undefined,
+    });
+    expect(res.snapshot?.usedTokens).toBe(500);
+    expect(res.nextCumulative).toBeUndefined();
+  });
+
+  it("clamps negative deltas to zero when cumulative goes backwards", () => {
+    const prior = {
+      inputTokens: 1_000,
+      cachedInputTokens: 5_000,
+      cacheCreationInputTokens: 0,
+      outputTokens: 500,
+      totalTokens: 6_500,
+    };
+    // Unexpected: SDK reports lower cumulative (shouldn't happen, but guard
+    // against it so cost math never goes negative).
+    const res = buildClaudeTurnCompleteUsage({
+      resultUsage: {
+        input_tokens: 900,
+        cache_read_input_tokens: 4_000,
+        output_tokens: 400,
+      },
+      taskSnapshot: undefined,
+      priorCumulative: prior,
+    });
+    const s = res.snapshot!;
+    expect(s.lastInputTokens).toBeUndefined(); // delta was 0
+    expect(s.lastCachedInputTokens).toBeUndefined();
+    expect(s.lastOutputTokens).toBeUndefined();
+  });
+});

From f38801193d4ea61becf898e66e17dbad1d5d42e1 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Tue, 21 Apr 2026 20:59:39 +0200
Subject: [PATCH 09/16] =?UTF-8?q?feat(server):=20CostTracker=20layer=20?=
 =?UTF-8?q?=E2=80=94=20JSON-backed=20cost=20ledger?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduces a server-owned cost ledger that writes three atomic JSON
files per recorded turn:
  - session_<threadId>.json  per-thread cumulative
  - YYYY-MM.json             month bucket (local tz)
  - alltime.json             running total since install

Works across dev, installed app, and standalone binaries because
persistence lives next to the server's existing SQLite state at
<T3CODE_HOME>/<state>/usage/. Atomic writes mirror serverSettings:
write .tmp, rename into place; errors log and swallow so
orchestration never blocks on FS failure.

Components:
- types.ts: plain-TS interfaces + local-tz month key helper +
  empty-bucket constructors.
- Reducer.ts: pure deriveTurnDeltas / processTurn / isTurnNoOp /
  sanitizePersistedFile. Prefers lastXxxTokens from the payload
  (Codex + post-fix Claude); falls back to delta-vs-lastCumulative
  for older providers. Zero-cost unknown models still record their
  token usage.
- Services/CostTracker.ts: Effect Context.Service API
  (recordUsage / getSummary / updates stream).
- Layers/CostTracker.ts: FS-backed live layer; semaphore-serialized
  writes; PubSub exposes live updates for WS broadcast.
- shared/pricing: re-export ProviderKind so server consumers don't
  reach into contracts for it.

Tests: 14 pure reducer cases + 5 live-layer cases (record, idempotent
no-op, accumulate, stream emission, zero-summary). All green.
---
 .../src/cost/Layers/CostTracker.test.ts       | 146 ++++++++
 apps/server/src/cost/Layers/CostTracker.ts    | 199 +++++++++++
 apps/server/src/cost/Reducer.test.ts          | 311 ++++++++++++++++
 apps/server/src/cost/Reducer.ts               | 337 ++++++++++++++++++
 apps/server/src/cost/Services/CostTracker.ts  |  49 +++
 apps/server/src/cost/types.ts                 | 120 +++++++
 packages/shared/src/pricing.ts                |   2 +
 7 files changed, 1164 insertions(+)
 create mode 100644 apps/server/src/cost/Layers/CostTracker.test.ts
 create mode 100644 apps/server/src/cost/Layers/CostTracker.ts
 create mode 100644 apps/server/src/cost/Reducer.test.ts
 create mode 100644 apps/server/src/cost/Reducer.ts
 create mode 100644 apps/server/src/cost/Services/CostTracker.ts
 create mode 100644 apps/server/src/cost/types.ts

diff --git a/apps/server/src/cost/Layers/CostTracker.test.ts b/apps/server/src/cost/Layers/CostTracker.test.ts
new file mode 100644
index 0000000000..1a2acb4325
--- /dev/null
+++ b/apps/server/src/cost/Layers/CostTracker.test.ts
@@ -0,0 +1,146 @@
+import * as NodeServices from "@effect/platform-node/NodeServices";
+import { assert, it } from "@effect/vitest";
+import { Effect, Fiber, FileSystem, Layer, Path, Stream } from "effect";
+
+import { ServerConfig } from "../../config.ts";
+import { CostTrackerLive } from "./CostTracker.ts";
+import { CostTrackerService } from "../Services/CostTracker.ts";
+import { localMonthKey } from "../types.ts";
+
+const SONNET = "claude-sonnet-4-6";
+
+const makeLayer = () => {
+  const configLayer = ServerConfig.layerTest(process.cwd(), { prefix: "t3-cost-" });
+  return Layer.mergeAll(CostTrackerLive.pipe(Layer.provide(configLayer)), configLayer);
+};
+
+it.layer(NodeServices.layer)("CostTrackerLive", (it) => {
+  it.effect("records a turn and persists session/month/alltime files", () =>
+    Effect.gen(function* () {
+      const tracker = yield* CostTrackerService;
+      const config = yield* ServerConfig;
+      const fs = yield* FileSystem.FileSystem;
+      const path = yield* Path.Path;
+
+      const at = new Date(2026, 3, 21, 10, 0, 0);
+      const monthKey = localMonthKey(at);
+      const summary = yield* tracker.recordUsage({
+        threadId: "thread-1",
+        model: SONNET,
+        usage: {
+          inputTokens: 1_000,
+          cachedInputTokens: 5_000,
+          outputTokens: 500,
+          lastInputTokens: 1_000,
+          lastCachedInputTokens: 5_000,
+          lastOutputTokens: 500,
+        },
+        at,
+      });
+
+      assert.equal(summary.thread?.turnCount, 1);
+      assert.equal(summary.month.turnCount, 1);
+      assert.equal(summary.allTime.turnCount, 1);
+      assert.equal(summary.monthKey, monthKey);
+      assert.ok(summary.month.totalUsd > 0);
+
+      const sessionPath = path.join(config.usageDir, "session_thread-1.json");
+      const monthPath = path.join(config.usageDir, `${monthKey}.json`);
+      const alltimePath = path.join(config.usageDir, "alltime.json");
+      assert.equal(yield* fs.exists(sessionPath), true);
+      assert.equal(yield* fs.exists(monthPath), true);
+      assert.equal(yield* fs.exists(alltimePath), true);
+
+      const monthRaw = yield* fs.readFileString(monthPath);
+      const monthParsed = JSON.parse(monthRaw) as {
+        readonly kind: string;
+        readonly bucket: { readonly turnCount: number };
+      };
+      assert.equal(monthParsed.kind, "month");
+      assert.equal(monthParsed.bucket.turnCount, 1);
+    }).pipe(Effect.provide(makeLayer())),
+  );
+
+  it.effect("is idempotent for zero-delta turns", () =>
+    Effect.gen(function* () {
+      const tracker = yield* CostTrackerService;
+      const summary = yield* tracker.recordUsage({
+        threadId: "thread-1",
+        model: SONNET,
+        usage: {},
+        at: new Date(2026, 3, 21, 10, 0, 0),
+      });
+      assert.equal(summary.month.turnCount, 0);
+      assert.equal(summary.allTime.turnCount, 0);
+    }).pipe(Effect.provide(makeLayer())),
+  );
+
+  it.effect("accumulates multiple turns", () =>
+    Effect.gen(function* () {
+      const tracker = yield* CostTrackerService;
+      const at = new Date(2026, 3, 21, 10, 0, 0);
+      yield* tracker.recordUsage({
+        threadId: "thread-1",
+        model: SONNET,
+        usage: {
+          inputTokens: 1_000,
+          outputTokens: 500,
+          lastInputTokens: 1_000,
+          lastOutputTokens: 500,
+        },
+        at,
+      });
+      const second = yield* tracker.recordUsage({
+        threadId: "thread-1",
+        model: SONNET,
+        usage: {
+          inputTokens: 2_000,
+          outputTokens: 900,
+          lastInputTokens: 1_000,
+          lastOutputTokens: 400,
+        },
+        at,
+      });
+      assert.equal(second.thread?.turnCount, 2);
+      assert.equal(second.month.turnCount, 2);
+      assert.equal(second.allTime.turnCount, 2);
+    }).pipe(Effect.provide(makeLayer())),
+  );
+
+  it.effect("emits summary on the updates stream after a write", () =>
+    Effect.gen(function* () {
+      const tracker = yield* CostTrackerService;
+      const fiber = yield* Effect.forkChild(
+        Stream.take(tracker.updates, 1).pipe(Stream.runCollect),
+      );
+      yield* tracker.recordUsage({
+        threadId: "thread-stream",
+        model: SONNET,
+        usage: {
+          lastInputTokens: 100,
+          lastOutputTokens: 50,
+        },
+        at: new Date(2026, 3, 21),
+      });
+      const chunk = yield* Fiber.join(fiber);
+      const events = Array.from(chunk);
+      assert.equal(events.length, 1);
+      assert.ok(events[0]!.month.turnCount >= 1);
+    }).pipe(Effect.provide(makeLayer())),
+  );
+
+  it.effect("getSummary returns zero for an unused session/month", () =>
+    Effect.gen(function* () {
+      const tracker = yield* CostTrackerService;
+      // Fresh layer per test, but be defensive: pin to a month no other test
+      // has touched. The thread-level bucket is per-threadId so that's safe.
+      const summary = yield* tracker.getSummary({
+        threadId: "never-seen",
+        at: new Date(2019, 11, 1),
+      });
+      assert.equal(summary.thread?.turnCount, 0);
+      assert.equal(summary.month.turnCount, 0);
+      assert.equal(summary.monthKey, "2019-12");
+    }).pipe(Effect.provide(makeLayer())),
+  );
+});
diff --git a/apps/server/src/cost/Layers/CostTracker.ts b/apps/server/src/cost/Layers/CostTracker.ts
new file mode 100644
index 0000000000..5d7c9bb0a2
--- /dev/null
+++ b/apps/server/src/cost/Layers/CostTracker.ts
@@ -0,0 +1,199 @@
+/**
+ * CostTrackerLive - JSON-backed cost ledger.
+ *
+ * Writes three atomic files per recorded turn:
+ *   - `session_<threadId>.json`
+ *   - `<YYYY-MM>.json`  (local tz)
+ *   - `alltime.json`
+ *
+ * Atomic pattern mirrors `serverSettings`: write `.tmp`, rename into place.
+ * Errors never block orchestration — the caller wraps `recordUsage` in
+ * `Effect.catchAll(logError)`.
+ *
+ * @module CostTrackerLive
+ */
+import { Data, Effect, FileSystem, Layer, Path, PubSub, Semaphore, Stream } from "effect";
+
+class CostFileParseError extends Data.TaggedError("CostFileParseError")<{
+  readonly path: string;
+  readonly cause: unknown;
+}> {}
+
+import { ServerConfig } from "../../config.ts";
+import { CostTrackerService, type CostTrackerShape } from "../Services/CostTracker.ts";
+import {
+  processTurn,
+  sanitizePersistedFile,
+  type ProcessTurnResult,
+} from "../Reducer.ts";
+import type {
+  CostBucket,
+  CostSummary,
+  PersistedCostFile,
+  PersistedCostFileKind,
+  RecordUsageInput,
+} from "../types.ts";
+import { emptyCostBucket, localMonthKey } from "../types.ts";
+
+function encodeFile(file: PersistedCostFile): string {
+  return `${JSON.stringify(file, null, 2)}\n`;
+}
+
+function sessionFilename(threadId: string): string {
+  // Normalize threadId for a flat filename — threadIds are UUID-like, but
+  // encodeURIComponent keeps us safe if a provider ever emits special chars.
+  return `session_${encodeURIComponent(threadId)}.json`;
+}
+
+function monthFilename(monthKey: string): string {
+  return `${monthKey}.json`;
+}
+
+const ALLTIME_FILENAME = "alltime.json";
+
+const make = Effect.gen(function* () {
+  const { usageDir } = yield* ServerConfig;
+  const fs = yield* FileSystem.FileSystem;
+  const path = yield* Path.Path;
+  // One writer at a time so concurrent turns don't clobber the same file.
+  const writeSemaphore = yield* Semaphore.make(1);
+  const updatesPubSub = yield* PubSub.unbounded<CostSummary>();
+
+  // Ensure the directory exists even if config bootstrap skipped it.
+  yield* fs.makeDirectory(usageDir, { recursive: true }).pipe(Effect.ignore({ log: true }));
+
+  const filePathFor = (kind: PersistedCostFileKind, key: string): string => {
+    switch (kind) {
+      case "session":
+        return path.join(usageDir, sessionFilename(key));
+      case "month":
+        return path.join(usageDir, monthFilename(key));
+      case "alltime":
+        return path.join(usageDir, ALLTIME_FILENAME);
+    }
+  };
+
+  const readFileIfExists = (absPath: string) =>
+    Effect.gen(function* () {
+      const exists = yield* fs.exists(absPath).pipe(Effect.orElseSucceed(() => false));
+      if (!exists) return undefined;
+      const raw = yield* fs.readFileString(absPath).pipe(Effect.orElseSucceed(() => ""));
+      if (!raw.trim()) return undefined;
+      return yield* Effect.try({
+        try: () => JSON.parse(raw) as unknown,
+        catch: (cause) => new CostFileParseError({ path: absPath, cause }),
+      }).pipe(Effect.orElseSucceed(() => undefined));
+    });
+
+  const loadFile = (
+    kind: PersistedCostFileKind,
+    key: string,
+    now: Date,
+  ): Effect.Effect<PersistedCostFile> =>
+    Effect.gen(function* () {
+      const raw = yield* readFileIfExists(filePathFor(kind, key));
+      return sanitizePersistedFile(raw, kind, key, now);
+    });
+
+  const writeFileAtomically = (file: PersistedCostFile) =>
+    Effect.gen(function* () {
+      const target = filePathFor(file.kind, file.key);
+      const tempPath = `${target}.${process.pid}.${Date.now()}.${Math.random()
+        .toString(36)
+        .slice(2, 8)}.tmp`;
+      const encoded = encodeFile(file);
+      yield* fs.writeFileString(tempPath, encoded);
+      yield* fs
+        .rename(tempPath, target)
+        .pipe(Effect.ensuring(fs.remove(tempPath, { force: true }).pipe(Effect.ignore({ log: true }))));
+    }).pipe(Effect.ignoreCause({ log: true }));
+
+  const summaryFromFiles = (
+    session: PersistedCostFile | null,
+    month: PersistedCostFile,
+    allTime: PersistedCostFile,
+    monthKey: string,
+  ): CostSummary => ({
+    thread: session?.bucket ?? null,
+    month: month.bucket,
+    allTime: allTime.bucket,
+    monthKey,
+  });
+
+  const emptyBucketFile = (
+    kind: PersistedCostFileKind,
+    key: string,
+    now: Date,
+  ): PersistedCostFile => ({
+    version: 1,
+    kind,
+    key,
+    bucket: emptyCostBucket(now),
+  });
+
+  const getSummary: CostTrackerShape["getSummary"] = (input) =>
+    Effect.gen(function* () {
+      const now = input.at ?? new Date();
+      const monthKey = localMonthKey(now);
+      const [month, allTime, threadFile] = yield* Effect.all(
+        [
+          loadFile("month", monthKey, now),
+          loadFile("alltime", "alltime", now),
+          input.threadId ? loadFile("session", input.threadId, now) : Effect.succeed(null),
+        ],
+        { concurrency: "unbounded" },
+      );
+      return summaryFromFiles(threadFile, month, allTime, monthKey);
+    });
+
+  const recordUsage: CostTrackerShape["recordUsage"] = (input: RecordUsageInput) =>
+    writeSemaphore.withPermits(1)(
+      Effect.gen(function* () {
+        const now = input.at ?? new Date();
+        const monthKey = localMonthKey(now);
+        const session = yield* loadFile("session", input.threadId, now);
+        const month = yield* loadFile("month", monthKey, now);
+        const allTime = yield* loadFile("alltime", "alltime", now);
+
+        const result: ProcessTurnResult = processTurn({
+          input,
+          session,
+          month,
+          allTime,
+          now,
+        });
+
+        if (result.applied) {
+          yield* Effect.all(
+            [
+              writeFileAtomically(result.session),
+              writeFileAtomically(result.month),
+              writeFileAtomically(result.allTime),
+            ],
+            { concurrency: "unbounded" },
+          );
+        }
+
+        const summary: CostSummary = {
+          thread: result.session.bucket,
+          month: result.month.bucket,
+          allTime: result.allTime.bucket,
+          monthKey: result.monthKey,
+        };
+
+        if (result.applied) {
+          yield* PubSub.publish(updatesPubSub, summary).pipe(Effect.asVoid);
+        }
+        return summary;
+      }),
+    );
+
+  const shape: CostTrackerShape = {
+    recordUsage,
+    getSummary,
+    updates: Stream.fromPubSub(updatesPubSub),
+  };
+  return shape;
+});
+
+export const CostTrackerLive = Layer.effect(CostTrackerService, make);
diff --git a/apps/server/src/cost/Reducer.test.ts b/apps/server/src/cost/Reducer.test.ts
new file mode 100644
index 0000000000..689bc83d0b
--- /dev/null
+++ b/apps/server/src/cost/Reducer.test.ts
@@ -0,0 +1,311 @@
+import { describe, expect, it } from "vitest";
+
+import {
+  deriveTurnDeltas,
+  isTurnNoOp,
+  processTurn,
+  sanitizePersistedFile,
+} from "./Reducer.ts";
+import type { CumulativeUsageSnapshot, PersistedCostFile, RecordUsageInput } from "./types.ts";
+import { localMonthKey, zeroCumulativeUsage } from "./types.ts";
+
+const SONNET = "claude-sonnet-4-6";
+
+describe("deriveTurnDeltas", () => {
+  it("prefers explicit lastXxx fields", () => {
+    const { deltas, nextCumulative } = deriveTurnDeltas(
+      {
+        inputTokens: 1_000,
+        cachedInputTokens: 5_000,
+        cacheCreationInputTokens: 500,
+        outputTokens: 200,
+        lastInputTokens: 800,
+        lastCachedInputTokens: 3_000,
+        lastCacheCreationInputTokens: 100,
+        lastOutputTokens: 50,
+      },
+      {
+        inputTokens: 200,
+        cachedInputTokens: 2_000,
+        cacheCreationInputTokens: 400,
+        outputTokens: 150,
+        reasoningOutputTokens: 0,
+      },
+    );
+    expect(deltas.inputTokens).toBe(800);
+    expect(deltas.cachedInputTokens).toBe(3_000);
+    expect(deltas.cacheCreationInputTokens).toBe(100);
+    expect(deltas.outputTokens).toBe(50);
+    // Cumulative reported in payload is used verbatim.
+    expect(nextCumulative.inputTokens).toBe(1_000);
+    expect(nextCumulative.cachedInputTokens).toBe(5_000);
+  });
+
+  it("subtracts cumulative snapshot when no lastXxx present", () => {
+    const prior: CumulativeUsageSnapshot = {
+      inputTokens: 100,
+      cachedInputTokens: 50,
+      cacheCreationInputTokens: 0,
+      outputTokens: 40,
+      reasoningOutputTokens: 0,
+    };
+    const { deltas, nextCumulative } = deriveTurnDeltas(
+      {
+        inputTokens: 250,
+        cachedInputTokens: 300,
+        outputTokens: 100,
+      },
+      prior,
+    );
+    expect(deltas.inputTokens).toBe(150);
+    expect(deltas.cachedInputTokens).toBe(250);
+    expect(deltas.cacheCreationInputTokens).toBe(0);
+    expect(deltas.outputTokens).toBe(60);
+    expect(nextCumulative.inputTokens).toBe(250);
+  });
+
+  it("clamps negative deltas to zero", () => {
+    const prior: CumulativeUsageSnapshot = {
+      inputTokens: 500,
+      cachedInputTokens: 0,
+      cacheCreationInputTokens: 0,
+      outputTokens: 200,
+      reasoningOutputTokens: 0,
+    };
+    const { deltas } = deriveTurnDeltas(
+      { inputTokens: 300, outputTokens: 150 },
+      prior,
+    );
+    expect(deltas.inputTokens).toBe(0);
+    expect(deltas.outputTokens).toBe(0);
+  });
+
+  it("rolls lastXxx onto prior cumulative when cumulative is absent", () => {
+    const { nextCumulative } = deriveTurnDeltas(
+      { lastInputTokens: 400, lastOutputTokens: 200 },
+      zeroCumulativeUsage(),
+    );
+    expect(nextCumulative.inputTokens).toBe(400);
+    expect(nextCumulative.outputTokens).toBe(200);
+  });
+});
+
+describe("processTurn", () => {
+  const at = new Date(2026, 3, 21, 10, 0, 0); // local April 2026
+  const monthKey = localMonthKey(at);
+
+  const baseInput: RecordUsageInput = {
+    threadId: "thread-1",
+    model: SONNET,
+    usage: {
+      inputTokens: 1_000,
+      cachedInputTokens: 5_000,
+      cacheCreationInputTokens: 0,
+      outputTokens: 500,
+      lastInputTokens: 1_000,
+      lastCachedInputTokens: 5_000,
+      lastOutputTokens: 500,
+    },
+    at,
+  };
+
+  it("records a new turn across all three buckets", () => {
+    const res = processTurn({ input: baseInput, session: undefined, month: undefined, allTime: undefined });
+    expect(res.applied).toBe(true);
+    expect(res.monthKey).toBe(monthKey);
+    // 1000*$3 + 5000*$0.3 + 500*$15 = $3 + $1.5 + $7.5 = $12 per 1M → /1M = $0.012
+    // 1k*3/1M + 5k*0.3/1M + 500*15/1M = 0.003 + 0.0015 + 0.0075 = $0.012
+    expect(res.costUsd).toBeCloseTo(0.012, 6);
+    expect(res.session.bucket.totalUsd).toBeCloseTo(0.012, 6);
+    expect(res.session.bucket.turnCount).toBe(1);
+    expect(res.session.bucket.byModel[SONNET]!.inputTokens).toBe(1_000);
+    expect(res.session.lastCumulative?.inputTokens).toBe(1_000);
+    expect(res.month.bucket.turnCount).toBe(1);
+    expect(res.allTime.bucket.turnCount).toBe(1);
+  });
+
+  it("accumulates a second turn", () => {
+    const turn1 = processTurn({
+      input: baseInput,
+      session: undefined,
+      month: undefined,
+      allTime: undefined,
+    });
+    const turn2Input: RecordUsageInput = {
+      ...baseInput,
+      usage: {
+        inputTokens: 1_500,
+        cachedInputTokens: 6_000,
+        outputTokens: 700,
+        lastInputTokens: 500,
+        lastCachedInputTokens: 1_000,
+        lastOutputTokens: 200,
+      },
+    };
+    const res = processTurn({
+      input: turn2Input,
+      session: turn1.session,
+      month: turn1.month,
+      allTime: turn1.allTime,
+    });
+    expect(res.applied).toBe(true);
+    expect(res.session.bucket.turnCount).toBe(2);
+    // 500*3 + 1000*0.3 + 200*15 = 1500+300+3000 = 4800 / 1M = $0.0048
+    expect(res.costUsd).toBeCloseTo(0.0048, 6);
+    expect(res.session.bucket.totalUsd).toBeCloseTo(0.012 + 0.0048, 6);
+  });
+
+  it("is a no-op when no tokens flow (zero deltas)", () => {
+    const emptyInput: RecordUsageInput = {
+      threadId: "thread-1",
+      model: SONNET,
+      usage: { inputTokens: 0, outputTokens: 0 },
+      at,
+    };
+    const res = processTurn({
+      input: emptyInput,
+      session: undefined,
+      month: undefined,
+      allTime: undefined,
+    });
+    expect(res.applied).toBe(false);
+    expect(res.session.bucket.turnCount).toBe(0);
+    expect(res.costUsd).toBe(0);
+  });
+
+  it("buckets by local month", () => {
+    const marchInput: RecordUsageInput = {
+      ...baseInput,
+      at: new Date(2026, 2, 31, 23, 0, 0), // last day of March local
+    };
+    const turn1 = processTurn({
+      input: marchInput,
+      session: undefined,
+      month: undefined,
+      allTime: undefined,
+    });
+    expect(turn1.monthKey).toBe("2026-03");
+    const aprilInput: RecordUsageInput = {
+      ...baseInput,
+      at: new Date(2026, 3, 1, 1, 0, 0),
+      usage: {
+        ...baseInput.usage,
+        inputTokens: 2_000,
+        cachedInputTokens: 10_000,
+        outputTokens: 1_000,
+        lastInputTokens: 1_000,
+        lastCachedInputTokens: 5_000,
+        lastOutputTokens: 500,
+      },
+    };
+    const turn2 = processTurn({
+      input: aprilInput,
+      session: turn1.session,
+      // April file is empty — new month means a new month bucket, not last month's.
+      month: undefined,
+      allTime: turn1.allTime,
+    });
+    expect(turn2.monthKey).toBe("2026-04");
+    expect(turn2.month.bucket.turnCount).toBe(1);
+    expect(turn2.allTime.bucket.turnCount).toBe(2);
+    expect(turn2.session.bucket.turnCount).toBe(2);
+  });
+
+  it("zero-cost unknown model still records token usage", () => {
+    const input: RecordUsageInput = {
+      threadId: "t1",
+      model: "some-unknown-model",
+      usage: {
+        lastInputTokens: 1_000,
+        lastOutputTokens: 500,
+      },
+      at,
+    };
+    const res = processTurn({ input, session: undefined, month: undefined, allTime: undefined });
+    expect(res.applied).toBe(true);
+    expect(res.costUsd).toBe(0);
+    expect(res.session.bucket.byModel["some-unknown-model"]!.inputTokens).toBe(1_000);
+    expect(res.session.bucket.byModel["some-unknown-model"]!.outputTokens).toBe(500);
+    expect(res.session.bucket.byModel["some-unknown-model"]!.totalUsd).toBe(0);
+  });
+});
+
+describe("isTurnNoOp", () => {
+  it("detects zero across all tiers", () => {
+    expect(
+      isTurnNoOp({
+        inputTokens: 0,
+        cachedInputTokens: 0,
+        cacheCreationInputTokens: 0,
+        outputTokens: 0,
+        reasoningOutputTokens: 0,
+      }),
+    ).toBe(true);
+  });
+  it("detects non-zero in any tier", () => {
+    expect(
+      isTurnNoOp({
+        inputTokens: 0,
+        cachedInputTokens: 1,
+        cacheCreationInputTokens: 0,
+        outputTokens: 0,
+        reasoningOutputTokens: 0,
+      }),
+    ).toBe(false);
+  });
+});
+
+describe("sanitizePersistedFile", () => {
+  it("returns an empty bucket when raw is garbage", () => {
+    const file = sanitizePersistedFile(null, "session", "thread-1");
+    expect(file.bucket.turnCount).toBe(0);
+    expect(file.kind).toBe("session");
+    expect(file.key).toBe("thread-1");
+  });
+
+  it("coerces invalid numeric fields to zero", () => {
+    const file = sanitizePersistedFile(
+      {
+        version: 1,
+        kind: "session",
+        key: "t1",
+        bucket: {
+          totalUsd: "bad" as unknown as number,
+          turnCount: -5,
+          byModel: {
+            [SONNET]: {
+              inputTokens: 100,
+              outputTokens: "bad" as unknown as number,
+            },
+          },
+          updatedAt: "2026-04-21",
+        },
+        lastCumulative: {
+          inputTokens: 100,
+          outputTokens: 50,
+        },
+      },
+      "session",
+      "t1",
+    );
+    expect(file.bucket.totalUsd).toBe(0);
+    expect(file.bucket.turnCount).toBe(0);
+    expect(file.bucket.byModel[SONNET]!.outputTokens).toBe(0);
+    expect(file.lastCumulative?.inputTokens).toBe(100);
+  });
+
+  it("drops lastCumulative for non-session files", () => {
+    const file = sanitizePersistedFile(
+      {
+        version: 1,
+        kind: "month",
+        key: "2026-04",
+        bucket: { totalUsd: 0, turnCount: 0, byModel: {}, updatedAt: "" },
+        lastCumulative: { inputTokens: 1 },
+      } as unknown as PersistedCostFile,
+      "month",
+      "2026-04",
+    );
+    expect(file.lastCumulative).toBeUndefined();
+  });
+});
diff --git a/apps/server/src/cost/Reducer.ts b/apps/server/src/cost/Reducer.ts
new file mode 100644
index 0000000000..bb9d4c7cd2
--- /dev/null
+++ b/apps/server/src/cost/Reducer.ts
@@ -0,0 +1,337 @@
+/**
+ * Pure cost-tracker reducers. No filesystem, no Effect — just math on plain
+ * objects so the write-path logic is trivial to unit-test.
+ */
+import { computeTurnCost, type ProviderKind, type TurnTokenDeltas } from "@t3tools/shared/pricing";
+import type {
+  CostBucket,
+  CumulativeUsageSnapshot,
+  ModelCostEntry,
+  PersistedCostFile,
+  PersistedCostFileKind,
+  RecordUsageInput,
+  UsageSnapshotLite,
+} from "./types.ts";
+import {
+  emptyCostBucket,
+  emptyModelCostEntry,
+  localMonthKey,
+  zeroCumulativeUsage,
+} from "./types.ts";
+
+function finiteNonNeg(value: unknown): number {
+  return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : 0;
+}
+
+/**
+ * Derive the deltas for this turn. Prefers the payload's `lastXxxTokens`
+ * fields (Codex and post-fix Claude); falls back to subtracting against the
+ * session file's `lastCumulative` snapshot (older providers / recovered
+ * sessions).
+ */
+export function deriveTurnDeltas(
+  usage: UsageSnapshotLite,
+  priorCumulative: CumulativeUsageSnapshot | undefined,
+): {
+  readonly deltas: TurnTokenDeltas;
+  readonly nextCumulative: CumulativeUsageSnapshot;
+} {
+  const hasExplicitLast =
+    usage.lastInputTokens !== undefined ||
+    usage.lastCachedInputTokens !== undefined ||
+    usage.lastCacheCreationInputTokens !== undefined ||
+    usage.lastOutputTokens !== undefined ||
+    usage.lastReasoningOutputTokens !== undefined;
+
+  const currentCumulative: CumulativeUsageSnapshot = {
+    inputTokens: finiteNonNeg(usage.inputTokens),
+    cachedInputTokens: finiteNonNeg(usage.cachedInputTokens),
+    cacheCreationInputTokens: finiteNonNeg(usage.cacheCreationInputTokens),
+    outputTokens: finiteNonNeg(usage.outputTokens),
+    reasoningOutputTokens: finiteNonNeg(usage.reasoningOutputTokens),
+  };
+
+  if (hasExplicitLast) {
+    const deltas: TurnTokenDeltas = {
+      inputTokens: finiteNonNeg(usage.lastInputTokens),
+      cachedInputTokens: finiteNonNeg(usage.lastCachedInputTokens),
+      cacheCreationInputTokens: finiteNonNeg(usage.lastCacheCreationInputTokens),
+      outputTokens: finiteNonNeg(usage.lastOutputTokens),
+      reasoningOutputTokens: finiteNonNeg(usage.lastReasoningOutputTokens),
+    };
+    // Next cumulative tracks whatever the payload reports cumulatively. If
+    // the payload gives lastXxx but not the cumulative totals, roll the
+    // deltas into the prior cumulative so we still have somewhere to land.
+    const nextCumulative =
+      currentCumulative.inputTokens +
+        currentCumulative.cachedInputTokens +
+        currentCumulative.cacheCreationInputTokens +
+        currentCumulative.outputTokens +
+        currentCumulative.reasoningOutputTokens >
+      0
+        ? currentCumulative
+        : addCumulative(priorCumulative ?? zeroCumulativeUsage(), deltas);
+    return { deltas, nextCumulative };
+  }
+
+  const prior = priorCumulative ?? zeroCumulativeUsage();
+  const deltas: TurnTokenDeltas = {
+    inputTokens: Math.max(0, currentCumulative.inputTokens - prior.inputTokens),
+    cachedInputTokens: Math.max(0, currentCumulative.cachedInputTokens - prior.cachedInputTokens),
+    cacheCreationInputTokens: Math.max(
+      0,
+      currentCumulative.cacheCreationInputTokens - prior.cacheCreationInputTokens,
+    ),
+    outputTokens: Math.max(0, currentCumulative.outputTokens - prior.outputTokens),
+    reasoningOutputTokens: Math.max(
+      0,
+      currentCumulative.reasoningOutputTokens - prior.reasoningOutputTokens,
+    ),
+  };
+  return { deltas, nextCumulative: currentCumulative };
+}
+
+function addCumulative(
+  base: CumulativeUsageSnapshot,
+  deltas: TurnTokenDeltas,
+): CumulativeUsageSnapshot {
+  return {
+    inputTokens: base.inputTokens + deltas.inputTokens,
+    cachedInputTokens: base.cachedInputTokens + deltas.cachedInputTokens,
+    cacheCreationInputTokens: base.cacheCreationInputTokens + deltas.cacheCreationInputTokens,
+    outputTokens: base.outputTokens + deltas.outputTokens,
+    reasoningOutputTokens: base.reasoningOutputTokens + deltas.reasoningOutputTokens,
+  };
+}
+
+function addEntry(
+  entry: ModelCostEntry,
+  deltas: TurnTokenDeltas,
+  costUsd: number,
+): ModelCostEntry {
+  return {
+    inputTokens: entry.inputTokens + deltas.inputTokens,
+    cachedInputTokens: entry.cachedInputTokens + deltas.cachedInputTokens,
+    cacheCreationInputTokens: entry.cacheCreationInputTokens + deltas.cacheCreationInputTokens,
+    outputTokens: entry.outputTokens + deltas.outputTokens,
+    reasoningOutputTokens: entry.reasoningOutputTokens + deltas.reasoningOutputTokens,
+    totalUsd: entry.totalUsd + costUsd,
+    turnCount: entry.turnCount + 1,
+  };
+}
+
+export function addTurnToBucket(
+  bucket: CostBucket,
+  model: string,
+  deltas: TurnTokenDeltas,
+  costUsd: number,
+  now: Date,
+): CostBucket {
+  const prev = bucket.byModel[model] ?? emptyModelCostEntry();
+  return {
+    totalUsd: bucket.totalUsd + costUsd,
+    turnCount: bucket.turnCount + 1,
+    byModel: {
+      ...bucket.byModel,
+      [model]: addEntry(prev, deltas, costUsd),
+    },
+    updatedAt: now.toISOString(),
+  };
+}
+
+/** True when no billable tokens changed — tracker should no-op. */
+export function isTurnNoOp(deltas: TurnTokenDeltas): boolean {
+  return (
+    deltas.inputTokens +
+      deltas.cachedInputTokens +
+      deltas.cacheCreationInputTokens +
+      deltas.outputTokens +
+      deltas.reasoningOutputTokens <=
+    0
+  );
+}
+
+export interface ProcessTurnArgs {
+  readonly input: RecordUsageInput;
+  readonly session: PersistedCostFile | undefined;
+  readonly month: PersistedCostFile | undefined;
+  readonly allTime: PersistedCostFile | undefined;
+  readonly now?: Date;
+}
+
+export interface ProcessTurnResult {
+  readonly session: PersistedCostFile;
+  readonly month: PersistedCostFile;
+  readonly allTime: PersistedCostFile;
+  readonly monthKey: string;
+  readonly deltas: TurnTokenDeltas;
+  readonly costUsd: number;
+  readonly applied: boolean;
+}
+
+/**
+ * Pure reducer: given the current persisted state for the three buckets and
+ * one runtime usage event, produce the three updated files. Idempotent when
+ * the turn contributes zero tokens (returns inputs unchanged).
+ */
+export function processTurn(args: ProcessTurnArgs): ProcessTurnResult {
+  const now = args.now ?? args.input.at ?? new Date();
+  const monthKey = localMonthKey(now);
+
+  const priorSessionBucket =
+    args.session?.bucket ?? emptyCostBucket(now);
+  const priorMonthBucket = args.month?.bucket ?? emptyCostBucket(now);
+  const priorAllTimeBucket = args.allTime?.bucket ?? emptyCostBucket(now);
+
+  const { deltas, nextCumulative } = deriveTurnDeltas(
+    args.input.usage,
+    args.session?.lastCumulative,
+  );
+
+  if (isTurnNoOp(deltas)) {
+    return {
+      session: {
+        version: 1,
+        kind: "session",
+        key: args.input.threadId,
+        bucket: priorSessionBucket,
+        ...(args.session?.lastCumulative
+          ? { lastCumulative: args.session.lastCumulative }
+          : {}),
+      },
+      month: {
+        version: 1,
+        kind: "month",
+        key: args.month?.key ?? monthKey,
+        bucket: priorMonthBucket,
+      },
+      allTime: {
+        version: 1,
+        kind: "alltime",
+        key: "alltime",
+        bucket: priorAllTimeBucket,
+      },
+      monthKey,
+      deltas,
+      costUsd: 0,
+      applied: false,
+    };
+  }
+
+  const breakdown = computeTurnCost(
+    args.input.model,
+    deltas,
+    args.input.provider as ProviderKind | undefined,
+  );
+  const costUsd = breakdown.totalUsd;
+
+  const nextSession: PersistedCostFile = {
+    version: 1,
+    kind: "session",
+    key: args.input.threadId,
+    bucket: addTurnToBucket(priorSessionBucket, args.input.model, deltas, costUsd, now),
+    lastCumulative: nextCumulative,
+  };
+  const nextMonth: PersistedCostFile = {
+    version: 1,
+    kind: "month",
+    key: monthKey,
+    bucket: addTurnToBucket(priorMonthBucket, args.input.model, deltas, costUsd, now),
+  };
+  const nextAllTime: PersistedCostFile = {
+    version: 1,
+    kind: "alltime",
+    key: "alltime",
+    bucket: addTurnToBucket(priorAllTimeBucket, args.input.model, deltas, costUsd, now),
+  };
+
+  return {
+    session: nextSession,
+    month: nextMonth,
+    allTime: nextAllTime,
+    monthKey,
+    deltas,
+    costUsd,
+    applied: true,
+  };
+}
+
+// ── Sanitization ────────────────────────────────────────────────────────
+
+function sanitizeNumber(value: unknown): number {
+  return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : 0;
+}
+
+function sanitizeModelEntry(raw: unknown): ModelCostEntry | null {
+  if (!raw || typeof raw !== "object") return null;
+  const r = raw as Record<string, unknown>;
+  return {
+    inputTokens: sanitizeNumber(r.inputTokens),
+    cachedInputTokens: sanitizeNumber(r.cachedInputTokens),
+    cacheCreationInputTokens: sanitizeNumber(r.cacheCreationInputTokens),
+    outputTokens: sanitizeNumber(r.outputTokens),
+    reasoningOutputTokens: sanitizeNumber(r.reasoningOutputTokens),
+    totalUsd: sanitizeNumber(r.totalUsd),
+    turnCount: sanitizeNumber(r.turnCount),
+  };
+}
+
+function sanitizeBucket(raw: unknown, now: Date): CostBucket {
+  if (!raw || typeof raw !== "object") return emptyCostBucket(now);
+  const r = raw as Record<string, unknown>;
+  const byModelRaw = (r.byModel ?? {}) as Record<string, unknown>;
+  const byModel: Record<string, ModelCostEntry> = {};
+  for (const [model, entry] of Object.entries(byModelRaw)) {
+    if (!model) continue;
+    const cleaned = sanitizeModelEntry(entry);
+    if (cleaned) byModel[model] = cleaned;
+  }
+  return {
+    totalUsd: sanitizeNumber(r.totalUsd),
+    turnCount: sanitizeNumber(r.turnCount),
+    byModel,
+    updatedAt: typeof r.updatedAt === "string" ? r.updatedAt : now.toISOString(),
+  };
+}
+
+function sanitizeLastCumulative(raw: unknown): CumulativeUsageSnapshot | undefined {
+  if (!raw || typeof raw !== "object") return undefined;
+  const r = raw as Record<string, unknown>;
+  return {
+    inputTokens: sanitizeNumber(r.inputTokens),
+    cachedInputTokens: sanitizeNumber(r.cachedInputTokens),
+    cacheCreationInputTokens: sanitizeNumber(r.cacheCreationInputTokens),
+    outputTokens: sanitizeNumber(r.outputTokens),
+    reasoningOutputTokens: sanitizeNumber(r.reasoningOutputTokens),
+  };
+}
+
+/** Parse a JSON blob into a `PersistedCostFile`, swallowing malformed data. */
+export function sanitizePersistedFile(
+  raw: unknown,
+  expectedKind: PersistedCostFileKind,
+  expectedKey: string,
+  now: Date = new Date(),
+): PersistedCostFile {
+  if (!raw || typeof raw !== "object") {
+    return {
+      version: 1,
+      kind: expectedKind,
+      key: expectedKey,
+      bucket: emptyCostBucket(now),
+    };
+  }
+  const r = raw as Record<string, unknown>;
+  const version = r.version === 1 ? 1 : 1;
+  const kind = r.kind === expectedKind ? expectedKind : expectedKind;
+  const key = typeof r.key === "string" && r.key.length > 0 ? r.key : expectedKey;
+  const bucket = sanitizeBucket(r.bucket, now);
+  const lastCumulative = sanitizeLastCumulative(r.lastCumulative);
+  return {
+    version,
+    kind,
+    key,
+    bucket,
+    ...(lastCumulative && expectedKind === "session" ? { lastCumulative } : {}),
+  };
+}
diff --git a/apps/server/src/cost/Services/CostTracker.ts b/apps/server/src/cost/Services/CostTracker.ts
new file mode 100644
index 0000000000..e6cb746baa
--- /dev/null
+++ b/apps/server/src/cost/Services/CostTracker.ts
@@ -0,0 +1,49 @@
+/**
+ * CostTrackerService - USD + token ledger for every Claude/Codex turn.
+ *
+ * Backed by plain JSON under `<T3CODE_HOME>/<state>/usage/`:
+ *   - `session_<threadId>.json` — per-thread cumulative.
+ *   - `YYYY-MM.json` — month bucket (local tz).
+ *   - `alltime.json` — running total since install.
+ *
+ * Works in dev, installed-app, and standalone binaries because persistence
+ * lives next to the server's SQLite state. Client reads via a snapshot
+ * endpoint; the tracker also exposes a Stream of post-write summaries so
+ * the web UI can subscribe to live updates.
+ *
+ * @module CostTrackerService
+ */
+import { Context } from "effect";
+import type { Effect, Stream } from "effect";
+
+import type { CostSummary, RecordUsageInput } from "../types.ts";
+
+export interface CostTrackerShape {
+  /**
+   * Record a single turn's usage. Idempotent when deltas sum to zero (e.g.
+   * a redelivered no-op snapshot). Returns the summary after the write so
+   * the caller can broadcast without a second read.
+   */
+  readonly recordUsage: (input: RecordUsageInput) => Effect.Effect<CostSummary>;
+
+  /**
+   * Read the current summary for a given thread. `threadId` may be omitted
+   * to get just month + all-time totals (e.g. the user is between threads).
+   */
+  readonly getSummary: (input: {
+    readonly threadId?: string | undefined;
+    readonly at?: Date | undefined;
+  }) => Effect.Effect<CostSummary>;
+
+  /**
+   * Live stream of summaries emitted after each `recordUsage` write.
+   * Consumers pair it with `getSummary` for the initial value, then follow
+   * the stream.
+   */
+  readonly updates: Stream.Stream<CostSummary>;
+}
+
+export class CostTrackerService extends Context.Service<
+  CostTrackerService,
+  CostTrackerShape
+>()("t3/cost/Services/CostTracker/CostTrackerService") {}
diff --git a/apps/server/src/cost/types.ts b/apps/server/src/cost/types.ts
new file mode 100644
index 0000000000..fd640660f6
--- /dev/null
+++ b/apps/server/src/cost/types.ts
@@ -0,0 +1,120 @@
+/**
+ * Shared cost-tracker types. Persisted to disk verbatim under
+ * `<T3CODE_HOME>/<state>/usage/*.json`. Loose interfaces + a sanitizer pass
+ * — we're the only writer, so round-tripping through Effect.Schema is
+ * overkill here. The sanitizer tolerates garbage and returns a fresh empty
+ * bucket rather than crashing.
+ */
+
+/** Running tallies for a single (model, bucket) pair. */
+export interface ModelCostEntry {
+  readonly inputTokens: number;
+  readonly cachedInputTokens: number;
+  readonly cacheCreationInputTokens: number;
+  readonly outputTokens: number;
+  readonly reasoningOutputTokens: number;
+  readonly totalUsd: number;
+  readonly turnCount: number;
+}
+
+export const emptyModelCostEntry = (): ModelCostEntry => ({
+  inputTokens: 0,
+  cachedInputTokens: 0,
+  cacheCreationInputTokens: 0,
+  outputTokens: 0,
+  reasoningOutputTokens: 0,
+  totalUsd: 0,
+  turnCount: 0,
+});
+
+/**
+ * A cost bucket — used for per-thread (session), per-month, and all-time
+ * aggregates. Same shape, different persistence files.
+ */
+export interface CostBucket {
+  readonly totalUsd: number;
+  readonly turnCount: number;
+  readonly byModel: Record<string, ModelCostEntry>;
+  readonly updatedAt: string;
+}
+
+export const emptyCostBucket = (now: Date = new Date()): CostBucket => ({
+  totalUsd: 0,
+  turnCount: 0,
+  byModel: {},
+  updatedAt: now.toISOString(),
+});
+
+export type PersistedCostFileKind = "session" | "month" | "alltime";
+
+/** Last cumulative usage snapshot — drives delta math when payload lacks lastXxx. */
+export interface CumulativeUsageSnapshot {
+  readonly inputTokens: number;
+  readonly cachedInputTokens: number;
+  readonly cacheCreationInputTokens: number;
+  readonly outputTokens: number;
+  readonly reasoningOutputTokens: number;
+}
+
+export const zeroCumulativeUsage = (): CumulativeUsageSnapshot => ({
+  inputTokens: 0,
+  cachedInputTokens: 0,
+  cacheCreationInputTokens: 0,
+  outputTokens: 0,
+  reasoningOutputTokens: 0,
+});
+
+export interface PersistedCostFile {
+  readonly version: 1;
+  readonly kind: PersistedCostFileKind;
+  readonly key: string;
+  readonly bucket: CostBucket;
+  /**
+   * Session files only. Runtime payloads from Claude/Codex carry cumulative
+   * totals across the whole thread; we subtract this snapshot to get the
+   * just-completed turn's deltas.
+   */
+  readonly lastCumulative?: CumulativeUsageSnapshot;
+}
+
+export interface CostSummary {
+  readonly thread: CostBucket | null;
+  readonly month: CostBucket;
+  readonly allTime: CostBucket;
+  readonly monthKey: string;
+}
+
+export interface RecordUsageInput {
+  readonly threadId: string;
+  readonly model: string;
+  readonly usage: UsageSnapshotLite;
+  readonly provider?: string | undefined;
+  readonly at?: Date;
+}
+
+/**
+ * Minimal shape we need from `ThreadTokenUsageSnapshot`; accepting a plain
+ * record keeps tests independent of the contracts package.
+ */
+export interface UsageSnapshotLite {
+  readonly inputTokens?: number | undefined;
+  readonly cachedInputTokens?: number | undefined;
+  readonly cacheCreationInputTokens?: number | undefined;
+  readonly outputTokens?: number | undefined;
+  readonly reasoningOutputTokens?: number | undefined;
+  readonly lastInputTokens?: number | undefined;
+  readonly lastCachedInputTokens?: number | undefined;
+  readonly lastCacheCreationInputTokens?: number | undefined;
+  readonly lastOutputTokens?: number | undefined;
+  readonly lastReasoningOutputTokens?: number | undefined;
+}
+
+/**
+ * `YYYY-MM` key for a Date in the user's local timezone. Statusline.sh-style
+ * monthly bucket: rollover on the user's clock, not UTC.
+ */
+export function localMonthKey(date: Date = new Date()): string {
+  const year = date.getFullYear().toString().padStart(4, "0");
+  const month = (date.getMonth() + 1).toString().padStart(2, "0");
+  return `${year}-${month}`;
+}
diff --git a/packages/shared/src/pricing.ts b/packages/shared/src/pricing.ts
index 96bcda1505..bd4e22ef84 100644
--- a/packages/shared/src/pricing.ts
+++ b/packages/shared/src/pricing.ts
@@ -1,6 +1,8 @@
 import { normalizeModelSlug } from "./model.ts";
 import type { ProviderKind } from "@t3tools/contracts";
 
+export type { ProviderKind };
+
 /**
  * USD price per 1,000,000 tokens for each token class.
  *

From f41104167fa7cef247784655d28a8f4f733aa771 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Tue, 21 Apr 2026 21:10:41 +0200
Subject: [PATCH 10/16] feat(cost): server-owned ledger + client migrates off
 localStorage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wire the runtime event stream into the new CostTracker and expose
the ledger over HTTP so web + desktop + standalone binaries all
share the same authoritative cost data.

Server (c11 + c12)
- ProviderRuntimeIngestion now calls CostTracker.recordUsage after
  appending the context-window.updated activity. Errors are logged
  and swallowed so orchestration is never blocked by FS faults.
- Model comes from event.payload.model (set by adapters) with a
  fallback to thread.modelSelection.model.
- CostTrackerLive added to the server composition root + wired into
  test + integration layers (stub mock for server.test.ts).
- New GET /api/cost/summary?threadId=X route returns the freshest
  session + month + all-time summary. CORS handled via the existing
  browserApi layer.

Client (c13)
- Drop zustand + localStorage. The old costStore.ts /
  useCostTracking.ts (plus their tests) are gone — server is now
  source of truth.
- New lib/costQuery.ts: react-query queryOptions + sanitizer for
  the HTTP response, plus formatUsd utility. Invalidation helper
  bumps the cache whenever the active thread receives a new
  context-window.updated activity, so the ring updates within one
  render of the server write.
- ChatComposer replaces useCostTracking/useCostSummary with a
  useQuery subscription and a tiny effect that invalidates on new
  usage activities. Plumbs activeProvider through to the meter.
- CostMeter: rebuild around the new {thread, month, allTime}
  shape. Popover now shows session ⋅ MTD ⋅ all-time and gracefully
  renders "—" for providers without token-usage telemetry (cursor /
  opencode) instead of a misleading $0.

Tests: 913 server pass, 906 web pass (26 old localStorage tests
deleted, replaced by server-owned CostTracker coverage from c10).
---
 .../OrchestrationEngineHarness.integration.ts |   5 +-
 apps/server/src/cost/http.ts                  |  50 +++
 .../Layers/ProviderRuntimeIngestion.test.ts   |   5 +-
 .../Layers/ProviderRuntimeIngestion.ts        |  23 ++
 apps/server/src/server.test.ts                |  20 ++
 apps/server/src/server.ts                     |   4 +
 apps/web/src/components/chat/ChatComposer.tsx |  51 ++-
 apps/web/src/components/chat/CostMeter.tsx    | 112 ++++--
 apps/web/src/lib/costQuery.ts                 | 202 +++++++++++
 apps/web/src/lib/costStore.test.ts            | 313 -----------------
 apps/web/src/lib/costStore.ts                 | 328 ------------------
 apps/web/src/lib/useCostTracking.test.ts      | 160 ---------
 apps/web/src/lib/useCostTracking.ts           | 120 -------
 13 files changed, 427 insertions(+), 966 deletions(-)
 create mode 100644 apps/server/src/cost/http.ts
 create mode 100644 apps/web/src/lib/costQuery.ts
 delete mode 100644 apps/web/src/lib/costStore.test.ts
 delete mode 100644 apps/web/src/lib/costStore.ts
 delete mode 100644 apps/web/src/lib/useCostTracking.test.ts
 delete mode 100644 apps/web/src/lib/useCostTracking.ts

diff --git a/apps/server/integration/OrchestrationEngineHarness.integration.ts b/apps/server/integration/OrchestrationEngineHarness.integration.ts
index 6f9f4c6f44..7015aeea9e 100644
--- a/apps/server/integration/OrchestrationEngineHarness.integration.ts
+++ b/apps/server/integration/OrchestrationEngineHarness.integration.ts
@@ -23,6 +23,7 @@ import {
 } from "effect";
 
 import { CheckpointStoreLive } from "../src/checkpointing/Layers/CheckpointStore.ts";
+import { CostTrackerLive } from "../src/cost/Layers/CostTracker.ts";
 import { CheckpointStore } from "../src/checkpointing/Services/CheckpointStore.ts";
 import { GitCoreLive } from "../src/git/Layers/GitCore.ts";
 import { GitCore, type GitCoreShape } from "../src/git/Services/GitCore.ts";
@@ -359,13 +360,15 @@ export const makeOrchestrationIntegrationHarness = (
         }),
       ),
     );
+    const configLayer = ServerConfig.layerTest(workspaceDir, rootDir);
     const layer = Layer.empty.pipe(
       Layer.provideMerge(runtimeServicesLayer),
       Layer.provideMerge(orchestrationReactorLayer),
       Layer.provide(persistenceLayer),
       Layer.provideMerge(RepositoryIdentityResolverLive),
       Layer.provideMerge(ServerSettingsService.layerTest()),
-      Layer.provideMerge(ServerConfig.layerTest(workspaceDir, rootDir)),
+      Layer.provideMerge(CostTrackerLive.pipe(Layer.provide(configLayer))),
+      Layer.provideMerge(configLayer),
       Layer.provideMerge(NodeServices.layer),
     );
 
diff --git a/apps/server/src/cost/http.ts b/apps/server/src/cost/http.ts
new file mode 100644
index 0000000000..a8bea550e6
--- /dev/null
+++ b/apps/server/src/cost/http.ts
@@ -0,0 +1,50 @@
+/**
+ * HTTP routes for the CostTracker ledger.
+ *
+ * One endpoint for now: `GET /api/cost/summary?threadId=X` returning the
+ * live CostSummary (session + month + all-time). The client refetches on
+ * each turn.completed activity; no WS push needed for v1 since the user
+ * watching their own session is already on a refresh cadence driven by
+ * the orchestration event stream.
+ */
+import { Effect } from "effect";
+import { HttpRouter, HttpServerRequest, HttpServerResponse } from "effect/unstable/http";
+
+import { ServerAuth } from "../auth/Services/ServerAuth.ts";
+import { respondToAuthError } from "../auth/http.ts";
+import { CostTrackerService } from "./Services/CostTracker.ts";
+import { localMonthKey } from "./types.ts";
+
+export const costSummaryRouteLayer = HttpRouter.add(
+  "GET",
+  "/api/cost/summary",
+  Effect.gen(function* () {
+    const request = yield* HttpServerRequest.HttpServerRequest;
+    const serverAuth = yield* ServerAuth;
+    yield* serverAuth.authenticateHttpRequest(request);
+
+    const tracker = yield* CostTrackerService;
+
+    const url = HttpServerRequest.toURL(request);
+    const threadId = (() => {
+      if (url._tag === "None") return undefined;
+      const raw = url.value.searchParams.get("threadId");
+      return typeof raw === "string" && raw.length > 0 ? raw : undefined;
+    })();
+
+    const summary = yield* tracker.getSummary({
+      threadId,
+      at: new Date(),
+    });
+
+    return HttpServerResponse.jsonUnsafe(
+      {
+        monthKey: summary.monthKey ?? localMonthKey(),
+        thread: summary.thread,
+        month: summary.month,
+        allTime: summary.allTime,
+      },
+      { status: 200 },
+    );
+  }).pipe(Effect.catchTag("AuthError", respondToAuthError)),
+);
diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts
index 577c5050ea..f334ad5ff1 100644
--- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts
+++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts
@@ -33,6 +33,7 @@ import { RepositoryIdentityResolverLive } from "../../project/Layers/RepositoryI
 import { OrchestrationEngineLive } from "./OrchestrationEngine.ts";
 import { OrchestrationProjectionPipelineLive } from "./ProjectionPipeline.ts";
 import { OrchestrationProjectionSnapshotQueryLive } from "./ProjectionSnapshotQuery.ts";
+import { CostTrackerLive } from "../../cost/Layers/CostTracker.ts";
 import { ProviderRuntimeIngestionLive } from "./ProviderRuntimeIngestion.ts";
 import {
   OrchestrationEngineService,
@@ -208,12 +209,14 @@ describe("ProviderRuntimeIngestion", () => {
       Layer.provide(RepositoryIdentityResolverLive),
       Layer.provide(SqlitePersistenceMemory),
     );
+    const configLayer = ServerConfig.layerTest(process.cwd(), process.cwd());
     const layer = ProviderRuntimeIngestionLive.pipe(
       Layer.provideMerge(orchestrationLayer),
       Layer.provideMerge(SqlitePersistenceMemory),
       Layer.provideMerge(Layer.succeed(ProviderService, provider.service)),
       Layer.provideMerge(makeTestServerSettingsLayer(options?.serverSettings)),
-      Layer.provideMerge(ServerConfig.layerTest(process.cwd(), process.cwd())),
+      Layer.provideMerge(CostTrackerLive.pipe(Layer.provide(configLayer))),
+      Layer.provideMerge(configLayer),
       Layer.provideMerge(NodeServices.layer),
     );
     runtime = ManagedRuntime.make(layer);
diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts
index 7eeeed2d51..053f04669f 100644
--- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts
+++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts
@@ -16,6 +16,7 @@ import {
 import { Cache, Cause, Duration, Effect, Layer, Option, Stream } from "effect";
 import { makeDrainableWorker } from "@t3tools/shared/DrainableWorker";
 
+import { CostTrackerService } from "../../cost/Services/CostTracker.ts";
 import { ProviderService } from "../../provider/Services/ProviderService.ts";
 import { ProjectionTurnRepository } from "../../persistence/Services/ProjectionTurns.ts";
 import { ProjectionTurnRepositoryLive } from "../../persistence/Layers/ProjectionTurns.ts";
@@ -525,6 +526,7 @@ const make = Effect.gen(function* () {
   const providerService = yield* ProviderService;
   const projectionTurnRepository = yield* ProjectionTurnRepository;
   const serverSettingsService = yield* ServerSettingsService;
+  const costTracker = yield* CostTrackerService;
 
   const turnMessageIdsByTurnKey = yield* Cache.make<string, Set<MessageId>>({
     capacity: TURN_MESSAGE_IDS_BY_TURN_CACHE_CAPACITY,
@@ -1519,6 +1521,23 @@ const make = Effect.gen(function* () {
           createdAt: activity.createdAt,
         }),
       ).pipe(Effect.asVoid);
+
+      // Side-channel: feed token usage into the CostTracker so the JSON
+      // ledger stays in sync with the activity stream. Failures never block
+      // ingestion — we log and drop.
+      if (event.type === "thread.token-usage.updated") {
+        const model = event.payload.model ?? thread.modelSelection.model;
+        const provider = thread.modelSelection.provider;
+        yield* costTracker
+          .recordUsage({
+            threadId: thread.id,
+            model,
+            provider,
+            usage: event.payload.usage,
+            at: new Date(event.createdAt),
+          })
+          .pipe(Effect.asVoid, Effect.ignoreCause({ log: true }));
+      }
     });
 
   const processDomainEvent = (_event: TurnStartRequestedDomainEvent) => Effect.void;
@@ -1570,3 +1589,7 @@ export const ProviderRuntimeIngestionLive = Layer.effect(
   ProviderRuntimeIngestionService,
   make,
 ).pipe(Layer.provide(ProjectionTurnRepositoryLive));
+
+// Note: CostTrackerLive must be provided in the composition root (bin.ts or
+// server runtime layer). Keeping it out of ProviderRuntimeIngestionLive keeps
+// the dep graph explicit and lets tests substitute a stub CostTracker.
diff --git a/apps/server/src/server.test.ts b/apps/server/src/server.test.ts
index 47e159d303..e7b05dfc8a 100644
--- a/apps/server/src/server.test.ts
+++ b/apps/server/src/server.test.ts
@@ -51,6 +51,7 @@ import { vi } from "vitest";
 import type { ServerConfigShape } from "./config.ts";
 import { deriveServerPaths, ServerConfig } from "./config.ts";
 import { makeRoutesLayer } from "./server.ts";
+import { CostTrackerService } from "./cost/Services/CostTracker.ts";
 import { resolveAttachmentRelativePath } from "./attachmentPaths.ts";
 import {
   CheckpointDiffQuery,
@@ -504,6 +505,25 @@ const buildAppUnderTest = (options?: {
     );
 
     const appLayer = servedRoutesLayer.pipe(
+      Layer.provide(
+        Layer.mock(CostTrackerService)({
+          recordUsage: () =>
+            Effect.succeed({
+              thread: null,
+              month: { totalUsd: 0, turnCount: 0, byModel: {}, updatedAt: "" },
+              allTime: { totalUsd: 0, turnCount: 0, byModel: {}, updatedAt: "" },
+              monthKey: "1970-01",
+            }),
+          getSummary: () =>
+            Effect.succeed({
+              thread: null,
+              month: { totalUsd: 0, turnCount: 0, byModel: {}, updatedAt: "" },
+              allTime: { totalUsd: 0, turnCount: 0, byModel: {}, updatedAt: "" },
+              monthKey: "1970-01",
+            }),
+          updates: Stream.empty,
+        }),
+      ),
       Layer.provide(
         Layer.mock(BrowserTraceCollector)({
           record: () => Effect.void,
diff --git a/apps/server/src/server.ts b/apps/server/src/server.ts
index f94bbb34b5..0530dce3b3 100644
--- a/apps/server/src/server.ts
+++ b/apps/server/src/server.ts
@@ -36,6 +36,7 @@ import { TerminalManagerLive } from "./terminal/Layers/Manager.ts";
 import { GitManagerLive } from "./git/Layers/GitManager.ts";
 import { KeybindingsLive } from "./keybindings.ts";
 import { ServerRuntimeStartup, ServerRuntimeStartupLive } from "./serverRuntimeStartup.ts";
+import { CostTrackerLive } from "./cost/Layers/CostTracker.ts";
 import { OrchestrationReactorLive } from "./orchestration/Layers/OrchestrationReactor.ts";
 import { RuntimeReceiptBusLive } from "./orchestration/Layers/RuntimeReceiptBus.ts";
 import { ProviderRuntimeIngestionLive } from "./orchestration/Layers/ProviderRuntimeIngestion.ts";
@@ -76,6 +77,7 @@ import {
   orchestrationDispatchRouteLayer,
   orchestrationSnapshotRouteLayer,
 } from "./orchestration/http.ts";
+import { costSummaryRouteLayer } from "./cost/http.ts";
 import { NetService } from "@t3tools/shared/Net";
 
 const PtyAdapterLive = Layer.unwrap(
@@ -133,6 +135,7 @@ const ReactorLayerLive = Layer.empty.pipe(
   Layer.provideMerge(CheckpointReactorLive),
   Layer.provideMerge(ThreadDeletionReactorLive),
   Layer.provideMerge(RuntimeReceiptBusLive),
+  Layer.provideMerge(CostTrackerLive),
 );
 
 const CheckpointingLayerLive = Layer.empty.pipe(
@@ -263,6 +266,7 @@ export const makeRoutesLayer = Layer.mergeAll(
   authSessionRouteLayer,
   authWebSocketTokenRouteLayer,
   attachmentsRouteLayer,
+  costSummaryRouteLayer,
   orchestrationDispatchRouteLayer,
   orchestrationSnapshotRouteLayer,
   otlpTracesProxyRouteLayer,
diff --git a/apps/web/src/components/chat/ChatComposer.tsx b/apps/web/src/components/chat/ChatComposer.tsx
index da3184e8ad..8e5471e5a9 100644
--- a/apps/web/src/components/chat/ChatComposer.tsx
+++ b/apps/web/src/components/chat/ChatComposer.tsx
@@ -29,7 +29,7 @@ import {
   useRef,
   useState,
 } from "react";
-import { useQuery } from "@tanstack/react-query";
+import { useQuery, useQueryClient } from "@tanstack/react-query";
 import { useDebouncedValue } from "@tanstack/react-pacer";
 import { projectSearchEntriesQueryOptions } from "~/lib/projectReactQuery";
 import {
@@ -103,8 +103,12 @@ import type { SessionPhase, Thread } from "../../types";
 import type { PendingUserInputDraftAnswer } from "../../pendingUserInput";
 import type { PendingApproval, PendingUserInput } from "../../session-logic";
 import { deriveLatestContextWindowSnapshot } from "../../lib/contextWindow";
-import { useCostSummary, type CostSummary } from "../../lib/costStore";
-import { useCostTracking } from "../../lib/useCostTracking";
+import {
+  costSummaryQueryOptions,
+  invalidateCostSummary,
+  EMPTY_COST_SUMMARY,
+  type CostSummary,
+} from "../../lib/costQuery";
 import { formatProviderSkillDisplayName } from "../../providerSkillPresentation";
 import { searchProviderSkills } from "../../providerSkillSearch";
 
@@ -273,6 +277,7 @@ const ComposerFooterPrimaryActions = memo(function ComposerFooterPrimaryActions(
   compact: boolean;
   activeContextWindow: ReturnType<typeof deriveLatestContextWindowSnapshot>;
   costSummary: CostSummary;
+  activeProvider: ProviderKind | null;
   isPreparingWorktree: boolean;
   pendingAction: {
     questionIndex: number;
@@ -294,7 +299,7 @@ const ComposerFooterPrimaryActions = memo(function ComposerFooterPrimaryActions(
   return (
     <>
       {props.activeContextWindow ? <ContextWindowMeter usage={props.activeContextWindow} /> : null}
-      <CostMeter summary={props.costSummary} />
+      <CostMeter summary={props.costSummary} activeProvider={props.activeProvider} />
       {props.isPreparingWorktree ? (
         <span className="text-muted-foreground/70 text-xs">Preparing worktree...</span>
       ) : null}
@@ -476,7 +481,7 @@ export const ChatComposer = memo(
       routeThreadRef,
       draftId,
       activeThreadId,
-      activeThreadEnvironmentId: _activeThreadEnvironmentId,
+      activeThreadEnvironmentId,
       activeThread,
       isServerThread: _isServerThread,
       isLocalDraftThread: _isLocalDraftThread,
@@ -646,9 +651,40 @@ export const ChatComposer = memo(
 
     // ------------------------------------------------------------------
     // Cost tracking (session + month-to-date spend)
+    // Server owns the ledger (apps/server/src/cost/*). Client fetches
+    // summary + invalidates on each new context-window.updated activity
+    // so the ring reflects the freshest write.
     // ------------------------------------------------------------------
-    useCostTracking(activeThreadId, activeThreadActivities, activeThreadModelSelection);
-    const costSummary = useCostSummary(activeThreadId);
+    const costQueryClient = useQueryClient();
+    const costSummaryQuery = useQuery(
+      costSummaryQueryOptions({
+        environmentId: activeThreadEnvironmentId ?? null,
+        threadId: activeThreadId,
+      }),
+    );
+    const costSummary: CostSummary = costSummaryQuery.data ?? EMPTY_COST_SUMMARY;
+    const latestContextWindowActivityId = useMemo(() => {
+      if (!activeThreadActivities) return null;
+      for (let index = activeThreadActivities.length - 1; index >= 0; index -= 1) {
+        const activity = activeThreadActivities[index];
+        if (activity?.kind === "context-window.updated") {
+          return String(activity.id);
+        }
+      }
+      return null;
+    }, [activeThreadActivities]);
+    useEffect(() => {
+      if (!latestContextWindowActivityId || !activeThreadEnvironmentId) return;
+      void invalidateCostSummary(costQueryClient, {
+        environmentId: activeThreadEnvironmentId,
+        threadId: activeThreadId,
+      });
+    }, [
+      latestContextWindowActivityId,
+      activeThreadEnvironmentId,
+      activeThreadId,
+      costQueryClient,
+    ]);
 
     // ------------------------------------------------------------------
     // Composer-local state
@@ -1965,6 +2001,7 @@ export const ChatComposer = memo(
                     compact={isComposerPrimaryActionsCompact}
                     activeContextWindow={activeContextWindow}
                     costSummary={costSummary}
+                    activeProvider={activeThreadModelSelection?.provider ?? selectedProvider ?? null}
                     pendingAction={pendingPrimaryAction}
                     isRunning={phase === "running"}
                     showPlanFollowUpPrompt={
diff --git a/apps/web/src/components/chat/CostMeter.tsx b/apps/web/src/components/chat/CostMeter.tsx
index 6c52d60762..b7646f6415 100644
--- a/apps/web/src/components/chat/CostMeter.tsx
+++ b/apps/web/src/components/chat/CostMeter.tsx
@@ -1,5 +1,7 @@
+import type { ProviderKind } from "@t3tools/contracts";
+
 import { cn } from "~/lib/utils";
-import { formatUsd, type CostSummary } from "~/lib/costStore";
+import { formatUsd, type CostSummary } from "~/lib/costQuery";
 import { Popover, PopoverPopup, PopoverTrigger } from "../ui/popover";
 
 function readBudget(): number | null {
@@ -25,28 +27,52 @@ function formatPercentage(value: number): string {
   return `${Math.round(value)}%`;
 }
 
-export function CostMeter(props: { summary: CostSummary }) {
-  const { summary } = props;
+/**
+ * Providers whose server adapters don't yet emit token-usage events. We
+ * surface "—" to avoid a misleading $0. (See c15: full provider-variance
+ * UI in a follow-up commit.)
+ */
+const PROVIDERS_WITHOUT_USAGE_TELEMETRY = new Set<ProviderKind>(["cursor", "opencode"]);
+
+export function CostMeter(props: {
+  summary: CostSummary;
+  activeProvider?: ProviderKind | null | undefined;
+}) {
+  const { summary, activeProvider } = props;
   const budget = readBudget();
 
-  // Ring: if budget set, fill by MTD/budget ratio; else fill by bucket of
-  // session-vs-month (bounded 0–100) so it still animates.
-  const ratio = budget
-    ? Math.min(100, (summary.monthUsd / budget) * 100)
-    : summary.monthUsd <= 0
-      ? 0
-      : Math.min(100, Math.log10(summary.monthUsd + 1) * 25);
+  const sessionUsd = summary.thread?.totalUsd ?? 0;
+  const sessionTurnCount = summary.thread?.turnCount ?? 0;
+  const monthUsd = summary.month.totalUsd;
+  const averagePerTurnUsd = sessionTurnCount > 0 ? sessionUsd / sessionTurnCount : null;
+  const providerUnsupported = activeProvider
+    ? PROVIDERS_WITHOUT_USAGE_TELEMETRY.has(activeProvider)
+    : false;
+
+  const ratio = providerUnsupported
+    ? 0
+    : budget
+      ? Math.min(100, (monthUsd / budget) * 100)
+      : monthUsd <= 0
+        ? 0
+        : Math.min(100, Math.log10(monthUsd + 1) * 25);
 
   const radius = 9.75;
   const circumference = 2 * Math.PI * radius;
   const dashOffset = circumference - (ratio / 100) * circumference;
 
-  const overBudget = budget ? summary.monthUsd >= budget : false;
+  const overBudget = budget ? monthUsd >= budget : false;
+  const centerLabel = providerUnsupported
+    ? "—"
+    : monthUsd > 0
+      ? formatCompactUsd(monthUsd)
+      : "$0";
 
-  const centerLabel = summary.monthUsd > 0 ? formatCompactUsd(summary.monthUsd) : "$0";
-  const ariaLabel = budget
-    ? `Cost ${formatUsd(summary.monthUsd)} of ${formatUsd(budget)} this month (${formatPercentage(ratio)})`
-    : `Cost ${formatUsd(summary.monthUsd)} this month, ${formatUsd(summary.sessionUsd)} this session`;
+  const ariaLabel = providerUnsupported
+    ? `Cost tracking unavailable for ${activeProvider}`
+    : budget
+      ? `Cost ${formatUsd(monthUsd)} of ${formatUsd(budget)} this month (${formatPercentage(ratio)})`
+      : `Cost ${formatUsd(monthUsd)} this month, ${formatUsd(sessionUsd)} this session`;
 
   return (
     <Popover>
@@ -104,28 +130,42 @@ export function CostMeter(props: { summary: CostSummary }) {
           <div className="text-[11px] font-medium uppercase tracking-[0.08em] text-muted-foreground">
             Cost
           </div>
-          <div className="whitespace-nowrap text-xs font-medium text-foreground">
-            <span>{formatUsd(summary.sessionUsd)}</span>
-            <span className="mx-1 text-muted-foreground">session</span>
-            <span className="mx-1">⋅</span>
-            <span>{formatUsd(summary.monthUsd)}</span>
-            <span className="mx-1 text-muted-foreground">MTD</span>
-          </div>
-          {budget ? (
-            <div className={cn("text-xs", overBudget ? "text-destructive" : "text-muted-foreground")}>
-              Budget: {formatUsd(budget)} ({formatPercentage(ratio)} used)
-            </div>
-          ) : null}
-          {summary.sessionTurnCount > 0 && summary.averagePerTurnUsd !== null ? (
+          {providerUnsupported ? (
             <div className="text-xs text-muted-foreground">
-              {summary.sessionTurnCount}
-              {summary.sessionTurnCount === 1 ? " turn" : " turns"} this session ·{" "}
-              {formatUsd(summary.averagePerTurnUsd)}/turn avg
+              Usage telemetry not available for this provider.
             </div>
-          ) : null}
-          {summary.month.turnCount > 0 ? (
-            <ModelBreakdown summary={summary} />
-          ) : null}
+          ) : (
+            <>
+              <div className="whitespace-nowrap text-xs font-medium text-foreground">
+                <span>{formatUsd(sessionUsd)}</span>
+                <span className="mx-1 text-muted-foreground">session</span>
+                <span className="mx-1">⋅</span>
+                <span>{formatUsd(monthUsd)}</span>
+                <span className="mx-1 text-muted-foreground">MTD</span>
+                <span className="mx-1">⋅</span>
+                <span>{formatUsd(summary.allTime.totalUsd)}</span>
+                <span className="mx-1 text-muted-foreground">all-time</span>
+              </div>
+              {budget ? (
+                <div
+                  className={cn(
+                    "text-xs",
+                    overBudget ? "text-destructive" : "text-muted-foreground",
+                  )}
+                >
+                  Budget: {formatUsd(budget)} ({formatPercentage(ratio)} used)
+                </div>
+              ) : null}
+              {sessionTurnCount > 0 && averagePerTurnUsd !== null ? (
+                <div className="text-xs text-muted-foreground">
+                  {sessionTurnCount}
+                  {sessionTurnCount === 1 ? " turn" : " turns"} this session ·{" "}
+                  {formatUsd(averagePerTurnUsd)}/turn avg
+                </div>
+              ) : null}
+              {summary.month.turnCount > 0 ? <ModelBreakdown summary={summary} /> : null}
+            </>
+          )}
         </div>
       </PopoverPopup>
     </Popover>
@@ -134,7 +174,7 @@ export function CostMeter(props: { summary: CostSummary }) {
 
 function ModelBreakdown(props: { summary: CostSummary }) {
   const entries = Object.entries(props.summary.month.byModel)
-    .filter(([, entry]) => entry.totalUsd > 0)
+    .filter(([, entry]) => entry.totalUsd > 0 || entry.turnCount > 0)
     .sort((left, right) => right[1].totalUsd - left[1].totalUsd);
   if (entries.length === 0) return null;
   return (
diff --git a/apps/web/src/lib/costQuery.ts b/apps/web/src/lib/costQuery.ts
new file mode 100644
index 0000000000..9c301363f6
--- /dev/null
+++ b/apps/web/src/lib/costQuery.ts
@@ -0,0 +1,202 @@
+/**
+ * Cost summary queries.
+ *
+ * Reads from the server's `/api/cost/summary` endpoint. Server owns the
+ * ledger (see apps/server/src/cost/*) so the client is a read-only
+ * consumer — localStorage is no longer involved.
+ *
+ * React Query caches the summary per (environment, thread). The composer
+ * invalidates this query whenever the active thread receives a new
+ * `context-window.updated` activity so the ring updates in near-realtime.
+ */
+import type { EnvironmentId, ThreadId } from "@t3tools/contracts";
+import {
+  queryOptions,
+  type QueryClient,
+  useQueryClient,
+} from "@tanstack/react-query";
+
+import { resolveEnvironmentHttpUrl } from "../environments/runtime";
+
+const COST_SUMMARY_STALE_TIME_MS = 5_000;
+
+/** Bucket shape mirrors apps/server/src/cost/types.ts. Kept duplicated so
+ * the client doesn't import server-only modules. */
+export interface ModelCostEntry {
+  readonly inputTokens: number;
+  readonly cachedInputTokens: number;
+  readonly cacheCreationInputTokens: number;
+  readonly outputTokens: number;
+  readonly reasoningOutputTokens: number;
+  readonly totalUsd: number;
+  readonly turnCount: number;
+}
+
+export interface CostBucket {
+  readonly totalUsd: number;
+  readonly turnCount: number;
+  readonly byModel: Record<string, ModelCostEntry>;
+  readonly updatedAt: string;
+}
+
+export interface CostSummary {
+  readonly monthKey: string;
+  readonly thread: CostBucket | null;
+  readonly month: CostBucket;
+  readonly allTime: CostBucket;
+}
+
+export const emptyBucket = (): CostBucket => ({
+  totalUsd: 0,
+  turnCount: 0,
+  byModel: {},
+  updatedAt: "",
+});
+
+const monthKeyNow = () => {
+  const now = new Date();
+  return `${now.getFullYear()}-${(now.getMonth() + 1).toString().padStart(2, "0")}`;
+};
+
+export const EMPTY_COST_SUMMARY: CostSummary = {
+  monthKey: monthKeyNow(),
+  thread: null,
+  month: emptyBucket(),
+  allTime: emptyBucket(),
+};
+
+export const costQueryKeys = {
+  all: ["cost"] as const,
+  summary: (environmentId: EnvironmentId | null, threadId: ThreadId | null) =>
+    ["cost", "summary", environmentId ?? null, threadId ?? null] as const,
+};
+
+async function fetchCostSummary(input: {
+  readonly environmentId: EnvironmentId;
+  readonly threadId: ThreadId | null;
+  readonly signal?: AbortSignal;
+}): Promise<CostSummary> {
+  const url = resolveEnvironmentHttpUrl({
+    environmentId: input.environmentId,
+    pathname: "/api/cost/summary",
+    searchParams: input.threadId ? { threadId: String(input.threadId) } : {},
+  });
+  const response = await fetch(url, {
+    method: "GET",
+    credentials: "include",
+    ...(input.signal ? { signal: input.signal } : {}),
+  });
+  if (!response.ok) {
+    throw new Error(`Failed to load cost summary: HTTP ${response.status}`);
+  }
+  const raw = (await response.json()) as unknown;
+  return sanitizeSummary(raw);
+}
+
+function sanitizeSummary(raw: unknown): CostSummary {
+  if (!raw || typeof raw !== "object") return EMPTY_COST_SUMMARY;
+  const r = raw as Record<string, unknown>;
+  return {
+    monthKey: typeof r.monthKey === "string" ? r.monthKey : monthKeyNow(),
+    thread: sanitizeBucketOrNull(r.thread),
+    month: sanitizeBucket(r.month),
+    allTime: sanitizeBucket(r.allTime),
+  };
+}
+
+function sanitizeBucket(raw: unknown): CostBucket {
+  if (!raw || typeof raw !== "object") return emptyBucket();
+  const r = raw as Record<string, unknown>;
+  const byModelRaw = (r.byModel ?? {}) as Record<string, unknown>;
+  const byModel: Record<string, ModelCostEntry> = {};
+  for (const [model, entry] of Object.entries(byModelRaw)) {
+    if (!model || !entry || typeof entry !== "object") continue;
+    byModel[model] = sanitizeEntry(entry);
+  }
+  return {
+    totalUsd: toNonNeg(r.totalUsd),
+    turnCount: toNonNeg(r.turnCount),
+    byModel,
+    updatedAt: typeof r.updatedAt === "string" ? r.updatedAt : "",
+  };
+}
+
+function sanitizeBucketOrNull(raw: unknown): CostBucket | null {
+  if (!raw || typeof raw !== "object") return null;
+  return sanitizeBucket(raw);
+}
+
+function sanitizeEntry(raw: unknown): ModelCostEntry {
+  const r = raw as Record<string, unknown>;
+  return {
+    inputTokens: toNonNeg(r.inputTokens),
+    cachedInputTokens: toNonNeg(r.cachedInputTokens),
+    cacheCreationInputTokens: toNonNeg(r.cacheCreationInputTokens),
+    outputTokens: toNonNeg(r.outputTokens),
+    reasoningOutputTokens: toNonNeg(r.reasoningOutputTokens),
+    totalUsd: toNonNeg(r.totalUsd),
+    turnCount: toNonNeg(r.turnCount),
+  };
+}
+
+function toNonNeg(value: unknown): number {
+  return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : 0;
+}
+
+export function costSummaryQueryOptions(input: {
+  readonly environmentId: EnvironmentId | null;
+  readonly threadId: ThreadId | null;
+}) {
+  return queryOptions({
+    queryKey: costQueryKeys.summary(input.environmentId, input.threadId),
+    queryFn: ({ signal }) => {
+      if (!input.environmentId) {
+        return Promise.resolve(EMPTY_COST_SUMMARY);
+      }
+      return fetchCostSummary({
+        environmentId: input.environmentId,
+        threadId: input.threadId,
+        signal,
+      });
+    },
+    enabled: input.environmentId !== null,
+    staleTime: COST_SUMMARY_STALE_TIME_MS,
+    placeholderData: EMPTY_COST_SUMMARY,
+  });
+}
+
+/** Invalidate the cost query for a specific thread (or all threads if omitted). */
+export function invalidateCostSummary(
+  queryClient: QueryClient,
+  input?: {
+    readonly environmentId?: EnvironmentId | null;
+    readonly threadId?: ThreadId | null;
+  },
+) {
+  if (input?.environmentId !== undefined || input?.threadId !== undefined) {
+    return queryClient.invalidateQueries({
+      queryKey: costQueryKeys.summary(input.environmentId ?? null, input.threadId ?? null),
+    });
+  }
+  return queryClient.invalidateQueries({ queryKey: costQueryKeys.all });
+}
+
+/** Convenience hook returning the invalidator for consumers outside React Query's mutation flow. */
+export function useInvalidateCostSummary() {
+  const queryClient = useQueryClient();
+  return (input?: {
+    readonly environmentId?: EnvironmentId | null;
+    readonly threadId?: ThreadId | null;
+  }) => invalidateCostSummary(queryClient, input);
+}
+
+/** Format USD for UI; kept here so the component imports one utility module. */
+export function formatUsd(value: number | null | undefined): string {
+  if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
+    return "$0.00";
+  }
+  if (value < 0.01) return "<$0.01";
+  if (value < 1) return `$${value.toFixed(3).replace(/0$/, "")}`;
+  if (value < 100) return `$${value.toFixed(2)}`;
+  return `$${Math.round(value).toLocaleString("en-US")}`;
+}
diff --git a/apps/web/src/lib/costStore.test.ts b/apps/web/src/lib/costStore.test.ts
deleted file mode 100644
index 0602f2ce6a..0000000000
--- a/apps/web/src/lib/costStore.test.ts
+++ /dev/null
@@ -1,313 +0,0 @@
-import { beforeEach, describe, expect, it } from "vitest";
-
-import {
-  COST_STORE_STORAGE_KEY,
-  localMonthKey,
-  reduceRecordTurnCost,
-  reduceResetSession,
-  sanitizePersistedCostState,
-  selectCostSummary,
-  useCostStore,
-  type PersistedCostState,
-} from "./costStore";
-
-function freshState(): PersistedCostState {
-  return { version: 1, sessions: {}, months: {} };
-}
-
-const cost = (total: number) => ({
-  inputUsd: 0,
-  cachedUsd: 0,
-  cacheCreationUsd: 0,
-  outputUsd: 0,
-  reasoningUsd: 0,
-  totalUsd: total,
-});
-
-const deltas = (
-  d: Partial<{
-    inputTokens: number;
-    cachedInputTokens: number;
-    cacheCreationInputTokens: number;
-    outputTokens: number;
-    reasoningOutputTokens: number;
-  }> = {},
-) => ({
-  inputTokens: d.inputTokens ?? 0,
-  cachedInputTokens: d.cachedInputTokens ?? 0,
-  cacheCreationInputTokens: d.cacheCreationInputTokens ?? 0,
-  outputTokens: d.outputTokens ?? 0,
-  reasoningOutputTokens: d.reasoningOutputTokens ?? 0,
-});
-
-describe("localMonthKey", () => {
-  it("formats YYYY-MM in local tz", () => {
-    const date = new Date(2026, 3, 7, 12, 0, 0); // April 7 2026 local
-    expect(localMonthKey(date)).toBe("2026-04");
-  });
-
-  it("pads single-digit months", () => {
-    const date = new Date(2026, 0, 1, 0, 0, 0);
-    expect(localMonthKey(date)).toBe("2026-01");
-  });
-});
-
-describe("reduceRecordTurnCost", () => {
-  const at = new Date(2026, 3, 21, 10, 0, 0); // April 21 2026
-
-  it("accumulates into session + month bucket", () => {
-    let state = freshState();
-    state = reduceRecordTurnCost(state, {
-      threadId: "t1",
-      model: "claude-sonnet-4-6",
-      deltas: deltas({ inputTokens: 1_000, outputTokens: 500 }),
-      breakdown: cost(0.01),
-      at,
-    });
-    state = reduceRecordTurnCost(state, {
-      threadId: "t1",
-      model: "claude-sonnet-4-6",
-      deltas: deltas({ inputTokens: 500, outputTokens: 200 }),
-      breakdown: cost(0.005),
-      at,
-    });
-
-    const session = state.sessions["t1"]!;
-    expect(session.totalUsd).toBeCloseTo(0.015, 6);
-    expect(session.turnCount).toBe(2);
-    expect(session.byModel["claude-sonnet-4-6"]!.inputTokens).toBe(1_500);
-    expect(session.byModel["claude-sonnet-4-6"]!.outputTokens).toBe(700);
-    expect(session.byModel["claude-sonnet-4-6"]!.turnCount).toBe(2);
-
-    const month = state.months["2026-04"]!;
-    expect(month.totalUsd).toBeCloseTo(0.015, 6);
-    expect(month.turnCount).toBe(2);
-  });
-
-  it("keeps per-model tallies separate", () => {
-    let state = freshState();
-    state = reduceRecordTurnCost(state, {
-      threadId: "t1",
-      model: "claude-sonnet-4-6",
-      deltas: deltas({ outputTokens: 100 }),
-      breakdown: cost(0.01),
-      at,
-    });
-    state = reduceRecordTurnCost(state, {
-      threadId: "t1",
-      model: "gpt-5.4",
-      deltas: deltas({ outputTokens: 100 }),
-      breakdown: cost(0.02),
-      at,
-    });
-    const session = state.sessions["t1"]!;
-    expect(Object.keys(session.byModel).sort()).toEqual(["claude-sonnet-4-6", "gpt-5.4"]);
-    expect(session.totalUsd).toBeCloseTo(0.03, 6);
-  });
-
-  it("isolates sessions by threadId", () => {
-    let state = freshState();
-    state = reduceRecordTurnCost(state, {
-      threadId: "t1",
-      model: "claude-sonnet-4-6",
-      deltas: deltas({ outputTokens: 100 }),
-      breakdown: cost(0.01),
-      at,
-    });
-    state = reduceRecordTurnCost(state, {
-      threadId: "t2",
-      model: "claude-sonnet-4-6",
-      deltas: deltas({ outputTokens: 100 }),
-      breakdown: cost(0.02),
-      at,
-    });
-    expect(state.sessions["t1"]!.totalUsd).toBeCloseTo(0.01, 6);
-    expect(state.sessions["t2"]!.totalUsd).toBeCloseTo(0.02, 6);
-    // Month aggregates both sessions.
-    expect(state.months["2026-04"]!.totalUsd).toBeCloseTo(0.03, 6);
-  });
-
-  it("buckets by local month", () => {
-    let state = freshState();
-    state = reduceRecordTurnCost(state, {
-      threadId: "t1",
-      model: "claude-sonnet-4-6",
-      deltas: deltas({ outputTokens: 100 }),
-      breakdown: cost(0.01),
-      at: new Date(2026, 2, 31, 10, 0, 0), // March
-    });
-    state = reduceRecordTurnCost(state, {
-      threadId: "t1",
-      model: "claude-sonnet-4-6",
-      deltas: deltas({ outputTokens: 100 }),
-      breakdown: cost(0.02),
-      at: new Date(2026, 3, 1, 10, 0, 0), // April
-    });
-    expect(Object.keys(state.months).sort()).toEqual(["2026-03", "2026-04"]);
-    expect(state.months["2026-03"]!.totalUsd).toBeCloseTo(0.01, 6);
-    expect(state.months["2026-04"]!.totalUsd).toBeCloseTo(0.02, 6);
-    // Session spans both months.
-    expect(state.sessions["t1"]!.totalUsd).toBeCloseTo(0.03, 6);
-  });
-
-  it("ignores zero-token zero-cost turns", () => {
-    const before = freshState();
-    const after = reduceRecordTurnCost(before, {
-      threadId: "t1",
-      model: "claude-sonnet-4-6",
-      deltas: deltas(),
-      breakdown: cost(0),
-      at,
-    });
-    expect(after).toBe(before);
-  });
-
-  it("ignores blank threadId / model", () => {
-    const before = freshState();
-    const a = reduceRecordTurnCost(before, {
-      threadId: "",
-      model: "claude-sonnet-4-6",
-      deltas: deltas({ outputTokens: 10 }),
-      breakdown: cost(0.01),
-      at,
-    });
-    const b = reduceRecordTurnCost(before, {
-      threadId: "t1",
-      model: "",
-      deltas: deltas({ outputTokens: 10 }),
-      breakdown: cost(0.01),
-      at,
-    });
-    expect(a).toBe(before);
-    expect(b).toBe(before);
-  });
-});
-
-describe("reduceResetSession", () => {
-  it("removes the session but keeps month", () => {
-    let state = freshState();
-    state = reduceRecordTurnCost(state, {
-      threadId: "t1",
-      model: "claude-sonnet-4-6",
-      deltas: deltas({ outputTokens: 100 }),
-      breakdown: cost(0.01),
-      at: new Date(2026, 3, 21, 10, 0, 0),
-    });
-    const next = reduceResetSession(state, "t1");
-    expect(next.sessions["t1"]).toBeUndefined();
-    expect(next.months["2026-04"]!.totalUsd).toBeCloseTo(0.01, 6);
-  });
-
-  it("no-op for unknown threadId", () => {
-    const state = freshState();
-    expect(reduceResetSession(state, "nope")).toBe(state);
-  });
-});
-
-describe("sanitizePersistedCostState", () => {
-  it("returns initial for garbage", () => {
-    expect(sanitizePersistedCostState(null).sessions).toEqual({});
-    expect(sanitizePersistedCostState("bad").months).toEqual({});
-    expect(sanitizePersistedCostState({ version: 99 }).months).toEqual({});
-  });
-
-  it("drops invalid month keys", () => {
-    const cleaned = sanitizePersistedCostState({
-      version: 1,
-      sessions: {},
-      months: {
-        "2026-04": { totalUsd: 1, turnCount: 1, byModel: {} },
-        "bogus": { totalUsd: 99, turnCount: 1, byModel: {} },
-      },
-    });
-    expect(Object.keys(cleaned.months)).toEqual(["2026-04"]);
-  });
-
-  it("coerces non-finite numbers to zero", () => {
-    const cleaned = sanitizePersistedCostState({
-      version: 1,
-      sessions: {
-        t1: {
-          totalUsd: Number.NaN,
-          turnCount: -5,
-          byModel: {
-            "claude-sonnet-4-6": {
-              inputTokens: "abc",
-              outputTokens: 10,
-              totalUsd: 5,
-              turnCount: 1,
-            },
-          },
-        },
-      },
-      months: {},
-    });
-    const s = cleaned.sessions["t1"]!;
-    expect(s.totalUsd).toBe(0);
-    expect(s.turnCount).toBe(0);
-    expect(s.byModel["claude-sonnet-4-6"]!.inputTokens).toBe(0);
-    expect(s.byModel["claude-sonnet-4-6"]!.outputTokens).toBe(10);
-    expect(s.byModel["claude-sonnet-4-6"]!.totalUsd).toBe(5);
-  });
-});
-
-describe("selectCostSummary", () => {
-  it("returns zero summary for empty state", () => {
-    const summary = selectCostSummary(freshState(), "t1", new Date(2026, 3, 21));
-    expect(summary.sessionUsd).toBe(0);
-    expect(summary.monthUsd).toBe(0);
-    expect(summary.averagePerTurnUsd).toBeNull();
-    expect(summary.monthKey).toBe("2026-04");
-  });
-
-  it("computes average per turn", () => {
-    let state = freshState();
-    for (let i = 0; i < 4; i += 1) {
-      state = reduceRecordTurnCost(state, {
-        threadId: "t1",
-        model: "claude-sonnet-4-6",
-        deltas: deltas({ outputTokens: 100 }),
-        breakdown: cost(0.01),
-        at: new Date(2026, 3, 21),
-      });
-    }
-    const summary = selectCostSummary(state, "t1", new Date(2026, 3, 21));
-    expect(summary.sessionUsd).toBeCloseTo(0.04, 6);
-    expect(summary.averagePerTurnUsd).toBeCloseTo(0.01, 6);
-    expect(summary.sessionTurnCount).toBe(4);
-  });
-});
-
-describe("useCostStore (zustand)", () => {
-  beforeEach(() => {
-    useCostStore.getState().resetAll();
-    if (typeof window !== "undefined") {
-      window.localStorage.removeItem(COST_STORE_STORAGE_KEY);
-    }
-  });
-
-  it("records turn cost via action", () => {
-    useCostStore.getState().recordTurnCost({
-      threadId: "t1",
-      model: "claude-sonnet-4-6",
-      deltas: deltas({ inputTokens: 1_000, outputTokens: 500 }),
-      breakdown: cost(0.01),
-      at: new Date(2026, 3, 21),
-    });
-    const state = useCostStore.getState();
-    expect(state.sessions["t1"]!.totalUsd).toBeCloseTo(0.01, 6);
-    expect(state.months["2026-04"]!.totalUsd).toBeCloseTo(0.01, 6);
-  });
-
-  it("resetSession clears one thread", () => {
-    useCostStore.getState().recordTurnCost({
-      threadId: "t1",
-      model: "claude-sonnet-4-6",
-      deltas: deltas({ outputTokens: 100 }),
-      breakdown: cost(0.01),
-      at: new Date(2026, 3, 21),
-    });
-    useCostStore.getState().resetSession("t1");
-    expect(useCostStore.getState().sessions["t1"]).toBeUndefined();
-  });
-});
diff --git a/apps/web/src/lib/costStore.ts b/apps/web/src/lib/costStore.ts
deleted file mode 100644
index 509276b5e2..0000000000
--- a/apps/web/src/lib/costStore.ts
+++ /dev/null
@@ -1,328 +0,0 @@
-import { Debouncer } from "@tanstack/react-pacer";
-import { create } from "zustand";
-import type { TurnCostBreakdown, TurnTokenDeltas } from "@t3tools/shared/pricing";
-import { formatUsd } from "@t3tools/shared/pricing";
-
-export const COST_STORE_STORAGE_KEY = "t3code:cost-store:v1";
-
-/** Cumulative token counts + USD spend for one model within a bucket. */
-export interface ModelCostEntry {
-  inputTokens: number;
-  cachedInputTokens: number;
-  outputTokens: number;
-  reasoningOutputTokens: number;
-  totalUsd: number;
-  turnCount: number;
-}
-
-export interface CostBucket {
-  totalUsd: number;
-  turnCount: number;
-  byModel: Record<string, ModelCostEntry>;
-}
-
-export interface PersistedCostState {
-  version: 1;
-  sessions: Record<string, CostBucket>;
-  months: Record<string, CostBucket>;
-}
-
-export interface CostStoreState extends PersistedCostState {
-  recordTurnCost: (input: RecordTurnCostInput) => void;
-  resetSession: (threadId: string) => void;
-  resetAll: () => void;
-  /** Test-only hook: replace state atomically. */
-  __replaceState: (next: PersistedCostState) => void;
-}
-
-export interface RecordTurnCostInput {
-  threadId: string;
-  model: string;
-  deltas: TurnTokenDeltas;
-  breakdown: TurnCostBreakdown;
-  /** Override "now" for deterministic tests. */
-  at?: Date;
-}
-
-const emptyBucket: () => CostBucket = () => ({ totalUsd: 0, turnCount: 0, byModel: {} });
-const emptyModelEntry: () => ModelCostEntry = () => ({
-  inputTokens: 0,
-  cachedInputTokens: 0,
-  outputTokens: 0,
-  reasoningOutputTokens: 0,
-  totalUsd: 0,
-  turnCount: 0,
-});
-
-const initialState: PersistedCostState = {
-  version: 1,
-  sessions: {},
-  months: {},
-};
-
-/**
- * Compute `YYYY-MM` key for a Date in the **local** timezone.
- * Done via `getFullYear/getMonth` (not toISOString) so the month rolls over
- * on the user's clock, not UTC's.
- */
-export function localMonthKey(date: Date = new Date()): string {
-  const year = date.getFullYear().toString().padStart(4, "0");
-  const month = (date.getMonth() + 1).toString().padStart(2, "0");
-  return `${year}-${month}`;
-}
-
-function addTurnToEntry(
-  entry: ModelCostEntry,
-  deltas: TurnTokenDeltas,
-  breakdown: TurnCostBreakdown,
-): ModelCostEntry {
-  return {
-    inputTokens: entry.inputTokens + deltas.inputTokens,
-    cachedInputTokens: entry.cachedInputTokens + deltas.cachedInputTokens,
-    outputTokens: entry.outputTokens + deltas.outputTokens,
-    reasoningOutputTokens: entry.reasoningOutputTokens + deltas.reasoningOutputTokens,
-    totalUsd: entry.totalUsd + breakdown.totalUsd,
-    turnCount: entry.turnCount + 1,
-  };
-}
-
-function addTurnToBucket(
-  bucket: CostBucket,
-  model: string,
-  deltas: TurnTokenDeltas,
-  breakdown: TurnCostBreakdown,
-): CostBucket {
-  const existing = bucket.byModel[model] ?? emptyModelEntry();
-  return {
-    totalUsd: bucket.totalUsd + breakdown.totalUsd,
-    turnCount: bucket.turnCount + 1,
-    byModel: {
-      ...bucket.byModel,
-      [model]: addTurnToEntry(existing, deltas, breakdown),
-    },
-  };
-}
-
-/** Pure reducer: record one turn into the given state. */
-export function reduceRecordTurnCost(
-  state: PersistedCostState,
-  input: RecordTurnCostInput,
-): PersistedCostState {
-  const { threadId, model, deltas, breakdown } = input;
-  if (!threadId || !model) {
-    return state;
-  }
-  // Skip no-op turns to keep storage tiny.
-  const totalTokens =
-    deltas.inputTokens +
-    deltas.cachedInputTokens +
-    deltas.outputTokens +
-    deltas.reasoningOutputTokens;
-  if (totalTokens <= 0 && breakdown.totalUsd <= 0) {
-    return state;
-  }
-  const monthKey = localMonthKey(input.at ?? new Date());
-  const session = state.sessions[threadId] ?? emptyBucket();
-  const month = state.months[monthKey] ?? emptyBucket();
-  return {
-    ...state,
-    sessions: {
-      ...state.sessions,
-      [threadId]: addTurnToBucket(session, model, deltas, breakdown),
-    },
-    months: {
-      ...state.months,
-      [monthKey]: addTurnToBucket(month, model, deltas, breakdown),
-    },
-  };
-}
-
-export function reduceResetSession(
-  state: PersistedCostState,
-  threadId: string,
-): PersistedCostState {
-  if (!(threadId in state.sessions)) {
-    return state;
-  }
-  const nextSessions = { ...state.sessions };
-  delete nextSessions[threadId];
-  return { ...state, sessions: nextSessions };
-}
-
-function sanitizeNumber(value: unknown): number {
-  return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : 0;
-}
-
-function sanitizeModelEntry(raw: unknown): ModelCostEntry | null {
-  if (!raw || typeof raw !== "object") {
-    return null;
-  }
-  const r = raw as Record<string, unknown>;
-  return {
-    inputTokens: sanitizeNumber(r.inputTokens),
-    cachedInputTokens: sanitizeNumber(r.cachedInputTokens),
-    outputTokens: sanitizeNumber(r.outputTokens),
-    reasoningOutputTokens: sanitizeNumber(r.reasoningOutputTokens),
-    totalUsd: sanitizeNumber(r.totalUsd),
-    turnCount: sanitizeNumber(r.turnCount),
-  };
-}
-
-function sanitizeBucket(raw: unknown): CostBucket | null {
-  if (!raw || typeof raw !== "object") {
-    return null;
-  }
-  const r = raw as Record<string, unknown>;
-  const byModelRaw = (r.byModel ?? {}) as Record<string, unknown>;
-  const byModel: Record<string, ModelCostEntry> = {};
-  if (byModelRaw && typeof byModelRaw === "object") {
-    for (const [model, entry] of Object.entries(byModelRaw)) {
-      if (!model) continue;
-      const cleaned = sanitizeModelEntry(entry);
-      if (cleaned) byModel[model] = cleaned;
-    }
-  }
-  return {
-    totalUsd: sanitizeNumber(r.totalUsd),
-    turnCount: sanitizeNumber(r.turnCount),
-    byModel,
-  };
-}
-
-export function sanitizePersistedCostState(raw: unknown): PersistedCostState {
-  if (!raw || typeof raw !== "object") {
-    return initialState;
-  }
-  const r = raw as Record<string, unknown>;
-  if (r.version !== 1) {
-    return initialState;
-  }
-  const sessions: Record<string, CostBucket> = {};
-  const months: Record<string, CostBucket> = {};
-  const sessionsRaw = (r.sessions ?? {}) as Record<string, unknown>;
-  const monthsRaw = (r.months ?? {}) as Record<string, unknown>;
-  if (sessionsRaw && typeof sessionsRaw === "object") {
-    for (const [threadId, bucket] of Object.entries(sessionsRaw)) {
-      if (!threadId) continue;
-      const cleaned = sanitizeBucket(bucket);
-      if (cleaned) sessions[threadId] = cleaned;
-    }
-  }
-  if (monthsRaw && typeof monthsRaw === "object") {
-    for (const [monthKey, bucket] of Object.entries(monthsRaw)) {
-      if (!/^\d{4}-\d{2}$/.test(monthKey)) continue;
-      const cleaned = sanitizeBucket(bucket);
-      if (cleaned) months[monthKey] = cleaned;
-    }
-  }
-  return { version: 1, sessions, months };
-}
-
-function readPersistedState(): PersistedCostState {
-  if (typeof window === "undefined") {
-    return initialState;
-  }
-  try {
-    const raw = window.localStorage.getItem(COST_STORE_STORAGE_KEY);
-    if (!raw) return initialState;
-    return sanitizePersistedCostState(JSON.parse(raw));
-  } catch {
-    return initialState;
-  }
-}
-
-function persistState(state: PersistedCostState): void {
-  if (typeof window === "undefined") return;
-  try {
-    const { version, sessions, months } = state;
-    window.localStorage.setItem(
-      COST_STORE_STORAGE_KEY,
-      JSON.stringify({ version, sessions, months } satisfies PersistedCostState),
-    );
-  } catch {
-    // ignore quota / serialization errors
-  }
-}
-
-const debouncedPersist = new Debouncer(persistState, { wait: 400 });
-
-export const useCostStore = create<CostStoreState>((set) => ({
-  ...readPersistedState(),
-  recordTurnCost: (input) => set((state) => reduceRecordTurnCost(state, input)),
-  resetSession: (threadId) => set((state) => reduceResetSession(state, threadId)),
-  resetAll: () => set(() => ({ ...initialState })),
-  __replaceState: (next) => set(() => ({ ...next })),
-}));
-
-useCostStore.subscribe((state) => {
-  const { version, sessions, months } = state;
-  debouncedPersist.maybeExecute({ version, sessions, months });
-});
-
-if (typeof window !== "undefined" && typeof window.addEventListener === "function") {
-  window.addEventListener("beforeunload", () => {
-    debouncedPersist.flush();
-  });
-}
-
-// ── Selectors ────────────────────────────────────────────────────────────
-
-export function selectSessionBucket(
-  state: PersistedCostState,
-  threadId: string | null | undefined,
-): CostBucket {
-  if (!threadId) return emptyBucket();
-  return state.sessions[threadId] ?? emptyBucket();
-}
-
-export function selectMonthBucket(
-  state: PersistedCostState,
-  monthKey: string = localMonthKey(),
-): CostBucket {
-  return state.months[monthKey] ?? emptyBucket();
-}
-
-export interface CostSummary {
-  readonly sessionUsd: number;
-  readonly monthUsd: number;
-  readonly sessionTurnCount: number;
-  readonly monthTurnCount: number;
-  readonly monthKey: string;
-  readonly session: CostBucket;
-  readonly month: CostBucket;
-  readonly averagePerTurnUsd: number | null;
-}
-
-export function useCostSummary(
-  threadId: string | null | undefined,
-  now?: Date,
-): CostSummary {
-  const sessions = useCostStore((state) => state.sessions);
-  const months = useCostStore((state) => state.months);
-  // Intentionally rebuild on any change to sessions/months — selector is cheap.
-  return selectCostSummary({ version: 1, sessions, months }, threadId, now);
-}
-
-export function selectCostSummary(
-  state: PersistedCostState,
-  threadId: string | null | undefined,
-  now: Date = new Date(),
-): CostSummary {
-  const monthKey = localMonthKey(now);
-  const session = selectSessionBucket(state, threadId);
-  const month = selectMonthBucket(state, monthKey);
-  const averagePerTurnUsd =
-    session.turnCount > 0 ? session.totalUsd / session.turnCount : null;
-  return {
-    sessionUsd: session.totalUsd,
-    monthUsd: month.totalUsd,
-    sessionTurnCount: session.turnCount,
-    monthTurnCount: month.turnCount,
-    monthKey,
-    session,
-    month,
-    averagePerTurnUsd,
-  };
-}
-
-export { formatUsd };
diff --git a/apps/web/src/lib/useCostTracking.test.ts b/apps/web/src/lib/useCostTracking.test.ts
deleted file mode 100644
index 9590cf820e..0000000000
--- a/apps/web/src/lib/useCostTracking.test.ts
+++ /dev/null
@@ -1,160 +0,0 @@
-import { describe, expect, it } from "vitest";
-import { EventId, type ModelSelection, type OrchestrationThreadActivity, TurnId } from "@t3tools/contracts";
-
-import { processActivitiesForCost } from "./useCostTracking";
-
-function makeContextWindowActivity(
-  id: string,
-  payload: Record<string, unknown>,
-  createdAt = "2026-04-21T10:00:00.000Z",
-): OrchestrationThreadActivity {
-  return {
-    id: EventId.make(id),
-    tone: "info",
-    kind: "context-window.updated",
-    summary: "Context window updated",
-    payload,
-    turnId: TurnId.make("turn-1"),
-    createdAt,
-  };
-}
-
-const sonnet: ModelSelection = {
-  provider: "claudeAgent",
-  model: "claude-sonnet-4-6",
-};
-
-describe("processActivitiesForCost", () => {
-  it("returns empty records with null threadId", () => {
-    const result = processActivitiesForCost(null, [], sonnet, null);
-    expect(result.records).toEqual([]);
-    expect(result.nextSeen.size).toBe(0);
-  });
-
-  it("seeds existing activities without recording on first mount", () => {
-    const acts = [
-      makeContextWindowActivity("evt-a", { lastOutputTokens: 1000 }),
-      makeContextWindowActivity("evt-b", { lastOutputTokens: 500 }),
-    ];
-    const result = processActivitiesForCost("t1", acts, sonnet, null);
-    expect(result.records).toEqual([]);
-    expect(result.nextSeen.size).toBe(2);
-  });
-
-  it("records only new activities on subsequent call", () => {
-    const seed = processActivitiesForCost(
-      "t1",
-      [makeContextWindowActivity("evt-a", { lastOutputTokens: 100 })],
-      sonnet,
-      null,
-    );
-    const next = processActivitiesForCost(
-      "t1",
-      [
-        makeContextWindowActivity("evt-a", { lastOutputTokens: 100 }),
-        makeContextWindowActivity("evt-b", {
-          lastInputTokens: 1_000,
-          lastCachedInputTokens: 500,
-          lastOutputTokens: 200,
-        }),
-      ],
-      sonnet,
-      seed.nextSeen,
-    );
-    expect(next.records).toHaveLength(1);
-    const record = next.records[0]!;
-    expect(record.threadId).toBe("t1");
-    expect(record.model).toBe("claude-sonnet-4-6");
-    expect(record.deltas.inputTokens).toBe(1_000);
-    expect(record.deltas.outputTokens).toBe(200);
-    // 1000*3 + 500*0.3 + 200*15 = 3000+150+3000 = 6150 / 1M = $0.00615
-    expect(record.breakdown.totalUsd).toBeCloseTo(0.00615, 6);
-  });
-
-  it("skips events without per-turn deltas", () => {
-    const seed = processActivitiesForCost("t1", [], sonnet, null);
-    const next = processActivitiesForCost(
-      "t1",
-      [makeContextWindowActivity("evt-1", { usedTokens: 10_000 })],
-      sonnet,
-      seed.nextSeen,
-    );
-    expect(next.records).toEqual([]);
-    expect(next.nextSeen.has("evt-1")).toBe(true);
-  });
-
-  it("skips non-context-window activity kinds", () => {
-    const seed = processActivitiesForCost("t1", [], sonnet, null);
-    const other: OrchestrationThreadActivity = {
-      id: EventId.make("evt-tool"),
-      tone: "info",
-      kind: "tool.started",
-      summary: "tool.started",
-      payload: { lastOutputTokens: 1_000 },
-      turnId: TurnId.make("turn-1"),
-      createdAt: "2026-04-21T10:00:00.000Z",
-    };
-    const next = processActivitiesForCost("t1", [other], sonnet, seed.nextSeen);
-    expect(next.records).toEqual([]);
-    expect(next.nextSeen.has("evt-tool")).toBe(true);
-  });
-
-  it("skips when model selection missing", () => {
-    const seed = processActivitiesForCost("t1", [], null, null);
-    const next = processActivitiesForCost(
-      "t1",
-      [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })],
-      null,
-      seed.nextSeen,
-    );
-    expect(next.records).toEqual([]);
-  });
-
-  it("skips when pricing resolves to zero (unknown model)", () => {
-    const seed = processActivitiesForCost("t1", [], sonnet, null);
-    const next = processActivitiesForCost(
-      "t1",
-      [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })],
-      { provider: "opencode", model: "some/unknown-model" },
-      seed.nextSeen,
-    );
-    expect(next.records).toEqual([]);
-    expect(next.nextSeen.has("evt-1")).toBe(true);
-  });
-
-  it("deduplicates by activity id", () => {
-    const seed = processActivitiesForCost("t1", [], sonnet, null);
-    const firstPass = processActivitiesForCost(
-      "t1",
-      [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })],
-      sonnet,
-      seed.nextSeen,
-    );
-    expect(firstPass.records).toHaveLength(1);
-    const secondPass = processActivitiesForCost(
-      "t1",
-      [makeContextWindowActivity("evt-1", { lastOutputTokens: 1_000 })],
-      sonnet,
-      firstPass.nextSeen,
-    );
-    expect(secondPass.records).toEqual([]);
-  });
-
-  it("uses activity.createdAt as `at` timestamp", () => {
-    const seed = processActivitiesForCost("t1", [], sonnet, null);
-    const next = processActivitiesForCost(
-      "t1",
-      [
-        makeContextWindowActivity(
-          "evt-1",
-          { lastOutputTokens: 1_000 },
-          "2026-03-15T00:00:00.000Z",
-        ),
-      ],
-      sonnet,
-      seed.nextSeen,
-    );
-    const record = next.records[0]!;
-    expect(record.at?.toISOString()).toBe("2026-03-15T00:00:00.000Z");
-  });
-});
diff --git a/apps/web/src/lib/useCostTracking.ts b/apps/web/src/lib/useCostTracking.ts
deleted file mode 100644
index 6757ecc305..0000000000
--- a/apps/web/src/lib/useCostTracking.ts
+++ /dev/null
@@ -1,120 +0,0 @@
-import { useEffect, useRef } from "react";
-import type { ModelSelection, OrchestrationThreadActivity } from "@t3tools/contracts";
-import {
-  computeTurnCost,
-  type TurnCostBreakdown,
-  type TurnTokenDeltas,
-} from "@t3tools/shared/pricing";
-
-import { useCostStore, type RecordTurnCostInput } from "./costStore";
-
-interface SeenRef {
-  threadId: string | null | undefined;
-  ids: Set<string>;
-}
-
-function toNonNegative(value: unknown): number {
-  return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : 0;
-}
-
-function extractDeltas(payload: unknown): TurnTokenDeltas | null {
-  if (!payload || typeof payload !== "object") return null;
-  const p = payload as Record<string, unknown>;
-  const input = toNonNegative(p.lastInputTokens);
-  const cached = toNonNegative(p.lastCachedInputTokens);
-  const cacheCreation = toNonNegative(p.lastCacheCreationInputTokens);
-  const output = toNonNegative(p.lastOutputTokens);
-  const reasoning = toNonNegative(p.lastReasoningOutputTokens);
-  if (input + cached + cacheCreation + output + reasoning <= 0) return null;
-  return {
-    inputTokens: input,
-    cachedInputTokens: cached,
-    cacheCreationInputTokens: cacheCreation,
-    outputTokens: output,
-    reasoningOutputTokens: reasoning,
-  };
-}
-
-export interface ProcessActivitiesResult {
-  readonly records: ReadonlyArray<RecordTurnCostInput>;
-  readonly nextSeen: Set<string>;
-}
-
-/**
- * Pure: find new `context-window.updated` events that carry per-turn
- * token deltas and translate them into cost-store inputs. Returns updated
- * "seen" set for caller to persist.
- *
- * Behaviour:
- *   - If `prevSeen` is `null`, treat all activities as "already seen" and
- *     emit no records — used for initial mount / thread switch.
- *   - Otherwise, only new activity IDs are considered.
- */
-export function processActivitiesForCost(
-  threadId: string | null | undefined,
-  activities: ReadonlyArray<OrchestrationThreadActivity> | undefined,
-  modelSelection: ModelSelection | null | undefined,
-  prevSeen: Set<string> | null,
-): ProcessActivitiesResult {
-  if (!threadId || !activities || activities.length === 0) {
-    return { records: [], nextSeen: prevSeen ?? new Set() };
-  }
-  if (prevSeen === null) {
-    // Initial mount / thread switch: seed seen set with current activity IDs.
-    return {
-      records: [],
-      nextSeen: new Set(activities.map((a) => a.id as string)),
-    };
-  }
-  const seen = new Set(prevSeen);
-  const model = modelSelection?.model;
-  const provider = modelSelection?.provider;
-  const records: RecordTurnCostInput[] = [];
-  for (const activity of activities) {
-    const id = activity.id as string;
-    if (seen.has(id)) continue;
-    seen.add(id);
-    if (activity.kind !== "context-window.updated") continue;
-    const deltas = extractDeltas(activity.payload);
-    if (!deltas) continue;
-    if (!model) continue;
-    const breakdown: TurnCostBreakdown = computeTurnCost(model, deltas, provider);
-    if (breakdown.totalUsd <= 0) continue;
-    records.push({
-      threadId,
-      model,
-      deltas,
-      breakdown,
-      at: activity.createdAt ? new Date(activity.createdAt) : new Date(),
-    });
-  }
-  return { records, nextSeen: seen };
-}
-
-/**
- * Observe thread activity stream and record cost for each new
- * `context-window.updated` event. Seeds on first mount so historical
- * activities aren't retroactively charged.
- */
-export function useCostTracking(
-  threadId: string | null | undefined,
-  activities: ReadonlyArray<OrchestrationThreadActivity> | undefined,
-  modelSelection: ModelSelection | null | undefined,
-): void {
-  const recordTurnCost = useCostStore((state) => state.recordTurnCost);
-  const seenRef = useRef<SeenRef>({ threadId: undefined, ids: new Set() });
-
-  useEffect(() => {
-    const prev = seenRef.current.threadId === threadId ? seenRef.current.ids : null;
-    const { records, nextSeen } = processActivitiesForCost(
-      threadId,
-      activities,
-      modelSelection,
-      prev,
-    );
-    seenRef.current = { threadId, ids: nextSeen };
-    for (const record of records) {
-      recordTurnCost(record);
-    }
-  }, [threadId, activities, modelSelection, recordTurnCost]);
-}

From 96768f185b859bee8510b270ad97a317a834f7e7 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Tue, 21 Apr 2026 21:53:13 +0200
Subject: [PATCH 11/16] fix(web): stop button stays active after model response
 completes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the final `thread.message-sent` (streaming:false) arrives, the
client marks `latestTurn.state` as "completed" but leaves
`session.status === "running"` until the separate `thread.session-set`
event (emitted server-side on `turn.completed`) arrives.  In that gap:

- The stop button stays red because visibility is derived from
  `derivePhase(session)` → `"running"` via `session.status`.
- Clicking it dispatches `thread.turn.interrupt`; the server has no
  active turn so the command is a no-op, and the UI stays stuck until
  the late `thread.session-set` lands.

Fix:

- `store.ts` `thread.message-sent` handler: when the final assistant
  message for the currently active turn arrives and `latestTurn`
  resolves to "completed", optimistically flip `session.status` /
  `orchestrationStatus` to "ready" and clear `activeTurnId`.  The
  later server-sent `thread.session-set` overwrites session via
  `mapSession` and is idempotent over this change.  Interrupted and
  errored turns are excluded (checked via `latestTurn.state ===
  "completed"` and the `activeTurnId === event.turnId` guard).

- `ChatView.tsx` `onInterrupt`: defensive guard — if `latestTurn` is
  already in a terminal state (completed / interrupted / error), skip
  the dispatch.  This closes the small window where a click lands
  before React re-renders the composer.

Tests:

- Updated the existing replay-batch test: after a final assistant
  `message-sent` for the active turn, `session.status` is now "ready"
  and `activeTurnId` is cleared.
- Added a test that a mismatched turnId (active turn ≠ streaming:false
  message turn) does NOT reconcile — the server's session-set remains
  authoritative.
- Added a test that an interrupted turn's final message does NOT
  reconcile session to "ready".

All 908 web tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 apps/web/src/components/ChatView.tsx |  14 +++
 apps/web/src/store.test.ts           | 123 ++++++++++++++++++++++++++-
 apps/web/src/store.ts                |  28 ++++++
 3 files changed, 164 insertions(+), 1 deletion(-)

diff --git a/apps/web/src/components/ChatView.tsx b/apps/web/src/components/ChatView.tsx
index 0c76059b6a..552d9fcaf9 100644
--- a/apps/web/src/components/ChatView.tsx
+++ b/apps/web/src/components/ChatView.tsx
@@ -2667,6 +2667,20 @@ export default function ChatView(props: ChatViewProps) {
   const onInterrupt = async () => {
     const api = readEnvironmentApi(environmentId);
     if (!api || !activeThread) return;
+    // Defensive: if the latest turn is already in a terminal state the
+    // server has no active turn to interrupt, so the dispatch would be a
+    // no-op round-trip.  Skip it — the store reconciles session.status in
+    // the `thread.message-sent` handler, so the stop button should have
+    // already disappeared; this guard handles the small window where a
+    // click landed before the React re-render.
+    const latestTurnState = activeThread.latestTurn?.state;
+    if (
+      latestTurnState === "completed" ||
+      latestTurnState === "interrupted" ||
+      latestTurnState === "error"
+    ) {
+      return;
+    }
     await api.orchestration.dispatchCommand({
       type: "thread.turn.interrupt",
       commandId: newCommandId(),
diff --git a/apps/web/src/store.test.ts b/apps/web/src/store.test.ts
index 9bb01ba0be..19b35f8a12 100644
--- a/apps/web/src/store.test.ts
+++ b/apps/web/src/store.test.ts
@@ -741,11 +741,132 @@ describe("incremental orchestration updates", () => {
       localEnvironmentId,
     );
 
-    expect(threadsOf(next)[0]?.session?.status).toBe("running");
+    // The final `thread.message-sent` for the active turn optimistically
+    // flips session.status from "running" → "ready" and clears
+    // activeTurnId, so the stop button does not remain active while we
+    // wait for the server's follow-up `thread.session-set` event.
+    expect(threadsOf(next)[0]?.session?.status).toBe("ready");
+    expect(threadsOf(next)[0]?.session?.orchestrationStatus).toBe("ready");
+    expect(threadsOf(next)[0]?.session?.activeTurnId).toBeUndefined();
     expect(threadsOf(next)[0]?.latestTurn?.state).toBe("completed");
     expect(threadsOf(next)[0]?.messages).toHaveLength(1);
   });
 
+  it("does not reconcile session when the completed turn is not the active turn", () => {
+    const thread = makeThread({
+      latestTurn: {
+        turnId: TurnId.make("turn-1"),
+        state: "running",
+        requestedAt: "2026-02-27T00:00:00.000Z",
+        startedAt: "2026-02-27T00:00:00.000Z",
+        completedAt: null,
+        assistantMessageId: null,
+      },
+    });
+    const state = makeState(thread);
+
+    const next = applyOrchestrationEvents(
+      state,
+      [
+        makeEvent(
+          "thread.session-set",
+          {
+            threadId: thread.id,
+            session: {
+              threadId: thread.id,
+              status: "running",
+              providerName: "codex",
+              runtimeMode: "full-access",
+              activeTurnId: TurnId.make("turn-2"),
+              lastError: null,
+              updatedAt: "2026-02-27T00:00:02.000Z",
+            },
+          },
+          { sequence: 2 },
+        ),
+        makeEvent(
+          "thread.message-sent",
+          {
+            threadId: thread.id,
+            messageId: MessageId.make("assistant-1"),
+            role: "assistant",
+            text: "done",
+            turnId: TurnId.make("turn-1"),
+            streaming: false,
+            createdAt: "2026-02-27T00:00:03.000Z",
+            updatedAt: "2026-02-27T00:00:03.000Z",
+          },
+          { sequence: 3 },
+        ),
+      ],
+      localEnvironmentId,
+    );
+
+    // activeTurnId is turn-2 but the streaming:false message is for turn-1;
+    // do not reconcile — the server's session-set is still authoritative.
+    expect(threadsOf(next)[0]?.session?.status).toBe("running");
+    expect(threadsOf(next)[0]?.session?.activeTurnId).toBe(TurnId.make("turn-2"));
+  });
+
+  it("does not reconcile session when the final message is for an interrupted turn", () => {
+    const thread = makeThread();
+    const state = makeState(thread);
+
+    const next = applyOrchestrationEvents(
+      state,
+      [
+        makeEvent(
+          "thread.session-set",
+          {
+            threadId: thread.id,
+            session: {
+              threadId: thread.id,
+              status: "running",
+              providerName: "codex",
+              runtimeMode: "full-access",
+              activeTurnId: TurnId.make("turn-1"),
+              lastError: null,
+              updatedAt: "2026-02-27T00:00:02.000Z",
+            },
+          },
+          { sequence: 2 },
+        ),
+        makeEvent(
+          "thread.turn-interrupt-requested",
+          {
+            threadId: thread.id,
+            turnId: TurnId.make("turn-1"),
+            createdAt: "2026-02-27T00:00:02.500Z",
+          },
+          { sequence: 3 },
+        ),
+        makeEvent(
+          "thread.message-sent",
+          {
+            threadId: thread.id,
+            messageId: MessageId.make("assistant-1"),
+            role: "assistant",
+            text: "partial",
+            turnId: TurnId.make("turn-1"),
+            streaming: false,
+            createdAt: "2026-02-27T00:00:03.000Z",
+            updatedAt: "2026-02-27T00:00:03.000Z",
+          },
+          { sequence: 4 },
+        ),
+      ],
+      localEnvironmentId,
+    );
+
+    // turn-interrupt-requested moved latestTurn to "interrupted"; the
+    // final message-sent keeps it interrupted and must NOT flip
+    // session.status to "ready" — only a cleanly completed turn triggers
+    // the optimistic reconcile.
+    expect(threadsOf(next)[0]?.latestTurn?.state).toBe("interrupted");
+    expect(threadsOf(next)[0]?.session?.status).toBe("running");
+    expect(threadsOf(next)[0]?.session?.activeTurnId).toBe(TurnId.make("turn-1"));
+  });
+
   it("does not regress latestTurn when an older turn diff completes late", () => {
     const state = makeState(
       makeThread({
diff --git a/apps/web/src/store.ts b/apps/web/src/store.ts
index 3b1976bf9a..e9867ca0f0 100644
--- a/apps/web/src/store.ts
+++ b/apps/web/src/store.ts
@@ -1432,8 +1432,36 @@ function applyEnvironmentOrchestrationEvent(
                 assistantMessageId: event.payload.messageId,
               })
             : thread.latestTurn;
+        // Optimistically reconcile session state when the final assistant
+        // message for the active turn arrives.  The server emits a separate
+        // `thread.session-set` event on `turn.completed` which flips
+        // session.status → "ready" and clears activeTurnId, but that event
+        // can arrive after the final `thread.message-sent`.  In that gap
+        // the stop button stays active and clicking it dispatches a dead
+        // interrupt command (no active turn) that the server no-ops.  Flip
+        // the status locally here so the button disappears immediately; the
+        // later session-set is idempotent over this change.
+        const shouldReconcileSession =
+          event.payload.role === "assistant" &&
+          event.payload.streaming === false &&
+          event.payload.turnId !== null &&
+          thread.session !== null &&
+          thread.session.orchestrationStatus === "running" &&
+          thread.session.activeTurnId === event.payload.turnId &&
+          latestTurn?.state === "completed";
+        const nextSession: Thread["session"] =
+          shouldReconcileSession && thread.session !== null
+            ? {
+                ...thread.session,
+                status: "ready",
+                orchestrationStatus: "ready",
+                activeTurnId: undefined,
+                updatedAt: event.occurredAt,
+              }
+            : thread.session;
         return {
           ...thread,
+          session: nextSession,
           messages: cappedMessages,
           turnDiffSummaries,
           latestTurn,

From 76a3495f6d59e65ec763ce447fd4321398451501 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Tue, 21 Apr 2026 22:09:35 +0200
Subject: [PATCH 12/16] fix: bot review follow-ups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address Cursor Bugbot + Macroscope findings on #2273:

- apps/server/src/cost/Reducer.ts: drop the no-op ternaries in
  sanitizePersistedFile (`r.version === 1 ? 1 : 1` and
  `r.kind === expectedKind ? expectedKind : expectedKind`).  Both
  always returned the right-hand value regardless of the stored
  value, so they were silently forcing the expected defaults — which
  is actually the intended sanitize-on-mismatch behaviour.  Simplify
  to the constants directly and add a comment explaining the intent.
  (Macroscope, Reducer.ts:325-326.)

- apps/web/src/lib/costQuery.ts: stop duplicating `formatUsd` and
  instead re-export it from `@t3tools/shared/pricing` (the shared
  package was already a workspace dep and owns computeTurnCost next
  to the formatter).  Keeping the re-export so CostMeter and any
  future consumer continue to import from `~/lib/costQuery` as the
  single cost-UI utility module.  (Cursor, duplicated-function.)

- apps/web/src/lib/costQuery.ts: remove the dead
  `useInvalidateCostSummary` hook.  The ChatComposer calls
  `invalidateCostSummary` directly with its own `useQueryClient`, so
  the hook wrapper was unused surface area.  (Cursor, dead-code.)

Verified: web typecheck clean, web tests 908/908 pass, server cost
tests 19/19 pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 apps/server/src/cost/Reducer.ts | 10 ++++++----
 apps/web/src/lib/costQuery.ts   | 31 +++++++------------------------
 2 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/apps/server/src/cost/Reducer.ts b/apps/server/src/cost/Reducer.ts
index bb9d4c7cd2..a9ac9e27b2 100644
--- a/apps/server/src/cost/Reducer.ts
+++ b/apps/server/src/cost/Reducer.ts
@@ -322,14 +322,16 @@ export function sanitizePersistedFile(
     };
   }
   const r = raw as Record<string, unknown>;
-  const version = r.version === 1 ? 1 : 1;
-  const kind = r.kind === expectedKind ? expectedKind : expectedKind;
+  // version and kind are forced to the expected values — any drift from
+  // what the caller asked for is treated as malformed and silently
+  // sanitized (the surrounding contract only supports version 1 and the
+  // requested kind).
   const key = typeof r.key === "string" && r.key.length > 0 ? r.key : expectedKey;
   const bucket = sanitizeBucket(r.bucket, now);
   const lastCumulative = sanitizeLastCumulative(r.lastCumulative);
   return {
-    version,
-    kind,
+    version: 1,
+    kind: expectedKind,
     key,
     bucket,
     ...(lastCumulative && expectedKind === "session" ? { lastCumulative } : {}),
diff --git a/apps/web/src/lib/costQuery.ts b/apps/web/src/lib/costQuery.ts
index 9c301363f6..a8a09af89d 100644
--- a/apps/web/src/lib/costQuery.ts
+++ b/apps/web/src/lib/costQuery.ts
@@ -10,14 +10,16 @@
  * `context-window.updated` activity so the ring updates in near-realtime.
  */
 import type { EnvironmentId, ThreadId } from "@t3tools/contracts";
-import {
-  queryOptions,
-  type QueryClient,
-  useQueryClient,
-} from "@tanstack/react-query";
+import { queryOptions, type QueryClient } from "@tanstack/react-query";
 
 import { resolveEnvironmentHttpUrl } from "../environments/runtime";
 
+// Re-export the shared USD formatter so `~/lib/costQuery` stays the single
+// import surface for cost UI consumers (see CostMeter.tsx) while the
+// actual implementation lives in @t3tools/shared/pricing alongside
+// computeTurnCost.
+export { formatUsd } from "@t3tools/shared/pricing";
+
 const COST_SUMMARY_STALE_TIME_MS = 5_000;
 
 /** Bucket shape mirrors apps/server/src/cost/types.ts. Kept duplicated so
@@ -181,22 +183,3 @@ export function invalidateCostSummary(
   return queryClient.invalidateQueries({ queryKey: costQueryKeys.all });
 }
 
-/** Convenience hook returning the invalidator for consumers outside React Query's mutation flow. */
-export function useInvalidateCostSummary() {
-  const queryClient = useQueryClient();
-  return (input?: {
-    readonly environmentId?: EnvironmentId | null;
-    readonly threadId?: ThreadId | null;
-  }) => invalidateCostSummary(queryClient, input);
-}
-
-/** Format USD for UI; kept here so the component imports one utility module. */
-export function formatUsd(value: number | null | undefined): string {
-  if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
-    return "$0.00";
-  }
-  if (value < 0.01) return "<$0.01";
-  if (value < 1) return `$${value.toFixed(3).replace(/0$/, "")}`;
-  if (value < 100) return `$${value.toFixed(2)}`;
-  return `$${Math.round(value).toLocaleString("en-US")}`;
-}

From b027c89ef871eacf5b74c3c62a21b89846190957 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Wed, 22 Apr 2026 00:38:02 +0200
Subject: [PATCH 13/16] fix(cost): drop mid-turn snapshots + rewrite usedTokens
 as input-side
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two independent bugs in the token-usage pipeline, both user-visible
and both rooted in the same conflation between the context-window
dimension (what fills the ring) and the billing dimension (what
lands in the cost ledger).

## 1. Cost ledger over-counting (CRITICAL)

Claude emits `thread.token-usage.updated` events from three places
per turn: every `task_progress`, every `task_notification`, and the
final `completeTurn`. The mid-turn snapshots carry per-API-call
breakdowns *without* `lastXxxTokens` fields, while the turn-complete
snapshot carries cumulative totals *with* `lastXxx` deltas.

`ProviderRuntimeIngestion` fed every one of these events into
`CostTracker.recordUsage`. For the mid-turn events, the Reducer's
`hasExplicitLast=false` branch subtracts the payload's cumulative
against the session's `lastCumulative` — but what gets stored in
`lastCumulative` between mid-turn events is one API call's
breakdown, not the session running total, so the resulting "deltas"
are arbitrary diffs between per-call snapshots. Net effect: cost
over/undercounted unpredictably every turn, and `turnCount`
inflated by 3–10× because every mid-turn snapshot with any positive
delta bumped it.

Fix: gate `recordUsage` in `ProviderRuntimeIngestion` on the
presence of any `lastXxxTokens` field. Mid-turn snapshots still
flow to the `context-window.updated` activity for the ring, they
just skip the ledger. Codex only emits one snapshot per turn (and
always with `lastXxx`) so it's unaffected.

While here, normalise the model slug (`resolveModelSlugForProvider`)
before passing it to the ledger so aliased/canonical variants
collapse to a single `byModel` key.

## 2. Context-window ring over-reporting

Both adapters set `usedTokens = totalTokens`, which for the cost
dimension meant *every* billed token including outputs. But the
ring consumes `usedTokens / maxTokens`, and output tokens are
generated *out* of the model — they don't live in the prompt
window, so including them inflated the ring (especially on long-
output turns). Reasoning tokens have the same property (ephemeral,
not persisted into next-turn context).

Fix: redefine `usedTokens` as the input-side total only
(`input + cache-read + cache-creation`), in both
`normalizeClaudeTokenUsage`/`buildClaudeTurnCompleteUsage` and
`normalizeCodexTokenUsage` (`last.inputTokens +
last.cachedInputTokens` — Codex V2 has no cache-creation tier).
`totalProcessedTokens` keeps the original semantic ("tokens
processed so far", billing-side). Added a contract-level JSDoc on
`ThreadTokenUsageSnapshot` that spells out the two dimensions and
the `lastXxxTokens` "turn-final" signal.

Also: the client's `deriveLatestContextWindowSnapshot` was silently
dropping `cacheCreationInputTokens` / `lastCacheCreationInputTokens`
from the `ContextWindowSnapshot` shape even though the payload
carries them. Wire them through.

## 3. Migration

Existing ledger files are polluted and can't be repaired in-place.
Added a `.schema-v2` sentinel in the usage dir: `CostTrackerLive`
boots, sees no sentinel, wipes only the known ledger files
(`session_*.json`, `YYYY-MM.json`, `alltime.json`) — any stray
files are left alone — writes the sentinel, and subsequent boots
skip. Bumping `LEDGER_SCHEMA_VERSION` is the single line needed
for any future reducer-incompatible change.

## Tests

- Reworked Claude/Codex adapter assertions for the new input-side
  `usedTokens` semantic (24542 → 23863 for the Claude cumulative
  case, 126 → 120 for Codex, etc.); explanatory comments added.
- New ProviderRuntimeIngestion test: mid-turn snapshot (no
  `lastXxx`) projects into the activity stream but does NOT bump
  the ledger; turn-final snapshot records exactly one turn.
- New CostTrackerLive tests: first boot wipes pre-v2 ledger files
  (including a `.json` stray, which survives); subsequent boot
  with sentinel present leaves ledger files intact.
- Existing ingestion tests retargeted at a temp-dir base so the
  first-boot wipe can't touch the developer's real
  `<cwd>/userdata/usage/` directory.

All 203 server tests pass in the changed files; 908 web tests
pass; 126 shared tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../src/cost/Layers/CostTracker.test.ts       | 92 +++++++++++++++++++
 apps/server/src/cost/Layers/CostTracker.ts    | 56 +++++++++++
 .../Layers/ProviderRuntimeIngestion.test.ts   | 78 +++++++++++++++-
 .../Layers/ProviderRuntimeIngestion.ts        | 46 +++++++---
 .../src/provider/Layers/ClaudeAdapter.test.ts | 11 ++-
 .../src/provider/Layers/ClaudeAdapter.ts      | 42 +++++++--
 .../Layers/ClaudeAdapter.usage.test.ts        | 14 ++-
 .../src/provider/Layers/CodexAdapter.test.ts  |  8 +-
 .../src/provider/Layers/CodexAdapter.ts       | 21 +++--
 apps/web/src/lib/contextWindow.ts             |  2 +
 packages/contracts/src/providerRuntime.ts     | 28 ++++++
 11 files changed, 360 insertions(+), 38 deletions(-)

diff --git a/apps/server/src/cost/Layers/CostTracker.test.ts b/apps/server/src/cost/Layers/CostTracker.test.ts
index 1a2acb4325..41392ff5f6 100644
--- a/apps/server/src/cost/Layers/CostTracker.test.ts
+++ b/apps/server/src/cost/Layers/CostTracker.test.ts
@@ -1,3 +1,7 @@
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
 import * as NodeServices from "@effect/platform-node/NodeServices";
 import { assert, it } from "@effect/vitest";
 import { Effect, Fiber, FileSystem, Layer, Path, Stream } from "effect";
@@ -14,6 +18,15 @@ const makeLayer = () => {
   return Layer.mergeAll(CostTrackerLive.pipe(Layer.provide(configLayer)), configLayer);
 };
 
+/**
+ * Build a layer pointing at a pre-existing temp dir so the migration has
+ * ledger files to wipe on boot. Caller is responsible for `rmSync` cleanup.
+ */
+const makeLayerAt = (baseDir: string) => {
+  const configLayer = ServerConfig.layerTest(process.cwd(), baseDir);
+  return Layer.mergeAll(CostTrackerLive.pipe(Layer.provide(configLayer)), configLayer);
+};
+
 it.layer(NodeServices.layer)("CostTrackerLive", (it) => {
   it.effect("records a turn and persists session/month/alltime files", () =>
     Effect.gen(function* () {
@@ -143,4 +156,83 @@ it.layer(NodeServices.layer)("CostTrackerLive", (it) => {
       assert.equal(summary.monthKey, "2019-12");
     }).pipe(Effect.provide(makeLayer())),
   );
+
+  it.effect("wipes pre-v2 ledger files on first boot and writes a schema sentinel", () => {
+    // Seed a usage dir that looks like a pre-migration install: a pair of
+    // session files, one month bucket, one all-time file, and an
+    // unrelated stray file we must leave alone.
+    const baseDir = fs.mkdtempSync(path.join(os.tmpdir(), "t3-cost-wipe-"));
+    const usageDir = path.join(baseDir, "userdata", "usage");
+    fs.mkdirSync(usageDir, { recursive: true });
+    const seededLedgerFiles = [
+      "session_thread-a.json",
+      "session_thread-b.json",
+      "2026-04.json",
+      "alltime.json",
+    ];
+    for (const name of seededLedgerFiles) {
+      fs.writeFileSync(
+        path.join(usageDir, name),
+        JSON.stringify({ version: 1, bucket: { totalUsd: 42, turnCount: 99 } }),
+      );
+    }
+    // Stray non-ledger file that must survive the wipe.
+    const strayPath = path.join(usageDir, "notes.txt");
+    fs.writeFileSync(strayPath, "unrelated");
+
+    return Effect.gen(function* () {
+      // Tracker service is resolved here so the layer effect — and thus
+      // the migration — runs before we assert.
+      yield* CostTrackerService;
+
+      for (const name of seededLedgerFiles) {
+        assert.equal(
+          fs.existsSync(path.join(usageDir, name)),
+          false,
+          `expected ${name} to be wiped`,
+        );
+      }
+      assert.equal(fs.existsSync(strayPath), true, "expected stray file to survive");
+
+      const sentinelPath = path.join(usageDir, ".schema-v2");
+      assert.equal(fs.existsSync(sentinelPath), true);
+      const sentinelContents = JSON.parse(fs.readFileSync(sentinelPath, "utf8")) as {
+        readonly version: number;
+        readonly wipedFileCount: number;
+      };
+      assert.equal(sentinelContents.version, 2);
+      assert.equal(sentinelContents.wipedFileCount, seededLedgerFiles.length);
+    }).pipe(
+      Effect.provide(makeLayerAt(baseDir)),
+      Effect.ensuring(
+        Effect.sync(() => fs.rmSync(baseDir, { recursive: true, force: true })),
+      ),
+    );
+  });
+
+  it.effect("skips the wipe on subsequent boots when the sentinel is present", () => {
+    const baseDir = fs.mkdtempSync(path.join(os.tmpdir(), "t3-cost-wipe-idempotent-"));
+    const usageDir = path.join(baseDir, "userdata", "usage");
+    fs.mkdirSync(usageDir, { recursive: true });
+    // Pre-existing sentinel → migration is a no-op; ledger files survive.
+    fs.writeFileSync(
+      path.join(usageDir, ".schema-v2"),
+      JSON.stringify({ version: 2, migratedAt: "2026-04-01T00:00:00.000Z" }),
+    );
+    const preservedPath = path.join(usageDir, "session_thread-keep.json");
+    fs.writeFileSync(
+      preservedPath,
+      JSON.stringify({ version: 1, bucket: { totalUsd: 1, turnCount: 1 } }),
+    );
+
+    return Effect.gen(function* () {
+      yield* CostTrackerService;
+      assert.equal(fs.existsSync(preservedPath), true);
+    }).pipe(
+      Effect.provide(makeLayerAt(baseDir)),
+      Effect.ensuring(
+        Effect.sync(() => fs.rmSync(baseDir, { recursive: true, force: true })),
+      ),
+    );
+  });
 });
diff --git a/apps/server/src/cost/Layers/CostTracker.ts b/apps/server/src/cost/Layers/CostTracker.ts
index 5d7c9bb0a2..3d6c1282c9 100644
--- a/apps/server/src/cost/Layers/CostTracker.ts
+++ b/apps/server/src/cost/Layers/CostTracker.ts
@@ -51,6 +51,20 @@ function monthFilename(monthKey: string): string {
 
 const ALLTIME_FILENAME = "alltime.json";
 
+/**
+ * Ledger schema version. Bump when the on-disk format changes in a way that
+ * makes older files incompatible with the new reducer — a sentinel file
+ * `.schema-v<N>` is written to `usageDir` and, if missing on startup, the
+ * ledger is wiped (only the JSON ledger files; untracked files in the
+ * directory are left alone). Rationale for v2: prior versions fed mid-turn
+ * `thread.token-usage.updated` snapshots into the cost reducer, which
+ * double-counted token totals and inflated `turnCount` by N per real turn.
+ * Those buckets can't be retroactively repaired, so we reset on upgrade.
+ */
+const LEDGER_SCHEMA_VERSION = 2 as const;
+const LEDGER_SCHEMA_SENTINEL = `.schema-v${LEDGER_SCHEMA_VERSION}`;
+const LEDGER_FILE_PATTERN = /^(session_.+|\d{4}-\d{2}|alltime)\.json$/;
+
 const make = Effect.gen(function* () {
   const { usageDir } = yield* ServerConfig;
   const fs = yield* FileSystem.FileSystem;
@@ -62,6 +76,48 @@ const make = Effect.gen(function* () {
   // Ensure the directory exists even if config bootstrap skipped it.
   yield* fs.makeDirectory(usageDir, { recursive: true }).pipe(Effect.ignore({ log: true }));
 
+  // Migration: wipe ledger files polluted by the pre-v2 reducer.  Idempotent
+  // via the `.schema-vN` sentinel — once present, subsequent boots skip.
+  yield* Effect.gen(function* () {
+    const sentinelPath = path.join(usageDir, LEDGER_SCHEMA_SENTINEL);
+    const sentinelExists = yield* fs
+      .exists(sentinelPath)
+      .pipe(Effect.orElseSucceed(() => false));
+    if (sentinelExists) return;
+
+    const entries = yield* fs
+      .readDirectory(usageDir)
+      .pipe(Effect.orElseSucceed(() => [] as Array<string>));
+    const ledgerFiles = entries.filter((entry) => LEDGER_FILE_PATTERN.test(entry));
+    if (ledgerFiles.length > 0) {
+      yield* Effect.logInfo(
+        `CostTracker: migrating usage ledger to schema v${LEDGER_SCHEMA_VERSION}; wiping ${ledgerFiles.length} pre-migration file(s)`,
+      );
+      yield* Effect.forEach(
+        ledgerFiles,
+        (entry) =>
+          fs
+            .remove(path.join(usageDir, entry), { force: true })
+            .pipe(Effect.ignoreCause({ log: true })),
+        { concurrency: "unbounded", discard: true },
+      );
+    }
+    yield* fs
+      .writeFileString(
+        sentinelPath,
+        `${JSON.stringify(
+          {
+            version: LEDGER_SCHEMA_VERSION,
+            migratedAt: new Date().toISOString(),
+            wipedFileCount: ledgerFiles.length,
+          },
+          null,
+          2,
+        )}\n`,
+      )
+      .pipe(Effect.ignoreCause({ log: true }));
+  }).pipe(Effect.ignoreCause({ log: true }));
+
   const filePathFor = (kind: PersistedCostFileKind, key: string): string => {
     switch (kind) {
       case "session":
diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts
index f334ad5ff1..361875200e 100644
--- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts
+++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts
@@ -34,6 +34,7 @@ import { OrchestrationEngineLive } from "./OrchestrationEngine.ts";
 import { OrchestrationProjectionPipelineLive } from "./ProjectionPipeline.ts";
 import { OrchestrationProjectionSnapshotQueryLive } from "./ProjectionSnapshotQuery.ts";
 import { CostTrackerLive } from "../../cost/Layers/CostTracker.ts";
+import { CostTrackerService } from "../../cost/Services/CostTracker.ts";
 import { ProviderRuntimeIngestionLive } from "./ProviderRuntimeIngestion.ts";
 import {
   OrchestrationEngineService,
@@ -171,7 +172,7 @@ type ProviderRuntimeTestCheckpoint = ProviderRuntimeTestThread["checkpoints"][nu
 
 describe("ProviderRuntimeIngestion", () => {
   let runtime: ManagedRuntime.ManagedRuntime<
-    OrchestrationEngineService | ProviderRuntimeIngestionService,
+    OrchestrationEngineService | ProviderRuntimeIngestionService | CostTrackerService,
     unknown
   > | null = null;
   let scope: Scope.Closeable | null = null;
@@ -209,7 +210,11 @@ describe("ProviderRuntimeIngestion", () => {
       Layer.provide(RepositoryIdentityResolverLive),
       Layer.provide(SqlitePersistenceMemory),
     );
-    const configLayer = ServerConfig.layerTest(process.cwd(), process.cwd());
+    // Use a scoped temp dir for the test base — avoids writing into the
+    // developer's real `<cwd>/userdata/usage/` when the ingestion harness
+    // runs `CostTrackerLive` (which now performs a schema-sentinel wipe
+    // on boot if no sentinel is present).
+    const configLayer = ServerConfig.layerTest(process.cwd(), { prefix: "t3-ingestion-" });
     const layer = ProviderRuntimeIngestionLive.pipe(
       Layer.provideMerge(orchestrationLayer),
       Layer.provideMerge(SqlitePersistenceMemory),
@@ -222,6 +227,7 @@ describe("ProviderRuntimeIngestion", () => {
     runtime = ManagedRuntime.make(layer);
     const engine = await runtime.runPromise(Effect.service(OrchestrationEngineService));
     const ingestion = await runtime.runPromise(Effect.service(ProviderRuntimeIngestionService));
+    const costTracker = await runtime.runPromise(Effect.service(CostTrackerService));
     scope = await Effect.runPromise(Scope.make("sequential"));
     await Effect.runPromise(ingestion.start().pipe(Scope.provide(scope)));
     const drain = () => Effect.runPromise(ingestion.drain);
@@ -290,6 +296,7 @@ describe("ProviderRuntimeIngestion", () => {
       emit: provider.emit,
       setProviderSession: provider.setSession,
       drain,
+      costTracker,
     };
   }
 
@@ -2659,6 +2666,73 @@ describe("ProviderRuntimeIngestion", () => {
     });
   });
 
+  it("routes only turn-final token-usage events to the cost ledger", async () => {
+    const harness = await createHarness();
+    const now = new Date().toISOString();
+
+    // Mid-turn snapshot (what Claude emits from task_progress /
+    // task_notification): cumulative breakdown present but NO `lastXxx`
+    // turn-delta fields. This should flow to the activity stream for the
+    // context-window ring but must not reach the cost ledger — the
+    // Reducer's cumulative-subtraction fallback would otherwise treat
+    // each mid-turn snapshot as a separate turn and over-count.
+    harness.emit({
+      type: "thread.token-usage.updated",
+      eventId: asEventId("evt-token-usage-mid-turn"),
+      provider: "claudeAgent",
+      createdAt: now,
+      threadId: asThreadId("thread-1"),
+      payload: {
+        usage: {
+          usedTokens: 1_000,
+          inputTokens: 1_000,
+          outputTokens: 200,
+        },
+      },
+    });
+
+    await waitForThread(harness.engine, (entry) =>
+      entry.activities.some(
+        (activity: ProviderRuntimeTestActivity) => activity.kind === "context-window.updated",
+      ),
+    );
+    await harness.drain();
+
+    const summaryAfterMidTurn = await runtime!.runPromise(
+      harness.costTracker.getSummary({ threadId: asThreadId("thread-1") }),
+    );
+    expect(summaryAfterMidTurn.thread?.turnCount ?? 0).toBe(0);
+    expect(summaryAfterMidTurn.month.turnCount).toBe(0);
+
+    // Turn-final snapshot: `lastXxx` deltas present → cost ledger records
+    // exactly one turn.
+    harness.emit({
+      type: "thread.token-usage.updated",
+      eventId: asEventId("evt-token-usage-turn-final"),
+      provider: "claudeAgent",
+      createdAt: new Date().toISOString(),
+      threadId: asThreadId("thread-1"),
+      turnId: asTurnId("turn-1"),
+      payload: {
+        usage: {
+          usedTokens: 1_000,
+          inputTokens: 1_000,
+          outputTokens: 200,
+          lastInputTokens: 1_000,
+          lastOutputTokens: 200,
+        },
+      },
+    });
+
+    await harness.drain();
+
+    const summaryAfterTurnFinal = await runtime!.runPromise(
+      harness.costTracker.getSummary({ threadId: asThreadId("thread-1") }),
+    );
+    expect(summaryAfterTurnFinal.thread?.turnCount).toBe(1);
+    expect(summaryAfterTurnFinal.month.turnCount).toBe(1);
+  });
+
   it("projects Claude usage snapshots with context window into normalized thread activities", async () => {
     const harness = await createHarness();
     const now = new Date().toISOString();
diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts
index 053f04669f..d3fdde10bb 100644
--- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts
+++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts
@@ -15,6 +15,7 @@ import {
 } from "@t3tools/contracts";
 import { Cache, Cause, Duration, Effect, Layer, Option, Stream } from "effect";
 import { makeDrainableWorker } from "@t3tools/shared/DrainableWorker";
+import { resolveModelSlugForProvider } from "@t3tools/shared/model";
 
 import { CostTrackerService } from "../../cost/Services/CostTracker.ts";
 import { ProviderService } from "../../provider/Services/ProviderService.ts";
@@ -1525,18 +1526,41 @@ const make = Effect.gen(function* () {
       // Side-channel: feed token usage into the CostTracker so the JSON
       // ledger stays in sync with the activity stream. Failures never block
       // ingestion — we log and drop.
+      //
+      // Only *turn-final* usage events reach the ledger. Providers (notably
+      // Claude) emit mid-turn snapshots from each `task_progress` /
+      // `task_notification` that carry per-API-call breakdowns *without*
+      // `lastXxxTokens` fields; feeding those through the Reducer's
+      // cumulative-subtraction fallback would double-count tokens and
+      // inflate `turnCount` by N per real turn. The presence of any
+      // `lastXxxTokens` field is the signal that this event represents the
+      // end of a turn with meaningful deltas — mid-turn snapshots still
+      // flow through the activity stream for the context-window ring,
+      // they just skip the cost ledger.
       if (event.type === "thread.token-usage.updated") {
-        const model = event.payload.model ?? thread.modelSelection.model;
-        const provider = thread.modelSelection.provider;
-        yield* costTracker
-          .recordUsage({
-            threadId: thread.id,
-            model,
-            provider,
-            usage: event.payload.usage,
-            at: new Date(event.createdAt),
-          })
-          .pipe(Effect.asVoid, Effect.ignoreCause({ log: true }));
+        const usage = event.payload.usage;
+        const hasTurnDeltas =
+          usage.lastInputTokens !== undefined ||
+          usage.lastCachedInputTokens !== undefined ||
+          usage.lastCacheCreationInputTokens !== undefined ||
+          usage.lastOutputTokens !== undefined ||
+          usage.lastReasoningOutputTokens !== undefined;
+        if (hasTurnDeltas) {
+          const provider = thread.modelSelection.provider;
+          const rawModel = event.payload.model ?? thread.modelSelection.model;
+          // Normalize to the canonical slug so the `byModel` ledger key is
+          // stable across turns that happen to report aliased slugs.
+          const model = resolveModelSlugForProvider(provider, rawModel);
+          yield* costTracker
+            .recordUsage({
+              threadId: thread.id,
+              model,
+              provider,
+              usage,
+              at: new Date(event.createdAt),
+            })
+            .pipe(Effect.asVoid, Effect.ignoreCause({ log: true }));
+        }
       }
     });
 
diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts
index 0846009a49..5e023c07a7 100644
--- a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts
+++ b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts
@@ -1596,12 +1596,15 @@ describe("ClaudeAdapterLive", () => {
       assert.equal(usageEvent?.type, "thread.token-usage.updated");
       if (usageEvent?.type === "thread.token-usage.updated") {
         // First turn: no prior cumulative, so last* deltas equal cumulative
-        // totals. Cache read/write split correctly; usedTokens = cumulative
-        // total (no task snapshot in this test).
+        // totals. Cache read/write split correctly. `usedTokens` +
+        // `lastUsedTokens` report input-side only (4 input + 21_144 cached
+        // + 2_715 cache-write = 23_863); output (679) is billed separately
+        // and tracked via `outputTokens` / `lastOutputTokens`.
+        // `totalProcessedTokens` keeps the full billed cumulative (24_542).
         assert.deepEqual(usageEvent.payload, {
           usage: {
-            usedTokens: 24542,
-            lastUsedTokens: 24542,
+            usedTokens: 23863,
+            lastUsedTokens: 23863,
             totalProcessedTokens: 24542,
             inputTokens: 4,
             cachedInputTokens: 21144,
diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.ts b/apps/server/src/provider/Layers/ClaudeAdapter.ts
index 11e7a2569b..d65330a6a7 100644
--- a/apps/server/src/provider/Layers/ClaudeAdapter.ts
+++ b/apps/server/src/provider/Layers/ClaudeAdapter.ts
@@ -367,9 +367,15 @@ export function parseClaudeUsageBreakdown(value: unknown): ClaudeUsageBreakdown
  * the current context window size. The four token classes are reported
  * separately so downstream cost math can apply the correct tier.
  *
- * No capping: `usedTokens` reflects `total_tokens` (or the derived sum) as
- * reported. Callers that want to clamp for ring display should do so in the
- * UI layer.
+ * `usedTokens` reports the **input-side** tokens only (context the model
+ * consumed: input + cache-read + cache-creation). Output + reasoning are
+ * billed separately and do not live in the prompt window; including them
+ * inflates the context ring for long-output turns. When the SDK reports
+ * only an opaque `total_tokens` (no class breakdown), we fall back to that
+ * number so the ring still shows *something* rather than zero.
+ *
+ * No capping: callers that want to clamp for ring display should do so in
+ * the UI layer.
  */
 function normalizeClaudeTokenUsage(
   value: unknown,
@@ -383,9 +389,12 @@ function normalizeClaudeTokenUsage(
     typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0
       ? contextWindow
       : undefined;
+  const inputSideTokens =
+    breakdown.inputTokens + breakdown.cachedInputTokens + breakdown.cacheCreationInputTokens;
+  const usedTokens = inputSideTokens > 0 ? inputSideTokens : breakdown.totalTokens;
   return {
-    usedTokens: breakdown.totalTokens,
-    lastUsedTokens: breakdown.totalTokens,
+    usedTokens,
+    lastUsedTokens: usedTokens,
     ...(breakdown.inputTokens > 0 ? { inputTokens: breakdown.inputTokens } : {}),
     ...(breakdown.cachedInputTokens > 0 ? { cachedInputTokens: breakdown.cachedInputTokens } : {}),
     ...(breakdown.cacheCreationInputTokens > 0
@@ -463,15 +472,28 @@ export function buildClaudeTurnCompleteUsage(
     cumulative.cacheCreationInputTokens - prior.cacheCreationInputTokens,
   );
   const deltaOutput = Math.max(0, cumulative.outputTokens - prior.outputTokens);
-  const lastTotal = deltaInput + deltaCached + deltaCacheCreation + deltaOutput;
 
-  // usedTokens: prefer the task snapshot (current context size); fall back to
-  // the cumulative total when no task snapshot was recorded for this turn.
-  const usedTokens = input.taskSnapshot?.usedTokens ?? cumulative.totalTokens;
+  // Context-window semantics: `usedTokens` reports input-side only (tokens
+  // the model actually has in its prompt window). Output + reasoning are
+  // billed but not persisted into the context, so including them over-
+  // reports the ring for long-output turns.
+  const lastInputSideTokens = deltaInput + deltaCached + deltaCacheCreation;
+  const cumulativeInputSideTokens =
+    cumulative.inputTokens + cumulative.cachedInputTokens + cumulative.cacheCreationInputTokens;
+  const cumulativeUsedFallback =
+    cumulativeInputSideTokens > 0 ? cumulativeInputSideTokens : cumulative.totalTokens;
+  // Prefer the freshest task snapshot (captured per-API-call → matches the
+  // real current context size).  Fall back to the cumulative input-side.
+  const usedTokens = input.taskSnapshot?.usedTokens ?? cumulativeUsedFallback;
+  // `lastUsedTokens` mirrors `usedTokens` at turn scope.  When this turn
+  // actually consumed prompt tokens, use its input-side delta; otherwise
+  // fall back to the cumulative read so we never report 0 for a turn that
+  // still had billable activity.
+  const lastUsedTokens = lastInputSideTokens > 0 ? lastInputSideTokens : cumulativeUsedFallback;
 
   const snapshot: ThreadTokenUsageSnapshot = {
     usedTokens,
-    lastUsedTokens: lastTotal > 0 ? lastTotal : cumulative.totalTokens,
+    lastUsedTokens,
     totalProcessedTokens: cumulative.totalTokens,
     ...(cumulative.inputTokens > 0 ? { inputTokens: cumulative.inputTokens } : {}),
     ...(cumulative.cachedInputTokens > 0 ? { cachedInputTokens: cumulative.cachedInputTokens } : {}),
diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts
index c651512d3c..3015d2820b 100644
--- a/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts
+++ b/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts
@@ -67,8 +67,13 @@ describe("buildClaudeTurnCompleteUsage", () => {
     expect(snap.lastCachedInputTokens).toBe(5_000);
     expect(snap.lastCacheCreationInputTokens).toBe(2_000);
     expect(snap.lastOutputTokens).toBe(500);
-    expect(snap.lastUsedTokens).toBe(8_500);
-    expect(snap.usedTokens).toBe(8_500);
+    // usedTokens + lastUsedTokens are input-side only (1_000+5_000+2_000 =
+    // 8_000). Output is billed (`outputTokens`) but excluded from the
+    // context-window ring since it doesn't live in the prompt.
+    expect(snap.usedTokens).toBe(8_000);
+    expect(snap.lastUsedTokens).toBe(8_000);
+    // totalProcessedTokens keeps the full cumulative billed total for
+    // informational display ("tokens processed so far").
     expect(snap.totalProcessedTokens).toBe(8_500);
     expect(snap.maxTokens).toBe(200_000);
     expect(res.nextCumulative).toBeDefined();
@@ -107,7 +112,10 @@ describe("buildClaudeTurnCompleteUsage", () => {
     expect(s.lastCachedInputTokens).toBe(1_000);
     expect(s.lastCacheCreationInputTokens).toBe(300);
     expect(s.lastOutputTokens).toBe(200);
-    expect(s.lastUsedTokens).toBe(500 + 1_000 + 300 + 200);
+    // lastUsedTokens is input-side only (context consumed this turn):
+    // 500 + 1_000 + 300 = 1_800.  Output (200) is tracked separately in
+    // lastOutputTokens for billing but not in the context window total.
+    expect(s.lastUsedTokens).toBe(1_800);
   });
 
   it("does not cap usedTokens to maxTokens", () => {
diff --git a/apps/server/src/provider/Layers/CodexAdapter.test.ts b/apps/server/src/provider/Layers/CodexAdapter.test.ts
index 03d4155934..cfc95c19cf 100644
--- a/apps/server/src/provider/Layers/CodexAdapter.test.ts
+++ b/apps/server/src/provider/Layers/CodexAdapter.test.ts
@@ -883,15 +883,19 @@ lifecycleLayer("CodexAdapterLive lifecycle", (it) => {
         return;
       }
 
+      // `usedTokens` reports the input-side tokens only (120 input + 0
+      // cached = 120) so the context-window ring reflects what's actually
+      // in the prompt. Output + reasoning are tracked separately for
+      // billing via `lastOutputTokens` / `lastReasoningOutputTokens`.
       assert.deepEqual(firstEvent.value.payload.usage, {
-        usedTokens: 126,
+        usedTokens: 120,
         totalProcessedTokens: 11_839,
         maxTokens: 258_400,
         inputTokens: 120,
         cachedInputTokens: 0,
         outputTokens: 6,
         reasoningOutputTokens: 0,
-        lastUsedTokens: 126,
+        lastUsedTokens: 120,
         lastInputTokens: 120,
         lastCachedInputTokens: 0,
         lastOutputTokens: 6,
diff --git a/apps/server/src/provider/Layers/CodexAdapter.ts b/apps/server/src/provider/Layers/CodexAdapter.ts
index 0111cd013c..fe9992c53f 100644
--- a/apps/server/src/provider/Layers/CodexAdapter.ts
+++ b/apps/server/src/provider/Layers/CodexAdapter.ts
@@ -137,17 +137,26 @@ function normalizeCodexTokenUsage(
   usage: EffectCodexSchema.V2ThreadTokenUsageUpdatedNotification["tokenUsage"],
 ): ThreadTokenUsageSnapshot | undefined {
   const totalProcessedTokens = usage.total.totalTokens;
-  const usedTokens = usage.last.totalTokens;
-  if (usedTokens === undefined || usedTokens <= 0) {
-    return undefined;
-  }
-
   const maxTokens = usage.modelContextWindow ?? undefined;
   const inputTokens = usage.last.inputTokens;
   const cachedInputTokens = usage.last.cachedInputTokens;
   const outputTokens = usage.last.outputTokens;
   const reasoningOutputTokens = usage.last.reasoningOutputTokens;
 
+  // Context-window semantics: `usedTokens` reports the input-side only
+  // (tokens currently sitting in the model's prompt window).  Output +
+  // reasoning are billed but do not persist into context between turns, so
+  // including them in the ring over-reports utilisation.  Codex re-sends
+  // the full conversation each turn, so `last.inputTokens +
+  // last.cachedInputTokens` is the closest analogue to current context
+  // size.  Fall back to the raw `last.totalTokens` only when the
+  // breakdown is zero (defensive — shouldn't happen for any real turn).
+  const inputSideTokens = inputTokens + cachedInputTokens;
+  const usedTokens = inputSideTokens > 0 ? inputSideTokens : usage.last.totalTokens;
+  if (usedTokens <= 0) {
+    return undefined;
+  }
+
   return {
     usedTokens,
     ...(totalProcessedTokens !== undefined && totalProcessedTokens > usedTokens
@@ -158,7 +167,7 @@ function normalizeCodexTokenUsage(
     ...(cachedInputTokens !== undefined ? { cachedInputTokens } : {}),
     ...(outputTokens !== undefined ? { outputTokens } : {}),
     ...(reasoningOutputTokens !== undefined ? { reasoningOutputTokens } : {}),
-    ...(usedTokens !== undefined ? { lastUsedTokens: usedTokens } : {}),
+    lastUsedTokens: usedTokens,
     ...(inputTokens !== undefined ? { lastInputTokens: inputTokens } : {}),
     ...(cachedInputTokens !== undefined ? { lastCachedInputTokens: cachedInputTokens } : {}),
     ...(outputTokens !== undefined ? { lastOutputTokens: outputTokens } : {}),
diff --git a/apps/web/src/lib/contextWindow.ts b/apps/web/src/lib/contextWindow.ts
index f668135a13..cf03acae84 100644
--- a/apps/web/src/lib/contextWindow.ts
+++ b/apps/web/src/lib/contextWindow.ts
@@ -56,11 +56,13 @@ export function deriveLatestContextWindowSnapshot(
       remainingPercentage,
       inputTokens: asFiniteNumber(payload?.inputTokens),
       cachedInputTokens: asFiniteNumber(payload?.cachedInputTokens),
+      cacheCreationInputTokens: asFiniteNumber(payload?.cacheCreationInputTokens),
       outputTokens: asFiniteNumber(payload?.outputTokens),
       reasoningOutputTokens: asFiniteNumber(payload?.reasoningOutputTokens),
       lastUsedTokens: asFiniteNumber(payload?.lastUsedTokens),
       lastInputTokens: asFiniteNumber(payload?.lastInputTokens),
       lastCachedInputTokens: asFiniteNumber(payload?.lastCachedInputTokens),
+      lastCacheCreationInputTokens: asFiniteNumber(payload?.lastCacheCreationInputTokens),
       lastOutputTokens: asFiniteNumber(payload?.lastOutputTokens),
       lastReasoningOutputTokens: asFiniteNumber(payload?.lastReasoningOutputTokens),
       toolUses: asFiniteNumber(payload?.toolUses),
diff --git a/packages/contracts/src/providerRuntime.ts b/packages/contracts/src/providerRuntime.ts
index e732451a7b..6a4bf7b7d5 100644
--- a/packages/contracts/src/providerRuntime.ts
+++ b/packages/contracts/src/providerRuntime.ts
@@ -298,6 +298,34 @@ const ThreadMetadataUpdatedPayload = Schema.Struct({
 });
 export type ThreadMetadataUpdatedPayload = typeof ThreadMetadataUpdatedPayload.Type;
 
+/**
+ * Snapshot of how many tokens the model has consumed on a thread.
+ *
+ * Two distinct dimensions are reported here — don't confuse them:
+ *
+ * 1. **Context-window dimension** (for the ring display): how much of the
+ *    model's prompt window is currently occupied.
+ *    - `usedTokens` = input-side tokens **only** (input + cache-read +
+ *      cache-creation). Output and reasoning tokens are generated *out* of
+ *      the model and do not live in the prompt window, so they are
+ *      excluded — including them inflates the ring for long-output turns.
+ *    - `lastUsedTokens` = the same measure scoped to the most recent turn.
+ *    - `maxTokens` = the model's declared context-window size.
+ *
+ * 2. **Billing dimension** (for the cost ledger): how many tokens were
+ *    billed for this turn, class-by-class, so downstream pricing can apply
+ *    the correct tier.  These are *not* clamped to the context window —
+ *    per-turn output tokens are separate from what persists into context.
+ *    - `inputTokens` / `cachedInputTokens` / `cacheCreationInputTokens` /
+ *      `outputTokens` / `reasoningOutputTokens` — cumulative class totals.
+ *    - `lastXxxTokens` — the delta for the most recent turn.  The presence
+ *      of any `lastXxxTokens` field is the canonical signal that this
+ *      snapshot represents the end of a turn; mid-turn snapshots omit
+ *      them and flow only to the context-window activity (not the cost
+ *      ledger, see `ProviderRuntimeIngestion`).
+ *    - `totalProcessedTokens` — cumulative tokens billed across the
+ *      session (for display).
+ */
 export const ThreadTokenUsageSnapshot = Schema.Struct({
   usedTokens: NonNegativeInt,
   totalProcessedTokens: Schema.optional(NonNegativeInt),

From d46b444d417b2347c7e9023aecf90166e48c8a79 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Wed, 22 Apr 2026 01:11:10 +0200
Subject: [PATCH 14/16] fix(cost): use per-call input-side from
 SDKAssistantMessage for ring accuracy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The earlier switch to input-side `usedTokens` still showed inflated
values for Claude Opus (and any multi-call turn) because the two
signals we trusted are both unreliable sources of current context
size:

1. `result.usage` is **session-cumulative** across every API call on
   the thread, not just this turn. Summing its input-side classes
   grows linearly with turn count — exactly what users saw on Opus,
   which makes many API calls per turn.
2. `task_progress.usage` only carries an opaque SDK
   `total_tokens`; the Anthropic-native per-class breakdown
   (`input_tokens` / `cache_read_input_tokens` /
   `cache_creation_input_tokens`) is **not present** on
   `SDKTaskProgressMessage.usage`. Parsing it always falls through
   to `total_tokens`.

The only source that carries the *exact per-call prompt breakdown*
is `SDKAssistantMessage.message.usage` — that's `BetaUsage` from
the Anthropic API, refreshed on every assistant frame.

Fix:

- New `context.lastApiCallInputSideTokens` tracks `input_tokens +
  cache_read_input_tokens + cache_creation_input_tokens` captured
  from each `SDKAssistantMessage.message.usage`. Refreshed per
  frame, cleared after the turn-completion emission so the next
  turn starts clean.
- `handleAssistantMessage` also emits a
  `thread.token-usage.updated` event on each assistant frame with
  this input-side sum as `usedTokens`, so the mid-turn ring tracks
  real prompt size (not the SDK's opaque total).
- `buildClaudeTurnCompleteUsage` now takes an optional
  `lastApiCallInputSide` and uses it as the top-priority
  `usedTokens` source. Priority:
    1. `lastApiCallInputSide` — exact current context.
    2. `taskSnapshot.usedTokens` — SDK opaque (fallback).
    3. Per-turn *delta* input-side — last-ditch when neither
       above is present. The old session-cumulative fallback has
       been removed; it inflated any multi-call turn.
- `lastUsedTokens` mirrors `usedTokens` when the per-turn input-side
  delta is zero, so we never fall back to the session-cumulative sum.

Tests:

- Updated the "preserves oversized result totals after task
  progress" test: `lastUsedTokens` is now `190_000` (mirrors
  `usedTokens`), not `535_000` (the removed cumulative fallback).
- New `prefers lastApiCallInputSide over the task snapshot for
  usedTokens`: when both are present, per-call wins.
- New `does NOT fall back to cumulative input-side for usedTokens`:
  with a real prior cumulative, fallback now returns the per-turn
  delta, not the session-wide sum.
- New adapter-level test verifying an assistant frame with
  Anthropic-native usage emits a `thread.token-usage.updated`
  event with `usedTokens = input + cache_read + cache_creation`.

Important: existing threads retain their pre-fix `usedTokens`
values in stored `context-window.updated` activities until the
next turn generates a new activity. The ring self-heals on the
first new turn; old turns in-history keep their stale numbers.

Verified: 206/206 targeted server tests pass (3 new), 908/908 web
tests pass, typecheck + oxlint clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../src/provider/Layers/ClaudeAdapter.test.ts |  81 ++++++++++-
 .../src/provider/Layers/ClaudeAdapter.ts      | 132 ++++++++++++++++--
 .../Layers/ClaudeAdapter.usage.test.ts        |  55 ++++++++
 3 files changed, 250 insertions(+), 18 deletions(-)

diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts
index 5e023c07a7..bcaab95283 100644
--- a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts
+++ b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts
@@ -1544,6 +1544,75 @@ describe("ClaudeAdapterLive", () => {
     );
   });
 
+  it.effect(
+    "emits thread token usage from assistant frame usage (per-call input side)",
+    () => {
+      const harness = makeHarness();
+      return Effect.gen(function* () {
+        const adapter = yield* ClaudeAdapter;
+
+        const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 8).pipe(
+          Stream.runCollect,
+          Effect.forkChild,
+        );
+
+        yield* adapter.startSession({
+          threadId: THREAD_ID,
+          provider: "claudeAgent",
+          runtimeMode: "full-access",
+        });
+
+        yield* adapter.sendTurn({
+          threadId: THREAD_ID,
+          input: "hello",
+          attachments: [],
+        });
+
+        // Assistant frame carrying Anthropic-native per-call usage.
+        // input (fresh) + cache_read + cache_creation = 80 + 45_000 + 2_000
+        // = 47_080 — the current context-window fill, unlike the
+        // session-cumulative `result.usage` which would grow across calls.
+        harness.query.emit({
+          type: "assistant",
+          session_id: "sdk-session-assistant-usage",
+          uuid: "assistant-usage-1",
+          parent_tool_use_id: null,
+          message: {
+            id: "assistant-message-usage",
+            content: [{ type: "text", text: "ok" }],
+            usage: {
+              input_tokens: 80,
+              cache_read_input_tokens: 45_000,
+              cache_creation_input_tokens: 2_000,
+              output_tokens: 12,
+            },
+          },
+        } as unknown as SDKMessage);
+
+        const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber));
+        const usageEvent = runtimeEvents.find(
+          (event) => event.type === "thread.token-usage.updated",
+        );
+        assert.equal(usageEvent?.type, "thread.token-usage.updated");
+        if (usageEvent?.type === "thread.token-usage.updated") {
+          assert.deepEqual(usageEvent.payload, {
+            usage: {
+              usedTokens: 47_080,
+              lastUsedTokens: 47_080,
+              inputTokens: 80,
+              cachedInputTokens: 45_000,
+              cacheCreationInputTokens: 2_000,
+              outputTokens: 12,
+            },
+          });
+        }
+      }).pipe(
+        Effect.provideService(Random.Random, makeDeterministicRandomService()),
+        Effect.provide(harness.layer),
+      );
+    },
+  );
+
   it.effect("emits Claude context window on result completion usage snapshots", () => {
     const harness = makeHarness();
     return Effect.gen(function* () {
@@ -1754,13 +1823,17 @@ describe("ClaudeAdapterLive", () => {
         const finalUsageEvent = usageEvents.at(-1);
         assert.equal(finalUsageEvent?.type, "thread.token-usage.updated");
         if (finalUsageEvent?.type === "thread.token-usage.updated") {
-          // Task snapshot drives usedTokens (real current-context); result
-          // cumulative drives totalProcessedTokens. lastUsedTokens reports
-          // the turn's total (cumulative since there's no prior turn).
+          // Task snapshot drives `usedTokens` (SDK-opaque current context,
+          // 190k), `totalProcessedTokens` carries the billing-side
+          // cumulative (535k). `lastUsedTokens` mirrors `usedTokens`
+          // because the per-turn delta input-side is zero (this test's
+          // result has only `total_tokens`, no breakdown) and we now
+          // refuse to fall back to the session-cumulative sum, which
+          // would inflate the ring over multi-call turns.
           assert.deepEqual(finalUsageEvent.payload, {
             usage: {
               usedTokens: 190000,
-              lastUsedTokens: 535000,
+              lastUsedTokens: 190000,
               totalProcessedTokens: 535000,
               maxTokens: 200000,
             },
diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.ts b/apps/server/src/provider/Layers/ClaudeAdapter.ts
index d65330a6a7..3654645e86 100644
--- a/apps/server/src/provider/Layers/ClaudeAdapter.ts
+++ b/apps/server/src/provider/Layers/ClaudeAdapter.ts
@@ -158,6 +158,17 @@ interface ClaudeSessionContext {
   turnState: ClaudeTurnState | undefined;
   lastKnownContextWindow: number | undefined;
   lastKnownTokenUsage: ThreadTokenUsageSnapshot | undefined;
+  /**
+   * Input-side token sum (input + cache-read + cache-creation) of the
+   * most recent Anthropic API call captured from `SDKAssistantMessage`.
+   * This is the authoritative current-context-size signal for the ring:
+   * unlike `result.usage` (session-cumulative) or `task_progress.usage`
+   * (SDK-opaque `total_tokens` only), each assistant frame carries the
+   * exact per-call prompt breakdown. Refreshed on every assistant frame;
+   * cleared after each turn's completion event so the next turn starts
+   * without stale carry-over.
+   */
+  lastApiCallInputSideTokens: number | undefined;
   /**
    * Cumulative per-class token counts emitted in the prior turn's
    * `result.usage`. Claude's SDK reports `result.usage` as a running total
@@ -425,6 +436,15 @@ export interface ClaudeTurnCompleteUsageInput {
   readonly taskSnapshot: ThreadTokenUsageSnapshot | undefined;
   readonly contextWindow?: number | undefined;
   readonly priorCumulative?: ClaudeUsageBreakdown | undefined;
+  /**
+   * Input-side token sum (input + cache-read + cache-creation) from the
+   * *last* Anthropic API call on this turn.  When available, this is the
+   * authoritative current-context-size signal for the ring — the
+   * cumulative `resultUsage` is a session-wide sum and over-reports
+   * multi-call turns, and the task-snapshot fallback only exposes an
+   * opaque SDK `total_tokens`.
+   */
+  readonly lastApiCallInputSide?: number | undefined;
 }
 
 export interface ClaudeTurnCompleteUsageResult {
@@ -478,18 +498,31 @@ export function buildClaudeTurnCompleteUsage(
   // billed but not persisted into the context, so including them over-
   // reports the ring for long-output turns.
   const lastInputSideTokens = deltaInput + deltaCached + deltaCacheCreation;
-  const cumulativeInputSideTokens =
-    cumulative.inputTokens + cumulative.cachedInputTokens + cumulative.cacheCreationInputTokens;
-  const cumulativeUsedFallback =
-    cumulativeInputSideTokens > 0 ? cumulativeInputSideTokens : cumulative.totalTokens;
-  // Prefer the freshest task snapshot (captured per-API-call → matches the
-  // real current context size).  Fall back to the cumulative input-side.
-  const usedTokens = input.taskSnapshot?.usedTokens ?? cumulativeUsedFallback;
-  // `lastUsedTokens` mirrors `usedTokens` at turn scope.  When this turn
-  // actually consumed prompt tokens, use its input-side delta; otherwise
-  // fall back to the cumulative read so we never report 0 for a turn that
-  // still had billable activity.
-  const lastUsedTokens = lastInputSideTokens > 0 ? lastInputSideTokens : cumulativeUsedFallback;
+  // `resultUsage` is a session-wide cumulative across every API call on
+  // the thread (not just this turn!), so summing its input-side classes
+  // inflates the ring proportionally to turn count.  We only fall back
+  // to it when nothing else is available, using the per-turn *delta*
+  // input-side — which represents just the tokens added this turn.
+  const deltaUsedFallback =
+    lastInputSideTokens > 0 ? lastInputSideTokens : cumulative.totalTokens;
+  // Priority order for `usedTokens` (authoritative → approximate):
+  //   1. `lastApiCallInputSide`  — exact current context size, captured
+  //      from the last assistant frame's per-call `usage`.
+  //   2. `taskSnapshot.usedTokens` — SDK-opaque `total_tokens` from the
+  //      freshest `task_progress`/`task_notification` snapshot.  Better
+  //      than cumulative-input but not class-accurate.
+  //   3. `deltaUsedFallback` — per-turn delta input-side.  Last-ditch
+  //      when neither above is present (unusual — no assistant frames +
+  //      no task events means a no-content turn).
+  const usedTokens =
+    input.lastApiCallInputSide !== undefined && input.lastApiCallInputSide > 0
+      ? input.lastApiCallInputSide
+      : (input.taskSnapshot?.usedTokens ?? deltaUsedFallback);
+  // `lastUsedTokens` is the per-turn echo of `usedTokens`.  Prefer the
+  // per-turn input-side delta (tokens *added* this turn); fall back to
+  // the same resolved `usedTokens` so we never emit 0 for a turn that
+  // clearly had activity.
+  const lastUsedTokens = lastInputSideTokens > 0 ? lastInputSideTokens : usedTokens;
 
   const snapshot: ThreadTokenUsageSnapshot = {
     usedTokens,
@@ -1553,18 +1586,30 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
 
     // `result.usage` reports running totals across every API call in the
     // session. We combine it with the freshest per-call task snapshot (for
-    // `usedTokens` — the real current-context value) and with the prior
-    // turn's cumulative snapshot (to derive this turn's per-class deltas).
+    // the SDK's opaque `total_tokens`) and with the prior turn's cumulative
+    // snapshot (to derive this turn's per-class deltas). The preferred
+    // `usedTokens` source, however, is the input-side token sum of the
+    // *last Anthropic API call* in this turn — captured directly from the
+    // freshest `SDKAssistantMessage.usage` via `context.lastApiCallInputSideTokens`.
+    // That number is the only one that tracks current context size
+    // precisely for multi-call turns (Opus, extended thinking, heavy tool
+    // use), because `result.usage` is session-cumulative and the
+    // task-event `usage` only exposes an opaque `total_tokens`.
     const turnUsage = buildClaudeTurnCompleteUsage({
       resultUsage: result?.usage,
       taskSnapshot: context.lastKnownTokenUsage,
       contextWindow: resultContextWindow ?? context.lastKnownContextWindow,
       priorCumulative: context.lastTurnCumulativeUsage,
+      lastApiCallInputSide: context.lastApiCallInputSideTokens,
     });
     const usageSnapshot = turnUsage.snapshot;
     if (turnUsage.nextCumulative !== undefined) {
       context.lastTurnCumulativeUsage = turnUsage.nextCumulative;
     }
+    // Clear per-turn scratch so the next turn starts without stale
+    // carry-over — `lastApiCallInputSideTokens` is captured fresh from
+    // the next turn's assistant frames.
+    context.lastApiCallInputSideTokens = undefined;
 
     const turnState = context.turnState;
     if (!turnState) {
@@ -2144,6 +2189,64 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
       yield* backfillAssistantTextBlocksFromSnapshot(context, message);
     }
 
+    // Capture the per-API-call input-side token count from this assistant
+    // frame and emit it as the freshest `usedTokens` for the
+    // context-window ring. Each `SDKAssistantMessage` carries Anthropic's
+    // native per-call usage (`message.message.usage`), so
+    // `input_tokens + cache_read_input_tokens + cache_creation_input_tokens`
+    // is the tokens *currently* in the prompt window — much more accurate
+    // than the SDK-opaque `total_tokens` on `task_progress` (which lacks
+    // per-class breakdown) or the session-cumulative `result.usage`
+    // (which grows with every API call in the turn).
+    const perCallBreakdown = parseClaudeUsageBreakdown(
+      (message.message as { usage?: unknown }).usage,
+    );
+    if (perCallBreakdown) {
+      const inputSide =
+        perCallBreakdown.inputTokens +
+        perCallBreakdown.cachedInputTokens +
+        perCallBreakdown.cacheCreationInputTokens;
+      if (inputSide > 0) {
+        context.lastApiCallInputSideTokens = inputSide;
+        const maxTokens = context.lastKnownContextWindow;
+        const ringSnapshot: ThreadTokenUsageSnapshot = {
+          usedTokens: inputSide,
+          lastUsedTokens: inputSide,
+          ...(perCallBreakdown.inputTokens > 0
+            ? { inputTokens: perCallBreakdown.inputTokens }
+            : {}),
+          ...(perCallBreakdown.cachedInputTokens > 0
+            ? { cachedInputTokens: perCallBreakdown.cachedInputTokens }
+            : {}),
+          ...(perCallBreakdown.cacheCreationInputTokens > 0
+            ? { cacheCreationInputTokens: perCallBreakdown.cacheCreationInputTokens }
+            : {}),
+          ...(perCallBreakdown.outputTokens > 0
+            ? { outputTokens: perCallBreakdown.outputTokens }
+            : {}),
+          ...(typeof maxTokens === "number" && Number.isFinite(maxTokens) && maxTokens > 0
+            ? { maxTokens }
+            : {}),
+        };
+        context.lastKnownTokenUsage = ringSnapshot;
+        const usageStamp = yield* makeEventStamp();
+        yield* offerRuntimeEvent({
+          type: "thread.token-usage.updated",
+          eventId: usageStamp.eventId,
+          provider: PROVIDER,
+          createdAt: usageStamp.createdAt,
+          threadId: context.session.threadId,
+          ...(context.turnState
+            ? { turnId: asCanonicalTurnId(context.turnState.turnId) }
+            : {}),
+          payload: {
+            usage: ringSnapshot,
+          },
+          providerRefs: nativeProviderRefs(context),
+        });
+      }
+    }
+
     context.lastAssistantUuid = message.uuid;
     yield* updateResumeCursor(context);
   });
@@ -3070,6 +3173,7 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
         turnState: undefined,
         lastKnownContextWindow: undefined,
         lastKnownTokenUsage: undefined,
+        lastApiCallInputSideTokens: undefined,
         lastTurnCumulativeUsage: undefined,
         lastAssistantUuid: resumeState?.resumeSessionAt,
         lastThreadStartedId: undefined,
diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts
index 3015d2820b..4b9f993c99 100644
--- a/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts
+++ b/apps/server/src/provider/Layers/ClaudeAdapter.usage.test.ts
@@ -154,6 +154,61 @@ describe("buildClaudeTurnCompleteUsage", () => {
     expect(res.nextCumulative).toBeUndefined();
   });
 
+  it("prefers lastApiCallInputSide over the task snapshot for usedTokens", () => {
+    // Session-cumulative result.usage reports big numbers (multiple calls
+    // have run across the whole session), but only the last API call's
+    // input-side count matters for the ring. The SDK's opaque
+    // `task_progress.total_tokens` (via taskSnapshot.usedTokens) is less
+    // trustworthy than the per-call input-side captured from
+    // `SDKAssistantMessage.usage`, so the per-call value wins.
+    const res = buildClaudeTurnCompleteUsage({
+      resultUsage: {
+        input_tokens: 10_000, // session cumulative across many calls
+        cache_read_input_tokens: 150_000,
+        cache_creation_input_tokens: 5_000,
+        output_tokens: 20_000,
+      },
+      taskSnapshot: { usedTokens: 999_999, lastUsedTokens: 999_999 },
+      contextWindow: 200_000,
+      priorCumulative: undefined,
+      lastApiCallInputSide: 48_000,
+    });
+    expect(res.snapshot!.usedTokens).toBe(48_000);
+    // totalProcessedTokens still tracks billing-side cumulative for
+    // informational display ("tokens processed so far").
+    expect(res.snapshot!.totalProcessedTokens).toBe(185_000);
+  });
+
+  it("does NOT fall back to cumulative input-side for usedTokens", () => {
+    // Previously we added `input + cached + cacheCreation` from
+    // `result.usage` when no task snapshot was available.  That sum is
+    // *session-cumulative* in Claude's SDK — it over-reports for any
+    // multi-call turn.  With no task snapshot and no last-API-call
+    // capture, we now fall back to the per-turn delta input-side
+    // (first turn → equals cumulative; subsequent turns → just this
+    // turn's additions).
+    const res = buildClaudeTurnCompleteUsage({
+      resultUsage: {
+        input_tokens: 5_000,
+        cache_read_input_tokens: 200_000,
+        cache_creation_input_tokens: 10_000,
+        output_tokens: 3_000,
+      },
+      taskSnapshot: undefined,
+      contextWindow: 200_000,
+      priorCumulative: {
+        inputTokens: 4_000,
+        cachedInputTokens: 180_000,
+        cacheCreationInputTokens: 8_000,
+        outputTokens: 2_500,
+        totalTokens: 194_500,
+      },
+    });
+    // Per-turn input-side delta = 1_000 + 20_000 + 2_000 = 23_000.
+    expect(res.snapshot!.usedTokens).toBe(23_000);
+    expect(res.snapshot!.lastUsedTokens).toBe(23_000);
+  });
+
   it("clamps negative deltas to zero when cumulative goes backwards", () => {
     const prior = {
       inputTokens: 1_000,

From 1790ec541cd9d4bc7724e3718755fe931e29925d Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Wed, 22 Apr 2026 01:11:25 +0200
Subject: [PATCH 15/16] chore(desktop): bump to 0.0.21 + rebrand to "T3 by
 Stan"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Local rebuild for personal distribution off the
feat/token-cost-meter branch. Keeps the app bundle identifier
(`com.t3tools.t3code`) untouched so existing auto-update channels
aren't disturbed, but changes the user-facing name, dev launcher
label, and artifact filename.

- apps/desktop/package.json: productName → "T3 by Stan".
- apps/desktop/scripts/electron-launcher.mjs: APP_DISPLAY_NAME
  follows the new name (dev / prod variants).
- scripts/build-desktop-artifact.ts: artifactName →
  `T3-by-Stan-${version}-${arch}.${ext}` so the DMG / zip /
  blockmap files land as `release/T3-by-Stan-0.0.21-arm64.dmg` etc.
- apps/{desktop,server,web}/package.json + bun.lock: version bump
  0.0.20 → 0.0.21.

The legacy user-data migration constant in `apps/desktop/src/main.ts`
(`LEGACY_USER_DATA_DIR_NAME = "T3 Code (Alpha)"`) is intentionally
left alone so this build still picks up data from the prior install.

Built macOS arm64 DMG sits at release/T3-by-Stan-0.0.21-arm64.dmg
(136 MB, unsigned / ad-hoc — Gatekeeper first-launch warning
expected). Signing / notarization not configured; would require
Apple Developer credentials.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 apps/desktop/package.json                  | 4 ++--
 apps/desktop/scripts/electron-launcher.mjs | 2 +-
 apps/server/package.json                   | 2 +-
 apps/web/package.json                      | 2 +-
 bun.lock                                   | 6 +++---
 scripts/build-desktop-artifact.ts          | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/apps/desktop/package.json b/apps/desktop/package.json
index 2a4ced70e7..a38c664b16 100644
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@t3tools/desktop",
-  "version": "0.0.20",
+  "version": "0.0.21",
   "private": true,
   "type": "module",
   "main": "dist-electron/main.cjs",
@@ -28,5 +28,5 @@
     "typescript": "catalog:",
     "vitest": "catalog:"
   },
-  "productName": "T3 Code (Alpha)"
+  "productName": "T3 by Stan"
 }
diff --git a/apps/desktop/scripts/electron-launcher.mjs b/apps/desktop/scripts/electron-launcher.mjs
index 1453cbe666..12e6f6f6bd 100644
--- a/apps/desktop/scripts/electron-launcher.mjs
+++ b/apps/desktop/scripts/electron-launcher.mjs
@@ -17,7 +17,7 @@ import { dirname, join, resolve } from "node:path";
 import { fileURLToPath } from "node:url";
 
 const isDevelopment = Boolean(process.env.VITE_DEV_SERVER_URL);
-const APP_DISPLAY_NAME = isDevelopment ? "T3 Code (Dev)" : "T3 Code (Alpha)";
+const APP_DISPLAY_NAME = isDevelopment ? "T3 by Stan (Dev)" : "T3 by Stan";
 const APP_BUNDLE_ID = isDevelopment ? "com.t3tools.t3code.dev" : "com.t3tools.t3code";
 const LAUNCHER_VERSION = 2;
 
diff --git a/apps/server/package.json b/apps/server/package.json
index 14dbe35bcb..13a8124cb2 100644
--- a/apps/server/package.json
+++ b/apps/server/package.json
@@ -1,6 +1,6 @@
 {
   "name": "t3",
-  "version": "0.0.20",
+  "version": "0.0.21",
   "license": "MIT",
   "repository": {
     "type": "git",
diff --git a/apps/web/package.json b/apps/web/package.json
index b18defebbe..11e69d1248 100644
--- a/apps/web/package.json
+++ b/apps/web/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@t3tools/web",
-  "version": "0.0.20",
+  "version": "0.0.21",
   "private": true,
   "type": "module",
   "scripts": {
diff --git a/bun.lock b/bun.lock
index e9b7511e34..8fb16c217b 100644
--- a/bun.lock
+++ b/bun.lock
@@ -14,7 +14,7 @@
     },
     "apps/desktop": {
       "name": "@t3tools/desktop",
-      "version": "0.0.20",
+      "version": "0.0.21",
       "dependencies": {
         "effect": "catalog:",
         "electron": "40.6.0",
@@ -43,7 +43,7 @@
     },
     "apps/server": {
       "name": "t3",
-      "version": "0.0.20",
+      "version": "0.0.21",
       "bin": {
         "t3": "./dist/bin.mjs",
       },
@@ -75,7 +75,7 @@
     },
     "apps/web": {
       "name": "@t3tools/web",
-      "version": "0.0.20",
+      "version": "0.0.21",
       "dependencies": {
         "@base-ui/react": "^1.2.0",
         "@dnd-kit/core": "^6.3.1",
diff --git a/scripts/build-desktop-artifact.ts b/scripts/build-desktop-artifact.ts
index 74e8bed0cb..5f3ae5427c 100644
--- a/scripts/build-desktop-artifact.ts
+++ b/scripts/build-desktop-artifact.ts
@@ -569,7 +569,7 @@ const createBuildConfig = Effect.fn("createBuildConfig")(function* (
   const buildConfig: Record<string, unknown> = {
     appId: "com.t3tools.t3code",
     productName: resolveDesktopProductName(version),
-    artifactName: "T3-Code-${version}-${arch}.${ext}",
+    artifactName: "T3-by-Stan-${version}-${arch}.${ext}",
     directories: {
       buildResources: "apps/desktop/resources",
     },

From bd0fc3bdcdabccfd9123a62af9128b99864d04e7 Mon Sep 17 00:00:00 2001
From: Olympicx <kosenkosv.dev@gmail.com>
Date: Wed, 22 Apr 2026 01:21:26 +0200
Subject: [PATCH 16/16] chore(desktop): bump to 0.0.22 for Opus context-ring
 fix rebuild

Rebuilds the personal T3-by-Stan DMG to pick up the per-call
input-side usedTokens fix (d46b444d) so the context ring shows
accurate values on Opus + multi-call turns.

No behavioural change beyond version; bun.lock re-synced.

Artifact: release/T3-by-Stan-0.0.22-arm64.dmg (136 MB, unsigned).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 apps/desktop/package.json | 2 +-
 apps/server/package.json  | 2 +-
 apps/web/package.json     | 2 +-
 bun.lock                  | 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/apps/desktop/package.json b/apps/desktop/package.json
index a38c664b16..8f2211d7df 100644
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@t3tools/desktop",
-  "version": "0.0.21",
+  "version": "0.0.22",
   "private": true,
   "type": "module",
   "main": "dist-electron/main.cjs",
diff --git a/apps/server/package.json b/apps/server/package.json
index 13a8124cb2..c65979430e 100644
--- a/apps/server/package.json
+++ b/apps/server/package.json
@@ -1,6 +1,6 @@
 {
   "name": "t3",
-  "version": "0.0.21",
+  "version": "0.0.22",
   "license": "MIT",
   "repository": {
     "type": "git",
diff --git a/apps/web/package.json b/apps/web/package.json
index 11e69d1248..99069389d6 100644
--- a/apps/web/package.json
+++ b/apps/web/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@t3tools/web",
-  "version": "0.0.21",
+  "version": "0.0.22",
   "private": true,
   "type": "module",
   "scripts": {
diff --git a/bun.lock b/bun.lock
index 8fb16c217b..287d35f87b 100644
--- a/bun.lock
+++ b/bun.lock
@@ -14,7 +14,7 @@
     },
     "apps/desktop": {
       "name": "@t3tools/desktop",
-      "version": "0.0.21",
+      "version": "0.0.22",
       "dependencies": {
         "effect": "catalog:",
         "electron": "40.6.0",
@@ -43,7 +43,7 @@
     },
     "apps/server": {
       "name": "t3",
-      "version": "0.0.21",
+      "version": "0.0.22",
       "bin": {
         "t3": "./dist/bin.mjs",
       },
@@ -75,7 +75,7 @@
     },
     "apps/web": {
       "name": "@t3tools/web",
-      "version": "0.0.21",
+      "version": "0.0.22",
       "dependencies": {
         "@base-ui/react": "^1.2.0",
         "@dnd-kit/core": "^6.3.1",