scarson · scarson · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/.github/docker/Dockerfile.ci b/.github/docker/Dockerfile.ci
@@ -4,34 +4,86 @@ FROM ubuntu:24.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 
-# System deps
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    git curl unzip ca-certificates jq bc gpg \
+# Switch apt sources to Hetzner's public mirror.
+# Ubicloud runners (Hetzner FSN1-DC21) hit reliable connection timeouts to
+# archive.ubuntu.com:80 — observed 90+ second outages on multiple builds.
+# Hetzner's mirror is publicly accessible from any cloud and route-local for
+# Ubicloud, so this fixes both reliability and latency. Ubuntu 24.04 uses
+# the deb822 sources format at /etc/apt/sources.list.d/ubuntu.sources.
+#
+# Using HTTP (not HTTPS) intentionally: the base ubuntu:24.04 image ships
+# without ca-certificates, so HTTPS apt fails with "No system certificates
+# available." Apt's security model verifies via GPG-signed Release files,
+# not TLS, so HTTP here is no weaker than the upstream defaults.
+RUN sed -i \
+    -e 's|http://archive.ubuntu.com/ubuntu|http://mirror.hetzner.com/ubuntu/packages|g' \
+    -e 's|http://security.ubuntu.com/ubuntu|http://mirror.hetzner.com/ubuntu/packages|g' \
+    /etc/apt/sources.list.d/ubuntu.sources
+
+# Also make apt itself resilient — per-package retries + generous timeouts.
+# Hetzner's mirror is reliable but individual packages can still blip; the
+# retry config means a single failed fetch doesn't nuke the whole build.
+RUN printf 'Acquire::Retries "5";\nAcquire::http::Timeout "30";\nAcquire::https::Timeout "30";\n' \
+    > /etc/apt/apt.conf.d/80-retries
+
+# System deps (retry apt-get update + install as a unit — even Hetzner can blip).
+# Includes xz-utils so the Node.js .tar.xz download below can decompress.
+RUN for i in 1 2 3; do \
+      apt-get update && apt-get install -y --no-install-recommends \
+        git curl unzip xz-utils ca-certificates jq bc gpg && break || \
+      (echo "apt retry $i/3 after failure"; sleep 10); \
+    done \
     && rm -rf /var/lib/apt/lists/*
 
 # GitHub CLI
-RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
+RUN curl --retry 5 --retry-delay 5 --retry-connrefused -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
     | gpg --dearmor -o /usr/share/keyrings/githubcli-archive-keyring.gpg \
     && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
     | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
-    && apt-get update && apt-get install -y --no-install-recommends gh \
+    && for i in 1 2 3; do \
+         apt-get update && apt-get install -y --no-install-recommends gh && break || \
+         (echo "gh install retry $i/3"; sleep 10); \
+       done \
     && rm -rf /var/lib/apt/lists/*
 
-# Node.js 22 LTS (needed for claude CLI)
-RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
-    && apt-get install -y --no-install-recommends nodejs \
-    && rm -rf /var/lib/apt/lists/*
+# Node.js 22 LTS (needed for claude CLI).
+# Install from the official nodejs.org tarball instead of NodeSource's apt setup.
+# NodeSource's setup_22.x script runs its own `apt-get update` + `apt-get install gnupg`,
+# both of which depend on archive.ubuntu.com / security.ubuntu.com being reachable.
+# Ubicloud CI runners frequently can't reach those mirrors (connection timeouts),
+# and "gnupg" was renamed to "gpg" on Ubuntu 24.04 anyway, so NodeSource's script
+# fails before it can add its own repo. Direct tarball download is network-simpler
+# (one host: nodejs.org) and doesn't touch apt at all.
+ENV NODE_VERSION=22.20.0
+RUN curl --retry 5 --retry-delay 5 --retry-connrefused -fsSL "https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-linux-x64.tar.xz" -o /tmp/node.tar.xz \
+    && tar -xJ -C /usr/local --strip-components=1 --no-same-owner -f /tmp/node.tar.xz \
+    && rm -f /tmp/node.tar.xz \
+    && node --version \
+    && npm --version
 
 # Bun (install to /usr/local so non-root users can access it)
 ENV BUN_INSTALL="/usr/local"
-RUN curl -fsSL https://bun.sh/install | BUN_VERSION=1.3.10 bash
+RUN curl --retry 5 --retry-delay 5 --retry-connrefused -fsSL https://bun.sh/install \
+    | BUN_VERSION=1.3.10 bash
 
 # Claude CLI
 RUN npm i -g @anthropic-ai/claude-code
 
 # Playwright system deps (Chromium) — needed for browse E2E tests
 RUN npx playwright install-deps chromium
 
+# Linux has neither Helvetica nor Arial. make-pdf's print CSS stacks fall back
+# to Liberation Sans (metric-compatible Arial clone, SIL OFL 1.1) so PDFs don't
+# render in DejaVu Sans. playwright install-deps happens to pull this in today,
+# but the dep is implicit and could change — install explicitly so upgrades
+# can't silently regress rendering.
+RUN for i in 1 2 3; do \
+      apt-get update && apt-get install -y --no-install-recommends fonts-liberation fontconfig && break || \
+      (echo "fonts-liberation install retry $i/3"; sleep 10); \
+    done \
+    && fc-cache -f \
+    && rm -rf /var/lib/apt/lists/*
+
 # Pre-install dependencies (cached layer — only rebuilds when package.json changes)
 COPY package.json /workspace/
 WORKDIR /workspace
@@ -44,7 +96,9 @@ RUN npx playwright install chromium \
 
 # Verify everything works
 RUN bun --version && node --version && claude --version && jq --version && gh --version \
-    && npx playwright --version
+    && npx playwright --version \
+    && fc-match "Liberation Sans" | grep -qi "Liberation" \
+        || (echo "ERROR: fonts-liberation not installed — make-pdf PDFs will render in DejaVu Sans" && exit 1)
 
 # At runtime: checkout overwrites /workspace, but node_modules persists
 # if we move it out of the way and symlink back

diff --git a/.github/workflows/make-pdf-gate.yml b/.github/workflows/make-pdf-gate.yml
@@ -0,0 +1,80 @@
+name: make-pdf copy-paste gate
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      - 'make-pdf/**'
+      - 'browse/src/meta-commands.ts'
+      - 'browse/src/write-commands.ts'
+      - 'browse/src/commands.ts'
+      - 'browse/src/cli.ts'
+      - 'scripts/resolvers/make-pdf.ts'
+      - 'package.json'
+      - '.github/workflows/make-pdf-gate.yml'
+  workflow_dispatch:
+
+concurrency:
+  group: make-pdf-gate-${{ github.head_ref }}
+  cancel-in-progress: true
+
+jobs:
+  gate:
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+        # Windows is tolerant-mode — Xpdf / Poppler-Windows extraction
+        # differs enough from the Linux/macOS baseline that the strict
+        # exact-diff gate is unreliable. Enable once the normalized
+        # comparator proves tolerant enough (Codex round 2 #18).
+        #
+        # include:
+        #   - os: windows-latest
+        #     tolerant: true
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Install dependencies
+        run: bun install --frozen-lockfile
+
+      - name: Install poppler (macOS)
+        if: matrix.os == 'macos-latest'
+        run: brew install poppler
+
+      - name: Install poppler-utils (Ubuntu)
+        if: matrix.os == 'ubuntu-latest'
+        run: sudo apt-get update && sudo apt-get install -y poppler-utils
+
+      - name: Install Playwright Chromium
+        run: bunx playwright install chromium
+
+      - name: Build binaries
+        run: bun run build
+
+      - name: ad-hoc codesign (Apple Silicon)
+        if: matrix.os == 'macos-latest'
+        run: |
+          for bin in browse/dist/browse browse/dist/find-browse design/dist/design make-pdf/dist/pdf; do
+            codesign --remove-signature "$bin" 2>/dev/null || true
+            codesign -s - -f "$bin" || true
+          done
+
+      - name: Log toolchain versions
+        run: |
+          echo "OS: ${{ matrix.os }}"
+          bun --version
+          which pdftotext && pdftotext -v 2>&1 | head -1 || true
+
+      - name: Run make-pdf unit tests
+        run: bun test make-pdf/test/*.test.ts
+
+      - name: Run combined-features copy-paste gate (P0)
+        env:
+          BROWSE_BIN: ${{ github.workspace }}/browse/dist/browse
+        run: bun test make-pdf/test/e2e/combined-gate.test.ts
diff --git a/.github/workflows/windows-smoke.yml b/.github/workflows/windows-smoke.yml
@@ -0,0 +1,88 @@
+# Windows Smoke CI — Phase 1 of the phased rollout in docs/designs/WINDOWS_CI.md
+#
+# Answers one question per run: "does the code path through a Windows-critical
+# module actually run on Windows." That's deliberately a lower bar than "does
+# every test pass" — it catches the class of bugs where Linux/macOS CI runs
+# green but a Windows user immediately hits ENOENT / "browse binary not found"
+# / silent mislocations of ~/.gstack/ state.
+#
+# Coverage catch list (see RFC for full reasoning):
+#   - Build fails to produce .exe on Windows              (catches #1013 / #1024)
+#   - Binary-resolution probes wrong filename             (catches #1118 / #1094)
+#   - Shebang bash script spawn fails                     (catches #1119)
+#   - Sensitive files written without ACL restriction     (catches #1121)
+#   - { mode: 0o600 } silently ignored on Windows         (catches Pre-#1121 state)
+#
+# Miss: #1120-style home-directory fallback — no direct unit test. RFC
+# proposes adding one as a follow-on.
+name: windows-smoke
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      - 'browse/**'
+      - 'make-pdf/**'
+      - 'design/**'
+      - 'scripts/**'
+      - 'bin/**'
+      - 'package.json'
+      - 'bun.lockb'
+      - '.github/workflows/windows-smoke.yml'
+  push:
+    branches: [main]
+    paths:
+      - 'browse/**'
+      - 'make-pdf/**'
+      - 'design/**'
+      - 'scripts/**'
+      - 'bin/**'
+      - 'package.json'
+      - 'bun.lockb'
+  workflow_dispatch:
+
+concurrency:
+  group: windows-smoke-${{ github.head_ref || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  smoke:
+    runs-on: windows-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Install dependencies
+        run: bun install --frozen-lockfile
+
+      - name: Build binaries
+        run: bun run build
+
+      - name: Assert Windows binary layout
+        shell: pwsh
+        run: |
+          $missing = @()
+          foreach ($p in @(
+            'browse/dist/browse.exe',
+            'browse/dist/find-browse.exe',
+            'browse/dist/server-node.mjs',
+            'make-pdf/dist/pdf.exe',
+            'design/dist/design.exe'
+          )) { if (-not (Test-Path $p)) { $missing += $p } }
+          if ($missing.Count -gt 0) {
+            Write-Error "Missing build artifacts: $($missing -join ', ')"
+            exit 1
+          }
+
+
+      - name: Windows-specific unit tests
+        # Single bun test invocation with all files so a failure in any
+        # file correctly fails the step. Separate invocations + default
+        # PowerShell error-handling would mask all-but-the-last failure.
+        run: bun test browse/test/security.test.ts browse/test/file-permissions.test.ts browse/test/home-dir-resolution.test.ts make-pdf/test/browseClient.test.ts make-pdf/test/pdftotext.test.ts
+
+      - name: make-pdf render smoke
+        run: bun test make-pdf/test/render.test.ts
diff --git a/.gitignore b/.gitignore
@@ -3,16 +3,21 @@ node_modules/
 dist/
 browse/dist/
 design/dist/
+make-pdf/dist/
 bin/gstack-global-discover
 .gstack/
 .claude/skills/
+.claude/scheduled_tasks.lock
+.claude/*.lock
 .agents/
 .factory/
 .kiro/
 .opencode/
 .slate/
 .cursor/
 .openclaw/
+.hermes/
+.gbrain/
 .context/
 extension/.auth.json
 .gstack-worktrees/
@@ -24,3 +29,6 @@ extension/.auth.json
 .env.*
 !.env.example
 supabase/.temp/
+
+# Throughput analysis — local-only, regenerate via scripts/garry-output-comparison.ts
+docs/throughput-*.json
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
@@ -109,6 +109,26 @@ Cookies are the most sensitive data gstack handles. The design:
 
 The browser registry (Comet, Chrome, Arc, Brave, Edge) is hardcoded. Database paths are constructed from known constants, never from user input. Keychain access uses `Bun.spawn()` with explicit argument arrays, not shell string interpolation.
 
+### Prompt injection defense (sidebar agent)
+
+The Chrome sidebar agent has tools (Bash, Read, Glob, Grep, WebFetch) and reads hostile web pages, so it's the part of gstack most exposed to prompt injection. Defense is layered, not single-point.
+
+1. **L1-L3 content security (`browse/src/content-security.ts`).** Runs on every page-content command and every tool output: datamarking, hidden-element strip, ARIA regex, URL blocklist, and a trust-boundary envelope wrapper. Applied at both the server and the agent.
+
+2. **L4 ML classifier — TestSavantAI (`browse/src/security-classifier.ts`).** A 22MB BERT-small ONNX model (int8 quantized) bundled with the agent. Runs locally, no network. Scans every user message and every Read/Glob/Grep/WebFetch tool output before Claude sees it. Opt-in 721MB DeBERTa-v3 ensemble via `GSTACK_SECURITY_ENSEMBLE=deberta`.
+
+3. **L4b transcript classifier.** A Claude Haiku pass that looks at the full conversation shape (user message, tool calls, tool output), not just text. Gated by `LOG_ONLY: 0.40` so most clean traffic skips the paid call.
+
+4. **L5 canary token (`browse/src/security.ts`).** A random token injected into the system prompt at session start. Rolling-buffer detection across `text_delta` and `input_json_delta` streams catches the token if it shows up anywhere in Claude's output, tool arguments, URLs, or file writes. Deterministic BLOCK — if the token leaks, the attacker convinced Claude to reveal the system prompt, and the session ends.
+
+5. **L6 ensemble combiner (`combineVerdict`).** BLOCK requires agreement from two ML classifiers at >= `WARN` (0.60), not a single confident hit. This is the Stack Overflow instruction-writing false-positive mitigation. On tool-output scans, single-layer high confidence BLOCKs directly — the content wasn't user-authored, so the FP concern doesn't apply.
+
+**Critical constraint:** `security-classifier.ts` runs only in the sidebar-agent process, never in the compiled browse binary. `@huggingface/transformers` v4 requires `onnxruntime-node`, which fails `dlopen` from Bun compile's temp extract directory. Only the pure-string pieces (canary inject/check, verdict combiner, attack log, status) are in `security.ts`, which is safe to import from `server.ts`.
+
+**Env knobs:** `GSTACK_SECURITY_OFF=1` is a real kill switch (skips ML scan, canary still injects). Model cache at `~/.gstack/models/testsavant-small/` (112MB, first run) and `~/.gstack/models/deberta-v3-injection/` (721MB, opt-in only). Attack log at `~/.gstack/security/attempts.jsonl` (salted sha256 + domain, rotates at 10MB, 5 generations). Per-device salt at `~/.gstack/security/device-salt` (0600), cached in-process to survive FS-unwritable environments.
+
+**Visibility.** The sidebar header shows a shield icon (green/amber/red) polled via `/sidebar-chat`. A centered banner appears on canary leak or BLOCK verdict with the exact layer scores. `bin/gstack-security-dashboard` aggregates local attempts; `supabase/functions/community-pulse` aggregates opt-in community telemetry across users.
+
 ## The ref system
 
 Refs (`@e1`, `@e2`, `@c1`) are how the agent addresses page elements without writing CSS selectors or XPath.
@@ -209,6 +229,8 @@ Templates contain the workflows, tips, and examples that require human judgment.
 | `{{DESIGN_SETUP}}` | `resolvers/design.ts` | Discovery pattern for `$D` design binary, mirrors `{{BROWSE_SETUP}}` |
 | `{{DESIGN_SHOTGUN_LOOP}}` | `resolvers/design.ts` | Shared comparison board feedback loop for /design-shotgun, /plan-design-review, /design-consultation |
 | `{{UX_PRINCIPLES}}` | `resolvers/design.ts` | User behavioral foundations (scanning, satisficing, goodwill reservoir, trunk test) for /design-html, /design-shotgun, /design-review, /plan-design-review |
+| `{{GBRAIN_CONTEXT_LOAD}}` | `resolvers/gbrain.ts` | Brain-first context search with keyword extraction, health awareness, and data-research routing. Injected into 10 brain-aware skills. Suppressed on non-brain hosts. |
+| `{{GBRAIN_SAVE_RESULTS}}` | `resolvers/gbrain.ts` | Post-skill brain persistence with entity enrichment, throttle handling, and per-skill save instructions. 8 skill-specific save formats. |
 
 This is structurally sound — if a command exists in code, it appears in docs. If it doesn't exist, it can't appear.