From e4ff714df73339a1d203b43f7217c4dcdfa8ab30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Thu, 28 May 2026 20:05:42 +0200 Subject: [PATCH 01/47] docs: add CodeQL integration implementation plan --- .project/codeql-integration-plan.md | 1106 +++++++++++++++++++++++++++ 1 file changed, 1106 insertions(+) create mode 100644 .project/codeql-integration-plan.md diff --git a/.project/codeql-integration-plan.md b/.project/codeql-integration-plan.md new file mode 100644 index 00000000..7ae04f86 --- /dev/null +++ b/.project/codeql-integration-plan.md @@ -0,0 +1,1106 @@ +# CodeQL Integration Plan + +Status: WIP planning document +Branch: `wip/codeql-integration-plan` +Scope: planning only; no implementation changes in this branch. + +## Goals + +Integrate CodeQL into CodeCome as a first-class static-analysis capability used by the normal workflow, not as a manual side tool. + +The integration should: + +- run automatically during Phase 1 unless explicitly disabled; +- run after the model has produced an initial target/build/language profile; +- enrich `itemdb/notes/file-risk-index.yml` and related reconnaissance notes; +- feed Phase 2 candidate hypothesis generation; +- inject per-file CodeQL context into `make sweep`; +- support official, GitHub Security Lab, Trail of Bits, coding standards, and local CodeCome query packs; +- keep the implementation simple and maintainable; +- avoid over-engineering such as external phase-definition YAMLs or a generic workflow engine. + +## Non-goals + +- Do not make CodeQL a replacement for model reasoning, counter-analysis, or validation. +- Do not confirm findings solely because CodeQL reported an alert. +- Do not require CodeQL for every target or make failures fatal by default. +- Do not introduce a `config/` directory just for CodeQL. +- Do not add declarative YAML phase orchestration. +- Do not keep the old raw `opencode run` bypass inside official phase targets. + +## Key design decisions + +### 1. Use `templates/codeql-packs.yml` + +Use a small, easy-to-maintain catalog at: + +```text +./templates/codeql-packs.yml +``` + +This avoids adding a new `config/` directory and keeps the pack mapping close to other CodeCome templates/schemas. + +The catalog should be a simple mapping from CodeQL language id to pack profile names and package references. + +### 2. Use `tools/codeql.py` as the dedicated CodeQL CLI + +Prefer: + +```bash +tools/codeql.py install +tools/codeql.py check +tools/codeql.py run --plan itemdb/notes/codeql-plan.yml +tools/codeql.py normalize +tools/codeql.py import-risk +tools/codeql.py create-candidates +tools/codeql.py context --file src/path/file.ext +tools/codeql.py check-artifacts +``` + +rather than: + +```bash +tools/codecome.py codeql ... +``` + +Rationale: + +- `tools/codecome.py` is currently a small workspace helper for `check`, `status`, and `next-id`. +- CodeQL will have enough subcommands and internal logic to deserve a focused CLI wrapper. +- The harness can call `tools/codeql.py` directly without bloating `tools/codecome.py`. +- This does not prevent a future CLI consolidation if/when `tools/codecome.py` becomes the single public entrypoint. + +Implementation shape: + +```text +tools/codeql.py # thin argparse CLI + +tools/codeql/ + __init__.py + config.py # env/config resolution + install.py # managed CodeQL CLI install + packs.py # templates/codeql-packs.yml resolver + runner.py # database create/analyze orchestration + sarif.py # SARIF loading/extraction helpers + normalize.py # SARIF -> normalized alerts + risk.py # normalized alerts -> file-risk-index enrichment + candidates.py # normalized alerts -> candidate findings/briefing + context.py # per-file sweep context + artifacts.py # manifest/check-artifact helpers +``` + +### 3. `run-agent.py` remains the CodeCome harness + +`run-agent.py` is not just a phase runner. It is the CodeCome harness used for phases and chat mode. + +For this integration, extend the existing harness directly. Do not introduce a YAML workflow definition or a generic step engine. + +Phase orchestration should be explicit Python code, for example: + +```python +def run_phase_1(args: Args) -> int: + run_gate("1") + + run_agent_step( + phase="1a", + label="Target Profile", + agent="recon", + prompt_file="prompts/phase-1a-profile.md", + ) + run_gate("1a") + + run_codeql_phase_1() + run_codeql_artifact_gate() + + run_agent_step( + phase="1b", + label="CodeQL-assisted Reconnaissance", + agent="recon", + prompt_file="prompts/phase-1b-codeql-recon.md", + ) + run_gate("1b") + + run_agent_step( + phase="1c", + label="Sandbox Bootstrap", + agent="recon", + prompt_file="prompts/phase-1c-sandbox.md", + ) + run_gate("1c") + + return 0 +``` + +Chat mode should continue to use the existing chat path and should not be forced into phase semantics. + +### 4. Remove `CODECOME_USE_WRAPPER` + +Remove the raw `opencode run` bypass from official phase targets immediately. + +Official phases must always pass through the CodeCome harness because the harness is now responsible for: + +- subphase orchestration; +- CodeQL execution; +- deterministic gates; +- candidate briefing/precreation; +- prompt enrichment; +- run logs and artifacts; +- future deterministic tooling. + +If a raw debug path is useful, add an explicit non-workflow target such as: + +```bash +make opencode-raw AGENT=auditor PROMPT_FILE=prompts/foo.md +``` + +but do not keep raw mode as an alternative implementation of `make phase-*`. + +## Updated Phase 1 flow + +Use clear subphase names: + +```text +Phase 1a — Target profile +Phase 1b — CodeQL-assisted reconnaissance +Phase 1c — Sandbox bootstrap +``` + +CodeQL runs between Phase 1a and Phase 1b. + +```text +make phase-1 + -> tools/run-agent.py --phase 1 + + 1. gate-check phase 1 + + 2. model: Phase 1a target profile + outputs: + itemdb/notes/target-profile.md + itemdb/notes/build-model.md + itemdb/notes/codeql-plan.yml + + 3. gate-check phase 1a + verifies: + - required 1a outputs exist + - codeql-plan.yml is valid YAML + - codeql-plan.yml has the required fields + - no accidental findings were created + + 4. deterministic CodeQL step + command: + tools/codeql.py run --plan itemdb/notes/codeql-plan.yml + outputs: + itemdb/evidence/codeql/run-manifest.yml + itemdb/evidence/codeql/selected-query-packs.yml + itemdb/evidence/codeql/sarif/*.sarif + itemdb/evidence/codeql/normalized/alerts.yml + itemdb/evidence/codeql/normalized/file-signals.yml + itemdb/evidence/codeql/codeql-summary.md + + 5. CodeQL artifact gate + verifies: + - skipped/soft-failed/running outcome is recorded clearly + - normalized artifacts exist when analysis succeeded + - run-manifest.yml exists even on skip/failure + + 6. model: Phase 1b CodeQL-assisted reconnaissance + reads: + - 1a notes + - CodeQL artifacts + outputs: + itemdb/notes/attack-surface.md + itemdb/notes/execution-model.md + itemdb/notes/trust-boundaries.md + itemdb/notes/data-flow.md + itemdb/notes/validation-model.md + itemdb/notes/interesting-files.md + itemdb/notes/file-risk-index.yml + itemdb/notes/security-assumptions.md + + 7. gate-check phase 1b + verifies: + - required recon notes exist + - file-risk-index.yml is valid + - scores are 1..5 + - paths are workspace-relative and under src/ + - no template placeholder entries remain + - no accidental findings were created + + 8. model: Phase 1c sandbox bootstrap + outputs: + sandbox/ + itemdb/notes/sandbox-plan.md + + 9. gate-check phase 1c + verifies: + - sandbox status/provenance + - sandbox validation result + - final frontmatter/checks +``` + +## Phase 1a prompt + +Create: + +```text +prompts/phase-1a-profile.md +``` + +Responsibilities: + +- broad source tree mapping; +- language/framework detection; +- build model detection; +- primary/secondary target identification; +- preliminary attack-surface hints; +- generate `itemdb/notes/codeql-plan.yml`; +- do not create vulnerability findings; +- do not bootstrap sandbox; +- do not run CodeQL manually. + +Required outputs: + +```text +itemdb/notes/target-profile.md +itemdb/notes/build-model.md +itemdb/notes/codeql-plan.yml +``` + +## Phase 1b prompt + +Create: + +```text +prompts/phase-1b-codeql-recon.md +``` + +Responsibilities: + +- read the Phase 1a outputs; +- read CodeQL artifacts if present; +- treat CodeQL results as reconnaissance evidence, not proof of vulnerability; +- complete the Phase 1 reconnaissance notes; +- enrich `file-risk-index.yml` with CodeQL file signals; +- prepare Phase 2 and sweep focus. + +Required outputs: + +```text +itemdb/notes/attack-surface.md +itemdb/notes/execution-model.md +itemdb/notes/trust-boundaries.md +itemdb/notes/data-flow.md +itemdb/notes/validation-model.md +itemdb/notes/interesting-files.md +itemdb/notes/file-risk-index.yml +itemdb/notes/security-assumptions.md +``` + +## Phase 1c prompt + +Create: + +```text +prompts/phase-1c-sandbox.md +``` + +This should contain the sandbox bootstrap portion currently embedded in `prompts/phase-1-recon.md`. + +Responsibilities: + +- inspect current sandbox state; +- select/apply/adapt a sandbox template; +- author missing helper scripts; +- run sandbox validation; +- write `itemdb/notes/sandbox-plan.md`; +- leave `sandbox/` ready for Phase 2/4/5 where possible. + +## `codeql-plan.yml` template + +Add: + +```text +templates/codeql-plan.yml +``` + +Example: + +```yaml +schema_version: 1 +generated_by: "phase-1a-profile" + +source_path: "./src" +recommended: true + +languages: + - id: "python" + confidence: "HIGH" + build_mode: "none" + build_command: null + packs: + - "official" + - "github-security-lab" + + - id: "javascript-typescript" + confidence: "MEDIUM" + build_mode: "none" + build_command: null + packs: + - "official" + +exclude: + - "src/**/tests/**" + - "src/**/fixtures/**" + - "src/**/vendor/**" + - "src/**/node_modules/**" + +notes: + - "Primary target appears to be a Python API service." +``` + +C/C++ example: + +```yaml +schema_version: 1 +generated_by: "phase-1a-profile" + +source_path: "./src" +recommended: true + +languages: + - id: "c-cpp" + confidence: "HIGH" + build_mode: "manual" + build_command: "make -C src" + packs: + - "official" + - "github-security-lab" + - "trailofbits" + - "coding-standards" + +exclude: + - "src/**/tests/**" + - "src/**/vendor/**" +``` + +Allowed pack profile names: + +```text +official +github-security-lab +trailofbits +coding-standards +local +``` + +The model chooses profiles, not exact package names. The harness resolves profiles via `templates/codeql-packs.yml`. + +## `templates/codeql-packs.yml` + +Add: + +```text +templates/codeql-packs.yml +``` + +Keep it intentionally simple: + +```yaml +schema_version: 1 + +packs: + python: + official: + - "codeql/python-queries" + github-security-lab: + - "githubsecuritylab/codeql-python-queries" + local: + - "./queries/codeql/python" + + javascript-typescript: + official: + - "codeql/javascript-queries" + github-security-lab: + - "githubsecuritylab/codeql-javascript-queries" + local: + - "./queries/codeql/javascript" + + c-cpp: + official: + - "codeql/cpp-queries" + github-security-lab: + - "githubsecuritylab/codeql-cpp-queries" + trailofbits: + - "trailofbits/cpp-queries" + coding-standards: + - "codeql/coding-standards-cpp" + local: + - "./queries/codeql/cpp" + + go: + official: + - "codeql/go-queries" + github-security-lab: + - "githubsecuritylab/codeql-go-queries" + trailofbits: + - "trailofbits/go-queries" + local: + - "./queries/codeql/go" + + csharp: + official: + - "codeql/csharp-queries" + github-security-lab: + - "githubsecuritylab/codeql-csharp-queries" + local: + - "./queries/codeql/csharp" + + java-kotlin: + official: + - "codeql/java-queries" + github-security-lab: + - "githubsecuritylab/codeql-java-queries" + local: + - "./queries/codeql/java" + +candidate_policy: + official: + allow_precreate: true + github-security-lab: + allow_precreate: true + trailofbits: + allow_precreate: true + coding-standards: + allow_precreate: false + local: + allow_precreate: true +``` + +Notes: + +- Some package names may require verification during implementation with `codeql pack download` / `codeql resolve packs`. +- Missing/unavailable packs should be recorded as warnings in `run-manifest.yml`, not crash the phase under soft fail policy. +- `coding-standards` packs should enrich risk and sweep context by default, but should not precreate findings unless explicitly allowed later. + +## CodeQL installation and `make init` + +Rename `make venv` to `make init`, keeping `venv` as an alias. + +```makefile +.PHONY: init venv venv-check + +init: + @python3 -m venv .venv + @$(PYTHON) -m pip install --upgrade pip + @$(PYTHON) -m pip install --no-input -r requirements.txt + @if [ "$$CODEQL" != "0" ] && [ "$$CODEQL_SKIP_INSTALL" != "1" ]; then \ + $(PYTHON) tools/codeql.py install; \ + fi + +venv: init +``` + +Install location: + +```text +.tools/codeql// +.tools/codeql/current -> +.cache/codeql/ +``` + +Update `.gitignore`: + +```text +.tools/ +.cache/ +``` + +Environment controls: + +```bash +CODEQL=0 make init +CODEQL_SKIP_INSTALL=1 make init +CODEQL_VERSION= make init +CODEQL_FORCE_INSTALL=1 make init +``` + +## CodeQL runtime controls + +Supported escape hatches: + +```bash +CODEQL=0 make phase-1 +CODEQL_SKIP=1 make phase-1 +CODEQL_FAIL_POLICY=hard make phase-1 +CODEQL_PACKS=0 make phase-1 +CODEQL_COMMUNITY_PACKS=0 make phase-1 +CODEQL_CANDIDATES=off make phase-2 +CODEQL_CANDIDATES=briefing make phase-2 +CODEQL_CANDIDATES=precreate make phase-2 +``` + +Resolution priority: + +```text +environment variables > codecome.yml > defaults +``` + +Default policy: + +```text +CodeQL enabled: yes +Failure policy: soft +Candidate mode: briefing +Community packs: enabled +``` + +## `codecome.yml` additions + +Keep this compact; do not embed the full pack map in `codecome.yml`. + +```yaml +static_analysis: + codeql: + enabled: true + fail_policy: "soft" + pack_catalog: "./templates/codeql-packs.yml" + + install: + managed: true + version: "latest" + path: ".tools/codeql/current/codeql" + + output_dir: "./itemdb/evidence/codeql" + database_dir: "./itemdb/evidence/codeql/databases" + cache_dir: "./.cache/codeql" + + phase_1: + enabled: true + + phase_2: + enabled: true + candidate_mode: "briefing" + max_candidates: 10 + + sweep: + enabled: true + inject_context: true +``` + +## CodeQL artifacts + +Use this layout: + +```text +itemdb/evidence/codeql/ + run-manifest.yml + selected-query-packs.yml + codeql-summary.md + + databases/ + python/ + c-cpp/ + + sarif/ + python.official.sarif + python.github-security-lab.sarif + cpp.trailofbits.sarif + cpp.coding-standards.sarif + + normalized/ + alerts.yml + file-signals.yml + candidate-findings.yml +``` + +`run-manifest.yml` should always exist after a CodeQL step, even when CodeQL was skipped or soft-failed. + +Example: + +```yaml +schema_version: 1 +phase: "phase-1" +status: "completed" # completed | skipped | soft-failed | failed +codeql_enabled: true +codeql_version: "2.x.y" +started_at: "YYYY-MM-DDTHH:MM:SSZ" +finished_at: "YYYY-MM-DDTHH:MM:SSZ" +plan_file: "itemdb/notes/codeql-plan.yml" +pack_catalog: "templates/codeql-packs.yml" +languages: + - "python" +warnings: [] +failures: [] +``` + +## SARIF normalization + +Do not expose raw SARIF directly to model prompts. Normalize it first. + +`itemdb/evidence/codeql/normalized/alerts.yml`: + +```yaml +schema_version: 1 +generated_by: "codeql-normalize" +codeql_version: "2.x.y" +target: "codecome-target" + +alerts: + - id: "CQ-0001" + fingerprint: "..." + language: "python" + pack_profile: "github-security-lab" + pack: "githubsecuritylab/codeql-python-queries" + rule_id: "py/path-injection" + rule_name: "Uncontrolled data used in path expression" + severity: "warning" + security_severity: "7.5" + precision: "high" + kind: "path-problem" + primary_location: + path: "src/api/upload.py" + start_line: 88 + end_line: 88 + flow: + source: + path: "src/api/routes.py" + line: 42 + label: "request file name" + sink: + path: "src/api/upload.py" + line: 88 + label: "filesystem write" + steps: + - path: "src/api/routes.py" + line: 42 + message: "..." + - path: "src/api/upload.py" + line: 88 + message: "..." + mapped: + category: "Path traversal" + suggested_validation_methods: + - "static_proof" + - "http_exploit" +``` + +`file-signals.yml`: + +```yaml +schema_version: 1 +files: + - path: "src/api/upload.py" + codeql_score_boost: 2 + suggested_sweep: true + alerts: + total: 3 + path_problems: 1 + high_precision: 1 + rules: + - "py/path-injection" +``` + +## File risk enrichment + +`tools/codeql.py import-risk` should enrich `itemdb/notes/file-risk-index.yml`. + +Rules: + +- Preserve existing entries and model-authored reasons. +- Do not duplicate file entries. +- Cap scores at 5. +- Explain every score boost in `reasons`. +- Add an optional `external_signals.codeql` block. + +Example: + +```yaml +- path: "src/api/upload.py" + score: 5 + confidence: "HIGH" + target_area: "file upload API" + reasons: + - "Handles attacker-controlled multipart upload data." + - "CodeQL signal: py/path-injection reports user-controlled path reaching filesystem sink." + sources: + - "HTTP multipart filename" + sinks: + - "filesystem write" + trust_boundaries: + - "remote client -> server filesystem" + suggested_vulnerability_classes: + - "Path traversal" + - "File upload vulnerabilities" + suggested_skills: + - "web-security" + suggested_validation_methods: + - "static_proof" + - "http_exploit" + external_signals: + codeql: + alerts: 3 + path_problems: 1 + highest_precision: "high" + rules: + - "py/path-injection" +``` + +## Phase 2 candidate handling + +Before the Phase 2 model invocation, the harness should call: + +```bash +tools/codeql.py create-candidates +``` + +Inputs: + +```text +itemdb/evidence/codeql/normalized/alerts.yml +itemdb/evidence/codeql/normalized/file-signals.yml +itemdb/notes/file-risk-index.yml +itemdb/findings/**/CC-*.md +``` + +Outputs: + +```text +itemdb/evidence/codeql/normalized/candidate-findings.yml +itemdb/notes/codeql-candidate-findings.md +``` + +Candidate modes: + +```text +off -> do nothing +briefing -> write candidate briefing only +precreate -> create filtered PENDING findings before model runs +``` + +Default: `briefing`. + +Precreate only when: + +- candidate is not under ignored/test/vendor/generated paths; +- a CodeCome category can be inferred; +- affected files are concrete; +- there is a plausible sink or security decision; +- the candidate is from an allowed pack profile; +- max candidate limit is not exceeded. + +Phase 2 prompt must require candidate disposition. + +Add to `prompts/phase-2-audit.md`: + +```md +## CodeQL candidate handling + +If `itemdb/notes/codeql-candidate-findings.md` or +`itemdb/evidence/codeql/normalized/candidate-findings.yml` exists, you must +account for each candidate. + +For each candidate, choose one: + +- create or complete a PENDING finding, +- merge it into an existing finding, +- defer it to `make sweep` with a concrete file target, +- reject it as non-security-relevant or out of scope. + +Write the decision table to: + + itemdb/notes/codeql-candidate-disposition.md +``` + +Add a Phase 2 gate: + +- if candidate findings exist, `itemdb/notes/codeql-candidate-disposition.md` must exist; +- each candidate id should appear in the disposition table; +- created findings must pass frontmatter validation. + +## Sweep context injection + +`tools/run-sweep.py` should request per-file CodeQL context before writing the temporary sweep prompt. + +Command: + +```bash +tools/codeql.py context --file src/path/file.ext +``` + +If context exists, inject a section like: + +```md +## CodeQL context for this file + +Relevant alerts: + +- `CQ-0001` / `py/path-injection` + - pack: `githubsecuritylab/codeql-python-queries` + - source: `src/api/routes.py:42` + - sink: `src/api/upload.py:88` + - summary: user-controlled path reaches filesystem write + +Treat this as a static-analysis hint, not proof. Verify attacker control, +reachability, sanitizers, authorization, and impact before creating a finding. +``` + +Add `SWEEP_ARGS` support to the Makefile: + +```makefile +sweep: venv-check + @if [ -n "$(FILE)" ]; then \ + $(PYTHON) tools/run-sweep.py --file "$(FILE)" $(SWEEP_ARGS); \ + else \ + $(PYTHON) tools/run-sweep.py $(SWEEP_ARGS); \ + fi +``` + +## Makefile changes + +### Remove raw wrapper mode + +Remove all `CODECOME_USE_WRAPPER` branches from phase targets. + +Phase targets become: + +```makefile +phase-1: venv-check + @$(PYTHON) tools/run-agent.py --phase 1 + +phase-2: venv-check + @$(PYTHON) tools/run-agent.py --phase 2 + +phase-3: venv-check + @$(PYTHON) tools/run-agent.py --phase 3 + +phase-4: venv-check + @test -n "$(FINDING)" || (...) + @$(PYTHON) tools/run-agent.py --phase 4 --finding "$(FINDING)" + +phase-5: venv-check + @test -n "$(FINDING)" || (...) + @$(PYTHON) tools/run-agent.py --phase 5 --finding "$(FINDING)" + +phase-6: venv-check + @$(PYTHON) tools/run-agent.py --phase 6 +``` + +### Optional raw debug target + +```makefile +opencode-raw: + @test -n "$(AGENT)" || (echo "AGENT is required" && exit 1) + @test -n "$(PROMPT_FILE)" || (echo "PROMPT_FILE is required" && exit 1) + @opencode run --agent "$(AGENT)" "$$(cat "$(PROMPT_FILE)")" +``` + +## Gates + +Extend `tools/gate-check.py` with subphase gates. + +### `gate-check.py 1a` + +Checks: + +- `itemdb/notes/target-profile.md` exists; +- `itemdb/notes/build-model.md` exists; +- `itemdb/notes/codeql-plan.yml` exists; +- `codeql-plan.yml` is valid YAML; +- if `recommended: true`, at least one language entry exists; +- each language entry has `id`, `confidence`, `build_mode`, `packs`; +- no new findings were created during 1a. + +### `gate-check.py 1b` + +Checks: + +- all required recon notes exist; +- `itemdb/notes/file-risk-index.yml` exists; +- YAML is valid; +- `schema_version` is present; +- `files` is a list; +- all file paths are workspace-relative; +- all scores are integers 1..5; +- template placeholder entry is gone; +- no new findings were created during 1b. + +### `gate-check.py 1c` + +Checks: + +- `itemdb/notes/sandbox-plan.md` exists; +- sandbox status/provenance exists or clear halt protocol exists; +- sandbox validation was attempted or static-only/nested-virt justification exists; +- frontmatter check passes. + +### CodeQL artifact gate + +Can live in `tools/codeql.py check-artifacts` rather than `gate-check.py`. + +Checks: + +- `run-manifest.yml` exists after a CodeQL step; +- manifest status is one of `completed`, `skipped`, `soft-failed`, `failed`; +- if completed, normalized outputs exist; +- if skipped/soft-failed, reason is recorded; +- no raw exception trace is left as the only diagnostic. + +## Candidate finding frontmatter + +If precreate mode is used, generated findings should include the normal finding frontmatter plus optional origin/static-analysis metadata if the current frontmatter checker allows it. + +Preferred fields if allowed: + +```yaml +origin: + - "codeql" + +static_analysis: + codeql: + alerts: + - "CQ-0001" + rules: + - "py/path-injection" + packs: + - "githubsecuritylab/codeql-python-queries" + sarif: + - "itemdb/evidence/codeql/sarif/python.github-security-lab.sarif" +``` + +If the frontmatter checker rejects extra fields, place this information in the finding body under: + +```md +# Static-analysis evidence +``` + +Do not weaken the frontmatter gate to accept arbitrary fields without a deliberate schema update. + +## Testing plan + +Add fixtures: + +```text +tests/fixtures/codeql/ + sarif-path-problem.json + sarif-local-problem.json + sarif-multiple-packs.json + file-risk-index.base.yml + codeql-plan.python.yml + codeql-plan.cpp.yml + codeql-packs.yml +``` + +Add tests: + +```text +tests/test_codeql_packs.py +tests/test_codeql_normalize.py +tests/test_codeql_risk.py +tests/test_codeql_candidates.py +tests/test_codeql_context.py +tests/test_phase1_subphase_gates.py +``` + +Required cases: + +- pack catalog resolves requested profiles by language; +- unavailable pack profile is reported clearly; +- SARIF path-problem extracts source/sink/steps; +- local SARIF problem without flow is normalized without crashing; +- file risk enrichment preserves existing entries; +- file risk enrichment caps score at 5; +- ignored paths do not create candidates; +- coding-standards alerts enrich risk but do not precreate candidates by default; +- context lookup returns alerts where file is primary or related location; +- Phase 1a gate rejects missing/invalid `codeql-plan.yml`; +- Phase 1b gate rejects placeholder file-risk-index entries. + +## Implementation PR sequence + +### PR 1 — Harness simplification and init rename + +- Remove `CODECOME_USE_WRAPPER` branches from Makefile. +- Make all `phase-*` targets call `tools/run-agent.py`. +- Add optional `opencode-raw` debug target. +- Rename `make venv` to `make init`. +- Keep `venv: init` alias. +- Update help text. + +### PR 2 — Split Phase 1 into 1a/1b/1c + +- Add prompts: + - `prompts/phase-1a-profile.md` + - `prompts/phase-1b-codeql-recon.md` + - `prompts/phase-1c-sandbox.md` +- Add `templates/codeql-plan.yml`. +- Extend `run-agent.py` with explicit Phase 1 orchestration. +- Add `gate-check.py 1a`, `1b`, `1c`. + +### PR 3 — CodeQL CLI and install/check + +- Add `tools/codeql.py`. +- Add `tools/codeql/` modules. +- Implement `install` and `check`. +- Install CodeQL into `.tools/codeql/`. +- Add `.tools/` and `.cache/` to `.gitignore`. +- Respect `CODEQL=0` and `CODEQL_SKIP_INSTALL=1`. + +### PR 4 — Pack catalog and resolver + +- Add `templates/codeql-packs.yml`. +- Implement pack resolver. +- Support profiles: + - `official` + - `github-security-lab` + - `trailofbits` + - `coding-standards` + - `local` +- Write `selected-query-packs.yml`. +- Validate pack catalog schema. + +### PR 5 — CodeQL run and SARIF normalization + +- Implement `tools/codeql.py run`. +- Read `itemdb/notes/codeql-plan.yml`. +- Create databases per language. +- Analyze with selected packs. +- Normalize SARIF. +- Write: + - `run-manifest.yml` + - `alerts.yml` + - `file-signals.yml` + - `codeql-summary.md` +- Implement soft/hard fail policy. + +### PR 6 — Phase 1 CodeQL integration + +- Call CodeQL between Phase 1a and Phase 1b. +- Add CodeQL artifact gate. +- Ensure Phase 1b prompt reads CodeQL artifacts. +- Enrich file-risk-index from CodeQL signals. + +### PR 7 — Phase 2 candidates + +- Implement `tools/codeql.py create-candidates`. +- Generate `candidate-findings.yml`. +- Generate `codeql-candidate-findings.md`. +- Support `off`, `briefing`, and `precreate` modes. +- Update Phase 2 prompt with candidate disposition requirement. +- Add gate for candidate disposition. + +### PR 8 — Sweep context + +- Implement `tools/codeql.py context --file`. +- Inject context into `tools/run-sweep.py` prompts. +- Add `SWEEP_ARGS` to Makefile. +- Update sweep prompt with CodeQL context rules. + +## Review checklist before implementation + +- Confirm `tools/codeql.py` vs `tools/codecome.py codeql` decision. +- Confirm exact CodeQL install source/version policy. +- Verify package names in `templates/codeql-packs.yml`. +- Confirm default `CODEQL_CANDIDATES` mode: `briefing` vs `precreate`. +- Confirm whether finding frontmatter schema should accept `origin` / `static_analysis`. +- Confirm whether `coding-standards` should ever precreate findings by default. +- Confirm whether Phase 1c sandbox prompt should be copied from current `phase-1-recon.md` or rewritten tighter. From dd010092c6dc8e25dd7f046c6b4721d352d20032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Fri, 29 May 2026 18:07:57 +0200 Subject: [PATCH 02/47] refactor: remove CODECOME_USE_WRAPPER bypass, rename venv -> init, add opencode-raw target - Remove CODECOME_USE_WRAPPER branches from all phase targets; always use harness - Remove CHAT/WRAPPER_ARGS and CODECOME_OPENCODE_ENV_EXPORT dead vars - Add opencode-raw debug target for non-workflow opencode runs - Rename make venv -> make init with CodeQL install stub (kept venv alias) - Update help text, .PHONY declarations, and venv-check error messages --- Makefile | 83 ++++++++++++++++++++++++-------------------------------- 1 file changed, 35 insertions(+), 48 deletions(-) diff --git a/Makefile b/Makefile index 2b7c1260..8a02727b 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ # Copyright (C) 2025-2026 Pablo Ruiz García # SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later -.PHONY: help venv venv-check check status next-id frontmatter tests test-parity itemdb-reset index report +.PHONY: help init venv venv-check check status next-id frontmatter tests test-parity itemdb-reset index report .PHONY: findings findings-create findings-move findings-evidence findings-package -.PHONY: phase-1 phase-2 phase-3 phase-4 phase-5 phase-6 validate-all exploit-all +.PHONY: phase-1 phase-2 phase-3 phase-4 phase-5 phase-6 validate-all exploit-all opencode-raw .PHONY: sandbox-setup sandbox-check sandbox-up sandbox-down sandbox-shell sandbox-logs sandbox-clean sandbox-reset sandbox-build sandbox-test .PHONY: sandbox-list sandbox-inspect sandbox-detect sandbox-bootstrap sandbox-validate sandbox-regenerate sandbox-status show-model @@ -12,14 +12,6 @@ export PATH := $(CURDIR)/.venv/bin:$(PATH) export PROMPT_EXTRA export PROMPT_EXTRA_FILE -CHAT ?= 0 -ifeq ($(CHAT),1) -WRAPPER_ARGS += --chat -endif - -# Env vars injected into opencode serve (wrapper mode) and opencode run (raw mode) -CODECOME_OPENCODE_ENV_EXPORT := OPENCODE_ENABLE_EXA=1 - # Pass --thinking to raw opencode run when CODECOME_THINKING=1 OPENCODE_THINKING_FLAG := $(if $(filter 1,$(CODECOME_THINKING)),--thinking,) @@ -44,7 +36,7 @@ help: @printf "\n" @printf " $(BOLD)$(CYAN)Workflow phases:$(RESET)\n" @printf "\n" - @printf " $(BOLD)make venv$(RESET) Create/update repo-local virtualenv\n" + @printf " $(BOLD)make init$(RESET) Create/update repo-local virtualenv\n" @printf " $(BOLD)make phase-1$(RESET) Run reconnaissance\n" @printf " $(BOLD)make phase-2$(RESET) Run hypothesis generation\n" @printf " $(BOLD)make phase-3$(RESET) Run counter-analysis\n" @@ -60,11 +52,9 @@ help: @printf " $(BOLD)make sweep$(RESET) Run deep sweep on top-scoring files\n" @printf " $(BOLD)make sweep FILE=\"src/foo.*\"$(RESET) Run deep sweep on specific file(s)\n" @printf "\n" - @printf " $(BOLD)$(CYAN)Wrapper controls:$(RESET)\n" + @printf " $(BOLD)$(CYAN)Phase controls:$(RESET)\n" @printf "\n" - @printf " $(BOLD)CODECOME_USE_WRAPPER=0$(RESET) Bypass styled wrapper and use raw opencode run\n" - @printf " $(BOLD)CODECOME_THINKING=1$(RESET) Show model reasoning/thinking blocks in output\n" - @printf " $(BOLD)OPENCODE_ARGS='...'$(RESET) Extra flags for opencode run (forwarded directly when CODECOME_USE_WRAPPER=0; in wrapper mode only --model, --variant and --thinking are used)\n" + @printf " $(BOLD)CODECOME_THINKING=1$(RESET) Show model reasoning/thinking blocks in output\n" @printf " $(BOLD)CODECOME_MODEL=$(RESET) Pin the model per phase (e.g. anthropic/claude-opus-4-7)\n" @printf " $(BOLD)CODECOME_MODEL_VARIANT=$(RESET) Pin the model variant (e.g. high, max)\n" @printf " $(BOLD)PROMPT_EXTRA=\"...\"$(RESET) Append extra instructions to phase prompt\n" @@ -73,6 +63,13 @@ help: @printf " $(BOLD)make show-model$(RESET) Print the model resolution table for an agent\n" @printf " $(BOLD)make show-model AGENT=auditor$(RESET)\n" @printf "\n" + @printf " $(BOLD)$(CYAN)Raw debug (non-workflow):$(RESET)\n" + @printf "\n" + @printf " $(BOLD)make opencode-raw$(RESET) Run opencode directly (bypasses harness)\n" + @printf " $(BOLD)AGENT=$(RESET) Required. Agent to run (e.g. auditor)\n" + @printf " $(BOLD)PROMPT_FILE=path$(RESET) Required. Prompt file to send\n" + @printf " $(BOLD)CODECOME_THINKING=1$(RESET) Show reasoning/thinking blocks\n" + @printf "\n" @printf " $(BOLD)$(CYAN)Workspace tools:$(RESET)\n" @printf "\n" @printf " $(BOLD)make check$(RESET) Validate workspace structure and config\n" @@ -127,14 +124,19 @@ help: # Python environment # --------------------------------------------------------------------------- -venv: +init: @python3 -m venv .venv @$(PYTHON) -m pip install --upgrade pip @$(PYTHON) -m pip install --no-input -r requirements.txt + @if [ "$$CODEQL" != "0" ] && [ "$$CODEQL_SKIP_INSTALL" != "1" ]; then \ + printf "$(BOLD)$(CYAN)[CodeQL]$(RESET) Managed CodeQL install not yet implemented — coming in a future PR.\n"; \ + fi + +venv: init venv-check: - @test -x "$(PYTHON)" || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) Missing repo virtualenv at .venv\n\nRun:\n\n make venv\n\n" && exit 1) - @$(PYTHON) -c "import yaml, rich" >/dev/null 2>&1 || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) .venv is missing required Python packages\n\nRun:\n\n make venv\n\nIf you updated requirements, rerun the same command to resync .venv.\n\n" && exit 1) + @test -x "$(PYTHON)" || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) Missing repo virtualenv at .venv\n\nRun:\n\n make init\n\n" && exit 1) + @$(PYTHON) -c "import yaml, rich" >/dev/null 2>&1 || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) .venv is missing required Python packages\n\nRun:\n\n make init\n\nIf you updated requirements, rerun the same command to resync .venv.\n\n" && exit 1) # --------------------------------------------------------------------------- # Workflow phases @@ -142,11 +144,7 @@ venv-check: phase-1: venv-check @$(PYTHON) tools/gate-check.py 1 - @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ - $(CODECOME_OPENCODE_ENV_EXPORT) opencode run --agent recon $(OPENCODE_THINKING_FLAG) "$$(cat prompts/phase-1-recon.md)"; \ - else \ - $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 1 --label "Target Reconnaissance + Sandbox Bootstrap" --agent recon --prompt-file prompts/phase-1-recon.md; \ - fi + @$(PYTHON) tools/run-agent.py --phase 1 --label "Target Reconnaissance + Sandbox Bootstrap" --agent recon --prompt-file prompts/phase-1-recon.md phase-2: venv-check @$(PYTHON) tools/gate-check.py 2 @@ -155,45 +153,25 @@ phase-2: venv-check printf "Run: make sandbox-status\n" ; \ printf "Or override (not recommended): CODECOME_ALLOW_NO_SANDBOX=1 make phase-2\n\n" ; \ exit 1 ) - @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ - $(CODECOME_OPENCODE_ENV_EXPORT) opencode run --agent auditor $(OPENCODE_THINKING_FLAG) "$$(cat prompts/phase-2-audit.md)"; \ - else \ - $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 2 --label "Hypothesis Generation" --agent auditor --prompt-file prompts/phase-2-audit.md; \ - fi + @$(PYTHON) tools/run-agent.py --phase 2 --label "Hypothesis Generation" --agent auditor --prompt-file prompts/phase-2-audit.md phase-3: venv-check @$(PYTHON) tools/gate-check.py 3 - @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ - $(CODECOME_OPENCODE_ENV_EXPORT) opencode run --agent reviewer $(OPENCODE_THINKING_FLAG) "$$(cat prompts/phase-3-review.md)"; \ - else \ - $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 3 --label "Counter-analysis" --agent reviewer --prompt-file prompts/phase-3-review.md; \ - fi + @$(PYTHON) tools/run-agent.py --phase 3 --label "Counter-analysis" --agent reviewer --prompt-file prompts/phase-3-review.md phase-4: venv-check @test -n "$(FINDING)" || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) Missing required FINDING argument for Phase 4 (Validation).\n\nSpecify which finding you want to validate:\n\n $(BOLD)make phase-4 FINDING=CC-0001$(RESET)\n\nTo list available pending findings: $(BOLD)make findings STATUS=PENDING$(RESET)\n\n" && exit 1) @$(PYTHON) tools/gate-check.py 4 $(FINDING) - @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ - $(CODECOME_OPENCODE_ENV_EXPORT) opencode run --agent validator $(OPENCODE_THINKING_FLAG) "$$(sed 's#FINDING_PATH_OR_ID#$(FINDING)#g' prompts/phase-4-validate.md)"; \ - else \ - $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 4 --label "Validation" --agent validator --prompt-file prompts/phase-4-validate.md --finding "$(FINDING)"; \ - fi + @$(PYTHON) tools/run-agent.py --phase 4 --label "Validation" --agent validator --prompt-file prompts/phase-4-validate.md --finding "$(FINDING)" phase-5: venv-check @test -n "$(FINDING)" || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) Missing required FINDING argument for Phase 5 (Exploitation).\n\nSpecify which finding you want to exploit:\n\n $(BOLD)make phase-5 FINDING=CC-0001$(RESET)\n\nTo list available confirmed findings: $(BOLD)make findings STATUS=CONFIRMED$(RESET)\n\n" && exit 1) @$(PYTHON) tools/gate-check.py 5 $(FINDING) - @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ - $(CODECOME_OPENCODE_ENV_EXPORT) opencode run --agent exploiter $(OPENCODE_THINKING_FLAG) "$$(sed 's#FINDING_PATH_OR_ID#$(FINDING)#g' prompts/phase-5-exploit.md)"; \ - else \ - $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 5 --label "Exploit Development" --agent exploiter --prompt-file prompts/phase-5-exploit.md --finding "$(FINDING)"; \ - fi + @$(PYTHON) tools/run-agent.py --phase 5 --label "Exploit Development" --agent exploiter --prompt-file prompts/phase-5-exploit.md --finding "$(FINDING)" phase-6: venv-check @$(PYTHON) tools/gate-check.py 6 - @if [ "$$CODECOME_USE_WRAPPER" = "0" ]; then \ - $(CODECOME_OPENCODE_ENV_EXPORT) opencode run --agent reporter $(OPENCODE_THINKING_FLAG) "$$(cat prompts/phase-6-report.md)"; \ - else \ - $(PYTHON) tools/run-agent.py $(WRAPPER_ARGS) --phase 6 --label "Reporting" --agent reporter --prompt-file prompts/phase-6-report.md; \ - fi + @$(PYTHON) tools/run-agent.py --phase 6 --label "Reporting" --agent reporter --prompt-file prompts/phase-6-report.md chat: venv-check @$(PYTHON) tools/run-agent.py --chat --label "Interactive Chat" --agent $(or $(AGENT),chat) --prompt-file prompts/chat-initial.md $(if $(DEBUG),--debug,) @@ -208,6 +186,15 @@ sweep: venv-check $(PYTHON) tools/run-sweep.py; \ fi +# --------------------------------------------------------------------------- +# Raw opencode debug target (non-workflow) +# --------------------------------------------------------------------------- + +opencode-raw: + @test -n "$(AGENT)" || (echo "AGENT is required. Usage: make opencode-raw AGENT=auditor PROMPT_FILE=prompts/foo.md" && exit 1) + @test -n "$(PROMPT_FILE)" || (echo "PROMPT_FILE is required. Usage: make opencode-raw AGENT=auditor PROMPT_FILE=prompts/foo.md" && exit 1) + @opencode run --agent "$(AGENT)" $(OPENCODE_THINKING_FLAG) "$$(cat "$(PROMPT_FILE)")" + validate-all: venv-check @ids=$$($(PYTHON) tools/list-findings.py --status PENDING --format ids 2>/dev/null); \ if [ -z "$$ids" ]; then \ From a209c982301a9536c0edb66411f652082c17d116 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Fri, 29 May 2026 18:36:39 +0200 Subject: [PATCH 03/47] feat: split Phase 1 into subphases 1a/1b/1c with gates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add prompts/phase-1a-profile.md (target profile + codeql plan) - Add prompts/phase-1b-codeql-recon.md (CodeQL-assisted recon notes) - Add prompts/phase-1c-sandbox.md (sandbox bootstrap, extracted from old prompt) - Add templates/codeql-plan.yml template for model to fill - Add codecome/phase_1.py subphase orchestration module (1a→1b→1c) - Add check_phase_1a/1b/1c gate functions to gate-check.py - Update harness.py: Phase 1 dispatches to phase_1 module with own server lifecycle - Update cli.py: --prompt-file not required for Phase 1 - Update Makefile: phase-1 target simplified --- Makefile | 2 +- prompts/phase-1a-profile.md | 95 +++++++ prompts/phase-1b-codeql-recon.md | 202 ++++++++++++++ prompts/phase-1c-sandbox.md | 125 +++++++++ templates/codeql-plan.yml | 44 +++ tools/codecome/cli.py | 6 +- tools/codecome/harness.py | 31 +++ tools/codecome/phase_1.py | 456 +++++++++++++++++++++++++++++++ tools/gate-check.py | 296 +++++++++++++++++++- 9 files changed, 1248 insertions(+), 9 deletions(-) create mode 100644 prompts/phase-1a-profile.md create mode 100644 prompts/phase-1b-codeql-recon.md create mode 100644 prompts/phase-1c-sandbox.md create mode 100644 templates/codeql-plan.yml create mode 100644 tools/codecome/phase_1.py diff --git a/Makefile b/Makefile index 8a02727b..638b61c1 100644 --- a/Makefile +++ b/Makefile @@ -144,7 +144,7 @@ venv-check: phase-1: venv-check @$(PYTHON) tools/gate-check.py 1 - @$(PYTHON) tools/run-agent.py --phase 1 --label "Target Reconnaissance + Sandbox Bootstrap" --agent recon --prompt-file prompts/phase-1-recon.md + @$(PYTHON) tools/run-agent.py --phase 1 --label "Phase 1: Reconnaissance" --agent recon phase-2: venv-check @$(PYTHON) tools/gate-check.py 2 diff --git a/prompts/phase-1a-profile.md b/prompts/phase-1a-profile.md new file mode 100644 index 00000000..412369bc --- /dev/null +++ b/prompts/phase-1a-profile.md @@ -0,0 +1,95 @@ +# CodeCome Phase 1a: Target Profile + +You are performing CodeCome **Phase 1a** — the first sub-stage of Phase 1. + +This sub-stage is scoped to: broad source tree mapping, language/framework detection, build model identification, and CodeQL plan generation. Do not produce full reconnaissance notes, file-risk-index, or sandbox artifacts here. Those are handled by Phase 1b and 1c. + +## Required reading + +Read the following files (all paths are relative to the project/workspace root): + +- `AGENTS.md` +- `codecome.yml` +- `templates/target-recon.md` +- `templates/codeql-plan.yml` +- `.opencode/agents/recon.md` +- `.opencode/skills/source-recon/SKILL.md` + +Do not load target-specific security skills or vulnerability-family skills during Phase 1a. Keep the scope structural. + +## Target + +Analyze the source tree under: + + ./src + +## Required outputs + +Create these files under `itemdb/notes/`: + +- `target-profile.md` +- `build-model.md` +- `codeql-plan.yml` + +### `target-profile.md` + +Document: + +- **Target type**: web application, CLI tool, library, service, firmware, IaC, mobile app, desktop app, benchmark corpus, or mixed repository. +- **Primary languages and frameworks**: detected language, version indicators, major frameworks. +- **Secondary languages**: tooling, scripting, configuration DSLs. +- **Repository structure**: top-level layout, key directories, monorepo vs single-project. +- **Primary target component**: the main application, service, or library. If multiple, identify the primary and note secondary surfaces as optional follow-up. + +Do not yet produce detailed attack surface, trust boundary, data flow, or validation notes. Those are Phase 1b. + +### `build-model.md` + +Document: + +- **Build system**: Make, CMake, Maven, Gradle, npm, pip, Cargo, Go modules, etc. +- **Build commands**: how to compile/build the target from source. +- **Dependencies**: package manager files, vendored dependencies, external dependencies. +- **Build prerequisites**: toolchain versions, system packages, Docker images. +- **Whether the target can be built** within the workspace. Be honest about blockers. + +### `codeql-plan.yml` + +Create `itemdb/notes/codeql-plan.yml` by filling in the template from `templates/codeql-plan.yml`. + +Rules: + +- Only include languages you have detected with **HIGH** or **MEDIUM** confidence. +- For each language, select the appropriate pack profiles: + - `official` — always include for languages with CodeQL support. + - `github-security-lab` — include for security-focused audits. + - `trailofbits` — include for C/C++ and Go targets. + - `coding-standards` — include for C/C++ targets where coding standards queries apply. + - `local` — include if custom queries exist under `queries/codeql//`. +- Set `build_mode` to `none` for interpreted languages, `manual` for compiled languages with a known build command, or `autobuild` if CodeQL autobuild should be attempted. +- Fill in `build_command` when `build_mode` is `manual`. +- Set `recommended: false` if you cannot confidently profile any language. +- Add relevant `notes` explaining your language choices and any uncertainties. +- Update `exclude` patterns to match the target's test, fixture, vendor, and generated code directories if different from the defaults. + +## Important rules + +- Do not assume the target is a web application. +- Do not modify files under `src/`. +- Do not generate vulnerability findings. +- Do not produce full reconnaissance notes (attack-surface, trust-boundaries, etc.) — those are Phase 1b. +- Do not bootstrap the sandbox — that is Phase 1c. +- Do not run CodeQL manually. The harness runs it after this sub-stage. +- Be explicit about uncertainty. +- Prefer useful notes over exhaustive dumps. +- Focus on what later sub-stages need. + +## Final response + +At the end, summarize: + +- Target type and primary language(s) +- Build system and buildability assessment +- Languages selected for CodeQL analysis and their confidence levels +- Files created: `target-profile.md`, `build-model.md`, `codeql-plan.yml` +- Key uncertainties or blockers diff --git a/prompts/phase-1b-codeql-recon.md b/prompts/phase-1b-codeql-recon.md new file mode 100644 index 00000000..ca884a38 --- /dev/null +++ b/prompts/phase-1b-codeql-recon.md @@ -0,0 +1,202 @@ +# CodeCome Phase 1b: CodeQL-assisted Reconnaissance + +You are performing CodeCome **Phase 1b** — the second sub-stage of Phase 1. + +This sub-stage produces the detailed reconnaissance notes. Phase 1a already created the target profile, build model, and CodeQL plan. If CodeQL analysis has completed (it may not have — treat it as optional), you now have normalized CodeQL artifacts to incorporate as reconnaissance evidence. + +## Required reading + +Read the following files (all paths are relative to the project/workspace root): + +- `AGENTS.md` +- `codecome.yml` +- `templates/target-recon.md` +- `templates/file-risk-index.yml` +- `.opencode/agents/recon.md` +- `.opencode/skills/source-recon/SKILL.md` + +Also read the Phase 1a outputs: + +- `itemdb/notes/target-profile.md` +- `itemdb/notes/build-model.md` +- `itemdb/notes/codeql-plan.yml` + +## CodeQL artifacts (conditional) + +If CodeQL analysis was performed, the following artifacts may exist. Treat them as reconnaissance evidence, not proof of vulnerability: + +- `itemdb/evidence/codeql/run-manifest.yml` — CodeQL run outcome and metadata. +- `itemdb/evidence/codeql/normalized/alerts.yml` — Normalized CodeQL alerts with source/sink/flow. +- `itemdb/evidence/codeql/normalized/file-signals.yml` — Per-file CodeQL signal scores. +- `itemdb/evidence/codeql/codeql-summary.md` — Human-readable CodeQL summary. + +If these files exist: + +1. Read them and extract relevant signals. +2. Use alert data to enrich your understanding of potential sources, sinks, and trust-boundary crossings. +3. Use file-signals to prioritize files for the file-risk-index. +4. Do not treat CodeQL alerts as confirmed vulnerabilities. They are static-analysis hints. + +If these files do not exist, proceed with reconnaissance based on source analysis alone. Phase 1b must complete regardless of CodeQL availability. + +## Target + +Analyze the source tree under: + + ./src + +## Required outputs + +Create these files under `itemdb/notes/`: + +- `attack-surface.md` +- `execution-model.md` +- `trust-boundaries.md` +- `data-flow.md` +- `validation-model.md` +- `interesting-files.md` +- `file-risk-index.yml` +- `security-assumptions.md` + +### `attack-surface.md` + +Document: + +- **Network-facing attack surfaces**: HTTP endpoints, RPC services, WebSocket handlers, TCP/UDP listeners, message queue consumers. +- **Local attack surfaces**: CLI argument parsing, config file loading, environment variable consumption, file I/O, IPC. +- **API surface**: routes, controllers, handlers, middleware, GraphQL schemas, gRPC service definitions. +- **Input vectors**: query parameters, request bodies, file uploads, headers, cookies, WebSocket frames, serialized objects. +- **Output vectors**: response bodies, rendered templates, log emissions, file writes. + +### `execution-model.md` + +Document: + +- **Runtime environment**: interpreter, JVM, CLR, native binary, container, serverless. +- **Process model**: single-process, multi-process, worker pool, event loop, thread pool. +- **Startup and lifecycle**: initialization, configuration loading, connection pooling, shutdown. +- **Concurrency model**: async/await, threads, multiprocessing, greenlets, coroutines. + +### `trust-boundaries.md` + +Document: + +- **Network boundary**: remote client ↔ server. +- **Process boundary**: separate processes or containers. +- **User boundary**: authenticated vs unauthenticated, role-based. +- **Data boundary**: tenant isolation, database per tenant, shared database. +- **Component boundary**: plugin system, library interfaces, IPC channels. + +### `data-flow.md` + +Document key data flows from entry points to dangerous sinks: + +- Source (entry point) → transformation/validation → sink (filesystem, DB, network, command execution). +- For each flow, note whether input is attacker-controlled, partially controlled, or trusted. +- Flag missing or weak validation points. + +### `validation-model.md` + +Document: + +- How the target is tested (unit, integration, E2E, fuzzing). +- Whether a sandbox runtime is achievable. +- Recommended validation methods for each vulnerability class identified in `attack-surface.md`. +- Whether static-only or nested-virt validation models apply (requires explicit justification). + +### `interesting-files.md` + +List files that warrant deeper Phase 2 or sweep attention: + +- Files containing authentication/authorization logic. +- Files with dangerous sink usage (exec, eval, SQL construction, file I/O, crypto). +- Files handling deserialization, parsing, or format conversion. +- Files at trust boundaries. +- Files with high CodeQL alert density (if CodeQL artifacts exist). +- Configuration files affecting security behavior. + +### `file-risk-index.yml` + +Create `itemdb/notes/file-risk-index.yml` using the schema in `templates/file-risk-index.yml`. + +This is a structured, machine-readable companion to `interesting-files.md`. It is consumed by optional file-scoped Phase 2 sweeps. + +Score files from 1 to 5 using the scoring scale in the template: + +- `1`: low security interest, +- `2`: weak or indirect security relevance, +- `3`: moderate security interest, +- `4`: high security interest, +- `5`: very high security interest. + +Prioritize files that contain or strongly influence: + +- attacker-controlled or externally influenced input, +- trust-boundary crossings, +- authentication or authorization decisions, +- dangerous sinks, +- parsers and decoders, +- file upload or archive handling, +- cryptographic or secret-handling logic, +- privilege boundaries, +- tenant/account/resource isolation, +- network-facing protocol handlers, +- sandbox, policy, or permission enforcement. + +For each high-risk file, include concrete reasons, likely entry points, sources, sinks, trust boundaries, suggested vulnerability classes, suggested skills, and suggested validation methods when inferable. + +If CodeQL file signals exist (`itemdb/evidence/codeql/normalized/file-signals.yml`), incorporate them: +- Add `external_signals.codeql` blocks to file entries with CodeQL alerts. +- Boost scores where CodeQL reports high-precision alerts, but cap at 5. +- Explain every CodeQL-driven score boost in the `reasons` field. + +Do not include every source file. Prefer a concise ranked set that Phase 2 can act on. + +### `security-assumptions.md` + +Document: + +- Assumptions the codebase appears to make about its environment, inputs, and callers. +- Implicit trust relationships (e.g., "this internal API assumes the caller is already authorized"). +- Cryptographic assumptions. +- Assumptions about input validation performed by upstream components. + +## Additional reconnaissance + +Recursively scan `src/` for high-signal documentation such as `README*`, `SECURITY*`, `THREAT_MODEL*`, `CONTRIBUTING*`, `docs/`, and similar. Also inspect `CHANGELOG*`, `HISTORY*`, and `NEWS*`, but prefer top-level or component-relevant files. + +If the repository has dozens of changelog/history/news files, do not process them exhaustively. Summarize the pattern, prioritize files near the primary target or security-relevant components, and record that scope decision. + +Review external public context for prior security advisories, CVE references, historical security fixes, release notes, and recurring bug classes affecting this project or closely related upstream components. Prefer project advisories, GitHub Security Advisories, NVD/CVE entries, issue trackers, release notes, and distribution advisories. + +Use external context only as reconnaissance input: distill affected components, historical bug patterns, trust boundaries, and fixed attack surfaces into the notes. Do not treat external claims as proof that the current source tree is affected; verify everything against `src/` before creating findings. + +Distill declared threat model, past CVEs, trust boundaries, and third-party components into the relevant notes; treat author claims as input to verify, not facts. + +## Important rules + +- Do not assume the target is a web application. +- Do not assume the target can be built. +- Do not assume the target can be executed. +- Do not modify files under `src/`. +- Do not generate low-confidence vulnerability findings during reconnaissance. +- Do not rely only on filenames, comments, or labels. +- Be explicit about uncertainty. +- Prefer useful notes over exhaustive dumps. +- Focus on what later phases need. +- Do not let any target-specific skill narrow the target model before broad mapping is complete. +- Do not ask the user to choose Phase 2 scope when a reasonable default can be inferred. Pick the primary target from repository evidence, document secondary surfaces as optional follow-up, and continue. +- Do not phrase optional preferences as "User input requested". Use "Optional follow-up" unless Phase 1 is blocked. +- Reading `.env` files is allowed only in two places during reconnaissance: target inputs under `src/**` and CodeCome-generated sandbox metadata in `sandbox/.env`. Avoid unrelated `.env` files elsewhere in the workspace. + +## Final response + +At the end, summarize: + +- Target type (from Phase 1a), +- Most important attack surfaces identified, +- Recommended Phase 2 focus, +- Highest-risk files from `file-risk-index.yml`, +- CodeQL signals incorporated (if any), +- Files created in this sub-stage, +- Key limitations and uncertainties. diff --git a/prompts/phase-1c-sandbox.md b/prompts/phase-1c-sandbox.md new file mode 100644 index 00000000..531306b0 --- /dev/null +++ b/prompts/phase-1c-sandbox.md @@ -0,0 +1,125 @@ +# CodeCome Phase 1c: Sandbox Bootstrap + +You are performing CodeCome **Phase 1c** — the third and final sub-stage of Phase 1. + +This sub-stage bootstraps the sandbox environment. Phase 1a produced the target profile and build model. Phase 1b produced the full reconnaissance notes. Your job is to leave `sandbox/` in a state where Phase 2 can run. + +## Required reading + +Read the following files (all paths are relative to the project/workspace root): + +- `AGENTS.md` +- `codecome.yml` +- `.opencode/agents/recon.md` +- `.opencode/skills/sandbox-bootstrap/SKILL.md` +- `itemdb/notes/target-profile.md` +- `itemdb/notes/build-model.md` + +## Required output + +- `itemdb/notes/sandbox-plan.md` + +## Workflow + +1. Inspect current sandbox state: + + make sandbox-status + +2. Inspect target runtime artifacts under `src/`. At minimum consider: + + src/Dockerfile + src/docker-compose.yml + src/docker-compose.yaml + src/compose.yml + src/compose.yaml + src/Makefile + src/scripts/ + src/README* + src/INSTALL* + src/CONTRIBUTING* + src/RUN* + src/docs/ + + Decide what to honor. Document the decision in `sandbox-plan.md`. + +3. Detect candidates: + + make sandbox-detect + +4. Inspect the chosen example: + + make sandbox-inspect ID= + +5. Apply the example: + + BOOTSTRAP_ARGS='--var KEY1=VAL1 --var KEY2=VAL2' \ + make sandbox-bootstrap ID= + + Or, for a preview without writing: + + BOOTSTRAP_ARGS='--dry-run --var KEY=VAL' \ + make sandbox-bootstrap ID= + + `apply` refuses to overwrite a user-managed `sandbox/` (one without `CODECOME-GENERATED.md`). If the user has accepted the loss, re-run with `--force` and the prior content is moved to `sandbox/.backup-/`. + +5b. Implement the required sandbox capabilities. + + Templates are seeds, not finished sandboxes. Each `templates/sandboxes//` ships only `Dockerfile`, `docker-compose.yml`, a starter `build.sh`, and a starter `test.sh`. After `apply`, you must leave `sandbox/` with working mechanisms for: + + sandbox setup + sandbox start + sandbox sanity + target build + target test + sandbox stop + + Prefer helper scripts under `sandbox/scripts/` such as: + + setup.sh up.sh check.sh build.sh test.sh + + Add operational helpers when they make sense for the target: + + down.sh shell.sh logs.sh clean.sh reset.sh + + Prefer a realistic runtime environment when it is reasonably derivable from the repository. For web apps, APIs, and other services, Phase 1c should attempt to start the real application stack, not just compile it. If the target appears to need a database, cache, queue, reverse proxy, migrations, seed data, or health checks, include those when the source tree or docs make them inferable. + + Do not stop at a toolchain-only or build-only sandbox when later Phase 4 or Phase 5 validation would realistically require a running application. If full runtime is not feasible, document the closest achievable runtime model and the blocker in `itemdb/notes/sandbox-plan.md`. + + Adapt `build.sh` and `test.sh` to the actual project layout (the source may be nested under `src//`, not directly under `src/`). Author additional scripts when they help the target (sanitizer build, fuzzing harness, debugger attach, target-specific reset, etc.). Make every script executable. Document any extras in `itemdb/notes/sandbox-plan.md` under "Extra scripts authored". + + Do not record any validation tier as `skipped` because the required capability is missing. Either implement the helper and run the tier, or accept the `failed` outcome the validator emits. + + Do not replace authoring a script with an in-chat manual spot-check. Manual checks do not survive future runs. + + See `.opencode/skills/sandbox-bootstrap/SKILL.md` for authoring conventions and the sandbox capability contract. + +6. Validate: + + make sandbox-validate + + Use `BOOTSTRAP_ARGS='--keep-going'` to run all tiers even after a failure, or `--scripts-only` / `--docker-only` to constrain which mode is used. + + `validate` appends a "Validation run " table to `sandbox/CODECOME-GENERATED.md` and returns JSON with `--format json`. Capture per-tier outcomes (passed / failed / skipped, exit code, last 50 lines of stderr) into the validation matrix in `sandbox-plan.md`. A missing required capability makes the tier `failed`; that means you still need to complete step 5b. + +7. If validation fails, attempt automatic remediations within the retry budget (`CODECOME_BOOTSTRAP_MAX_RETRIES`, default 3). Each attempt must be logged in `sandbox-plan.md`. When the budget is exhausted, write the halt protocol in `sandbox-plan.md` and stop Phase 1c. + +8. Special validation models: + + - `static-only`: requires explicit justification in `sandbox-plan.md`. + - `nested-virt`: requires explicit justification and arch declaration. + +## Important rules + +- Do not modify files under `src/`. +- Do not silently overwrite a `sandbox/` that lacks `CODECOME-GENERATED.md`. Validate first; if it works, move on; if it does not, halt with the halt protocol. +- Do not generate vulnerability findings. + +## Final response + +At the end, summarize: + +- Chosen sandbox example and `validation_model`, +- Validation outcome (`passed`, `passed-with-warnings`, `halted`), +- `itemdb/notes/sandbox-plan.md` created, +- Key limitations, +- Halt requirements if sandbox bootstrap is blocked. diff --git a/templates/codeql-plan.yml b/templates/codeql-plan.yml new file mode 100644 index 00000000..9c6c58b0 --- /dev/null +++ b/templates/codeql-plan.yml @@ -0,0 +1,44 @@ +# CodeQL analysis plan generated by Phase 1a target profiling. +# The model fills in language entries based on source tree analysis. +# This template is read by tools/codeql.py run --plan. + +schema_version: 1 +generated_by: "phase-1a-profile" + +source_path: "./src" +recommended: true + +languages: [] +# Example language entries: +# +# languages: +# - id: "python" +# confidence: "HIGH" +# build_mode: "none" +# build_command: null +# packs: +# - "official" +# - "github-security-lab" +# +# - id: "c-cpp" +# confidence: "HIGH" +# build_mode: "manual" +# build_command: "make -C src" +# packs: +# - "official" +# - "github-security-lab" +# - "trailofbits" +# - "coding-standards" +# +# Allowed language IDs: python, javascript-typescript, c-cpp, go, csharp, java-kotlin +# Allowed confidence values: HIGH, MEDIUM, LOW +# Allowed build_mode values: none, manual, autobuild +# Allowed pack profile names: official, github-security-lab, trailofbits, coding-standards, local + +exclude: + - "src/**/tests/**" + - "src/**/fixtures/**" + - "src/**/vendor/**" + - "src/**/node_modules/**" + +notes: [] diff --git a/tools/codecome/cli.py b/tools/codecome/cli.py index 84c1869e..6f75dfa7 100644 --- a/tools/codecome/cli.py +++ b/tools/codecome/cli.py @@ -66,7 +66,11 @@ def main() -> int: from chat.harness import run_harness return run_harness(parser, args) - missing = [n for n in ("phase", "label", "agent", "prompt_file") if getattr(args, n) is None] + # Phase 1 handles its own prompt files via subphase orchestration. + required = ["phase", "label", "agent"] + if str(args.phase) not in ("1", "None"): + required.append("prompt_file") + missing = [n for n in required if getattr(args, n) is None] if missing: parser.error( "the following arguments are required when not using --show-model or --chat: " diff --git a/tools/codecome/harness.py b/tools/codecome/harness.py index 1cd4633e..f02b08e6 100644 --- a/tools/codecome/harness.py +++ b/tools/codecome/harness.py @@ -59,6 +59,37 @@ def run_phase_mode(args: argparse.Namespace) -> int: if _overrides: _rendering_ctx.settings = dataclasses.replace(_rendering_ctx.settings, **_overrides) + # ── Phase 1: subphase orchestration with own server lifecycle ── + if str(args.phase) == "1": + os.environ["_CODECOME_INSIDE_HARNESS"] = "1" + _p1_runner = ServerRunner() + try: + _p1_server_info = _p1_runner.start(hostname="127.0.0.1", log_level=args.log_level) + except ServerRunnerError as exc: + _emit_fatal_error(console, "Server Error", str(exc)) + return 1 + + def _p1_forward_signal(signum: int, _frame: Any) -> None: + info = _p1_runner.info + if info is not None: + try: + os.killpg(info.pid, signum) + except ProcessLookupError: + pass + signal.signal(signum, signal.SIG_DFL) + os.kill(os.getpid(), signum) + + _p1_prev_sigint = signal.signal(signal.SIGINT, _p1_forward_signal) + _p1_prev_sigterm = signal.signal(signal.SIGTERM, _p1_forward_signal) + try: + from codecome.phase_1 import run_phase_1 as _run_phase_1 + return _run_phase_1(args, console, _rendering_ctx, _p1_runner, _p1_server_info.base_url) + finally: + signal.signal(signal.SIGINT, _p1_prev_sigint) + signal.signal(signal.SIGTERM, _p1_prev_sigterm) + _p1_runner.stop() + + # ── Phases 2-6 below this point ── prompt_file = ROOT / args.prompt_file prompt = load_prompt(prompt_file, args.finding, phase=args.phase) rc = resolve_runtime_config(args.agent) diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py new file mode 100644 index 00000000..37e3afae --- /dev/null +++ b/tools/codecome/phase_1.py @@ -0,0 +1,456 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +""" +Phase 1 subphase orchestration. + +Runs Phase 1 as three subphases (1a / 1b / 1c) with gates and a CodeQL +placeholder between 1a and 1b. The opencode server is started once and +reused across all three subphase sessions. +""" + +from __future__ import annotations + +import importlib.util +import subprocess +import sys +import time +from pathlib import Path +from typing import Any + +from opencode.serve import ServerRunner, ServerRunnerError + +from codecome.console import build_console, _emit_fatal_error +from codecome.config import ROOT, resolve_color_mode, load_prompt, resolve_runtime_config +from codecome.runner import _run_single_attempt +from rendering.dispatch import HAVE_RICH, _get_rendering_ctx, configure_rendering, render_event +from rendering.events import ( + _FINISH_TERMINAL_OK, + _FINISH_MID_TURN, + _FINISH_FAILURE, + _reset_subagent_state, +) +from phases.completion import ( + check_phase_graceful_completion, + build_phase_resume_prompt, + build_frontmatter_resume_prompt, +) + +# gate-check.py uses a hyphen and cannot be imported with a regular +# ``import`` statement. Load it via importlib. +_gc_spec = importlib.util.spec_from_file_location( + "gate_check", + str(ROOT / "tools" / "gate-check.py"), +) +_gate_check = importlib.util.module_from_spec(_gc_spec) +_gc_spec.loader.exec_module(_gate_check) + + +# --------------------------------------------------------------------------- +# CodeQL placeholder (no-op until PR 5) +# --------------------------------------------------------------------------- + +def _run_codeql_placeholder(console: Any) -> None: + """Log that CodeQL is not yet implemented.""" + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(title="CodeQL", style="yellow")) + console.print(Text( + "CodeQL analysis not yet implemented — coming in a future PR. " + "Proceeding to Phase 1b without CodeQL artifacts.", + style="yellow", + )) + else: + import _colors as C + print(C.header("CodeQL")) + print(C.warn( + "CodeQL analysis not yet implemented — coming in a future PR. " + "Proceeding to Phase 1b without CodeQL artifacts." + )) + print() + + +# --------------------------------------------------------------------------- +# Subphase runner +# --------------------------------------------------------------------------- + +def _run_subphase( + *, + args: Any, + console: Any, + rendering_ctx: Any, + runner: ServerRunner, + base_url: str, + phase_id: str, + label: str, + agent: str, + prompt_file: str, + finding: str | None = None, + findings_snapshot: dict[str, int] | None = None, +) -> tuple[int, dict[str, int] | None]: + """Run a single subphase agent session with retry/resume. + + Returns (exit_code, cumulative_findings_snapshot). The snapshot is + updated after the session completes so that gate functions can detect + unexpected finding creation. + """ + prompt_path = ROOT / prompt_file + prompt = load_prompt(prompt_path, finding, phase=phase_id) + rc = resolve_runtime_config(agent) + model = rc.model + variant = rc.variant + thinking_on = rc.thinking_on + configure_rendering(console, render_reasoning=thinking_on) + + model_label = model or "(unknown)" + variant_label = variant or "(unknown)" + + parts = [f"agent={agent}", f"model={model_label}"] + if variant is not None: + parts.append(f"variant={variant_label}") + parts.append(f"thinking={'on' if thinking_on else 'off'}") + parts.append(f"prompt={prompt_file}") + + if variant is not None: + sources_tail = ( + f"(model source: {rc.model_source}, variant source: {rc.variant_source}, " + f"thinking source: {rc.thinking_source})" + ) + else: + sources_tail = f"(model source: {rc.model_source}, thinking source: {rc.thinking_source})" + + main_line = " ".join(parts) + " " + sources_tail + + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(title=f"Phase {phase_id}: {label}", style="bold cyan")) + console.print(Text(main_line, style="dim")) + if finding: + console.print(Text(f"finding={finding}", style="dim")) + else: + import _colors as C + print(C.header(f"Phase {phase_id}: {label}")) + print(C.info(main_line)) + if finding: + print(C.info(f"finding={finding}")) + + iteration_retry_count = 0 + frontmatter_retry_count = 0 + attempt_number = 0 + last_session_id: str = "" + last_finish_reason: str | None = None + last_finish_tokens: dict[str, Any] = {} + last_permission_error: str | None = None + any_step_finish_seen = False + step_finish_count = 0 + transcript_path: Path = Path() + finish_warning: str | None = None + subphase_start_time = time.time() + + password = runner.info.password if runner.info else "" + + # --- Retry loop (mirrors harness.run_phase_mode) --- + while True: + attempt_number += 1 + _reset_subagent_state() + + returncode, session_id, run_result, transcript_path = _run_single_attempt( + args, console, prompt, model, variant, base_url, + password, str(ROOT), + render_event_fn=render_event, + emit_fatal_error_fn=_emit_fatal_error, + existing_session_id=last_session_id or None, + ) + + if returncode != 0: + break + + last_session_id = session_id + last_finish_reason = run_result.last_finish_reason + last_finish_tokens = run_result.last_finish_tokens + last_permission_error = run_result.last_permission_error + any_step_finish_seen = run_result.any_step_finish_seen + step_finish_count = run_result.step_finish_count + + if not any_step_finish_seen: + finish_warning = ( + "CodeCome observed no step_finish events in the JSON stream, so the model/provider did not emit a " + "completion signal. Treating the run as incomplete." + ) + elif last_finish_reason is None: + finish_warning = ( + "CodeCome observed a step_finish event without a finish reason, so the model/provider completion " + "state is ambiguous. Treating the run as incomplete." + ) + elif last_finish_reason in _FINISH_FAILURE: + finish_warning = ( + f"CodeCome observed finish reason '{last_finish_reason}', which means the model/provider stopped " + "before completing the subphase. Treating the run as incomplete rather than as a CodeCome logic error." + ) + elif last_finish_reason in _FINISH_MID_TURN: + if last_permission_error: + finish_warning = ( + f"{last_permission_error}; CodeCome observed the model/provider stop mid-turn with finish " + f"reason '{last_finish_reason}', so the subphase did not reach a final completion signal." + ) + else: + finish_warning = ( + f"CodeCome observed the model/provider stop mid-turn with finish reason '{last_finish_reason}' " + f"after {step_finish_count} completed loops, without a terminal completion signal. Treating the " + "subphase as incomplete because the model/provider cut off the response." + ) + elif last_finish_reason not in _FINISH_TERMINAL_OK: + finish_warning = ( + f"CodeCome observed an unrecognised model/provider finish reason '{last_finish_reason}'. Treating " + "the run as incomplete rather than assuming success." + ) + + if finish_warning is not None: + if ( + last_finish_reason in _FINISH_MID_TURN + and last_permission_error is None + and check_phase_graceful_completion(phase_id, finding, subphase_start_time) + ): + msg = ( + f"CodeCome observed a mid-turn model/provider cutoff for Phase {phase_id} after {step_finish_count} " + "completed loops, but the required durable artifacts were already written. Treating the subphase as complete." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold green")) + else: + import _colors as C + print(C.ok(msg)) + finish_warning = None + last_finish_reason = "graceful_forgiveness" + else: + returncode = 2 + + if returncode == 0: + validation_result = subprocess.run( + [sys.executable, "tools/check-frontmatter.py"], + cwd=ROOT, + capture_output=True, + text=True, + ) + if validation_result.returncode != 0: + max_frontmatter_retries = 2 + validation_output = (validation_result.stderr or validation_result.stdout).strip() or "(no validator output)" + if frontmatter_retry_count < max_frontmatter_retries: + frontmatter_retry_count += 1 + msg = ( + "\n[Auto-Correction] The model completed a turn, but its output failed local frontmatter " + f"validation. CodeCome will resume the same session and ask for a minimal repair " + f"(retry {frontmatter_retry_count}/{max_frontmatter_retries})." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold yellow")) + else: + import _colors as C + print(C.warn(msg)) + if last_session_id and last_session_id != "id": + prompt = build_frontmatter_resume_prompt(phase_id, finding, validation_output) + continue + else: + returncode = 2 + finish_warning = ( + "The model output failed local frontmatter validation, and CodeCome could not determine a " + "session ID to resume for repair. Treating the subphase as incomplete so the validator output " + "can be reported back with the saved transcript." + ) + else: + returncode = 2 + finish_warning = ( + f"The model output still fails local frontmatter validation after {max_frontmatter_retries} " + "auto-repair attempts. Treating the subphase as incomplete so the validation errors can be reported back." + ) + msg = f"\n[Warning] Frontmatter errors persist after {max_frontmatter_retries} auto-retries." + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold red")) + else: + import _colors as C + print(C.fail(msg)) + print(validation_output) + break + break + + if returncode == 2 and last_finish_reason in _FINISH_MID_TURN: + import os + max_iteration_retries = int(os.environ.get("CODECOME_MAX_ITERATION_RETRIES", "1")) + if iteration_retry_count < max_iteration_retries: + iteration_retry_count += 1 + msg = ( + "\n[Auto-Resume] CodeCome observed a mid-turn model/provider cutoff and will resume the same " + f"session once to let the model finish the interrupted work (retry {iteration_retry_count}/{max_iteration_retries})." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold yellow")) + else: + import _colors as C + print(C.warn(msg)) + if last_session_id and last_session_id != "id": + prompt = build_phase_resume_prompt( + phase_id, finding, last_finish_reason, step_finish_count, + ) + continue + else: + finish_warning = ( + "CodeCome correctly detected that the model/provider stopped mid-turn, but it could not determine " + "a session ID for automatic continuation. Treating the subphase as incomplete." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text("Could not determine session ID to resume.", style="red")) + else: + import _colors as C + print(C.fail("Could not determine session ID to resume.")) + break + + break + # --- end retry loop --- + + # Report subphase outcome + if returncode == 0: + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(style="green")) + console.print(Text(f"{'OK' if not HAVE_RICH else ''}Phase {phase_id} completed successfully", style="green")) + console.print(Text( + f" finish reason: {last_finish_reason!r} " + f"transcript: {transcript_path.relative_to(ROOT) if transcript_path.name else 'N/A'}", + style="dim", + )) + else: + import _colors as C + print(C.ok(f"Phase {phase_id} completed successfully")) + print(f" finish reason: {last_finish_reason!r} transcript: {transcript_path.relative_to(ROOT) if transcript_path.name else 'N/A'}") + elif returncode == 130: + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(style="yellow")) + console.print(Text(f"Phase {phase_id} interrupted", style="yellow")) + else: + import _colors as C + print(C.warn(f"Phase {phase_id} interrupted")) + else: + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(style="red")) + console.print(Text( + f"Phase {phase_id} did not complete cleanly (exit code {returncode})", + style="red", + )) + if finish_warning: + console.print(Text(f" reason: {finish_warning}", style="red")) + console.print(Text(f" transcript: {transcript_path.relative_to(ROOT) if transcript_path.name else 'N/A'}", style="dim")) + else: + import _colors as C + print(C.fail(f"Phase {phase_id} did not complete cleanly (exit code {returncode})")) + if finish_warning: + print(C.fail(f" reason: {finish_warning}")) + print(f" finish reason: {last_finish_reason!r} transcript: {transcript_path.relative_to(ROOT) if transcript_path.name else 'N/A'}") + + # Update findings snapshot for gate check + if findings_snapshot is not None and returncode == 0: + from tools.gate_check import _count_findings_since + pass # snapshot is read by caller; we'll track in the orchestrator + + return returncode, findings_snapshot + + +# --------------------------------------------------------------------------- +# Phase 1 orchestration +# --------------------------------------------------------------------------- + +def run_phase_1( + args: Any, + console: Any, + rendering_ctx: Any, + runner: ServerRunner, + base_url: str, +) -> int: + """Orchestrate Phase 1 subphases 1a → 1b → 1c with gates.""" + # Snapshot findings before 1a + findings_snapshot = _gate_check._count_findings_since() + + # ---- Phase 1a: Target Profile ---- + rc, _ = _run_subphase( + args=args, + console=console, + rendering_ctx=rendering_ctx, + runner=runner, + base_url=base_url, + phase_id="1a", + label="Target Profile", + agent="recon", + prompt_file="prompts/phase-1a-profile.md", + ) + if rc != 0: + return rc + + gate_rc = _gate_check.check_phase_1a(console) + if gate_rc != 0: + return gate_rc + + # ---- CodeQL placeholder ---- + _run_codeql_placeholder(console) + + # ---- Phase 1b: CodeQL-assisted Reconnaissance ---- + rc, _ = _run_subphase( + args=args, + console=console, + rendering_ctx=rendering_ctx, + runner=runner, + base_url=base_url, + phase_id="1b", + label="CodeQL-assisted Reconnaissance", + agent="recon", + prompt_file="prompts/phase-1b-codeql-recon.md", + findings_snapshot=findings_snapshot, + ) + if rc != 0: + return rc + + gate_rc = _gate_check.check_phase_1b(console, findings_snapshot=findings_snapshot) + if gate_rc != 0: + return gate_rc + + # ---- Phase 1c: Sandbox Bootstrap ---- + rc, _ = _run_subphase( + args=args, + console=console, + rendering_ctx=rendering_ctx, + runner=runner, + base_url=base_url, + phase_id="1c", + label="Sandbox Bootstrap", + agent="recon", + prompt_file="prompts/phase-1c-sandbox.md", + ) + if rc != 0: + return rc + + gate_rc = _gate_check.check_phase_1c(console) + if gate_rc != 0: + return gate_rc + + # ---- Phase 1 complete ---- + if HAVE_RICH: + from rich.rule import Rule + from rich.text import Text + console.print(Rule(style="bold green")) + console.print(Text("Phase 1 complete — all subphases passed.", style="bold green")) + else: + import _colors as C + print() + print(C.ok("Phase 1 complete — all subphases passed.")) + + return 0 diff --git a/tools/gate-check.py b/tools/gate-check.py index cb3f9814..15b9cd2b 100755 --- a/tools/gate-check.py +++ b/tools/gate-check.py @@ -48,6 +48,61 @@ "attack-surface.md", ] +REQUIRED_NOTES_1B = [ + "attack-surface.md", + "execution-model.md", + "trust-boundaries.md", + "data-flow.md", + "validation-model.md", + "interesting-files.md", + "file-risk-index.yml", + "security-assumptions.md", +] + +# --------------------------------------------------------------------------- +# Conditional rich support: gate functions accept an optional Console. +# --------------------------------------------------------------------------- +try: + from rich.console import Console as _RichConsole + HAVE_RICH = True +except ImportError: # pragma: no cover + _RichConsole = None # type: ignore[assignment] + HAVE_RICH = False + + +def _emit(console, level: str, text: str) -> None: + """Emit a gate message through rich Console or plain print.""" + if console is not None and HAVE_RICH: + from rich.text import Text + style_map = { + "header": "bold cyan", + "ok": "green", + "fail": "bold red", + "warn": "yellow", + "info": "dim", + } + style = style_map.get(level, "") + console.print(Text(text, style=style)) + else: + fn_map = { + "header": header, + "ok": ok, + "fail": fail, + "warn": warn, + "info": info, + } + fn = fn_map.get(level, print) + fn(text) + + +def _emit_separator(console, style: str = "green") -> None: + """Emit a visual separator.""" + if console is not None and HAVE_RICH: + from rich.rule import Rule + console.print(Rule(style=style)) + else: + print() + FINDING_STATUS_DIRS = [ "PENDING", "CONFIRMED", @@ -294,11 +349,218 @@ def gate_phase_6() -> int: return 0 +# --------------------------------------------------------------------------- +# Phase 1 subphase gates (1a / 1b / 1c) +# --------------------------------------------------------------------------- + +def _notes_exist(*names: str) -> list[str]: + """Return names of note files that are missing from itemdb/notes/.""" + notes_dir = ROOT / "itemdb" / "notes" + return [n for n in names if not (notes_dir / n).exists()] + + +def _count_findings_since(snapshot: dict[str, int] | None = None) -> dict[str, int]: + """Return {status_dir: count} of CC-*.md files. If *snapshot* is given, + return the delta (current - snapshot) instead of absolute counts.""" + findings_root = ROOT / "itemdb" / "findings" + current: dict[str, int] = {} + for s in FINDING_STATUS_DIRS: + sd = findings_root / s + current[s] = len(list(sd.glob("CC-*.md"))) if sd.exists() else 0 + if snapshot is None: + return current + return {s: max(0, current[s] - snapshot.get(s, 0)) for s in FINDING_STATUS_DIRS} + + +def check_phase_1a(console=None) -> int: + """Gate 1a: Phase 1a outputs must exist; codeql-plan.yml must be valid.""" + _emit(console, "header", "Gate 1a: Target Profile") + _emit_separator(console, "cyan") + + notes_dir = ROOT / "itemdb" / "notes" + + required = ["target-profile.md", "build-model.md", "codeql-plan.yml"] + missing = [n for n in required if not (notes_dir / n).exists()] + if missing: + _emit(console, "fail", "Required Phase 1a outputs are missing:") + for m in missing: + _emit(console, "info", f" itemdb/notes/{m}") + _emit(console, "info", "Run Phase 1a first.") + return 1 + + _emit(console, "ok", "itemdb/notes/target-profile.md exists") + _emit(console, "ok", "itemdb/notes/build-model.md exists") + _emit(console, "ok", "itemdb/notes/codeql-plan.yml exists") + + # Validate codeql-plan.yml + plan_path = notes_dir / "codeql-plan.yml" + if yaml is None: + _emit(console, "warn", "Cannot validate codeql-plan.yml: PyYAML not available") + else: + try: + plan = yaml.safe_load(plan_path.read_text(encoding="utf-8")) + except Exception as exc: + _emit(console, "fail", f"codeql-plan.yml is not valid YAML: {exc}") + return 1 + + if not isinstance(plan, dict): + _emit(console, "fail", "codeql-plan.yml is not a mapping") + return 1 + + if plan.get("recommended") is True: + languages = plan.get("languages", []) + if not isinstance(languages, list) or len(languages) == 0: + _emit(console, "fail", "codeql-plan.yml: recommended=true but no language entries") + return 1 + + valid_build_modes = {"none", "manual", "autobuild"} + valid_confidences = {"HIGH", "MEDIUM", "LOW"} + for i, lang in enumerate(languages): + if not isinstance(lang, dict): + _emit(console, "fail", f"codeql-plan.yml: language entry {i} is not a mapping") + return 1 + if "id" not in lang: + _emit(console, "fail", f"codeql-plan.yml: language entry {i} missing 'id'") + return 1 + if lang.get("confidence") not in valid_confidences: + _emit(console, "warn", + f"codeql-plan.yml: language '{lang.get('id', '?')}' " + f"has unexpected confidence '{lang.get('confidence')}'") + if lang.get("build_mode") not in valid_build_modes: + _emit(console, "warn", + f"codeql-plan.yml: language '{lang.get('id', '?')}' " + f"has unexpected build_mode '{lang.get('build_mode')}'") + if "packs" not in lang: + _emit(console, "fail", f"codeql-plan.yml: language '{lang['id']}' missing 'packs'") + return 1 + if not isinstance(lang["packs"], list) or len(lang["packs"]) == 0: + _emit(console, "fail", f"codeql-plan.yml: language '{lang['id']}' has empty packs list") + return 1 + + _emit(console, "ok", f"codeql-plan.yml: {len(languages)} language(s) configured") + + _emit_separator(console, "green") + _emit(console, "ok", "Ready to run Phase 1b (CodeQL-assisted Reconnaissance).") + return 0 + + +def check_phase_1b(console=None, findings_snapshot: dict[str, int] | None = None) -> int: + """Gate 1b: all recon notes must exist; file-risk-index.yml must be valid.""" + _emit(console, "header", "Gate 1b: CodeQL-assisted Reconnaissance") + _emit_separator(console, "cyan") + + missing = _notes_exist(*REQUIRED_NOTES_1B) + if missing: + _emit(console, "fail", "Required Phase 1b reconnaissance notes are missing:") + for m in missing: + _emit(console, "info", f" itemdb/notes/{m}") + _emit(console, "info", "Run Phase 1b first.") + return 1 + + for name in REQUIRED_NOTES_1B: + _emit(console, "ok", f"itemdb/notes/{name} exists") + + # Validate file-risk-index.yml + risk_path = ROOT / "itemdb" / "notes" / "file-risk-index.yml" + if yaml is not None: + try: + data = yaml.safe_load(risk_path.read_text(encoding="utf-8")) + except Exception as exc: + _emit(console, "fail", f"file-risk-index.yml is not valid YAML: {exc}") + return 1 + + if isinstance(data, dict): + if "schema_version" not in data: + _emit(console, "warn", "file-risk-index.yml: missing 'schema_version'") + files = data.get("files") + if files is None: + _emit(console, "fail", "file-risk-index.yml: missing 'files' key") + return 1 + if not isinstance(files, list): + _emit(console, "fail", "file-risk-index.yml: 'files' is not a list") + return 1 + + for entry in files: + if not isinstance(entry, dict): + continue + path_val = entry.get("path", "") + if "../" in str(path_val) or str(path_val).startswith("/"): + _emit(console, "warn", + f"file-risk-index.yml: path '{path_val}' is not workspace-relative") + score = entry.get("score") + if score is not None: + try: + s = int(score) + if s < 1 or s > 5: + _emit(console, "warn", + f"file-risk-index.yml: score {score} for '{path_val}' is not in 1..5") + except (TypeError, ValueError): + _emit(console, "warn", + f"file-risk-index.yml: non-integer score '{score}' for '{path_val}'") + + _emit(console, "ok", f"file-risk-index.yml: {len(files)} file(s) indexed") + + # Check no findings were created during 1b + if findings_snapshot is not None: + delta = _count_findings_since(findings_snapshot) + new_findings = sum(delta.values()) + if new_findings > 0: + _emit(console, "warn", + f"{new_findings} new finding(s) were created during Phase 1b. " + "Findings should not be created during reconnaissance.") + for status, count in delta.items(): + if count > 0: + _emit(console, "info", f" {status}: +{count}") + + _emit_separator(console, "green") + _emit(console, "ok", "Ready to run Phase 1c (Sandbox Bootstrap).") + return 0 + + +def check_phase_1c(console=None) -> int: + """Gate 1c: sandbox-plan.md must exist; sandbox must have provenance.""" + _emit(console, "header", "Gate 1c: Sandbox Bootstrap") + _emit_separator(console, "cyan") + + plan_path = ROOT / "itemdb" / "notes" / "sandbox-plan.md" + if not plan_path.exists(): + _emit(console, "fail", "itemdb/notes/sandbox-plan.md does not exist") + _emit(console, "info", "Run Phase 1c first.") + return 1 + + _emit(console, "ok", "itemdb/notes/sandbox-plan.md exists") + + # Check sandbox provenance + provenance = ROOT / "sandbox" / "CODECOME-GENERATED.md" + has_provenance = provenance.exists() + + generated_dir = ROOT / "sandbox" + has_sandbox = generated_dir.exists() and any( + f.name != ".gitkeep" + for f in generated_dir.iterdir() + ) + + if has_provenance: + _emit(console, "ok", "sandbox/CODECOME-GENERATED.md exists") + elif has_sandbox: + _emit(console, "warn", + "sandbox/ exists without CODECOME-GENERATED.md — may be user-managed") + else: + _emit(console, "warn", "sandbox/ is empty or does not exist") + + _emit_separator(console, "green") + _emit(console, "ok", "Phase 1 complete. Ready to run Phase 2.") + return 0 + + def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="Check readiness gates for a CodeCome phase.", ) - parser.add_argument("phase", type=int, choices=[1, 2, 3, 4, 5, 6], help="Phase number.") + parser.add_argument( + "phase", + help="Phase number (1-6) or subphase (1a, 1b, 1c).", + ) parser.add_argument("finding_id", nargs="?", help="Finding ID or path (required for Phase 4 and 5).") return parser @@ -307,13 +569,30 @@ def main() -> int: parser = build_parser() args = parser.parse_args() - if args.phase == 1: + phase_str = str(args.phase) + + if phase_str == "1a": + return check_phase_1a() + elif phase_str == "1b": + return check_phase_1b() + elif phase_str == "1c": + return check_phase_1c() + + try: + phase_int = int(phase_str) + except ValueError: + print(fail(f"Invalid phase: {phase_str}")) + print() + print(info("Valid values: 1, 2, 3, 4, 5, 6, 1a, 1b, 1c")) + return 1 + + if phase_int == 1: return gate_phase_1() - elif args.phase == 2: + elif phase_int == 2: return gate_phase_2() - elif args.phase == 3: + elif phase_int == 3: return gate_phase_3() - elif args.phase == 4: + elif phase_int == 4: if not args.finding_id: print(fail("Phase 4 requires a finding ID.")) print() @@ -321,7 +600,7 @@ def main() -> int: print(info(" or: ./tools/gate-check.py 4 itemdb/findings/PENDING/CC-0001-test.md")) return 1 return gate_phase_4(args.finding_id) - elif args.phase == 5: + elif phase_int == 5: if not args.finding_id: print(fail("Phase 5 requires a finding ID.")) print() @@ -329,9 +608,12 @@ def main() -> int: print(info(" or: ./tools/gate-check.py 5 itemdb/findings/CONFIRMED/CC-0001-test.md")) return 1 return gate_phase_5(args.finding_id) - elif args.phase == 6: + elif phase_int == 6: return gate_phase_6() + print(fail(f"Invalid phase: {phase_str}")) + print() + print(info("Valid values: 1, 2, 3, 4, 5, 6, 1a, 1b, 1c")) return 1 From 00a113a39968dfe8d2b59d3ae1df6f9521f6d106 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Fri, 29 May 2026 22:26:58 +0200 Subject: [PATCH 04/47] feat: add CodeQL install scaffold --- .gitignore | 4 + Makefile | 4 +- codecome.yml | 29 +++- tools/codecome/cli.py | 2 +- tools/codecome/phase_1.py | 32 ++-- tools/codeql.py | 105 +++++++++++++ tools/codeql/__init__.py | 1 + tools/codeql/config.py | 235 ++++++++++++++++++++++++++++ tools/codeql/install.py | 263 ++++++++++++++++++++++++++++++++ tools/gate-check.py | 264 +------------------------------- tools/phases/phase_1_gates.py | 277 ++++++++++++++++++++++++++++++++++ 11 files changed, 932 insertions(+), 284 deletions(-) create mode 100644 tools/codeql.py create mode 100644 tools/codeql/__init__.py create mode 100644 tools/codeql/config.py create mode 100644 tools/codeql/install.py create mode 100644 tools/phases/phase_1_gates.py diff --git a/.gitignore b/.gitignore index 50a050ba..ca566b9a 100644 --- a/.gitignore +++ b/.gitignore @@ -220,6 +220,10 @@ __marimo__/ tmp/* !tmp/.gitkeep +# CodeQL managed install and cache +.tools/ +.cache/codeql/ + # CodeCome runtime artifacts itemdb/index.md diff --git a/Makefile b/Makefile index 638b61c1..1638da31 100644 --- a/Makefile +++ b/Makefile @@ -129,7 +129,7 @@ init: @$(PYTHON) -m pip install --upgrade pip @$(PYTHON) -m pip install --no-input -r requirements.txt @if [ "$$CODEQL" != "0" ] && [ "$$CODEQL_SKIP_INSTALL" != "1" ]; then \ - printf "$(BOLD)$(CYAN)[CodeQL]$(RESET) Managed CodeQL install not yet implemented — coming in a future PR.\n"; \ + $(PYTHON) tools/codeql.py install; \ fi venv: init @@ -192,7 +192,7 @@ sweep: venv-check opencode-raw: @test -n "$(AGENT)" || (echo "AGENT is required. Usage: make opencode-raw AGENT=auditor PROMPT_FILE=prompts/foo.md" && exit 1) - @test -n "$(PROMPT_FILE)" || (echo "PROMPT_FILE is required. Usage: make opencode-raw AGENT=auditor PROMPT_FILE=prompts/foo.md" && exit 1) + @test -r "$(PROMPT_FILE)" || (echo "PROMPT_FILE must be a readable file. Usage: make opencode-raw AGENT=auditor PROMPT_FILE=prompts/foo.md" && exit 1) @opencode run --agent "$(AGENT)" $(OPENCODE_THINKING_FLAG) "$$(cat "$(PROMPT_FILE)")" validate-all: venv-check diff --git a/codecome.yml b/codecome.yml index a665eeac..4031936e 100644 --- a/codecome.yml +++ b/codecome.yml @@ -85,6 +85,34 @@ audit: - "Race conditions" - "Privilege escalation" + static_analysis: + codeql: + enabled: true + fail_policy: "soft" + + pack_catalog: "./templates/codeql-packs.yml" + + install: + managed: true + version: "latest" + path: ".tools/codeql/current/codeql/codeql" + + output_dir: "./itemdb/evidence/codeql" + database_dir: "./itemdb/evidence/codeql/databases" + cache_dir: "./.cache/codeql" + + phase_1: + enabled: true + + phase_2: + enabled: true + candidate_mode: "precreate" + max_candidates: 10 + + sweep: + enabled: true + inject_context: true + environment: type: "sandbox" path: "./sandbox" @@ -191,4 +219,3 @@ agents: # model: "anthropic/claude-opus-4-7" # reporter: # model: "anthropic/claude-opus-4-7" - diff --git a/tools/codecome/cli.py b/tools/codecome/cli.py index 6f75dfa7..89c62998 100644 --- a/tools/codecome/cli.py +++ b/tools/codecome/cli.py @@ -68,7 +68,7 @@ def main() -> int: # Phase 1 handles its own prompt files via subphase orchestration. required = ["phase", "label", "agent"] - if str(args.phase) not in ("1", "None"): + if str(args.phase) != "1": required.append("prompt_file") missing = [n for n in required if getattr(args, n) is None] if missing: diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index 37e3afae..d89dc795 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -11,7 +11,6 @@ from __future__ import annotations -import importlib.util import subprocess import sys import time @@ -23,6 +22,12 @@ from codecome.console import build_console, _emit_fatal_error from codecome.config import ROOT, resolve_color_mode, load_prompt, resolve_runtime_config from codecome.runner import _run_single_attempt +from phases.phase_1_gates import ( + check_phase_1a, + check_phase_1b, + check_phase_1c, + count_findings_snapshot, +) from rendering.dispatch import HAVE_RICH, _get_rendering_ctx, configure_rendering, render_event from rendering.events import ( _FINISH_TERMINAL_OK, @@ -35,17 +40,6 @@ build_phase_resume_prompt, build_frontmatter_resume_prompt, ) - -# gate-check.py uses a hyphen and cannot be imported with a regular -# ``import`` statement. Load it via importlib. -_gc_spec = importlib.util.spec_from_file_location( - "gate_check", - str(ROOT / "tools" / "gate-check.py"), -) -_gate_check = importlib.util.module_from_spec(_gc_spec) -_gc_spec.loader.exec_module(_gate_check) - - # --------------------------------------------------------------------------- # CodeQL placeholder (no-op until PR 5) # --------------------------------------------------------------------------- @@ -155,6 +149,7 @@ def _run_subphase( while True: attempt_number += 1 _reset_subagent_state() + finish_warning = None returncode, session_id, run_result, transcript_path = _run_single_attempt( args, console, prompt, model, variant, base_url, @@ -358,11 +353,6 @@ def _run_subphase( print(C.fail(f" reason: {finish_warning}")) print(f" finish reason: {last_finish_reason!r} transcript: {transcript_path.relative_to(ROOT) if transcript_path.name else 'N/A'}") - # Update findings snapshot for gate check - if findings_snapshot is not None and returncode == 0: - from tools.gate_check import _count_findings_since - pass # snapshot is read by caller; we'll track in the orchestrator - return returncode, findings_snapshot @@ -379,7 +369,7 @@ def run_phase_1( ) -> int: """Orchestrate Phase 1 subphases 1a → 1b → 1c with gates.""" # Snapshot findings before 1a - findings_snapshot = _gate_check._count_findings_since() + findings_snapshot = count_findings_snapshot() # ---- Phase 1a: Target Profile ---- rc, _ = _run_subphase( @@ -396,7 +386,7 @@ def run_phase_1( if rc != 0: return rc - gate_rc = _gate_check.check_phase_1a(console) + gate_rc = check_phase_1a(console) if gate_rc != 0: return gate_rc @@ -419,7 +409,7 @@ def run_phase_1( if rc != 0: return rc - gate_rc = _gate_check.check_phase_1b(console, findings_snapshot=findings_snapshot) + gate_rc = check_phase_1b(console, findings_snapshot=findings_snapshot) if gate_rc != 0: return gate_rc @@ -438,7 +428,7 @@ def run_phase_1( if rc != 0: return rc - gate_rc = _gate_check.check_phase_1c(console) + gate_rc = check_phase_1c(console) if gate_rc != 0: return gate_rc diff --git a/tools/codeql.py b/tools/codeql.py new file mode 100644 index 00000000..6f38a750 --- /dev/null +++ b/tools/codeql.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL CLI wrapper for CodeCome. + +Usage:: + + tools/codeql.py install + tools/codeql.py check +""" + +from __future__ import annotations + +import argparse +import subprocess +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from codeql.config import resolve_config + + +def _cmd_install() -> int: + """Install the managed CodeQL CLI.""" + from codeql.install import install + return install() + + +def _cmd_check() -> int: + """Check that CodeQL CLI is available and working.""" + config = resolve_config() + + binary_path = config.abs_install_path + + # 1. Binary check + if not binary_path.is_file(): + print(f"FAIL: CodeQL binary not found at {binary_path}") + print("Run 'tools/codeql.py install' to install the managed CodeQL CLI.") + return 1 + + try: + result = subprocess.run( + [str(binary_path), "--version"], + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode != 0: + print(f"FAIL: codeql --version failed: {result.stderr}") + return 1 + version_line = result.stdout.strip().split("\n")[0] + print(f"CodeQL CLI: {version_line}") + except Exception as exc: + print(f"FAIL: {exc}") + return 1 + + # 2. Pack resolve check + print("Checking pack resolution …") + try: + result = subprocess.run( + [str(binary_path), "resolve", "qlpacks"], + capture_output=True, + text=True, + timeout=60, + ) + if result.returncode != 0: + print(f"WARN: codeql resolve qlpacks failed: {result.stderr}") + # Soft-fail: the binary works, packs might need downloading later + else: + print("Pack resolution OK.") + except Exception as exc: + print(f"WARN: pack resolution check failed: {exc}") + + print("CodeQL CLI check passed.") + return 0 + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="CodeQL CLI wrapper for CodeCome.", + ) + sub = parser.add_subparsers(dest="command", required=True) + + sub.add_parser("install", help="Install the managed CodeQL CLI.") + sub.add_parser("check", help="Verify the CodeQL CLI is installed and working.") + + return parser + + +def main() -> int: + parser = build_parser() + args = parser.parse_args() + + if args.command == "install": + return _cmd_install() + elif args.command == "check": + return _cmd_check() + + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/codeql/__init__.py b/tools/codeql/__init__.py new file mode 100644 index 00000000..61f74eac --- /dev/null +++ b/tools/codeql/__init__.py @@ -0,0 +1 @@ +# CodeQL integration package. diff --git a/tools/codeql/config.py b/tools/codeql/config.py new file mode 100644 index 00000000..b8758b22 --- /dev/null +++ b/tools/codeql/config.py @@ -0,0 +1,235 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL configuration resolution. + +Priority: environment variables > codecome.yml > hard-coded defaults. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +try: + import yaml +except ImportError: # pragma: no cover + yaml = None # type: ignore[assignment] + + +# --------------------------------------------------------------------------- +# Resolve the workspace root. When imported from the tools/codeql/ package, +# three levels above __file__ gives the repo root. +# --------------------------------------------------------------------------- +ROOT = Path(__file__).resolve().parents[2] + + +# --------------------------------------------------------------------------- +# Defaults (lowest priority) +# --------------------------------------------------------------------------- + +DEFAULTS: dict[str, Any] = { + "enabled": True, + "fail_policy": "soft", + "pack_catalog": "./templates/codeql-packs.yml", + "install_managed": True, + "install_version": "latest", + "install_path": ".tools/codeql/current/codeql/codeql", + "output_dir": "./itemdb/evidence/codeql", + "database_dir": "./itemdb/evidence/codeql/databases", + "cache_dir": "./.cache/codeql", + "phase_1_enabled": True, + "phase_2_enabled": True, + "candidate_mode": "precreate", + "max_candidates": 10, + "sweep_enabled": True, + "sweep_inject_context": True, +} + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _load_codecome_yml() -> dict[str, Any] | None: + """Load codecome.yml and return the ``audit.static_analysis.codeql`` block.""" + if yaml is None: + return None + path = ROOT / "codecome.yml" + if not path.is_file(): + return None + try: + data = yaml.safe_load(path.read_text(encoding="utf-8")) + except Exception: + return None + if not isinstance(data, dict): + return None + audit = data.get("audit") + if not isinstance(audit, dict): + return None + sa = audit.get("static_analysis") + if not isinstance(sa, dict): + return None + cq = sa.get("codeql") + return cq if isinstance(cq, dict) else None + + +def _bool_env(name: str) -> bool | None: + """Return a tri-state bool from an env var (0/false/no → False, 1/true/yes → True).""" + raw = os.environ.get(name) + if raw is None: + return None + return raw.strip().lower() in ("1", "true", "yes") + + +def _str_env(name: str) -> str | None: + raw = os.environ.get(name) + return raw.strip() if raw else None + + +# --------------------------------------------------------------------------- +# Resolution +# --------------------------------------------------------------------------- + +@dataclass +class CodeQLConfig: + """Resolved CodeQL configuration.""" + + enabled: bool = True + fail_policy: str = "soft" + + pack_catalog: str = "./templates/codeql-packs.yml" + + install_managed: bool = True + install_version: str = "latest" + install_path: str = ".tools/codeql/current/codeql/codeql" + + output_dir: str = "./itemdb/evidence/codeql" + database_dir: str = "./itemdb/evidence/codeql/databases" + cache_dir: str = "./.cache/codeql" + + phase_1_enabled: bool = True + phase_2_enabled: bool = True + candidate_mode: str = "precreate" + max_candidates: int = 10 + + sweep_enabled: bool = True + sweep_inject_context: bool = True + + # Absolute paths (resolved from ROOT) + abs_pack_catalog: Path = field(default_factory=Path) + abs_install_path: Path = field(default_factory=Path) + abs_output_dir: Path = field(default_factory=Path) + abs_database_dir: Path = field(default_factory=Path) + abs_cache_dir: Path = field(default_factory=Path) + + +def resolve_config() -> CodeQLConfig: + """Resolve the CodeQL configuration. + + Priority: env vars > codecome.yml > defaults. + """ + yml = _load_codecome_yml() or {} + + def _get(key: str, default: Any, env: str | None = None, coerce: Any = None) -> Any: + """Pick the highest-priority value.""" + # 1. Environment variable + if env is not None: + raw = os.environ.get(env) + if raw is not None and raw.strip() != "": + if coerce is bool: + return raw.strip().lower() in ("1", "true", "yes") + if coerce is int: + try: + return int(raw) + except ValueError: + pass + return raw.strip() + + # 2. codecome.yml + m_key = key.replace("install_", "install.").replace("phase_1_", "phase_1.").replace("phase_2_", "phase_2.").replace("sweep_", "sweep.") + # Try nested lookup + parts = m_key.split(".") + node: Any = yml + for p in parts: + if isinstance(node, dict) and p in node: + node = node[p] + else: + node = None + break + if node is not None: + return node + + # 3. Defaults + return default + + # Top-level overrides + enabled = _bool_env("CODEQL") + if enabled is not None: + # CODEQL=0 → disabled, CODEQL=1 → enabled + pass + else: + enabled = _get("enabled", DEFAULTS["enabled"], coerce=bool) + + # If CODEQL=0 overrides everything + if not enabled: + return CodeQLConfig(enabled=False) + + # Also check CODEQL_SKIP + skip = _bool_env("CODEQL_SKIP") + if skip is True: + return CodeQLConfig(enabled=False) + + fail_policy = _str_env("CODEQL_FAIL_POLICY") or _get("fail_policy", DEFAULTS["fail_policy"]) + + # Install settings + install_managed = _get("install_managed", DEFAULTS["install_managed"], + env="CODEQL_MANAGED_INSTALL", coerce=bool) + install_version = _str_env("CODEQL_VERSION") or _get("install_version", DEFAULTS["install_version"]) + install_path = _get("install_path", DEFAULTS["install_path"]) + + # Paths + pack_catalog = _get("pack_catalog", DEFAULTS["pack_catalog"]) + output_dir = _get("output_dir", DEFAULTS["output_dir"]) + database_dir = _get("database_dir", DEFAULTS["database_dir"]) + cache_dir = _get("cache_dir", DEFAULTS["cache_dir"]) + + # Phase settings + phase_1_enabled = _get("phase_1_enabled", DEFAULTS["phase_1_enabled"], + env="CODEQL_PHASE_1", coerce=bool) + phase_2_enabled = _get("phase_2_enabled", DEFAULTS["phase_2_enabled"], + env="CODEQL_PHASE_2", coerce=bool) + candidate_mode = _str_env("CODEQL_CANDIDATES") or _get("candidate_mode", DEFAULTS["candidate_mode"]) + max_candidates = _get("max_candidates", DEFAULTS["max_candidates"], + env="CODEQL_MAX_CANDIDATES", coerce=int) + + # Sweep settings + sweep_enabled = _get("sweep_enabled", DEFAULTS["sweep_enabled"], + env="CODEQL_SWEEP", coerce=bool) + sweep_inject_context = _get("sweep_inject_context", DEFAULTS["sweep_inject_context"], + coerce=bool) + + return CodeQLConfig( + enabled=enabled, + fail_policy=fail_policy, + pack_catalog=pack_catalog, + install_managed=install_managed, + install_version=install_version, + install_path=install_path, + output_dir=output_dir, + database_dir=database_dir, + cache_dir=cache_dir, + phase_1_enabled=phase_1_enabled, + phase_2_enabled=phase_2_enabled, + candidate_mode=candidate_mode, + max_candidates=int(max_candidates), + sweep_enabled=sweep_enabled, + sweep_inject_context=sweep_inject_context, + abs_pack_catalog=(ROOT / pack_catalog).resolve(), + abs_install_path=(ROOT / install_path).resolve(), + abs_output_dir=(ROOT / output_dir).resolve(), + abs_database_dir=(ROOT / database_dir).resolve(), + abs_cache_dir=(ROOT / cache_dir).resolve(), + ) diff --git a/tools/codeql/install.py b/tools/codeql/install.py new file mode 100644 index 00000000..f5ede7e0 --- /dev/null +++ b/tools/codeql/install.py @@ -0,0 +1,263 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""Managed CodeQL CLI installation. + +Downloads the CodeQL CLI bundle from GitHub Releases, extracts it to a +versioned directory under ``.tools/codeql/``, and maintains a ``current`` +symlink pointing to the active version. +""" + +from __future__ import annotations + +import os +import platform +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path +from typing import Optional +from urllib.request import urlopen, Request + +from codeql.config import resolve_config, CodeQLConfig, ROOT + + +GITHUB_API_RELEASES = "https://api.github.com/repos/github/codeql-cli-binaries/releases" + + +# --------------------------------------------------------------------------- +# Platform detection +# --------------------------------------------------------------------------- + +def _detect_platform() -> str: + system = platform.system().lower() + machine = platform.machine().lower() + if system == "darwin": + return "osx64" + if system == "linux": + return "linux64" + # Fallback: try linux64 for other unix-like systems + return "linux64" + + +def _bundle_suffix(plat: str) -> str: + """Return the asset name suffix for a given platform.""" + return f"{plat}.zip" + + +# --------------------------------------------------------------------------- +# Release discovery +# --------------------------------------------------------------------------- + +def _fetch_latest_version() -> str: + """Fetch the latest CodeQL CLI version tag from the GitHub API.""" + import json + + url = f"{GITHUB_API_RELEASES}/latest" + req = Request(url, headers={"Accept": "application/vnd.github+json"}) + try: + with urlopen(req, timeout=30) as resp: + data = json.loads(resp.read().decode()) + except Exception as exc: + raise RuntimeError(f"Failed to fetch latest CodeQL CLI release: {exc}") + + tag = data.get("tag_name", "") + # tag_name looks like "v2.20.4" — strip leading "v" + return tag.lstrip("v") if tag.startswith("v") else tag + + +def _fetch_release_assets(version: str) -> list[dict]: + """Fetch the assets for a specific release version.""" + import json + + url = f"{GITHUB_API_RELEASES}/tags/v{version}" + req = Request(url, headers={"Accept": "application/vnd.github+json"}) + try: + with urlopen(req, timeout=30) as resp: + data = json.loads(resp.read().decode()) + except Exception as exc: + raise RuntimeError( + f"Failed to fetch CodeQL CLI release v{version}: {exc}" + ) + + return data.get("assets", []) + + +def _find_download_url(assets: list[dict], plat: str) -> Optional[str]: + """Find the browser_download_url for the platform-specific bundle.""" + suffix = _bundle_suffix(plat) + for asset in assets: + name = asset.get("name", "") + if name.endswith(suffix): + return asset.get("browser_download_url") + return None + + +# --------------------------------------------------------------------------- +# Download and extract +# --------------------------------------------------------------------------- + +def _download(url: str, dest: Path) -> None: + """Download a file from *url* to *dest*.""" + print(f"Downloading {url} …") + req = Request(url, headers={"User-Agent": "CodeCome-CodeQL-Installer/1.0"}) + try: + with urlopen(req, timeout=300) as resp: + with open(dest, "wb") as f: + while True: + chunk = resp.read(8192) + if not chunk: + break + f.write(chunk) + except Exception as exc: + if dest.exists(): + dest.unlink() + raise RuntimeError(f"Download failed: {exc}") + + +def _extract(zip_path: Path, dest_dir: Path) -> None: + """Extract a zip archive to *dest_dir*.""" + import zipfile + + dest_dir.mkdir(parents=True, exist_ok=True) + print(f"Extracting to {dest_dir} …") + with zipfile.ZipFile(zip_path, "r") as zf: + zf.extractall(dest_dir) + + +# --------------------------------------------------------------------------- +# Install +# --------------------------------------------------------------------------- + +def _codeql_binary(base_dir: Path) -> Path: + """Return the path to the codeql executable inside an extracted bundle.""" + # The bundle extracts to codeql/ subdirectory + codeql_dir = base_dir / "codeql" + if codeql_dir.is_dir(): + return codeql_dir / "codeql" + # Some older bundles extract directly + return base_dir / "codeql" + + +def install(config: Optional[CodeQLConfig] = None) -> int: + """Install (or reinstall) the managed CodeQL CLI. + + Returns 0 on success, 1 on failure. + """ + if config is None: + config = resolve_config() + + if not config.enabled: + print("CodeQL is disabled (CODEQL=0 or CODEQL_SKIP=1). Skipping install.") + return 0 + + if os.environ.get("CODEQL_SKIP_INSTALL") == "1": + print("CODEQL_SKIP_INSTALL=1 — skipping managed install.") + return 0 + + if not config.install_managed: + print("Managed install disabled in config. Skipping.") + return 0 + + # --- Determine version --- + version = config.install_version + if version == "latest": + print("Determining latest CodeQL CLI version …") + try: + version = _fetch_latest_version() + except RuntimeError as exc: + print(f"ERROR: {exc}", file=sys.stderr) + return 1 + print(f"Latest version: {version}") + + # --- Determine target directories --- + tools_dir = ROOT / ".tools" / "codeql" + version_dir = tools_dir / version + current_link = tools_dir / "current" + binary_path = _codeql_binary(version_dir) + + # Check if already installed + force = os.environ.get("CODEQL_FORCE_INSTALL") == "1" + if not force and binary_path.is_file(): + print(f"CodeQL CLI v{version} already installed at {version_dir}") + # Ensure the 'current' symlink points to this version + _ensure_symlink(version_dir, current_link) + return _verify(binary_path) + + # --- Download --- + plat = _detect_platform() + print(f"Platform: {plat}") + + if version == "latest": + # Re-fetch since we already resolved it above + pass + + assets = _fetch_release_assets(version) + url = _find_download_url(assets, plat) + if url is None: + print(f"ERROR: No CodeQL CLI bundle found for platform '{plat}' in release v{version}", + file=sys.stderr) + print("Available assets:", file=sys.stderr) + for a in assets: + print(f" - {a.get('name', '?')}", file=sys.stderr) + return 1 + + # --- Download and extract --- + tmp_root = ROOT / "tmp" + tmp_root.mkdir(parents=True, exist_ok=True) + tmp_dir = Path(tempfile.mkdtemp(prefix="codecome-codeql-", dir=tmp_root)) + try: + zip_path = tmp_dir / f"codeql-{version}-{plat}.zip" + _download(url, zip_path) + + # Remove previous version dir if force-reinstalling + if force and version_dir.exists(): + shutil.rmtree(version_dir) + + _extract(zip_path, version_dir) + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) + + # --- Create current symlink --- + _ensure_symlink(version_dir, current_link) + + # --- Verify --- + return _verify(binary_path) + + +def _ensure_symlink(target: Path, link: Path) -> None: + """Create or update ``link -> target``.""" + link.parent.mkdir(parents=True, exist_ok=True) + if link.is_symlink() or link.is_file(): + link.unlink() + elif link.is_dir(): + shutil.rmtree(link) + link.symlink_to(target.name, target_is_directory=True) + + +def _verify(binary_path: Path) -> int: + """Verify the CodeQL binary works.""" + if not binary_path.is_file(): + print(f"ERROR: CodeQL binary not found at {binary_path}", file=sys.stderr) + return 1 + + try: + result = subprocess.run( + [str(binary_path), "--version"], + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode != 0: + print(f"ERROR: codeql --version failed: {result.stderr}", file=sys.stderr) + return 1 + version_line = result.stdout.strip().split("\n")[0] + print(f"CodeQL CLI ready: {version_line}") + return 0 + except FileNotFoundError: + print(f"ERROR: CodeQL binary not found at {binary_path}", file=sys.stderr) + return 1 + except Exception as exc: + print(f"ERROR: {exc}", file=sys.stderr) + return 1 diff --git a/tools/gate-check.py b/tools/gate-check.py index 15b9cd2b..5311cc24 100755 --- a/tools/gate-check.py +++ b/tools/gate-check.py @@ -33,6 +33,11 @@ sys.path.insert(0, str(Path(__file__).resolve().parent)) from _colors import ok, fail, warn, header, info, GREEN, RESET, BOLD, SYM_OK +from phases.phase_1_gates import ( + check_phase_1a, + check_phase_1b, + check_phase_1c, +) ROOT = Path(__file__).resolve().parents[1] FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) @@ -48,61 +53,6 @@ "attack-surface.md", ] -REQUIRED_NOTES_1B = [ - "attack-surface.md", - "execution-model.md", - "trust-boundaries.md", - "data-flow.md", - "validation-model.md", - "interesting-files.md", - "file-risk-index.yml", - "security-assumptions.md", -] - -# --------------------------------------------------------------------------- -# Conditional rich support: gate functions accept an optional Console. -# --------------------------------------------------------------------------- -try: - from rich.console import Console as _RichConsole - HAVE_RICH = True -except ImportError: # pragma: no cover - _RichConsole = None # type: ignore[assignment] - HAVE_RICH = False - - -def _emit(console, level: str, text: str) -> None: - """Emit a gate message through rich Console or plain print.""" - if console is not None and HAVE_RICH: - from rich.text import Text - style_map = { - "header": "bold cyan", - "ok": "green", - "fail": "bold red", - "warn": "yellow", - "info": "dim", - } - style = style_map.get(level, "") - console.print(Text(text, style=style)) - else: - fn_map = { - "header": header, - "ok": ok, - "fail": fail, - "warn": warn, - "info": info, - } - fn = fn_map.get(level, print) - fn(text) - - -def _emit_separator(console, style: str = "green") -> None: - """Emit a visual separator.""" - if console is not None and HAVE_RICH: - from rich.rule import Rule - console.print(Rule(style=style)) - else: - print() - FINDING_STATUS_DIRS = [ "PENDING", "CONFIRMED", @@ -349,210 +299,6 @@ def gate_phase_6() -> int: return 0 -# --------------------------------------------------------------------------- -# Phase 1 subphase gates (1a / 1b / 1c) -# --------------------------------------------------------------------------- - -def _notes_exist(*names: str) -> list[str]: - """Return names of note files that are missing from itemdb/notes/.""" - notes_dir = ROOT / "itemdb" / "notes" - return [n for n in names if not (notes_dir / n).exists()] - - -def _count_findings_since(snapshot: dict[str, int] | None = None) -> dict[str, int]: - """Return {status_dir: count} of CC-*.md files. If *snapshot* is given, - return the delta (current - snapshot) instead of absolute counts.""" - findings_root = ROOT / "itemdb" / "findings" - current: dict[str, int] = {} - for s in FINDING_STATUS_DIRS: - sd = findings_root / s - current[s] = len(list(sd.glob("CC-*.md"))) if sd.exists() else 0 - if snapshot is None: - return current - return {s: max(0, current[s] - snapshot.get(s, 0)) for s in FINDING_STATUS_DIRS} - - -def check_phase_1a(console=None) -> int: - """Gate 1a: Phase 1a outputs must exist; codeql-plan.yml must be valid.""" - _emit(console, "header", "Gate 1a: Target Profile") - _emit_separator(console, "cyan") - - notes_dir = ROOT / "itemdb" / "notes" - - required = ["target-profile.md", "build-model.md", "codeql-plan.yml"] - missing = [n for n in required if not (notes_dir / n).exists()] - if missing: - _emit(console, "fail", "Required Phase 1a outputs are missing:") - for m in missing: - _emit(console, "info", f" itemdb/notes/{m}") - _emit(console, "info", "Run Phase 1a first.") - return 1 - - _emit(console, "ok", "itemdb/notes/target-profile.md exists") - _emit(console, "ok", "itemdb/notes/build-model.md exists") - _emit(console, "ok", "itemdb/notes/codeql-plan.yml exists") - - # Validate codeql-plan.yml - plan_path = notes_dir / "codeql-plan.yml" - if yaml is None: - _emit(console, "warn", "Cannot validate codeql-plan.yml: PyYAML not available") - else: - try: - plan = yaml.safe_load(plan_path.read_text(encoding="utf-8")) - except Exception as exc: - _emit(console, "fail", f"codeql-plan.yml is not valid YAML: {exc}") - return 1 - - if not isinstance(plan, dict): - _emit(console, "fail", "codeql-plan.yml is not a mapping") - return 1 - - if plan.get("recommended") is True: - languages = plan.get("languages", []) - if not isinstance(languages, list) or len(languages) == 0: - _emit(console, "fail", "codeql-plan.yml: recommended=true but no language entries") - return 1 - - valid_build_modes = {"none", "manual", "autobuild"} - valid_confidences = {"HIGH", "MEDIUM", "LOW"} - for i, lang in enumerate(languages): - if not isinstance(lang, dict): - _emit(console, "fail", f"codeql-plan.yml: language entry {i} is not a mapping") - return 1 - if "id" not in lang: - _emit(console, "fail", f"codeql-plan.yml: language entry {i} missing 'id'") - return 1 - if lang.get("confidence") not in valid_confidences: - _emit(console, "warn", - f"codeql-plan.yml: language '{lang.get('id', '?')}' " - f"has unexpected confidence '{lang.get('confidence')}'") - if lang.get("build_mode") not in valid_build_modes: - _emit(console, "warn", - f"codeql-plan.yml: language '{lang.get('id', '?')}' " - f"has unexpected build_mode '{lang.get('build_mode')}'") - if "packs" not in lang: - _emit(console, "fail", f"codeql-plan.yml: language '{lang['id']}' missing 'packs'") - return 1 - if not isinstance(lang["packs"], list) or len(lang["packs"]) == 0: - _emit(console, "fail", f"codeql-plan.yml: language '{lang['id']}' has empty packs list") - return 1 - - _emit(console, "ok", f"codeql-plan.yml: {len(languages)} language(s) configured") - - _emit_separator(console, "green") - _emit(console, "ok", "Ready to run Phase 1b (CodeQL-assisted Reconnaissance).") - return 0 - - -def check_phase_1b(console=None, findings_snapshot: dict[str, int] | None = None) -> int: - """Gate 1b: all recon notes must exist; file-risk-index.yml must be valid.""" - _emit(console, "header", "Gate 1b: CodeQL-assisted Reconnaissance") - _emit_separator(console, "cyan") - - missing = _notes_exist(*REQUIRED_NOTES_1B) - if missing: - _emit(console, "fail", "Required Phase 1b reconnaissance notes are missing:") - for m in missing: - _emit(console, "info", f" itemdb/notes/{m}") - _emit(console, "info", "Run Phase 1b first.") - return 1 - - for name in REQUIRED_NOTES_1B: - _emit(console, "ok", f"itemdb/notes/{name} exists") - - # Validate file-risk-index.yml - risk_path = ROOT / "itemdb" / "notes" / "file-risk-index.yml" - if yaml is not None: - try: - data = yaml.safe_load(risk_path.read_text(encoding="utf-8")) - except Exception as exc: - _emit(console, "fail", f"file-risk-index.yml is not valid YAML: {exc}") - return 1 - - if isinstance(data, dict): - if "schema_version" not in data: - _emit(console, "warn", "file-risk-index.yml: missing 'schema_version'") - files = data.get("files") - if files is None: - _emit(console, "fail", "file-risk-index.yml: missing 'files' key") - return 1 - if not isinstance(files, list): - _emit(console, "fail", "file-risk-index.yml: 'files' is not a list") - return 1 - - for entry in files: - if not isinstance(entry, dict): - continue - path_val = entry.get("path", "") - if "../" in str(path_val) or str(path_val).startswith("/"): - _emit(console, "warn", - f"file-risk-index.yml: path '{path_val}' is not workspace-relative") - score = entry.get("score") - if score is not None: - try: - s = int(score) - if s < 1 or s > 5: - _emit(console, "warn", - f"file-risk-index.yml: score {score} for '{path_val}' is not in 1..5") - except (TypeError, ValueError): - _emit(console, "warn", - f"file-risk-index.yml: non-integer score '{score}' for '{path_val}'") - - _emit(console, "ok", f"file-risk-index.yml: {len(files)} file(s) indexed") - - # Check no findings were created during 1b - if findings_snapshot is not None: - delta = _count_findings_since(findings_snapshot) - new_findings = sum(delta.values()) - if new_findings > 0: - _emit(console, "warn", - f"{new_findings} new finding(s) were created during Phase 1b. " - "Findings should not be created during reconnaissance.") - for status, count in delta.items(): - if count > 0: - _emit(console, "info", f" {status}: +{count}") - - _emit_separator(console, "green") - _emit(console, "ok", "Ready to run Phase 1c (Sandbox Bootstrap).") - return 0 - - -def check_phase_1c(console=None) -> int: - """Gate 1c: sandbox-plan.md must exist; sandbox must have provenance.""" - _emit(console, "header", "Gate 1c: Sandbox Bootstrap") - _emit_separator(console, "cyan") - - plan_path = ROOT / "itemdb" / "notes" / "sandbox-plan.md" - if not plan_path.exists(): - _emit(console, "fail", "itemdb/notes/sandbox-plan.md does not exist") - _emit(console, "info", "Run Phase 1c first.") - return 1 - - _emit(console, "ok", "itemdb/notes/sandbox-plan.md exists") - - # Check sandbox provenance - provenance = ROOT / "sandbox" / "CODECOME-GENERATED.md" - has_provenance = provenance.exists() - - generated_dir = ROOT / "sandbox" - has_sandbox = generated_dir.exists() and any( - f.name != ".gitkeep" - for f in generated_dir.iterdir() - ) - - if has_provenance: - _emit(console, "ok", "sandbox/CODECOME-GENERATED.md exists") - elif has_sandbox: - _emit(console, "warn", - "sandbox/ exists without CODECOME-GENERATED.md — may be user-managed") - else: - _emit(console, "warn", "sandbox/ is empty or does not exist") - - _emit_separator(console, "green") - _emit(console, "ok", "Phase 1 complete. Ready to run Phase 2.") - return 0 - - def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="Check readiness gates for a CodeCome phase.", diff --git a/tools/phases/phase_1_gates.py b/tools/phases/phase_1_gates.py new file mode 100644 index 00000000..1f2268ef --- /dev/null +++ b/tools/phases/phase_1_gates.py @@ -0,0 +1,277 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""Reusable Phase 1 subphase gate logic. + +This module holds the implementation for the Phase 1a/1b/1c checks so the +root ``tools/gate-check.py`` script can remain a thin CLI wrapper. +""" + +from __future__ import annotations + +try: + import yaml +except ImportError: # pragma: no cover + yaml = None # type: ignore[assignment] + +import _colors as C + +from codecome.config import ROOT + + +REQUIRED_NOTES_1B = [ + "attack-surface.md", + "execution-model.md", + "trust-boundaries.md", + "data-flow.md", + "validation-model.md", + "interesting-files.md", + "file-risk-index.yml", + "security-assumptions.md", +] + +FINDING_STATUS_DIRS = [ + "PENDING", + "CONFIRMED", + "EXPLOITED", + "REJECTED", + "DUPLICATE", +] + +try: + from rich.console import Console as _RichConsole + + HAVE_RICH = True +except ImportError: # pragma: no cover + _RichConsole = None # type: ignore[assignment] + HAVE_RICH = False + + +def _emit(console, level: str, text: str) -> None: + """Emit a gate message through rich Console or plain output.""" + if console is not None and HAVE_RICH: + from rich.text import Text + + style_map = { + "header": "bold cyan", + "ok": "green", + "fail": "bold red", + "warn": "yellow", + "info": "dim", + } + console.print(Text(text, style=style_map.get(level, ""))) + return + + fn_map = { + "header": C.header, + "ok": C.ok, + "fail": C.fail, + "warn": C.warn, + "info": C.info, + } + fn_map.get(level, print)(text) + + +def _emit_separator(console, style: str = "green") -> None: + """Emit a visual separator for rich/plain output.""" + if console is not None and HAVE_RICH: + from rich.rule import Rule + + console.print(Rule(style=style)) + else: + print() + + +def _notes_exist(*names: str) -> list[str]: + """Return names of note files missing from ``itemdb/notes``.""" + notes_dir = ROOT / "itemdb" / "notes" + return [name for name in names if not (notes_dir / name).exists()] + + +def count_findings_snapshot(snapshot: dict[str, int] | None = None) -> dict[str, int]: + """Return finding counts, or deltas from a previous snapshot.""" + findings_root = ROOT / "itemdb" / "findings" + current: dict[str, int] = {} + for status in FINDING_STATUS_DIRS: + status_dir = findings_root / status + current[status] = len(list(status_dir.glob("CC-*.md"))) if status_dir.exists() else 0 + if snapshot is None: + return current + return {status: max(0, current[status] - snapshot.get(status, 0)) for status in FINDING_STATUS_DIRS} + + +def check_phase_1a(console=None) -> int: + """Gate 1a: target-profile/build-model/codeql-plan outputs must exist.""" + _emit(console, "header", "Gate 1a: Target Profile") + _emit_separator(console, "cyan") + + notes_dir = ROOT / "itemdb" / "notes" + required = ["target-profile.md", "build-model.md", "codeql-plan.yml"] + missing = [name for name in required if not (notes_dir / name).exists()] + if missing: + _emit(console, "fail", "Required Phase 1a outputs are missing:") + for name in missing: + _emit(console, "info", f" itemdb/notes/{name}") + _emit(console, "info", "Run Phase 1a first.") + return 1 + + _emit(console, "ok", "itemdb/notes/target-profile.md exists") + _emit(console, "ok", "itemdb/notes/build-model.md exists") + _emit(console, "ok", "itemdb/notes/codeql-plan.yml exists") + + plan_path = notes_dir / "codeql-plan.yml" + if yaml is None: + _emit(console, "warn", "Cannot validate codeql-plan.yml: PyYAML not available") + else: + try: + plan = yaml.safe_load(plan_path.read_text(encoding="utf-8")) + except Exception as exc: + _emit(console, "fail", f"codeql-plan.yml is not valid YAML: {exc}") + return 1 + + if not isinstance(plan, dict): + _emit(console, "fail", "codeql-plan.yml is not a mapping") + return 1 + + if plan.get("recommended") is True: + languages = plan.get("languages", []) + if not isinstance(languages, list) or len(languages) == 0: + _emit(console, "fail", "codeql-plan.yml: recommended=true but no language entries") + return 1 + + valid_build_modes = {"none", "manual", "autobuild"} + valid_confidences = {"HIGH", "MEDIUM", "LOW"} + for i, lang in enumerate(languages): + if not isinstance(lang, dict): + _emit(console, "fail", f"codeql-plan.yml: language entry {i} is not a mapping") + return 1 + if "id" not in lang: + _emit(console, "fail", f"codeql-plan.yml: language entry {i} missing 'id'") + return 1 + if lang.get("confidence") not in valid_confidences: + _emit( + console, + "warn", + f"codeql-plan.yml: language '{lang.get('id', '?')}' has unexpected confidence '{lang.get('confidence')}'", + ) + if lang.get("build_mode") not in valid_build_modes: + _emit( + console, + "warn", + f"codeql-plan.yml: language '{lang.get('id', '?')}' has unexpected build_mode '{lang.get('build_mode')}'", + ) + if "packs" not in lang: + _emit(console, "fail", f"codeql-plan.yml: language '{lang['id']}' missing 'packs'") + return 1 + if not isinstance(lang["packs"], list) or len(lang["packs"]) == 0: + _emit(console, "fail", f"codeql-plan.yml: language '{lang['id']}' has empty packs list") + return 1 + + _emit(console, "ok", f"codeql-plan.yml: {len(languages)} language(s) configured") + + _emit_separator(console, "green") + _emit(console, "ok", "Ready to run Phase 1b (CodeQL-assisted Reconnaissance).") + return 0 + + +def check_phase_1b(console=None, findings_snapshot: dict[str, int] | None = None) -> int: + """Gate 1b: recon notes and file-risk-index.yml must be valid.""" + _emit(console, "header", "Gate 1b: CodeQL-assisted Reconnaissance") + _emit_separator(console, "cyan") + + missing = _notes_exist(*REQUIRED_NOTES_1B) + if missing: + _emit(console, "fail", "Required Phase 1b reconnaissance notes are missing:") + for name in missing: + _emit(console, "info", f" itemdb/notes/{name}") + _emit(console, "info", "Run Phase 1b first.") + return 1 + + for name in REQUIRED_NOTES_1B: + _emit(console, "ok", f"itemdb/notes/{name} exists") + + risk_path = ROOT / "itemdb" / "notes" / "file-risk-index.yml" + if yaml is not None: + try: + data = yaml.safe_load(risk_path.read_text(encoding="utf-8")) + except Exception as exc: + _emit(console, "fail", f"file-risk-index.yml is not valid YAML: {exc}") + return 1 + + if not isinstance(data, dict): + _emit(console, "fail", "file-risk-index.yml: must be a mapping") + return 1 + + if "schema_version" not in data: + _emit(console, "warn", "file-risk-index.yml: missing 'schema_version'") + files = data.get("files") + if files is None: + _emit(console, "fail", "file-risk-index.yml: missing 'files' key") + return 1 + if not isinstance(files, list): + _emit(console, "fail", "file-risk-index.yml: 'files' is not a list") + return 1 + + for entry in files: + if not isinstance(entry, dict): + continue + path_val = entry.get("path", "") + if "../" in str(path_val) or str(path_val).startswith("/"): + _emit(console, "warn", f"file-risk-index.yml: path '{path_val}' is not workspace-relative") + score = entry.get("score") + if score is not None: + try: + score_int = int(score) + if score_int < 1 or score_int > 5: + _emit(console, "warn", f"file-risk-index.yml: score {score} for '{path_val}' is not in 1..5") + except (TypeError, ValueError): + _emit(console, "warn", f"file-risk-index.yml: non-integer score '{score}' for '{path_val}'") + + _emit(console, "ok", f"file-risk-index.yml: {len(files)} file(s) indexed") + + if findings_snapshot is not None: + delta = count_findings_snapshot(findings_snapshot) + new_findings = sum(delta.values()) + if new_findings > 0: + _emit( + console, + "warn", + f"{new_findings} new finding(s) were created during Phase 1b. Findings should not be created during reconnaissance.", + ) + for status, count in delta.items(): + if count > 0: + _emit(console, "info", f" {status}: +{count}") + + _emit_separator(console, "green") + _emit(console, "ok", "Ready to run Phase 1c (Sandbox Bootstrap).") + return 0 + + +def check_phase_1c(console=None) -> int: + """Gate 1c: sandbox-plan.md must exist and sandbox provenance is checked.""" + _emit(console, "header", "Gate 1c: Sandbox Bootstrap") + _emit_separator(console, "cyan") + + plan_path = ROOT / "itemdb" / "notes" / "sandbox-plan.md" + if not plan_path.exists(): + _emit(console, "fail", "itemdb/notes/sandbox-plan.md does not exist") + _emit(console, "info", "Run Phase 1c first.") + return 1 + + _emit(console, "ok", "itemdb/notes/sandbox-plan.md exists") + + provenance = ROOT / "sandbox" / "CODECOME-GENERATED.md" + has_provenance = provenance.exists() + sandbox_dir = ROOT / "sandbox" + has_sandbox = sandbox_dir.exists() and any(entry.name != ".gitkeep" for entry in sandbox_dir.iterdir()) + + if has_provenance: + _emit(console, "ok", "sandbox/CODECOME-GENERATED.md exists") + elif has_sandbox: + _emit(console, "warn", "sandbox/ exists without CODECOME-GENERATED.md - may be user-managed") + else: + _emit(console, "warn", "sandbox/ is empty or does not exist") + + _emit_separator(console, "green") + _emit(console, "ok", "Phase 1 complete. Ready to run Phase 2.") + return 0 From 2a22ad8fb26c9f563269be49f0b6f3bd70fa3f53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Fri, 29 May 2026 22:47:47 +0200 Subject: [PATCH 05/47] refactor: flatten CodeQL install layout and add verbose init output --- Makefile | 21 +++++++++++++++++---- codecome.yml | 2 +- tools/codeql/config.py | 4 ++-- tools/codeql/install.py | 37 +++++++++++++++++++++++++++++-------- 4 files changed, 49 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 1638da31..a612e1aa 100644 --- a/Makefile +++ b/Makefile @@ -17,12 +17,14 @@ OPENCODE_THINKING_FLAG := $(if $(filter 1,$(CODECOME_THINKING)),--thinking,) ifndef NO_COLOR RED := \033[31m +GREEN := \033[32m YELLOW := \033[33m CYAN := \033[36m BOLD := \033[1m RESET := \033[0m else RED := +GREEN := YELLOW := CYAN := BOLD := @@ -125,12 +127,23 @@ help: # --------------------------------------------------------------------------- init: - @python3 -m venv .venv - @$(PYTHON) -m pip install --upgrade pip - @$(PYTHON) -m pip install --no-input -r requirements.txt + @printf "\n$(BOLD)$(CYAN)==> [1/4] Creating Python virtual environment$(RESET)\n" + @python3 -m venv .venv || { printf "$(BOLD)$(RED)[FAIL]$(RESET) Could not create .venv\n"; exit 1; } + @printf "$(BOLD)$(GREEN)[OK]$(RESET) Virtual environment ready at .venv\n\n" + @printf "$(BOLD)$(CYAN)==> [2/4] Upgrading pip$(RESET)\n" + @$(PYTHON) -m pip install --upgrade pip || { printf "$(BOLD)$(RED)[FAIL]$(RESET) pip upgrade failed\n"; exit 1; } + @printf "$(BOLD)$(GREEN)[OK]$(RESET) pip upgraded\n\n" + @printf "$(BOLD)$(CYAN)==> [3/4] Installing Python requirements$(RESET)\n" + @$(PYTHON) -m pip install --no-input -r requirements.txt || { printf "$(BOLD)$(RED)[FAIL]$(RESET) requirements install failed\n"; exit 1; } + @printf "$(BOLD)$(GREEN)[OK]$(RESET) Python dependencies installed\n\n" + @printf "$(BOLD)$(CYAN)==> [4/4] Installing managed CodeQL CLI$(RESET)\n" @if [ "$$CODEQL" != "0" ] && [ "$$CODEQL_SKIP_INSTALL" != "1" ]; then \ - $(PYTHON) tools/codeql.py install; \ + $(PYTHON) tools/codeql.py install || { printf "$(BOLD)$(RED)[FAIL]$(RESET) managed CodeQL install failed\n"; exit 1; }; \ + printf "$(BOLD)$(GREEN)[OK]$(RESET) Managed CodeQL CLI ready\n"; \ + else \ + printf "$(BOLD)$(YELLOW)[SKIP]$(RESET) Managed CodeQL install skipped (CODEQL=0 or CODEQL_SKIP_INSTALL=1)\n"; \ fi + @printf "\n$(BOLD)$(GREEN)Setup complete.$(RESET)\n" venv: init diff --git a/codecome.yml b/codecome.yml index 4031936e..61aa5a75 100644 --- a/codecome.yml +++ b/codecome.yml @@ -95,7 +95,7 @@ audit: install: managed: true version: "latest" - path: ".tools/codeql/current/codeql/codeql" + path: ".tools/codeql/current/codeql" output_dir: "./itemdb/evidence/codeql" database_dir: "./itemdb/evidence/codeql/databases" diff --git a/tools/codeql/config.py b/tools/codeql/config.py index b8758b22..18373051 100644 --- a/tools/codeql/config.py +++ b/tools/codeql/config.py @@ -36,7 +36,7 @@ "pack_catalog": "./templates/codeql-packs.yml", "install_managed": True, "install_version": "latest", - "install_path": ".tools/codeql/current/codeql/codeql", + "install_path": ".tools/codeql/current/codeql", "output_dir": "./itemdb/evidence/codeql", "database_dir": "./itemdb/evidence/codeql/databases", "cache_dir": "./.cache/codeql", @@ -104,7 +104,7 @@ class CodeQLConfig: install_managed: bool = True install_version: str = "latest" - install_path: str = ".tools/codeql/current/codeql/codeql" + install_path: str = ".tools/codeql/current/codeql" output_dir: str = "./itemdb/evidence/codeql" database_dir: str = "./itemdb/evidence/codeql/databases" diff --git a/tools/codeql/install.py b/tools/codeql/install.py index f5ede7e0..9bec0f17 100644 --- a/tools/codeql/install.py +++ b/tools/codeql/install.py @@ -117,7 +117,13 @@ def _download(url: str, dest: Path) -> None: def _extract(zip_path: Path, dest_dir: Path) -> None: - """Extract a zip archive to *dest_dir*.""" + """Extract a zip archive to *dest_dir*, flattening the inner codeql/ dir. + + The GitHub release bundle contains a top-level ``codeql/`` directory. + After extraction we move its contents up one level into *dest_dir* + so the binary lands directly under the versioned directory. + The now-empty inner ``codeql/`` directory is removed. + """ import zipfile dest_dir.mkdir(parents=True, exist_ok=True) @@ -125,19 +131,34 @@ def _extract(zip_path: Path, dest_dir: Path) -> None: with zipfile.ZipFile(zip_path, "r") as zf: zf.extractall(dest_dir) + # Flatten: the bundle creates an inner "codeql/" subdir; move everything + # inside it up into dest_dir so the binary sits at dest_dir/codeql. + inner = dest_dir / "codeql" + if inner.is_dir(): + for item in inner.iterdir(): + target = dest_dir / item.name + if target.exists(): + if target.is_dir(): + shutil.rmtree(target) + else: + target.unlink() + item.rename(target) + shutil.rmtree(inner) + # --------------------------------------------------------------------------- # Install # --------------------------------------------------------------------------- def _codeql_binary(base_dir: Path) -> Path: - """Return the path to the codeql executable inside an extracted bundle.""" - # The bundle extracts to codeql/ subdirectory - codeql_dir = base_dir / "codeql" - if codeql_dir.is_dir(): - return codeql_dir / "codeql" - # Some older bundles extract directly - return base_dir / "codeql" + """Return the path to the codeql executable inside an extracted bundle. + + After flattening, the binary sits directly at ``base_dir/codeql``. + """ + binary = base_dir / "codeql" + if binary.is_file(): + return binary + return binary # fall back; will fail usefully in _verify if missing def install(config: Optional[CodeQLConfig] = None) -> int: From 2b133adb9a83c5f814a4afdb67d7aa63101b84c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Fri, 29 May 2026 22:53:21 +0200 Subject: [PATCH 06/47] fix: use shutil.move in _extract to correctly flatten CodeQL bundle Also add 'make init' step to CI test job so managed CodeQL install is exercised on every run. --- .github/workflows/tests.yml | 3 +++ tools/codeql/install.py | 11 +++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 25466445..a12769f9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -46,6 +46,9 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt + - name: Run make init (includes managed CodeQL install) + run: make init + - name: Run pytest with coverage id: pytest run: | diff --git a/tools/codeql/install.py b/tools/codeql/install.py index 9bec0f17..a3107941 100644 --- a/tools/codeql/install.py +++ b/tools/codeql/install.py @@ -137,12 +137,11 @@ def _extract(zip_path: Path, dest_dir: Path) -> None: if inner.is_dir(): for item in inner.iterdir(): target = dest_dir / item.name - if target.exists(): - if target.is_dir(): - shutil.rmtree(target) - else: - target.unlink() - item.rename(target) + if target.is_dir(): + shutil.rmtree(target) + elif target.is_file(): + target.unlink() + shutil.move(str(item), str(target)) shutil.rmtree(inner) From e2caf7ad6b97dc2104fa0a502cc73324400da221 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Fri, 29 May 2026 23:21:13 +0200 Subject: [PATCH 07/47] fix: extract CodeQL bundle without post-processing --- tests/test_codeql_install.py | 40 +++++++++++++++++++++++ tools/codeql/install.py | 62 +++++++++++++++++++++++------------- 2 files changed, 80 insertions(+), 22 deletions(-) create mode 100644 tests/test_codeql_install.py diff --git a/tests/test_codeql_install.py b/tests/test_codeql_install.py new file mode 100644 index 00000000..c3a5b930 --- /dev/null +++ b/tests/test_codeql_install.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import sys +import zipfile +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql.install import _codeql_binary, _extract + + +def test_extract_strips_leading_codeql_prefix(tmp_path: Path) -> None: + zip_path = tmp_path / "codeql-test.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("codeql/", "") + zf.writestr("codeql/codeql", "#!/bin/sh\necho codeql\n") + zf.writestr("codeql/codeql.cmd", "@echo off\r\n") + zf.writestr("codeql/cpp/extractor.txt", "cpp") + zf.writestr("codeql/LICENSE.md", "license") + + dest_dir = tmp_path / "install" + _extract(zip_path, dest_dir) + + assert (dest_dir / "codeql").is_file() + assert (dest_dir / "codeql.cmd").is_file() + assert (dest_dir / "cpp" / "extractor.txt").read_text(encoding="utf-8") == "cpp" + assert (dest_dir / "LICENSE.md").read_text(encoding="utf-8") == "license" + assert not (dest_dir / "codeql" / "codeql").exists() + assert _codeql_binary(dest_dir) == dest_dir / "codeql" + + +def test_codeql_binary_supports_legacy_nested_layout(tmp_path: Path) -> None: + legacy = tmp_path / "legacy" / "codeql" + legacy.mkdir(parents=True) + binary = legacy / "codeql" + binary.write_text("#!/bin/sh\n", encoding="utf-8") + + assert _codeql_binary(tmp_path / "legacy") == binary diff --git a/tools/codeql/install.py b/tools/codeql/install.py index a3107941..56c66731 100644 --- a/tools/codeql/install.py +++ b/tools/codeql/install.py @@ -13,6 +13,7 @@ import os import platform import shutil +import stat import subprocess import sys import tempfile @@ -117,32 +118,45 @@ def _download(url: str, dest: Path) -> None: def _extract(zip_path: Path, dest_dir: Path) -> None: - """Extract a zip archive to *dest_dir*, flattening the inner codeql/ dir. + """Extract a zip archive to *dest_dir*, stripping the leading ``codeql/``. - The GitHub release bundle contains a top-level ``codeql/`` directory. - After extraction we move its contents up one level into *dest_dir* - so the binary lands directly under the versioned directory. - The now-empty inner ``codeql/`` directory is removed. + GitHub's CodeQL bundles contain a single top-level ``codeql/`` directory. + We strip that prefix during extraction so the launcher ends up at + ``dest_dir/codeql`` and the rest of the bundle contents sit directly under + the version directory. """ import zipfile + prefix = "codeql/" dest_dir.mkdir(parents=True, exist_ok=True) print(f"Extracting to {dest_dir} …") with zipfile.ZipFile(zip_path, "r") as zf: - zf.extractall(dest_dir) - - # Flatten: the bundle creates an inner "codeql/" subdir; move everything - # inside it up into dest_dir so the binary sits at dest_dir/codeql. - inner = dest_dir / "codeql" - if inner.is_dir(): - for item in inner.iterdir(): - target = dest_dir / item.name - if target.is_dir(): - shutil.rmtree(target) - elif target.is_file(): - target.unlink() - shutil.move(str(item), str(target)) - shutil.rmtree(inner) + for info in zf.infolist(): + if not info.filename.startswith(prefix): + raise RuntimeError( + f"Unexpected CodeQL bundle layout: {info.filename!r} does not start with {prefix!r}" + ) + + relative_name = info.filename[len(prefix):] + if not relative_name: + continue + + target = dest_dir / relative_name + if info.is_dir(): + target.mkdir(parents=True, exist_ok=True) + continue + + target.parent.mkdir(parents=True, exist_ok=True) + with zf.open(info, "r") as src, target.open("wb") as dst: + shutil.copyfileobj(src, dst) + + mode = (info.external_attr >> 16) & 0o777 + if mode: + target.chmod(mode) + + launcher = dest_dir / "codeql" + if launcher.is_file(): + launcher.chmod(launcher.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) # --------------------------------------------------------------------------- @@ -152,11 +166,15 @@ def _extract(zip_path: Path, dest_dir: Path) -> None: def _codeql_binary(base_dir: Path) -> Path: """Return the path to the codeql executable inside an extracted bundle. - After flattening, the binary sits directly at ``base_dir/codeql``. + New installs place the binary at ``base_dir/codeql``. Keep a temporary + fallback for older nested local installs. """ binary = base_dir / "codeql" if binary.is_file(): return binary + legacy_binary = base_dir / "codeql" / "codeql" + if legacy_binary.is_file(): + return legacy_binary return binary # fall back; will fail usefully in _verify if missing @@ -231,8 +249,8 @@ def install(config: Optional[CodeQLConfig] = None) -> int: zip_path = tmp_dir / f"codeql-{version}-{plat}.zip" _download(url, zip_path) - # Remove previous version dir if force-reinstalling - if force and version_dir.exists(): + # Replace stale partial installs before extracting a fresh bundle. + if version_dir.exists(): shutil.rmtree(version_dir) _extract(zip_path, version_dir) From a9d5d428400c41912220f5bb10aca0c1b3b6140b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Fri, 29 May 2026 23:43:31 +0200 Subject: [PATCH 08/47] refactor: move gate checks into phase modules --- tests/test_codeql_config.py | 31 ++++ tests/test_codeql_install.py | 14 ++ tests/test_gate_check.py | 100 +++++++---- tools/codeql/config.py | 38 ++-- tools/codeql/install.py | 16 +- tools/gate-check.py | 320 +--------------------------------- tools/phases/gates.py | 316 +++++++++++++++++++++++++++++++++ tools/phases/phase_1_gates.py | 4 +- 8 files changed, 470 insertions(+), 369 deletions(-) create mode 100644 tests/test_codeql_config.py create mode 100644 tools/phases/gates.py diff --git a/tests/test_codeql_config.py b/tests/test_codeql_config.py new file mode 100644 index 00000000..78b2a0b1 --- /dev/null +++ b/tests/test_codeql_config.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql import config as config_module + + +def test_load_codecome_yml_supports_top_level_static_analysis(tmp_path: Path, monkeypatch) -> None: + config_path = tmp_path / "codecome.yml" + config_path.write_text( + "static_analysis:\n codeql:\n candidate_mode: top-level\n", + encoding="utf-8", + ) + monkeypatch.setattr(config_module, "ROOT", tmp_path) + + data = config_module._load_codecome_yml() + assert data == {"candidate_mode": "top-level"} + + +def test_resolve_config_falls_back_on_invalid_max_candidates(monkeypatch) -> None: + monkeypatch.delenv("CODEQL", raising=False) + monkeypatch.delenv("CODEQL_SKIP", raising=False) + monkeypatch.setenv("CODEQL_MAX_CANDIDATES", "not-a-number") + + config = config_module.resolve_config() + assert config.max_candidates == config_module.DEFAULTS["max_candidates"] diff --git a/tests/test_codeql_install.py b/tests/test_codeql_install.py index c3a5b930..97222692 100644 --- a/tests/test_codeql_install.py +++ b/tests/test_codeql_install.py @@ -31,6 +31,20 @@ def test_extract_strips_leading_codeql_prefix(tmp_path: Path) -> None: assert _codeql_binary(dest_dir) == dest_dir / "codeql" +def test_extract_rejects_path_traversal(tmp_path: Path) -> None: + zip_path = tmp_path / "codeql-traversal.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("codeql/../../escape.txt", "boom") + + dest_dir = tmp_path / "install" + try: + _extract(zip_path, dest_dir) + except RuntimeError as exc: + assert "outside target dir" in str(exc) + else: + raise AssertionError("expected traversal-protection RuntimeError") + + def test_codeql_binary_supports_legacy_nested_layout(tmp_path: Path) -> None: legacy = tmp_path / "legacy" / "codeql" legacy.mkdir(parents=True) diff --git a/tests/test_gate_check.py b/tests/test_gate_check.py index 2e715345..7152ec39 100644 --- a/tests/test_gate_check.py +++ b/tests/test_gate_check.py @@ -1,98 +1,130 @@ from __future__ import annotations -from conftest import load_tool_module +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codecome.config import ROOT as CONFIG_ROOT +from phases import gates as gates_module def test_has_meaningful_evidence_detects_template_only_readme(tmp_path): - module = load_tool_module("gate_check", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path evidence_dir = tmp_path / "itemdb" / "evidence" / "CC-0001" evidence_dir.mkdir(parents=True) readme = evidence_dir / "README.md" readme.write_text("Briefly summarize what this evidence proves or disproves.", encoding="utf-8") - assert module.has_meaningful_evidence("CC-0001") is False + try: + assert gates_module.has_meaningful_evidence("CC-0001") is False + finally: + gates_module.ROOT = original_root def test_has_meaningful_evidence_detects_non_readme_artifact(tmp_path): - module = load_tool_module("gate_check_artifact", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path evidence_dir = tmp_path / "itemdb" / "evidence" / "CC-0002" evidence_dir.mkdir(parents=True) (evidence_dir / "output.txt").write_text("proof", encoding="utf-8") - assert module.has_meaningful_evidence("CC-0002") is True + try: + assert gates_module.has_meaningful_evidence("CC-0002") is True + finally: + gates_module.ROOT = original_root def test_find_finding_exact_match_bare_cc_xxxx(tmp_path): - module = load_tool_module("gate_check_exact", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path pending = tmp_path / "itemdb" / "findings" / "PENDING" pending.mkdir(parents=True) (pending / "CC-0003.md").write_text("---\nid: CC-0003\n---\n", encoding="utf-8") - result = module.find_finding("CC-0003") - assert result is not None - assert result.name == "CC-0003.md" + try: + result = gates_module.find_finding("CC-0003") + assert result is not None + assert result.name == "CC-0003.md" + finally: + gates_module.ROOT = original_root def test_find_finding_slug_match(tmp_path): - module = load_tool_module("gate_check_slug", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path pending = tmp_path / "itemdb" / "findings" / "PENDING" pending.mkdir(parents=True) (pending / "CC-0003-some-finding.md").write_text("---\nid: CC-0003\n---\n", encoding="utf-8") - result = module.find_finding("CC-0003") - assert result is not None - assert result.name == "CC-0003-some-finding.md" + try: + result = gates_module.find_finding("CC-0003") + assert result is not None + assert result.name == "CC-0003-some-finding.md" + finally: + gates_module.ROOT = original_root def test_find_finding_exact_wins_over_slug(tmp_path): - module = load_tool_module("gate_check_priority", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path pending = tmp_path / "itemdb" / "findings" / "PENDING" pending.mkdir(parents=True) (pending / "CC-0003.md").write_text("---\nid: CC-0003\n---\n", encoding="utf-8") (pending / "CC-0003-other-finding.md").write_text("---\nid: CC-0003\n---\n", encoding="utf-8") - result = module.find_finding("CC-0003") - assert result is not None - assert result.name == "CC-0003.md" + try: + result = gates_module.find_finding("CC-0003") + assert result is not None + assert result.name == "CC-0003.md" + finally: + gates_module.ROOT = original_root def test_find_finding_returns_none_for_missing(tmp_path): - module = load_tool_module("gate_check_missing", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path - result = module.find_finding("CC-9999") - assert result is None + try: + result = gates_module.find_finding("CC-9999") + assert result is None + finally: + gates_module.ROOT = original_root def test_gate_phase_4_accepts_bare_id(tmp_path, monkeypatch): - module = load_tool_module("gate_check_phase4", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path pending = tmp_path / "itemdb" / "findings" / "PENDING" pending.mkdir(parents=True) (pending / "CC-0003.md").write_text("---\nid: CC-0003\n---\n", encoding="utf-8") - exit_code = module.gate_phase_4("CC-0003") - assert exit_code == 0 + try: + exit_code = gates_module.gate_phase_4("CC-0003") + assert exit_code == 0 + finally: + gates_module.ROOT = original_root def test_gate_phase_4_rejects_wrong_status(tmp_path, monkeypatch): - module = load_tool_module("gate_check_phase4_wrong", "tools/gate-check.py") - module.ROOT = tmp_path + original_root = gates_module.ROOT + gates_module.ROOT = tmp_path confirmed = tmp_path / "itemdb" / "findings" / "CONFIRMED" confirmed.mkdir(parents=True) (confirmed / "CC-0003.md").write_text("---\nid: CC-0003\n---\n", encoding="utf-8") - exit_code = module.gate_phase_4("CC-0003") - assert exit_code == 1 + try: + exit_code = gates_module.gate_phase_4("CC-0003") + assert exit_code == 1 + finally: + gates_module.ROOT = original_root diff --git a/tools/codeql/config.py b/tools/codeql/config.py index 18373051..0c05cd01 100644 --- a/tools/codeql/config.py +++ b/tools/codeql/config.py @@ -54,7 +54,7 @@ # --------------------------------------------------------------------------- def _load_codecome_yml() -> dict[str, Any] | None: - """Load codecome.yml and return the ``audit.static_analysis.codeql`` block.""" + """Load codecome.yml and return the configured CodeQL block.""" if yaml is None: return None path = ROOT / "codecome.yml" @@ -67,13 +67,19 @@ def _load_codecome_yml() -> dict[str, Any] | None: if not isinstance(data, dict): return None audit = data.get("audit") - if not isinstance(audit, dict): - return None - sa = audit.get("static_analysis") - if not isinstance(sa, dict): - return None - cq = sa.get("codeql") - return cq if isinstance(cq, dict) else None + if isinstance(audit, dict): + sa = audit.get("static_analysis") + if isinstance(sa, dict): + cq = sa.get("codeql") + if isinstance(cq, dict): + return cq + + sa = data.get("static_analysis") + if isinstance(sa, dict): + cq = sa.get("codeql") + if isinstance(cq, dict): + return cq + return None def _bool_env(name: str) -> bool | None: @@ -89,6 +95,13 @@ def _str_env(name: str) -> str | None: return raw.strip() if raw else None +def _safe_int(value: Any, default: int) -> int: + try: + return int(value) + except (TypeError, ValueError): + return default + + # --------------------------------------------------------------------------- # Resolution # --------------------------------------------------------------------------- @@ -202,8 +215,11 @@ def _get(key: str, default: Any, env: str | None = None, coerce: Any = None) -> phase_2_enabled = _get("phase_2_enabled", DEFAULTS["phase_2_enabled"], env="CODEQL_PHASE_2", coerce=bool) candidate_mode = _str_env("CODEQL_CANDIDATES") or _get("candidate_mode", DEFAULTS["candidate_mode"]) - max_candidates = _get("max_candidates", DEFAULTS["max_candidates"], - env="CODEQL_MAX_CANDIDATES", coerce=int) + max_candidates_raw = _str_env("CODEQL_MAX_CANDIDATES") + if max_candidates_raw is None: + max_candidates = _safe_int(_get("max_candidates", DEFAULTS["max_candidates"]), DEFAULTS["max_candidates"]) + else: + max_candidates = _safe_int(max_candidates_raw, DEFAULTS["max_candidates"]) # Sweep settings sweep_enabled = _get("sweep_enabled", DEFAULTS["sweep_enabled"], @@ -224,7 +240,7 @@ def _get(key: str, default: Any, env: str | None = None, coerce: Any = None) -> phase_1_enabled=phase_1_enabled, phase_2_enabled=phase_2_enabled, candidate_mode=candidate_mode, - max_candidates=int(max_candidates), + max_candidates=max_candidates, sweep_enabled=sweep_enabled, sweep_inject_context=sweep_inject_context, abs_pack_catalog=(ROOT / pack_catalog).resolve(), diff --git a/tools/codeql/install.py b/tools/codeql/install.py index 56c66731..e77c386f 100644 --- a/tools/codeql/install.py +++ b/tools/codeql/install.py @@ -38,8 +38,9 @@ def _detect_platform() -> str: return "osx64" if system == "linux": return "linux64" - # Fallback: try linux64 for other unix-like systems - return "linux64" + if system == "windows": + return "win64" + raise RuntimeError(f"Unsupported platform: system={system} machine={machine}") def _bundle_suffix(plat: str) -> str: @@ -128,6 +129,7 @@ def _extract(zip_path: Path, dest_dir: Path) -> None: import zipfile prefix = "codeql/" + dest_root = dest_dir.resolve() dest_dir.mkdir(parents=True, exist_ok=True) print(f"Extracting to {dest_dir} …") with zipfile.ZipFile(zip_path, "r") as zf: @@ -141,7 +143,9 @@ def _extract(zip_path: Path, dest_dir: Path) -> None: if not relative_name: continue - target = dest_dir / relative_name + target = (dest_dir / relative_name).resolve() + if target != dest_root and dest_root not in target.parents: + raise RuntimeError(f"Refusing to extract CodeQL bundle member outside target dir: {info.filename!r}") if info.is_dir(): target.mkdir(parents=True, exist_ok=True) continue @@ -224,7 +228,11 @@ def install(config: Optional[CodeQLConfig] = None) -> int: return _verify(binary_path) # --- Download --- - plat = _detect_platform() + try: + plat = _detect_platform() + except RuntimeError as exc: + print(f"ERROR: {exc}", file=sys.stderr) + return 1 print(f"Platform: {plat}") if version == "latest": diff --git a/tools/gate-check.py b/tools/gate-check.py index 5311cc24..9572e854 100755 --- a/tools/gate-check.py +++ b/tools/gate-check.py @@ -20,283 +20,13 @@ from __future__ import annotations import argparse -import re import sys from pathlib import Path -try: - import yaml -except ImportError: # pragma: no cover - yaml = None - # Allow importing sibling modules. sys.path.insert(0, str(Path(__file__).resolve().parent)) -from _colors import ok, fail, warn, header, info, GREEN, RESET, BOLD, SYM_OK -from phases.phase_1_gates import ( - check_phase_1a, - check_phase_1b, - check_phase_1c, -) - -ROOT = Path(__file__).resolve().parents[1] -FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) -EVIDENCE_TEMPLATE_MARKERS = [ - "Briefly summarize what this evidence proves or disproves.", - "Describe the validation method used.", - "command goes here", - "Describe what happened.", -] - -REQUIRED_NOTES = [ - "target-profile.md", - "attack-surface.md", -] - -FINDING_STATUS_DIRS = [ - "PENDING", - "CONFIRMED", - "EXPLOITED", - "REJECTED", - "DUPLICATE", -] - - -def has_source_files() -> bool: - """Return True if src/ contains at least one file (not just .gitkeep).""" - src_dir = ROOT / "src" - if not src_dir.exists(): - return False - for child in src_dir.rglob("*"): - if child.is_file() and child.name != ".gitkeep": - return True - return False - - -def has_notes(*names: str) -> list[str]: - """Return list of missing note files.""" - notes_dir = ROOT / "itemdb" / "notes" - missing = [] - for name in names: - if not (notes_dir / name).exists(): - missing.append(name) - return missing - - -def count_findings(status: str) -> int: - """Count finding files in a status directory.""" - status_dir = ROOT / "itemdb" / "findings" / status - if not status_dir.exists(): - return 0 - return len(list(status_dir.glob("CC-*.md"))) - - -def count_all_findings() -> int: - """Count finding files across all status directories.""" - return sum(count_findings(s) for s in FINDING_STATUS_DIRS) - - -def load_frontmatter(path: Path) -> dict[str, object]: - """Load YAML frontmatter from a finding file.""" - if yaml is None: - raise RuntimeError("PyYAML is not installed. Run: pip install -r requirements.txt") - - content = path.read_text(encoding="utf-8") - match = FRONTMATTER_RE.match(content) - if not match: - return {} - - data = yaml.safe_load(match.group(1)) - return data if isinstance(data, dict) else {} - - -def find_finding(identifier: str) -> Path | None: - """Locate a finding file by path or ID.""" - candidate = Path(identifier) - if candidate.is_absolute() and candidate.exists(): - return candidate.resolve() - - root_relative = ROOT / identifier - if root_relative.exists(): - return root_relative.resolve() - - findings_root = ROOT / "itemdb" / "findings" - for status in FINDING_STATUS_DIRS: - status_dir = findings_root / status - if not status_dir.exists(): - continue - # Exact match: CC-0003.md (no slug) - exact = status_dir / f"{identifier}.md" - if exact.exists(): - return exact.resolve() - # Slug match: CC-0003-some-title.md - matches = list(status_dir.glob(f"{identifier}-*.md")) - if matches: - return matches[0].resolve() - return None - - -def has_meaningful_evidence(finding_id: str) -> bool: - """Return True when the evidence directory contains more than scaffolding.""" - evidence_dir = ROOT / "itemdb" / "evidence" / finding_id - if not evidence_dir.exists(): - return False - - files = [path for path in evidence_dir.rglob("*") if path.is_file()] - if not files: - return False - - non_readme_files = [path for path in files if path.name != "README.md"] - if non_readme_files: - return True - - readme_path = evidence_dir / "README.md" - if not readme_path.exists(): - return False - - content = readme_path.read_text(encoding="utf-8") - return not any(marker in content for marker in EVIDENCE_TEMPLATE_MARKERS) - - -def gate_phase_1() -> int: - """Phase 1: src/ must contain target source code.""" - print(header("Phase 1: Target Reconnaissance")) - print() - - if not has_source_files(): - print(fail("src/ is empty or does not exist.")) - print() - print(info("Place target source code under src/ before running Phase 1.")) - print(info("See docs/target-setup.md for instructions.")) - return 1 - - print(ok("src/ contains source files.")) - print() - print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 1.") - return 0 - - -def gate_phase_2() -> int: - """Phase 2: reconnaissance notes must exist.""" - print(header("Phase 2: Vulnerability Hypothesis Generation")) - print() - - missing = has_notes(*REQUIRED_NOTES) - if missing: - print(fail("Required reconnaissance notes are missing:")) - for name in missing: - print(f" {name}") - print() - print(info("Run Phase 1 first: make phase-1")) - return 1 - - print(ok("Required reconnaissance notes exist.")) - print() - print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 2.") - return 0 - - -def gate_phase_3() -> int: - """Phase 3: at least one PENDING finding must exist.""" - print(header("Phase 3: Counter-analysis")) - print() - - nv_count = count_findings("PENDING") - if nv_count == 0: - print(fail("No findings in PENDING.")) - print() - print(info("Run Phase 2 first: make phase-2")) - return 1 - - print(ok(f"{nv_count} finding(s) in PENDING.")) - print() - print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 3.") - return 0 - - -def gate_phase_4(identifier: str) -> int: - """Phase 4: finding must exist and be in PENDING.""" - print(header(f"Phase 4: Validate {identifier}")) - print() - - path = find_finding(identifier) - if path is None: - print(fail(f"Finding not found: {identifier}")) - print() - print(info("Check available findings: make status")) - return 1 - - if path.parent.name != "PENDING": - print(warn(f"{path.stem} is in {path.parent.name}, not PENDING.")) - print() - print(info("Only PENDING findings can be validated.")) - return 1 - - print(ok(f"Found: {path.relative_to(ROOT)}")) - print() - print(f"{GREEN}{SYM_OK}{RESET} Ready to validate {path.stem}.") - return 0 - - -def gate_phase_5(identifier: str) -> int: - """Phase 5: finding must be CONFIRMED with evidence.""" - print(header(f"Phase 5: Exploit Development for {identifier}")) - print() - - path = find_finding(identifier) - if path is None: - print(fail(f"Finding not found: {identifier}")) - print() - print(info("Check available findings: make status")) - return 1 - - if path.parent.name != "CONFIRMED": - print(warn(f"{path.stem} is in {path.parent.name}, not CONFIRMED.")) - print() - print(info("Only CONFIRMED findings can have exploits developed.")) - return 1 - - frontmatter = load_frontmatter(path) - validation = frontmatter.get("validation") - validation_status = validation.get("status") if isinstance(validation, dict) else None - if validation_status != "CONFIRMED": - print(warn(f"{path.stem} has validation.status={validation_status!r}, not 'CONFIRMED'.")) - print() - print(info("Only findings with confirmed validation evidence can enter Phase 5.")) - return 1 - - finding_id = str(frontmatter.get("id", "-".join(path.stem.split("-", 2)[:2]))) - - evidence_dir = ROOT / "itemdb" / "evidence" / finding_id - if not has_meaningful_evidence(finding_id): - print(warn(f"No meaningful validation evidence found under itemdb/evidence/{finding_id}/.")) - print() - print(info("Run Phase 4 first and record actual evidence before Phase 5.")) - return 1 - - print(ok(f"Found: {path.relative_to(ROOT)}")) - print(ok(f"Evidence exists: itemdb/evidence/{finding_id}/")) - print() - print(f"{GREEN}{SYM_OK}{RESET} Ready to develop exploit for {finding_id}.") - return 0 - - -def gate_phase_6() -> int: - """Phase 6: at least one finding must exist.""" - print(header("Phase 6: Reporting")) - print() - - total = count_all_findings() - if total == 0: - print(fail("No findings exist in any status directory.")) - print() - print(info("Run Phases 1-5 first to produce findings.")) - return 1 - - print(ok(f"{total} finding(s) across all status directories.")) - print() - print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 6.") - return 0 +from phases.gates import run_from_cli def build_parser() -> argparse.ArgumentParser: @@ -314,53 +44,7 @@ def build_parser() -> argparse.ArgumentParser: def main() -> int: parser = build_parser() args = parser.parse_args() - - phase_str = str(args.phase) - - if phase_str == "1a": - return check_phase_1a() - elif phase_str == "1b": - return check_phase_1b() - elif phase_str == "1c": - return check_phase_1c() - - try: - phase_int = int(phase_str) - except ValueError: - print(fail(f"Invalid phase: {phase_str}")) - print() - print(info("Valid values: 1, 2, 3, 4, 5, 6, 1a, 1b, 1c")) - return 1 - - if phase_int == 1: - return gate_phase_1() - elif phase_int == 2: - return gate_phase_2() - elif phase_int == 3: - return gate_phase_3() - elif phase_int == 4: - if not args.finding_id: - print(fail("Phase 4 requires a finding ID.")) - print() - print(info("Usage: ./tools/gate-check.py 4 CC-0001")) - print(info(" or: ./tools/gate-check.py 4 itemdb/findings/PENDING/CC-0001-test.md")) - return 1 - return gate_phase_4(args.finding_id) - elif phase_int == 5: - if not args.finding_id: - print(fail("Phase 5 requires a finding ID.")) - print() - print(info("Usage: ./tools/gate-check.py 5 CC-0001")) - print(info(" or: ./tools/gate-check.py 5 itemdb/findings/CONFIRMED/CC-0001-test.md")) - return 1 - return gate_phase_5(args.finding_id) - elif phase_int == 6: - return gate_phase_6() - - print(fail(f"Invalid phase: {phase_str}")) - print() - print(info("Valid values: 1, 2, 3, 4, 5, 6, 1a, 1b, 1c")) - return 1 + return run_from_cli(args) if __name__ == "__main__": diff --git a/tools/phases/gates.py b/tools/phases/gates.py new file mode 100644 index 00000000..4595d778 --- /dev/null +++ b/tools/phases/gates.py @@ -0,0 +1,316 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""Phase gate implementation for the gate-check CLI.""" + +from __future__ import annotations + +import re +from pathlib import Path + +try: + import yaml +except ImportError: # pragma: no cover + yaml = None + +from _colors import ok, fail, warn, header, info, GREEN, RESET, SYM_OK +from codecome.config import ROOT +from phases.phase_1_gates import check_phase_1a, check_phase_1b, check_phase_1c + + +FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) +EVIDENCE_TEMPLATE_MARKERS = [ + "Briefly summarize what this evidence proves or disproves.", + "Describe the validation method used.", + "command goes here", + "Describe what happened.", +] + +REQUIRED_NOTES = [ + "target-profile.md", + "attack-surface.md", +] + +FINDING_STATUS_DIRS = [ + "PENDING", + "CONFIRMED", + "EXPLOITED", + "REJECTED", + "DUPLICATE", +] + + +def has_source_files() -> bool: + """Return True if src/ contains at least one file (not just .gitkeep).""" + src_dir = ROOT / "src" + if not src_dir.exists(): + return False + for child in src_dir.rglob("*"): + if child.is_file() and child.name != ".gitkeep": + return True + return False + + +def has_notes(*names: str) -> list[str]: + """Return list of missing note files.""" + notes_dir = ROOT / "itemdb" / "notes" + missing = [] + for name in names: + if not (notes_dir / name).exists(): + missing.append(name) + return missing + + +def count_findings(status: str) -> int: + """Count finding files in a status directory.""" + status_dir = ROOT / "itemdb" / "findings" / status + if not status_dir.exists(): + return 0 + return len(list(status_dir.glob("CC-*.md"))) + + +def count_all_findings() -> int: + """Count finding files across all status directories.""" + return sum(count_findings(s) for s in FINDING_STATUS_DIRS) + + +def load_frontmatter(path: Path) -> dict[str, object]: + """Load YAML frontmatter from a finding file.""" + if yaml is None: + raise RuntimeError("PyYAML is not installed. Run: pip install -r requirements.txt") + + content = path.read_text(encoding="utf-8") + match = FRONTMATTER_RE.match(content) + if not match: + return {} + + data = yaml.safe_load(match.group(1)) + return data if isinstance(data, dict) else {} + + +def find_finding(identifier: str) -> Path | None: + """Locate a finding file by path or ID.""" + candidate = Path(identifier) + if candidate.is_absolute() and candidate.exists(): + return candidate.resolve() + + root_relative = ROOT / identifier + if root_relative.exists(): + return root_relative.resolve() + + findings_root = ROOT / "itemdb" / "findings" + for status in FINDING_STATUS_DIRS: + status_dir = findings_root / status + if not status_dir.exists(): + continue + exact = status_dir / f"{identifier}.md" + if exact.exists(): + return exact.resolve() + matches = list(status_dir.glob(f"{identifier}-*.md")) + if matches: + return matches[0].resolve() + return None + + +def has_meaningful_evidence(finding_id: str) -> bool: + """Return True when the evidence directory contains more than scaffolding.""" + evidence_dir = ROOT / "itemdb" / "evidence" / finding_id + if not evidence_dir.exists(): + return False + + files = [path for path in evidence_dir.rglob("*") if path.is_file()] + if not files: + return False + + non_readme_files = [path for path in files if path.name != "README.md"] + if non_readme_files: + return True + + readme_path = evidence_dir / "README.md" + if not readme_path.exists(): + return False + + content = readme_path.read_text(encoding="utf-8") + return not any(marker in content for marker in EVIDENCE_TEMPLATE_MARKERS) + + +def gate_phase_1() -> int: + print(header("Phase 1: Target Reconnaissance")) + print() + + if not has_source_files(): + print(fail("src/ is empty or does not exist.")) + print() + print(info("Place target source code under src/ before running Phase 1.")) + print(info("See docs/target-setup.md for instructions.")) + return 1 + + print(ok("src/ contains source files.")) + print() + print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 1.") + return 0 + + +def gate_phase_2() -> int: + print(header("Phase 2: Vulnerability Hypothesis Generation")) + print() + + missing = has_notes(*REQUIRED_NOTES) + if missing: + print(fail("Required reconnaissance notes are missing:")) + for name in missing: + print(f" {name}") + print() + print(info("Run Phase 1 first: make phase-1")) + return 1 + + print(ok("Required reconnaissance notes exist.")) + print() + print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 2.") + return 0 + + +def gate_phase_3() -> int: + print(header("Phase 3: Counter-analysis")) + print() + + nv_count = count_findings("PENDING") + if nv_count == 0: + print(fail("No findings in PENDING.")) + print() + print(info("Run Phase 2 first: make phase-2")) + return 1 + + print(ok(f"{nv_count} finding(s) in PENDING.")) + print() + print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 3.") + return 0 + + +def gate_phase_4(identifier: str) -> int: + print(header(f"Phase 4: Validate {identifier}")) + print() + + path = find_finding(identifier) + if path is None: + print(fail(f"Finding not found: {identifier}")) + print() + print(info("Check available findings: make status")) + return 1 + + if path.parent.name != "PENDING": + print(warn(f"{path.stem} is in {path.parent.name}, not PENDING.")) + print() + print(info("Only PENDING findings can be validated.")) + return 1 + + print(ok(f"Found: {path.relative_to(ROOT)}")) + print() + print(f"{GREEN}{SYM_OK}{RESET} Ready to validate {path.stem}.") + return 0 + + +def gate_phase_5(identifier: str) -> int: + print(header(f"Phase 5: Exploit Development for {identifier}")) + print() + + path = find_finding(identifier) + if path is None: + print(fail(f"Finding not found: {identifier}")) + print() + print(info("Check available findings: make status")) + return 1 + + if path.parent.name != "CONFIRMED": + print(warn(f"{path.stem} is in {path.parent.name}, not CONFIRMED.")) + print() + print(info("Only CONFIRMED findings can have exploits developed.")) + return 1 + + frontmatter = load_frontmatter(path) + validation = frontmatter.get("validation") + validation_status = validation.get("status") if isinstance(validation, dict) else None + if validation_status != "CONFIRMED": + print(warn(f"{path.stem} has validation.status={validation_status!r}, not 'CONFIRMED'.")) + print() + print(info("Only findings with confirmed validation evidence can enter Phase 5.")) + return 1 + + finding_id = str(frontmatter.get("id", "-".join(path.stem.split("-", 2)[:2]))) + if not has_meaningful_evidence(finding_id): + print(warn(f"No meaningful validation evidence found under itemdb/evidence/{finding_id}/.")) + print() + print(info("Run Phase 4 first and record actual evidence before Phase 5.")) + return 1 + + print(ok(f"Found: {path.relative_to(ROOT)}")) + print(ok(f"Evidence exists: itemdb/evidence/{finding_id}/")) + print() + print(f"{GREEN}{SYM_OK}{RESET} Ready to develop exploit for {finding_id}.") + return 0 + + +def gate_phase_6() -> int: + print(header("Phase 6: Reporting")) + print() + + total = count_all_findings() + if total == 0: + print(fail("No findings exist in any status directory.")) + print() + print(info("Run Phases 1-5 first to produce findings.")) + return 1 + + print(ok(f"{total} finding(s) across all status directories.")) + print() + print(f"{GREEN}{SYM_OK}{RESET} Ready to run Phase 6.") + return 0 + + +def run_from_cli(args) -> int: + phase_str = str(args.phase) + + if phase_str == "1a": + return check_phase_1a() + if phase_str == "1b": + return check_phase_1b() + if phase_str == "1c": + return check_phase_1c() + + try: + phase_int = int(phase_str) + except ValueError: + print(fail(f"Invalid phase: {phase_str}")) + print() + print(info("Valid values: 1, 2, 3, 4, 5, 6, 1a, 1b, 1c")) + return 1 + + if phase_int == 1: + return gate_phase_1() + if phase_int == 2: + return gate_phase_2() + if phase_int == 3: + return gate_phase_3() + if phase_int == 4: + if not args.finding_id: + print(fail("Phase 4 requires a finding ID.")) + print() + print(info("Usage: ./tools/gate-check.py 4 CC-0001")) + print(info(" or: ./tools/gate-check.py 4 itemdb/findings/PENDING/CC-0001-test.md")) + return 1 + return gate_phase_4(args.finding_id) + if phase_int == 5: + if not args.finding_id: + print(fail("Phase 5 requires a finding ID.")) + print() + print(info("Usage: ./tools/gate-check.py 5 CC-0001")) + print(info(" or: ./tools/gate-check.py 5 itemdb/findings/CONFIRMED/CC-0001-test.md")) + return 1 + return gate_phase_5(args.finding_id) + if phase_int == 6: + return gate_phase_6() + + print(fail(f"Invalid phase: {phase_str}")) + print() + print(info("Valid values: 1, 2, 3, 4, 5, 6, 1a, 1b, 1c")) + return 1 diff --git a/tools/phases/phase_1_gates.py b/tools/phases/phase_1_gates.py index 1f2268ef..e6f3c427 100644 --- a/tools/phases/phase_1_gates.py +++ b/tools/phases/phase_1_gates.py @@ -125,7 +125,7 @@ def check_phase_1a(console=None) -> int: else: try: plan = yaml.safe_load(plan_path.read_text(encoding="utf-8")) - except Exception as exc: + except (yaml.YAMLError, OSError, UnicodeDecodeError) as exc: _emit(console, "fail", f"codeql-plan.yml is not valid YAML: {exc}") return 1 @@ -194,7 +194,7 @@ def check_phase_1b(console=None, findings_snapshot: dict[str, int] | None = None if yaml is not None: try: data = yaml.safe_load(risk_path.read_text(encoding="utf-8")) - except Exception as exc: + except (yaml.YAMLError, OSError, UnicodeDecodeError) as exc: _emit(console, "fail", f"file-risk-index.yml is not valid YAML: {exc}") return 1 From b9c55cf26ef090229ba6435e09d93146fb06c2e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 16:48:24 +0200 Subject: [PATCH 09/47] fix: authenticate CodeQL release API requests --- .github/workflows/tests.yml | 2 ++ tools/codeql/install.py | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a12769f9..f7d1c789 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -47,6 +47,8 @@ jobs: pip install -r requirements.txt - name: Run make init (includes managed CodeQL install) + env: + GITHUB_TOKEN: ${{ github.token }} run: make init - name: Run pytest with coverage diff --git a/tools/codeql/install.py b/tools/codeql/install.py index e77c386f..87c5d3c7 100644 --- a/tools/codeql/install.py +++ b/tools/codeql/install.py @@ -27,6 +27,19 @@ GITHUB_API_RELEASES = "https://api.github.com/repos/github/codeql-cli-binaries/releases" +def _github_headers() -> dict[str, str]: + """Return GitHub API headers, using a token when available.""" + headers = { + "Accept": "application/vnd.github+json", + "User-Agent": "CodeCome-CodeQL-Installer/1.0", + "X-GitHub-Api-Version": "2022-11-28", + } + token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") + if token: + headers["Authorization"] = f"Bearer {token}" + return headers + + # --------------------------------------------------------------------------- # Platform detection # --------------------------------------------------------------------------- @@ -57,7 +70,7 @@ def _fetch_latest_version() -> str: import json url = f"{GITHUB_API_RELEASES}/latest" - req = Request(url, headers={"Accept": "application/vnd.github+json"}) + req = Request(url, headers=_github_headers()) try: with urlopen(req, timeout=30) as resp: data = json.loads(resp.read().decode()) @@ -74,7 +87,7 @@ def _fetch_release_assets(version: str) -> list[dict]: import json url = f"{GITHUB_API_RELEASES}/tags/v{version}" - req = Request(url, headers={"Accept": "application/vnd.github+json"}) + req = Request(url, headers=_github_headers()) try: with urlopen(req, timeout=30) as resp: data = json.loads(resp.read().decode()) @@ -103,7 +116,7 @@ def _find_download_url(assets: list[dict], plat: str) -> Optional[str]: def _download(url: str, dest: Path) -> None: """Download a file from *url* to *dest*.""" print(f"Downloading {url} …") - req = Request(url, headers={"User-Agent": "CodeCome-CodeQL-Installer/1.0"}) + req = Request(url, headers=_github_headers()) try: with urlopen(req, timeout=300) as resp: with open(dest, "wb") as f: From 221bc5d6f518f8528cb068b8a5c3a2e7edbd1368 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 17:04:31 +0200 Subject: [PATCH 10/47] refactor: keep one canonical CodeQL config path --- tests/test_codeql_config.py | 25 +++++++++++++++++++++++-- tools/codeql/config.py | 22 ++++++++-------------- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/tests/test_codeql_config.py b/tests/test_codeql_config.py index 78b2a0b1..433f2cef 100644 --- a/tests/test_codeql_config.py +++ b/tests/test_codeql_config.py @@ -10,7 +10,19 @@ from codeql import config as config_module -def test_load_codecome_yml_supports_top_level_static_analysis(tmp_path: Path, monkeypatch) -> None: +def test_load_codecome_yml_reads_audit_static_analysis(tmp_path: Path, monkeypatch) -> None: + config_path = tmp_path / "codecome.yml" + config_path.write_text( + "audit:\n static_analysis:\n codeql:\n candidate_mode: audit\n", + encoding="utf-8", + ) + monkeypatch.setattr(config_module, "ROOT", tmp_path) + + data = config_module._load_codecome_yml() + assert data == {"candidate_mode": "audit"} + + +def test_load_codecome_yml_ignores_top_level_static_analysis(tmp_path: Path, monkeypatch) -> None: config_path = tmp_path / "codecome.yml" config_path.write_text( "static_analysis:\n codeql:\n candidate_mode: top-level\n", @@ -19,7 +31,16 @@ def test_load_codecome_yml_supports_top_level_static_analysis(tmp_path: Path, mo monkeypatch.setattr(config_module, "ROOT", tmp_path) data = config_module._load_codecome_yml() - assert data == {"candidate_mode": "top-level"} + assert data is None + + +def test_load_codecome_yml_returns_none_for_invalid_yaml(tmp_path: Path, monkeypatch) -> None: + config_path = tmp_path / "codecome.yml" + config_path.write_text("audit:\n static_analysis: [\n", encoding="utf-8") + monkeypatch.setattr(config_module, "ROOT", tmp_path) + + data = config_module._load_codecome_yml() + assert data is None def test_resolve_config_falls_back_on_invalid_max_candidates(monkeypatch) -> None: diff --git a/tools/codeql/config.py b/tools/codeql/config.py index 0c05cd01..17349a2b 100644 --- a/tools/codeql/config.py +++ b/tools/codeql/config.py @@ -62,24 +62,18 @@ def _load_codecome_yml() -> dict[str, Any] | None: return None try: data = yaml.safe_load(path.read_text(encoding="utf-8")) - except Exception: + except (yaml.YAMLError, OSError, UnicodeDecodeError): return None if not isinstance(data, dict): return None audit = data.get("audit") - if isinstance(audit, dict): - sa = audit.get("static_analysis") - if isinstance(sa, dict): - cq = sa.get("codeql") - if isinstance(cq, dict): - return cq - - sa = data.get("static_analysis") - if isinstance(sa, dict): - cq = sa.get("codeql") - if isinstance(cq, dict): - return cq - return None + if not isinstance(audit, dict): + return None + sa = audit.get("static_analysis") + if not isinstance(sa, dict): + return None + cq = sa.get("codeql") + return cq if isinstance(cq, dict) else None def _bool_env(name: str) -> bool | None: From b24099f3147e5914610a04b5536dc8db2f1df595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 17:36:29 +0200 Subject: [PATCH 11/47] fix: tighten CodeQL follow-up checks and CI --- .github/workflows/tests.yml | 9 ++++++++- templates/codeql-plan.yml | 2 +- tests/test_gate_check.py | 1 - tools/codecome/phase_1.py | 6 +++--- tools/codeql.py | 4 ++++ 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f7d1c789..7981e5fe 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -46,11 +46,18 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt - - name: Run make init (includes managed CodeQL install) + - name: Run make init (full CodeQL install) + if: matrix.python-version == '3.14' env: GITHUB_TOKEN: ${{ github.token }} run: make init + - name: Run make init (skip CodeQL install) + if: matrix.python-version != '3.14' + env: + CODEQL_SKIP_INSTALL: 1 + run: make init + - name: Run pytest with coverage id: pytest run: | diff --git a/templates/codeql-plan.yml b/templates/codeql-plan.yml index 9c6c58b0..a2a82f6f 100644 --- a/templates/codeql-plan.yml +++ b/templates/codeql-plan.yml @@ -1,6 +1,6 @@ # CodeQL analysis plan generated by Phase 1a target profiling. # The model fills in language entries based on source tree analysis. -# This template is read by tools/codeql.py run --plan. +# Reserved for future CodeQL run orchestration. schema_version: 1 generated_by: "phase-1a-profile" diff --git a/tests/test_gate_check.py b/tests/test_gate_check.py index 7152ec39..4ab99826 100644 --- a/tests/test_gate_check.py +++ b/tests/test_gate_check.py @@ -7,7 +7,6 @@ ROOT = Path(__file__).resolve().parents[1] sys.path.insert(0, str(ROOT / "tools")) -from codecome.config import ROOT as CONFIG_ROOT from phases import gates as gates_module diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index d89dc795..fc075abf 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -368,9 +368,6 @@ def run_phase_1( base_url: str, ) -> int: """Orchestrate Phase 1 subphases 1a → 1b → 1c with gates.""" - # Snapshot findings before 1a - findings_snapshot = count_findings_snapshot() - # ---- Phase 1a: Target Profile ---- rc, _ = _run_subphase( args=args, @@ -393,6 +390,9 @@ def run_phase_1( # ---- CodeQL placeholder ---- _run_codeql_placeholder(console) + # Snapshot findings immediately before 1b so the warning scope matches 1b. + findings_snapshot = count_findings_snapshot() + # ---- Phase 1b: CodeQL-assisted Reconnaissance ---- rc, _ = _run_subphase( args=args, diff --git a/tools/codeql.py b/tools/codeql.py index 6f38a750..9a4f81a4 100644 --- a/tools/codeql.py +++ b/tools/codeql.py @@ -32,6 +32,10 @@ def _cmd_check() -> int: """Check that CodeQL CLI is available and working.""" config = resolve_config() + if not config.enabled: + print("CodeQL is disabled (CODEQL=0 or CODEQL_SKIP=1).") + return 0 + binary_path = config.abs_install_path # 1. Binary check From 396e6fec17ad262cdd39fc88581e156b6d4291d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 18:00:58 +0200 Subject: [PATCH 12/47] feat: add CodeQL pack catalog resolver --- templates/codeql-packs.yml | 70 +++++++++++++++ tests/test_codeql_packs.py | 132 +++++++++++++++++++++++++++++ tools/codeql.py | 43 ++++++++++ tools/codeql/packs.py | 169 +++++++++++++++++++++++++++++++++++++ 4 files changed, 414 insertions(+) create mode 100644 templates/codeql-packs.yml create mode 100644 tests/test_codeql_packs.py create mode 100644 tools/codeql/packs.py diff --git a/templates/codeql-packs.yml b/templates/codeql-packs.yml new file mode 100644 index 00000000..a9e8885a --- /dev/null +++ b/templates/codeql-packs.yml @@ -0,0 +1,70 @@ +# CodeQL pack catalog used to resolve model-selected pack profiles. + +schema_version: 1 + +packs: + python: + official: + - "codeql/python-queries" + github-security-lab: + - "githubsecuritylab/codeql-python-queries" + local: + - "./queries/codeql/python" + + javascript-typescript: + official: + - "codeql/javascript-queries" + github-security-lab: + - "githubsecuritylab/codeql-javascript-queries" + local: + - "./queries/codeql/javascript" + + c-cpp: + official: + - "codeql/cpp-queries" + github-security-lab: + - "githubsecuritylab/codeql-cpp-queries" + trailofbits: + - "trailofbits/cpp-queries" + coding-standards: + - "codeql/coding-standards-cpp" + local: + - "./queries/codeql/cpp" + + go: + official: + - "codeql/go-queries" + github-security-lab: + - "githubsecuritylab/codeql-go-queries" + trailofbits: + - "trailofbits/go-queries" + local: + - "./queries/codeql/go" + + csharp: + official: + - "codeql/csharp-queries" + github-security-lab: + - "githubsecuritylab/codeql-csharp-queries" + local: + - "./queries/codeql/csharp" + + java-kotlin: + official: + - "codeql/java-queries" + github-security-lab: + - "githubsecuritylab/codeql-java-queries" + local: + - "./queries/codeql/java" + +candidate_policy: + official: + allow_precreate: true + github-security-lab: + allow_precreate: true + trailofbits: + allow_precreate: true + coding-standards: + allow_precreate: false + local: + allow_precreate: true diff --git a/tests/test_codeql_packs.py b/tests/test_codeql_packs.py new file mode 100644 index 00000000..24fa528f --- /dev/null +++ b/tests/test_codeql_packs.py @@ -0,0 +1,132 @@ +from __future__ import annotations + +import json +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql.packs import PackResolverError, load_codeql_plan, load_pack_catalog, resolve_pack_profiles, resolve_plan_packs + + +def _write_catalog(path: Path) -> None: + path.write_text( + ( + "schema_version: 1\n" + "packs:\n" + " python:\n" + " official:\n" + " - codeql/python-queries\n" + " github-security-lab:\n" + " - githubsecuritylab/codeql-python-queries\n" + " local:\n" + " - ./queries/codeql/python\n" + " c-cpp:\n" + " official:\n" + " - codeql/cpp-queries\n" + " trailofbits:\n" + " - trailofbits/cpp-queries\n" + " coding-standards:\n" + " - codeql/coding-standards-cpp\n" + "candidate_policy:\n" + " official:\n" + " allow_precreate: true\n" + " coding-standards:\n" + " allow_precreate: false\n" + ), + encoding="utf-8", + ) + + +def _write_plan(path: Path) -> None: + path.write_text( + ( + "schema_version: 1\n" + "languages:\n" + " - id: python\n" + " packs:\n" + " - official\n" + " - github-security-lab\n" + " - id: c-cpp\n" + " packs:\n" + " - official\n" + " - coding-standards\n" + ), + encoding="utf-8", + ) + + +def test_load_pack_catalog_validates_schema(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + + catalog = load_pack_catalog(catalog_path) + assert catalog["schema_version"] == 1 + assert catalog["packs"]["python"]["official"] == ["codeql/python-queries"] + + +def test_resolve_pack_profiles_preserves_order_and_dedupes(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + catalog = load_pack_catalog(catalog_path) + catalog["packs"]["python"]["dup"] = ["codeql/python-queries"] + + resolved = resolve_pack_profiles("python", ["official", "dup", "github-security-lab"], catalog) + assert resolved == ["codeql/python-queries", "githubsecuritylab/codeql-python-queries"] + + +def test_resolve_pack_profiles_rejects_unknown_language(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + catalog = load_pack_catalog(catalog_path) + + try: + resolve_pack_profiles("ruby", ["official"], catalog) + except PackResolverError as exc: + assert "Unsupported CodeQL language id" in str(exc) + else: + raise AssertionError("expected PackResolverError") + + +def test_resolve_pack_profiles_rejects_unknown_profile(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + catalog = load_pack_catalog(catalog_path) + + try: + resolve_pack_profiles("python", ["trailofbits"], catalog) + except PackResolverError as exc: + assert "Unknown CodeQL pack profile" in str(exc) + else: + raise AssertionError("expected PackResolverError") + + +def test_resolve_plan_packs_returns_candidate_policy(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + plan_path = tmp_path / "plan.yml" + _write_catalog(catalog_path) + _write_plan(plan_path) + + catalog = load_pack_catalog(catalog_path) + plan = load_codeql_plan(plan_path) + resolved = resolve_plan_packs(plan, catalog) + + assert resolved["languages"][0]["packs"] == [ + "codeql/python-queries", + "githubsecuritylab/codeql-python-queries", + ] + assert resolved["languages"][1]["candidate_policy"]["coding-standards"]["allow_precreate"] is False + + +def test_load_codeql_plan_rejects_invalid_language_entry(tmp_path: Path) -> None: + plan_path = tmp_path / "bad-plan.yml" + plan_path.write_text("languages:\n - nope\n", encoding="utf-8") + + try: + load_codeql_plan(plan_path) + except PackResolverError as exc: + assert "non-mapping language entry" in str(exc) + else: + raise AssertionError("expected PackResolverError") diff --git a/tools/codeql.py b/tools/codeql.py index 9a4f81a4..62708b7d 100644 --- a/tools/codeql.py +++ b/tools/codeql.py @@ -13,6 +13,7 @@ from __future__ import annotations import argparse +import json import subprocess import sys from pathlib import Path @@ -20,6 +21,8 @@ sys.path.insert(0, str(Path(__file__).resolve().parent)) from codeql.config import resolve_config +from codeql.install import ROOT +from codeql.packs import PackResolverError, dump_yaml, load_codeql_plan, load_pack_catalog, resolve_plan_packs def _cmd_install() -> int: @@ -81,6 +84,36 @@ def _cmd_check() -> int: return 0 +def _cmd_resolve_packs(args: argparse.Namespace) -> int: + """Resolve CodeQL plan pack profiles to concrete pack references.""" + config = resolve_config() + + plan_path = ROOT / args.plan if not Path(args.plan).is_absolute() else Path(args.plan) + catalog_path = config.abs_pack_catalog + output_path = ROOT / args.output if not Path(args.output).is_absolute() else Path(args.output) + + try: + catalog = load_pack_catalog(catalog_path) + plan = load_codeql_plan(plan_path) + resolved = resolve_plan_packs(plan, catalog) + except PackResolverError as exc: + print(f"FAIL: {exc}") + return 1 + + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(dump_yaml(resolved), encoding="utf-8") + + if args.format == "json": + print(json.dumps(resolved, indent=2)) + else: + print(f"Resolved CodeQL packs written to {output_path.relative_to(ROOT) if output_path.is_relative_to(ROOT) else output_path}") + for language in resolved["languages"]: + print(f"- {language['id']}: {', '.join(language['profiles'])}") + for pack in language["packs"]: + print(f" {pack}") + return 0 + + def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="CodeQL CLI wrapper for CodeCome.", @@ -89,6 +122,14 @@ def build_parser() -> argparse.ArgumentParser: sub.add_parser("install", help="Install the managed CodeQL CLI.") sub.add_parser("check", help="Verify the CodeQL CLI is installed and working.") + resolve = sub.add_parser("resolve-packs", help="Resolve plan pack profiles to concrete pack references.") + resolve.add_argument("--plan", default="itemdb/notes/codeql-plan.yml", help="Path to codeql-plan.yml") + resolve.add_argument( + "--output", + default="itemdb/evidence/codeql/selected-query-packs.yml", + help="Path to write resolved pack selections", + ) + resolve.add_argument("--format", choices=["text", "json"], default="text", help="Output format") return parser @@ -101,6 +142,8 @@ def main() -> int: return _cmd_install() elif args.command == "check": return _cmd_check() + elif args.command == "resolve-packs": + return _cmd_resolve_packs(args) return 1 diff --git a/tools/codeql/packs.py b/tools/codeql/packs.py new file mode 100644 index 00000000..d7fa271e --- /dev/null +++ b/tools/codeql/packs.py @@ -0,0 +1,169 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL pack catalog loading and plan resolution.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +try: + import yaml +except ImportError: # pragma: no cover + yaml = None # type: ignore[assignment] + + +class PackResolverError(RuntimeError): + """Raised when the pack catalog or plan is invalid.""" + + +def _require_yaml() -> None: + if yaml is None: + raise PackResolverError("PyYAML is required to load CodeQL pack catalogs and plans.") + + +def _load_yaml_mapping(path: Path, *, what: str) -> dict[str, Any]: + _require_yaml() + try: + data = yaml.safe_load(path.read_text(encoding="utf-8")) + except (yaml.YAMLError, OSError, UnicodeDecodeError) as exc: + raise PackResolverError(f"Failed to read {what} at {path}: {exc}") from exc + if not isinstance(data, dict): + raise PackResolverError(f"{what} at {path} must be a YAML mapping.") + return data + + +def load_pack_catalog(path: Path) -> dict[str, Any]: + """Load and validate the CodeQL pack catalog.""" + data = _load_yaml_mapping(path, what="CodeQL pack catalog") + + if data.get("schema_version") != 1: + raise PackResolverError(f"CodeQL pack catalog at {path} must have schema_version: 1.") + + packs = data.get("packs") + if not isinstance(packs, dict) or not packs: + raise PackResolverError(f"CodeQL pack catalog at {path} must define a non-empty 'packs' mapping.") + + for language_id, profiles in packs.items(): + if not isinstance(language_id, str) or not language_id: + raise PackResolverError(f"CodeQL pack catalog at {path} contains an invalid language id: {language_id!r}.") + if not isinstance(profiles, dict) or not profiles: + raise PackResolverError( + f"CodeQL pack catalog at {path} must define non-empty profiles for language {language_id!r}." + ) + for profile_name, refs in profiles.items(): + if not isinstance(profile_name, str) or not profile_name: + raise PackResolverError( + f"CodeQL pack catalog at {path} contains an invalid profile name for {language_id!r}." + ) + if not isinstance(refs, list) or not all(isinstance(ref, str) and ref for ref in refs): + raise PackResolverError( + f"CodeQL pack catalog at {path} must define {language_id!r}/{profile_name!r} as a list of pack references." + ) + + candidate_policy = data.get("candidate_policy") + if candidate_policy is not None: + if not isinstance(candidate_policy, dict): + raise PackResolverError(f"CodeQL pack catalog at {path} has a non-mapping 'candidate_policy' section.") + for profile_name, policy in candidate_policy.items(): + if not isinstance(policy, dict): + raise PackResolverError( + f"CodeQL pack catalog at {path} has a non-mapping candidate policy for profile {profile_name!r}." + ) + allow_precreate = policy.get("allow_precreate") + if not isinstance(allow_precreate, bool): + raise PackResolverError( + f"CodeQL pack catalog at {path} must define candidate_policy.{profile_name}.allow_precreate as a boolean." + ) + + return data + + +def load_codeql_plan(path: Path) -> dict[str, Any]: + """Load and validate a CodeQL plan file.""" + data = _load_yaml_mapping(path, what="CodeQL plan") + + languages = data.get("languages") + if not isinstance(languages, list): + raise PackResolverError(f"CodeQL plan at {path} must define 'languages' as a list.") + + for i, entry in enumerate(languages): + if not isinstance(entry, dict): + raise PackResolverError(f"CodeQL plan at {path} has non-mapping language entry at index {i}.") + language_id = entry.get("id") + if not isinstance(language_id, str) or not language_id: + raise PackResolverError(f"CodeQL plan at {path} has language entry {i} without a valid 'id'.") + profiles = entry.get("packs") + if not isinstance(profiles, list) or not all(isinstance(p, str) and p for p in profiles): + raise PackResolverError( + f"CodeQL plan at {path} must define language {language_id!r} packs as a list of profile names." + ) + + return data + + +def resolve_pack_profiles(language_id: str, profiles: list[str], catalog: dict[str, Any]) -> list[str]: + """Resolve pack profile names for one language to concrete pack references.""" + packs = catalog["packs"] + language_profiles = packs.get(language_id) + if not isinstance(language_profiles, dict): + raise PackResolverError(f"Unsupported CodeQL language id: {language_id!r}.") + + resolved: list[str] = [] + seen: set[str] = set() + for profile_name in profiles: + refs = language_profiles.get(profile_name) + if not isinstance(refs, list): + raise PackResolverError( + f"Unknown CodeQL pack profile {profile_name!r} for language {language_id!r}." + ) + for ref in refs: + if ref not in seen: + resolved.append(ref) + seen.add(ref) + return resolved + + +def allow_precreate(profile_name: str, catalog: dict[str, Any]) -> bool: + """Return whether a profile allows precreating findings by default.""" + candidate_policy = catalog.get("candidate_policy") or {} + if not isinstance(candidate_policy, dict): + return True + policy = candidate_policy.get(profile_name) + if not isinstance(policy, dict): + return True + value = policy.get("allow_precreate") + return value if isinstance(value, bool) else True + + +def resolve_plan_packs(plan: dict[str, Any], catalog: dict[str, Any]) -> dict[str, Any]: + """Resolve all language entries in a CodeQL plan to concrete pack references.""" + languages_out: list[dict[str, Any]] = [] + + for entry in plan.get("languages", []): + language_id = entry["id"] + profiles = list(entry.get("packs", [])) + languages_out.append( + { + "id": language_id, + "profiles": profiles, + "packs": resolve_pack_profiles(language_id, profiles, catalog), + "candidate_policy": { + profile: {"allow_precreate": allow_precreate(profile, catalog)} + for profile in profiles + }, + } + ) + + return { + "schema_version": 1, + "generated_by": "codeql-pack-resolver", + "languages": languages_out, + } + + +def dump_yaml(data: dict[str, Any]) -> str: + """Serialize resolved pack data to YAML.""" + _require_yaml() + return yaml.safe_dump(data, sort_keys=False) From e900054c0236a7b5716321724d397b7ce7fa55c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 18:24:35 +0200 Subject: [PATCH 13/47] fix: record raw transcripts and split phase-1 logs --- tests/test_codecome_runner.py | 31 ++++++++++++++++++++++++++++++- tests/test_events_loops.py | 8 ++++++-- tests/test_rendering_events.py | 6 +++--- tools/chat/app.py | 1 + tools/codecome/phase_1.py | 1 + tools/codecome/runner.py | 8 ++++---- tools/codecome/transcript.py | 2 +- tools/events/chat_loop.py | 16 ++++++++++++++-- tools/events/phase_loop.py | 4 ++++ tools/rendering/events/text.py | 2 +- 10 files changed, 65 insertions(+), 14 deletions(-) diff --git a/tests/test_codecome_runner.py b/tests/test_codecome_runner.py index 9b8e7547..459f8974 100644 --- a/tests/test_codecome_runner.py +++ b/tests/test_codecome_runner.py @@ -29,8 +29,10 @@ def test_consume_events_renders_and_logs(mock_args, mock_console, monkeypatch): class FakePhaseEventLoop: def __init__(self, **kwargs): pass - def run(self, render_and_log_fn): + def run(self, render_and_log_fn, record_raw_event_fn=None): event = {"type": "text", "content": "hello"} + if record_raw_event_fn is not None: + record_raw_event_fn(event) render_and_log_fn(mock_console, "1", "Recon", event) return RunResult() @@ -52,6 +54,33 @@ def fake_render(console, phase, label, event): assert rendered_events[0]["content"] == "hello" fake_transcript.write_event.assert_called_once() + +def test_run_single_attempt_uses_explicit_transcript_phase(mock_args, mock_console, monkeypatch): + monkeypatch.setattr(runner, "create_session", lambda *a, **kw: "new_session") + monkeypatch.setattr(runner, "send_prompt_to_session", lambda *a, **kw: None) + monkeypatch.setattr(runner, "_consume_events", lambda *a, **kw: RunResult()) + + captured = {} + fake_transcript = MagicMock(spec=Transcript) + fake_transcript.path = Path("fake-1a.jsonl") + + def fake_for_phase(cls, phase, finding): + captured["phase"] = phase + captured["finding"] = finding + return fake_transcript + + monkeypatch.setattr(Transcript, "for_phase", classmethod(fake_for_phase)) + + code, session_id, res, path = runner._run_single_attempt( + mock_args, mock_console, "do work", "model", "var", + "http://base", "auth", "dir", lambda *a: None, + transcript_phase="1a", + ) + + assert code == 0 + assert session_id == "new_session" + assert captured == {"phase": "1a", "finding": None} + def test_run_single_attempt_success(mock_args, mock_console, monkeypatch): monkeypatch.setattr(runner, "create_session", lambda *a, **kw: "new_session") diff --git a/tests/test_events_loops.py b/tests/test_events_loops.py index 28513c36..36d9cf4f 100644 --- a/tests/test_events_loops.py +++ b/tests/test_events_loops.py @@ -62,7 +62,8 @@ def stop(self): def render_fn(console, phase, label, event): rendered.append((phase, label, event)) - result = loop.run(render_fn) + raw_events = [] + result = loop.run(render_fn, raw_events.append) assert isinstance(result, RunResult) assert result.any_step_finish_seen is True @@ -70,6 +71,7 @@ def render_fn(console, phase, label, event): assert result.last_finish_reason == "stop" assert result.last_finish_tokens == {"output": 3} assert rendered[-1][2]["properties"]["status"]["type"] == "idle" + assert raw_events == events def test_chat_event_loop_recovery_sync_emits_synced_events(monkeypatch): @@ -96,13 +98,15 @@ def stop(self): monkeypatch.setattr(loop, "_sync_session_messages", lambda: [synced]) rendered = [] + raw_events = [] def render_fn(console, phase, label, event): rendered.append(event) - loop._consumer_worker(render_fn) + loop._consumer_worker(render_fn, raw_events.append) assert synced in rendered assert any(event.get("type") == "session.status" and event.get("properties", {}).get("status", {}).get("type") == "idle" for event in rendered) assert loop.get_state(timeout=0.1)[0] == ChatState.BUSY assert loop.get_state(timeout=0.1)[0] == ChatState.IDLE + assert raw_events == events diff --git a/tests/test_rendering_events.py b/tests/test_rendering_events.py index 56ed586b..66c18cfa 100644 --- a/tests/test_rendering_events.py +++ b/tests/test_rendering_events.py @@ -101,12 +101,12 @@ def test_renders_text_rich(self): def test_skips_empty_text(self): r = TextEventRenderer(_ctx("plain")) - assert r.render({"part": {"text": ""}}) is False - assert r.render({"part": {"text": " \n\t "}}) is False + assert r.render({"part": {"text": ""}}) is True + assert r.render({"part": {"text": " \n\t "}}) is True def test_skips_missing_text(self): r = TextEventRenderer(_ctx("plain")) - assert r.render({"part": {}}) is False + assert r.render({"part": {}}) is True # --------------------------------------------------------------------------- diff --git a/tools/chat/app.py b/tools/chat/app.py index cf8190c4..0e5d4283 100644 --- a/tools/chat/app.py +++ b/tools/chat/app.py @@ -493,6 +493,7 @@ def on_mount(self) -> None: workspace_dir=str(Path(__file__).resolve().parents[2]), debug=_chat_debug if self.args and self.args.debug else None, ) + self.chat_loop.set_raw_event_recorder(self.event_recorder.record) # Raw daemon thread — the SSE consumer. _chat_debug("on_mount: starting SSE consumer (raw daemon thread)") diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index fc075abf..0a6d1383 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -157,6 +157,7 @@ def _run_subphase( render_event_fn=render_event, emit_fatal_error_fn=_emit_fatal_error, existing_session_id=last_session_id or None, + transcript_phase=phase_id, ) if returncode != 0: diff --git a/tools/codecome/runner.py b/tools/codecome/runner.py index 7f1de33c..c7ff9b59 100644 --- a/tools/codecome/runner.py +++ b/tools/codecome/runner.py @@ -47,10 +47,9 @@ def _consume_events( recorder = EventRecorder(transcript, debug=args.debug) def _handle_event(console_: Any, phase_: str, label_: str, event: dict[str, Any]) -> None: - recorder.record(event) render_event_fn(console_, phase_, label_, event) - return event_loop.run(_handle_event) + return event_loop.run(_handle_event, recorder.record) def _run_single_attempt( @@ -65,15 +64,16 @@ def _run_single_attempt( render_event_fn: Callable[..., None], emit_fatal_error_fn: Callable[..., None] | None = None, existing_session_id: str | None = None, + transcript_phase: str | None = None, ) -> tuple[int, str, RunResult, Path]: transcript: Transcript try: - transcript = Transcript.for_phase(str(args.phase), args.finding) + transcript = Transcript.for_phase(transcript_phase or str(args.phase), args.finding) except OSError as exc: finding_tag = (args.finding or "no-finding").replace("/", "_") transcript = Transcript.null() - transcript.path = ROOT / "tmp" / f"last-phase-{args.phase}-{finding_tag}-attempt-N.jsonl" + transcript.path = ROOT / "tmp" / f"last-phase-{transcript_phase or args.phase}-{finding_tag}-attempt-N.jsonl" try: console.print("warning: could not open transcript ", transcript.path, ": ", exc) except AttributeError: diff --git a/tools/codecome/transcript.py b/tools/codecome/transcript.py index 1ae0af13..8c46c778 100644 --- a/tools/codecome/transcript.py +++ b/tools/codecome/transcript.py @@ -55,7 +55,7 @@ def for_phase(cls, phase: str, finding: str | None) -> Transcript: _ATTEMPT_COUNTER[key] = counter + 1 path = _transcript_dir() / f"last-phase-{phase}-{finding_tag}-attempt-{counter}.jsonl" - return cls(path, path.open("w", encoding="utf-8")) + return cls(path, path.open("w", encoding="utf-8", buffering=1)) @classmethod def for_chat(cls) -> Transcript: diff --git a/tools/events/chat_loop.py b/tools/events/chat_loop.py index af3d51d4..7b0fdc0c 100644 --- a/tools/events/chat_loop.py +++ b/tools/events/chat_loop.py @@ -72,12 +72,16 @@ def start_consumer(self, render_fn: Callable[[Any, str, str, dict[str, Any]], No """Start the SSE consumer in a background daemon thread.""" self._consumer_thread = threading.Thread( target=self._consumer_worker, - args=(render_fn,), + args=(render_fn, None), name="codecome-chat-consumer", daemon=True, ) self._consumer_thread.start() + def set_raw_event_recorder(self, recorder: Callable[[dict[str, Any]], None] | None) -> None: + """Install an optional raw-event recorder for transcript/debug capture.""" + self._raw_event_recorder = recorder + def send_prompt( self, text: str, @@ -131,7 +135,11 @@ def stop(self) -> None: # Internal # ------------------------------------------------------------------ - def _consumer_worker(self, render_fn: Callable[[Any, str, str, dict[str, Any]], None]) -> None: + def _consumer_worker( + self, + render_fn: Callable[[Any, str, str, dict[str, Any]], None], + record_raw_event_fn: Callable[[dict[str, Any]], None] | None, + ) -> None: """Background thread: consume SSE, render events, signal idle.""" if self.debug: self.debug("_consumer_worker: starting SSE client") @@ -157,6 +165,10 @@ def _consumer_worker(self, render_fn: Callable[[Any, str, str, dict[str, Any]], if not self._belongs_to_session(event): continue + recorder = record_raw_event_fn or getattr(self, "_raw_event_recorder", None) + if recorder is not None: + recorder(event) + event_count += 1 if self.debug and (event_count <= 5 or event_count % 20 == 0): self.debug(f"_consumer_worker: event #{event_count} type={event.get('type')}") diff --git a/tools/events/phase_loop.py b/tools/events/phase_loop.py index d4efb45f..aba10abb 100644 --- a/tools/events/phase_loop.py +++ b/tools/events/phase_loop.py @@ -59,6 +59,7 @@ def __init__( def run( self, render_fn: Callable[[Any, str, str, dict[str, Any]], None], + record_raw_event_fn: Callable[[dict[str, Any]], None] | None = None, ) -> RunResult: _any_step_finish_seen = False _step_finish_count = 0 @@ -83,6 +84,9 @@ def run( if not self._belongs_to_session(event): continue + if record_raw_event_fn is not None: + record_raw_event_fn(event) + if self._should_skip_message_updated(event): continue diff --git a/tools/rendering/events/text.py b/tools/rendering/events/text.py index fbbe0956..8f6cff3d 100644 --- a/tools/rendering/events/text.py +++ b/tools/rendering/events/text.py @@ -18,7 +18,7 @@ def render(self, event: dict[str, Any]) -> bool: part = event.get("part", {}) text = str(part.get("text", "")).strip() if not text: - return False + return True _clear_hidden_reasoning_state(self.context) if self.rich: from rich.markdown import Markdown From bad575a42b8383f58571ff80cb1a1f054bceed27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 18:57:34 +0200 Subject: [PATCH 14/47] fix: review bot follow-up cleanups --- tests/test_chat_app.py | 5 ++-- tests/test_rendering_events.py | 6 ++-- tools/chat/app.py | 10 ++++--- tools/codecome/harness.py | 14 +++------ tools/codecome/phase_1.py | 33 +++++++-------------- tools/codeql/install.py | 4 +++ tools/findings/checks_entry.py | 53 ++++++++++++++++++++++------------ 7 files changed, 64 insertions(+), 61 deletions(-) diff --git a/tests/test_chat_app.py b/tests/test_chat_app.py index a6ea3d22..6f9a4448 100644 --- a/tests/test_chat_app.py +++ b/tests/test_chat_app.py @@ -36,7 +36,7 @@ class DummyThread(threading.Thread): mock_log.write.assert_not_called() mock_app.post_message.assert_called_once() -def test_chat_render_and_log(monkeypatch): +def test_chat_render(monkeypatch): mock_transcript = MagicMock() mock_args = MagicMock() mock_args.debug = True @@ -65,12 +65,11 @@ def fake_render(console, phase, label, event): event = {"type": "message.updated", "info": {"role": "assistant", "modelID": "gpt-5"}} - app._chat_render_and_log(fake_self, None, "1", "label", event) + app._chat_render(fake_self, None, "1", "label", event) assert len(rendered) == 1 assert "gpt-5" in fake_self._modeline_meta assert fake_self._modeline_state == "busy" - mock_transcript.write_event.assert_called() def test_chat_update_modeline_info(): class FakeSelf: diff --git a/tests/test_rendering_events.py b/tests/test_rendering_events.py index 66c18cfa..527b9d3d 100644 --- a/tests/test_rendering_events.py +++ b/tests/test_rendering_events.py @@ -99,14 +99,16 @@ def test_renders_text_rich(self): r = TextEventRenderer(_ctx("rich")) assert r.render({"part": {"text": "Hello world"}}) is True - def test_skips_empty_text(self): + def test_skips_empty_text(self, capsys): r = TextEventRenderer(_ctx("plain")) assert r.render({"part": {"text": ""}}) is True assert r.render({"part": {"text": " \n\t "}}) is True + assert capsys.readouterr().out == "" - def test_skips_missing_text(self): + def test_skips_missing_text(self, capsys): r = TextEventRenderer(_ctx("plain")) assert r.render({"part": {}}) is True + assert capsys.readouterr().out == "" # --------------------------------------------------------------------------- diff --git a/tools/chat/app.py b/tools/chat/app.py index 0e5d4283..29bff3ad 100644 --- a/tools/chat/app.py +++ b/tools/chat/app.py @@ -7,7 +7,7 @@ Provides: - TextualConsoleProxy: RichLog bridge for background-thread console output. - ChatApp / QuitScreen: module-level type hints (real classes set after try/except). - - _chat_render_and_log / _chat_update_modeline_info: standalone helpers, + - _chat_render / _chat_update_modeline_info: standalone helpers, callable without Textual (for testing parity). - _QuitScreen: quit confirmation modal. - _ChatApp: the Textual App. @@ -105,13 +105,15 @@ def _write(self, renderable): # launching a real TUI. # --------------------------------------------------------------------------- -def _chat_render_and_log(self, console, phase, label, event): +def _chat_render(self, console, phase, label, event): """Standalone version of _ChatApp._render_and_log. See the docstring on the class for the full contract. + Raw event recording is handled separately by the chat event loop; + this function only drives rendering and UI updates. + When bound via ``__get__`` to a _ChatApp instance, ``self`` is guaranteed to carry the attributes accessed below.""" - self.event_recorder.record(event) render_event(console, phase, label, event) _chat_update_activity_state(self, event) if event.get("type") == "message.updated": @@ -608,7 +610,7 @@ def _on_render_message(self, message: RenderMessage) -> None: # --- Consumer-thread callback --- def _render_and_log(self, console, phase, label, event): - _chat_render_and_log(self, console, phase, label, event) + _chat_render(self, console, phase, label, event) def _update_modeline_info(self, event: dict[str, Any]) -> None: _chat_update_modeline_info(self, event) diff --git a/tools/codecome/harness.py b/tools/codecome/harness.py index f02b08e6..e44f2edf 100644 --- a/tools/codecome/harness.py +++ b/tools/codecome/harness.py @@ -15,8 +15,6 @@ import dataclasses import os import signal -import subprocess -import sys import time from pathlib import Path from typing import Any, Optional @@ -254,15 +252,11 @@ def _forward_signal(signum: int, _frame: Any) -> None: returncode = 2 if returncode == 0: - validation_result = subprocess.run( - [sys.executable, "tools/check-frontmatter.py"], - cwd=ROOT, - capture_output=True, - text=True - ) - if validation_result.returncode != 0: + from findings.checks_entry import run_frontmatter_validation + + validation_rc, validation_output = run_frontmatter_validation() + if validation_rc != 0: max_frontmatter_retries = 2 - validation_output = (validation_result.stderr or validation_result.stdout).strip() or "(no validator output)" if frontmatter_retry_count < max_frontmatter_retries: frontmatter_retry_count += 1 msg = ( diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index 0a6d1383..598bd057 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -11,8 +11,6 @@ from __future__ import annotations -import subprocess -import sys import time from pathlib import Path from typing import Any @@ -81,14 +79,8 @@ def _run_subphase( agent: str, prompt_file: str, finding: str | None = None, - findings_snapshot: dict[str, int] | None = None, -) -> tuple[int, dict[str, int] | None]: - """Run a single subphase agent session with retry/resume. - - Returns (exit_code, cumulative_findings_snapshot). The snapshot is - updated after the session completes so that gate functions can detect - unexpected finding creation. - """ +) -> int: + """Run a single subphase agent session with retry/resume.""" prompt_path = ROOT / prompt_file prompt = load_prompt(prompt_path, finding, phase=phase_id) rc = resolve_runtime_config(agent) @@ -225,15 +217,11 @@ def _run_subphase( returncode = 2 if returncode == 0: - validation_result = subprocess.run( - [sys.executable, "tools/check-frontmatter.py"], - cwd=ROOT, - capture_output=True, - text=True, - ) - if validation_result.returncode != 0: + from findings.checks_entry import run_frontmatter_validation + + validation_rc, validation_output = run_frontmatter_validation() + if validation_rc != 0: max_frontmatter_retries = 2 - validation_output = (validation_result.stderr or validation_result.stdout).strip() or "(no validator output)" if frontmatter_retry_count < max_frontmatter_retries: frontmatter_retry_count += 1 msg = ( @@ -354,7 +342,7 @@ def _run_subphase( print(C.fail(f" reason: {finish_warning}")) print(f" finish reason: {last_finish_reason!r} transcript: {transcript_path.relative_to(ROOT) if transcript_path.name else 'N/A'}") - return returncode, findings_snapshot + return returncode # --------------------------------------------------------------------------- @@ -370,7 +358,7 @@ def run_phase_1( ) -> int: """Orchestrate Phase 1 subphases 1a → 1b → 1c with gates.""" # ---- Phase 1a: Target Profile ---- - rc, _ = _run_subphase( + rc = _run_subphase( args=args, console=console, rendering_ctx=rendering_ctx, @@ -395,7 +383,7 @@ def run_phase_1( findings_snapshot = count_findings_snapshot() # ---- Phase 1b: CodeQL-assisted Reconnaissance ---- - rc, _ = _run_subphase( + rc = _run_subphase( args=args, console=console, rendering_ctx=rendering_ctx, @@ -405,7 +393,6 @@ def run_phase_1( label="CodeQL-assisted Reconnaissance", agent="recon", prompt_file="prompts/phase-1b-codeql-recon.md", - findings_snapshot=findings_snapshot, ) if rc != 0: return rc @@ -415,7 +402,7 @@ def run_phase_1( return gate_rc # ---- Phase 1c: Sandbox Bootstrap ---- - rc, _ = _run_subphase( + rc = _run_subphase( args=args, console=console, rendering_ctx=rendering_ctx, diff --git a/tools/codeql/install.py b/tools/codeql/install.py index 87c5d3c7..9b1cadea 100644 --- a/tools/codeql/install.py +++ b/tools/codeql/install.py @@ -278,6 +278,10 @@ def install(config: Optional[CodeQLConfig] = None) -> int: finally: shutil.rmtree(tmp_dir, ignore_errors=True) + # Recompute the binary path after extraction — a legacy nested install + # may have been replaced with the new flat layout during this run. + binary_path = _codeql_binary(version_dir) + # --- Create current symlink --- _ensure_symlink(version_dir, current_link) diff --git a/tools/findings/checks_entry.py b/tools/findings/checks_entry.py index 4925f8da..93fcae04 100644 --- a/tools/findings/checks_entry.py +++ b/tools/findings/checks_entry.py @@ -20,48 +20,63 @@ def build_parser(): return parser -def main() -> int: - import argparse - parser = build_parser() - parser.parse_args() +def run_frontmatter_validation() -> tuple[int, str]: + """Run frontmatter validation in-process and return (exit_code, output_text). - paths = iter_all_finding_files() + This is the reusable entrypoint for code paths that need to validate + frontmatter without shelling out to a subprocess (phase retry loops, + gate checks, etc.). + """ + import io + out = io.StringIO() + + paths = iter_all_finding_files() total_errors = 0 index_errors = validate_file_risk_index() if index_errors: total_errors += len(index_errors) - print(C.fail(str(FILE_RISK_INDEX_REL))) + out.write(C.fail(str(FILE_RISK_INDEX_REL)) + "\n") for error in index_errors: - print(f" {C.SYM_BULLET} {error}") + out.write(f" {C.SYM_BULLET} {error}\n") else: if FILE_RISK_INDEX_PATH.exists(): - print(C.ok(str(FILE_RISK_INDEX_REL))) + out.write(C.ok(str(FILE_RISK_INDEX_REL)) + "\n") if not paths: if not FILE_RISK_INDEX_PATH.exists(): - print(C.info("No findings or index to validate.")) - return 0 if total_errors == 0 else 1 + out.write(C.info("No findings or index to validate.") + "\n") + return (0 if total_errors == 0 else 1, out.getvalue()) for path in paths: errors = validate_finding(path) - if not errors: - print(C.ok(str(path.relative_to(ROOT)))) + out.write(C.ok(str(path.relative_to(ROOT))) + "\n") continue - total_errors += len(errors) - print(C.fail(str(path.relative_to(ROOT)))) + out.write(C.fail(str(path.relative_to(ROOT))) + "\n") for error in errors: - print(f" {C.SYM_BULLET} {error}") + out.write(f" {C.SYM_BULLET} {error}\n") if total_errors: - print(f"\n{C.fail(f'Found {total_errors} frontmatter error(s).')}", file=sys.stderr) - return 1 + out.write(f"\n{C.fail(f'Found {total_errors} frontmatter error(s).')}\n") + else: + out.write(f"\n{C.ok(f'Validated {len(paths)} finding(s).')}\n") + + return (1 if total_errors else 0, out.getvalue()) + + +def main() -> int: + import argparse + parser = build_parser() + parser.parse_args() - print(f"\n{C.ok(f'Validated {len(paths)} finding(s).')}") - return 0 + exit_code, output = run_frontmatter_validation() + sys.stdout.write(output) + if exit_code != 0: + print(output.split("\n")[-2] if output.strip() else "", file=sys.stderr) + return exit_code if __name__ == "__main__": From 81c1eeee8f5a81f5d90a089ad870b3a016890fe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 19:22:39 +0200 Subject: [PATCH 15/47] fix: correct subphase phase label in step output + normalize unknown events + add PatchRenderer - runner._run_single_attempt: add phase_override/label_override params to thread subphase ids (1a/1b/1c) into the render context instead of the CLI-level phase number (1). - phase_1._run_subphase: pass phase_override=phase_id, label_override=label at the _run_single_attempt call site. - state_tracker: add explicit 'patch' branch that normalizes patch events (hash + files) from OpenCode. Normalize the unknown pass-through so UnknownEventRenderer receives a top-level 'part' key instead of . - rendering/events/patch.py: new PatchRenderer (mirrors ApplyPatchRenderer panel style) showing hash (8 chars), file count, and relative file list (truncated by CODECOME_APPLY_PATCH_MAX_FILES). Re-reads affected files in SnapshotCache so subsequent Write/Edit renderers see correct diffs. - Register PatchRenderer in dispatch.py and events/__init__.py. --- tools/codecome/phase_1.py | 2 + tools/codecome/runner.py | 6 ++- tools/events/state_tracker.py | 18 ++++++- tools/rendering/dispatch.py | 2 + tools/rendering/events/__init__.py | 2 + tools/rendering/events/patch.py | 86 ++++++++++++++++++++++++++++++ 6 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 tools/rendering/events/patch.py diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index 598bd057..83c5a3ed 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -150,6 +150,8 @@ def _run_subphase( emit_fatal_error_fn=_emit_fatal_error, existing_session_id=last_session_id or None, transcript_phase=phase_id, + phase_override=phase_id, + label_override=label, ) if returncode != 0: diff --git a/tools/codecome/runner.py b/tools/codecome/runner.py index c7ff9b59..0d0412c6 100644 --- a/tools/codecome/runner.py +++ b/tools/codecome/runner.py @@ -65,6 +65,8 @@ def _run_single_attempt( emit_fatal_error_fn: Callable[..., None] | None = None, existing_session_id: str | None = None, transcript_phase: str | None = None, + phase_override: str | None = None, + label_override: str | None = None, ) -> tuple[int, str, RunResult, Path]: transcript: Transcript @@ -92,7 +94,9 @@ def _consume() -> None: try: run_result_box["result"] = _consume_events( base_url, session_id, console, - str(args.phase), str(args.label), args, + phase_override or str(args.phase), + label_override or str(args.label), + args, transcript, auth_token, workspace_dir, render_event_fn=render_event_fn, diff --git a/tools/events/state_tracker.py b/tools/events/state_tracker.py index 566839a3..f8a44742 100644 --- a/tools/events/state_tracker.py +++ b/tools/events/state_tracker.py @@ -166,8 +166,22 @@ def _build_finalized_event(self, event: dict[str, Any]) -> dict[str, Any] | None } return None - # Pass through unknown part types as raw event. - return event + if part_type == "patch": + return { + "type": "patch", + "timestamp": event.get("timestamp", 0), + "sessionID": props.get("sessionID", ""), + "part": part, + } + + # Pass through unknown part types with a normalized envelope so that + # downstream renderers always receive a top-level "part" key. + return { + "type": "message.part.updated", + "timestamp": event.get("timestamp", 0), + "sessionID": props.get("sessionID", ""), + "part": part, + } def _map_session_diff(self, event: dict[str, Any]) -> dict[str, Any] | None: """Map non-empty session.diff into a compact compatibility event.""" diff --git a/tools/rendering/dispatch.py b/tools/rendering/dispatch.py index 88482e1f..b9fa9cf2 100644 --- a/tools/rendering/dispatch.py +++ b/tools/rendering/dispatch.py @@ -89,6 +89,7 @@ def _get_rendering_ctx(console: Any, *, root: Path | None = None) -> Any: SessionStatusRenderer, SessionDiffRenderer, SubagentStatusRenderer, + PatchRenderer, ) registry.register_event(ServerConnectedRenderer(ctx)) registry.register_event(ServerHeartbeatRenderer(ctx)) @@ -102,6 +103,7 @@ def _get_rendering_ctx(console: Any, *, root: Path | None = None) -> Any: registry.register_event(SessionStatusRenderer(ctx)) registry.register_event(SessionDiffRenderer(ctx)) registry.register_event(SubagentStatusRenderer(ctx)) + registry.register_event(PatchRenderer(ctx)) from rendering.tools import ( ApplyPatchRenderer, diff --git a/tools/rendering/events/__init__.py b/tools/rendering/events/__init__.py index b59f0427..54773382 100644 --- a/tools/rendering/events/__init__.py +++ b/tools/rendering/events/__init__.py @@ -32,6 +32,7 @@ from rendering.events.session_diff import SessionDiffRenderer from rendering.events.message import MessageUpdatedRenderer from rendering.events.subagent import SubagentStatusRenderer +from rendering.events.patch import PatchRenderer from rendering.events.unknown import UnknownEventRenderer __all__ = [ @@ -53,5 +54,6 @@ "SessionDiffRenderer", "MessageUpdatedRenderer", "SubagentStatusRenderer", + "PatchRenderer", "UnknownEventRenderer", ] diff --git a/tools/rendering/events/patch.py b/tools/rendering/events/patch.py new file mode 100644 index 00000000..1fe07fc7 --- /dev/null +++ b/tools/rendering/events/patch.py @@ -0,0 +1,86 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""PatchRenderer — renders session-level patch events (hash + file list).""" + +from __future__ import annotations + +from typing import Any + +from rendering.events.base import EventRenderer +from rendering.utils import relativize_path +import _colors as C + + +class PatchRenderer(EventRenderer): + event_types = ("patch",) + + def render(self, event: dict[str, Any]) -> bool: + part = event.get("part", {}) + hash_ = part.get("hash", "") + files: list[str] = part.get("files", []) + short_hash = hash_[:8] if hash_ else "" + + if not files and not short_hash: + return False + + if self.rich: + return self._render_rich(short_hash, files) + else: + return self._render_plain(short_hash, files) + + def _render_rich(self, hash_: str, files: list[str]) -> bool: + from rich.console import Group + from rich.panel import Panel + from rich.text import Text + + settings = self.context.settings + cache = self.context.cache + + sections: list[Any] = [] + shown = files[:settings.apply_patch_max_files] + for fpath in shown: + rel = relativize_path(fpath, self.context.root) + sections.append(Text(f" {rel}", style="dim")) + + remaining = len(files) - len(shown) + if remaining > 0: + sections.append(Text(f" ... and {remaining} more file(s)", style="dim")) + + title = f"Session patch hash={hash_}" if hash_ else "Session patch" + nfile = len(files) + if nfile: + title += f" {nfile} file{'s' if nfile != 1 else ''}" + + self.sink.write(Panel( + Group(*sections) if sections else Text(" (no files)"), + title=title, + border_style="green" if files else "yellow", + expand=True, + )) + + for fpath in files: + cache.reread(fpath) + return True + + def _render_plain(self, hash_: str, files: list[str]) -> bool: + settings = self.context.settings + cache = self.context.cache + + nfile = len(files) + hash_part = f" hash={hash_}" if hash_ else "" + file_part = f" {nfile} file{'s' if nfile != 1 else ''}" if nfile else "" + self.sink.write_text(C.header(f"patch{hash_part}{file_part}")) + + shown = files[:settings.apply_patch_max_files] + for fpath in shown: + rel = relativize_path(fpath, self.context.root) + self.sink.write_text(f" {rel}") + + remaining = len(files) - len(shown) + if remaining > 0: + self.sink.write_text(f" ... and {remaining} more file(s)") + + for fpath in files: + cache.reread(fpath) + return True From a5b4d67e4c7109cac18519aef2754cb3d7c0e819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 19:35:51 +0200 Subject: [PATCH 16/47] feat: add FileEditedRenderer and FileWatcherRenderer with inflight tracking - RenderContext: add inflight_write_files set to track tool_use write files that are currently running but not yet completed. - ToolUseEventRenderer: update inflight_write_files set on running/completed/ error status changes for write-like tool calls (has filePath + content or oldString). This allows FileEditedRenderer to distinguish tool-attributable file.edited events from agent-internal writes. - FileEditedRenderer: suppresses file.edited when the path is inflight (i.e. the corresponding write tool will render on completion). For non-inflight file.edited events (agent-internal writes), renders a compact 'edited' line and rereads the file in SnapshotCache. - FileWatcherRenderer: suppresses always in normal mode. In debug mode (debug_unknown_events), renders a dim 'watcher ' line. - Register both renderers in dispatch.py and events/__init__.py. --- tools/rendering/context.py | 3 +- tools/rendering/dispatch.py | 4 ++ tools/rendering/events/__init__.py | 3 ++ tools/rendering/events/file_events.py | 64 +++++++++++++++++++++++++++ tools/rendering/events/tool_use.py | 26 +++++++++++ 5 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 tools/rendering/events/file_events.py diff --git a/tools/rendering/context.py b/tools/rendering/context.py index 4508ebfa..be342a1d 100644 --- a/tools/rendering/context.py +++ b/tools/rendering/context.py @@ -7,7 +7,7 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from rendering.cache import SnapshotCache @@ -37,3 +37,4 @@ class RenderContext: hidden_reasoning_active: bool = False hidden_reasoning_started_at: float = 0.0 last_hidden_reasoning_rendered_at: float = 0.0 + inflight_write_files: set[str] = field(default_factory=set) diff --git a/tools/rendering/dispatch.py b/tools/rendering/dispatch.py index b9fa9cf2..feca819b 100644 --- a/tools/rendering/dispatch.py +++ b/tools/rendering/dispatch.py @@ -90,6 +90,8 @@ def _get_rendering_ctx(console: Any, *, root: Path | None = None) -> Any: SessionDiffRenderer, SubagentStatusRenderer, PatchRenderer, + FileEditedRenderer, + FileWatcherRenderer, ) registry.register_event(ServerConnectedRenderer(ctx)) registry.register_event(ServerHeartbeatRenderer(ctx)) @@ -104,6 +106,8 @@ def _get_rendering_ctx(console: Any, *, root: Path | None = None) -> Any: registry.register_event(SessionDiffRenderer(ctx)) registry.register_event(SubagentStatusRenderer(ctx)) registry.register_event(PatchRenderer(ctx)) + registry.register_event(FileEditedRenderer(ctx)) + registry.register_event(FileWatcherRenderer(ctx)) from rendering.tools import ( ApplyPatchRenderer, diff --git a/tools/rendering/events/__init__.py b/tools/rendering/events/__init__.py index 54773382..63bec010 100644 --- a/tools/rendering/events/__init__.py +++ b/tools/rendering/events/__init__.py @@ -33,6 +33,7 @@ from rendering.events.message import MessageUpdatedRenderer from rendering.events.subagent import SubagentStatusRenderer from rendering.events.patch import PatchRenderer +from rendering.events.file_events import FileEditedRenderer, FileWatcherRenderer from rendering.events.unknown import UnknownEventRenderer __all__ = [ @@ -55,5 +56,7 @@ "MessageUpdatedRenderer", "SubagentStatusRenderer", "PatchRenderer", + "FileEditedRenderer", + "FileWatcherRenderer", "UnknownEventRenderer", ] diff --git a/tools/rendering/events/file_events.py b/tools/rendering/events/file_events.py new file mode 100644 index 00000000..51d32d67 --- /dev/null +++ b/tools/rendering/events/file_events.py @@ -0,0 +1,64 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""File event renderers — file.edited and file.watcher.updated.""" + +from __future__ import annotations + +import os +from typing import Any + +from rendering.events.base import EventRenderer +from rendering.utils import relativize_path +import _colors as C + + +def _norm(path: str) -> str: + return os.path.normpath(os.path.abspath(path)) if path else path + + +class FileEditedRenderer(EventRenderer): + event_types = ("file.edited",) + + def render(self, event: dict[str, Any]) -> bool: + file_path = str(event.get("properties", {}).get("file", "")) + if not file_path: + return False + + normed = _norm(file_path) + + if normed in self.context.inflight_write_files: + return True + + rel = relativize_path(file_path, self.context.root) + if self.rich: + from rich.text import Text + self.sink.write(Text(f" edited {rel}", style="dim")) + else: + self.sink.write_text(C.info(f" edited {rel}")) + + self.context.cache.reread(file_path) + return True + + +class FileWatcherRenderer(EventRenderer): + event_types = ("file.watcher.updated",) + + def render(self, event: dict[str, Any]) -> bool: + file_path = str(event.get("properties", {}).get("file", "")) + watcher_event = str(event.get("properties", {}).get("event", "")) + + if not self.context.settings.debug_unknown_events: + return True + + if not file_path: + return False + + rel = relativize_path(file_path, self.context.root) + label = f" watcher {watcher_event} {rel}" + if self.rich: + from rich.text import Text + self.sink.write(Text(label, style="dim")) + else: + self.sink.write_text(C.info(label)) + return True diff --git a/tools/rendering/events/tool_use.py b/tools/rendering/events/tool_use.py index 13c8244d..80db603d 100644 --- a/tools/rendering/events/tool_use.py +++ b/tools/rendering/events/tool_use.py @@ -5,11 +5,27 @@ from __future__ import annotations +import os from typing import Any from rendering.events.base import EventRenderer, _clear_hidden_reasoning_state +def _is_write_like(inp: dict[str, Any]) -> bool: + """Return True if the tool input looks like a write or edit (has filePath + content/oldString).""" + fp = inp.get("filePath", "") + if not isinstance(fp, str) or not fp.strip(): + return False + return "content" in inp or "oldString" in inp + + +def _normalize_path(path: str) -> str: + """Normalize a file path for consistent set membership.""" + if not path: + return path + return os.path.normpath(os.path.abspath(path)) + + class ToolUseEventRenderer(EventRenderer): event_types = ("tool_use",) @@ -17,6 +33,16 @@ def render(self, event: dict[str, Any]) -> bool: part = event.get("part", {}) tool = str(part.get("tool", "unknown")) state = part.get("state", {}) if isinstance(part.get("state"), dict) else {} + inp = state.get("input", {}) if isinstance(state.get("input"), dict) else {} + status = state.get("status", "") + + if _is_write_like(inp): + file_path = _normalize_path(str(inp["filePath"])) + if status == "running": + self.context.inflight_write_files.add(file_path) + elif status in ("completed", "error"): + self.context.inflight_write_files.discard(file_path) + _clear_hidden_reasoning_state(self.context) self.context.registry.dispatch_tool(tool, state) return True From 8345775af108ba5f2fcbeb11f64c470ef882e1d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 19:49:00 +0200 Subject: [PATCH 17/47] =?UTF-8?q?fix:=20review=20bot=20cleanups=20?= =?UTF-8?q?=E2=80=94=208=20fixes=20across=20codebase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - patch.py: guard against non-list 'files' (null) with isinstance check - state_tracker.py: update stale _build_finalized_event docstring to reflect that unknown part types return a normalized envelope, not None - Makefile: fix stale Phase 1b→1c references in help text, SANDBOX_SCRIPT_HINT, and section comment - test_codecome_runner.py: rename unused res/path unpacked vars to _res/_path - codeql-integration-plan.md: update Scope header from 'planning only' to 'planning + phased implementation' - checks_entry.py: fix early return on empty paths skipping the 'Validated/Found N error(s)' footer when FILE_RISK_INDEX_PATH exists - install.py: add _validate_version() semver regex guard against path traversal via config install_version - phase-1c-sandbox.md: reconcile conflicting halt/--force instructions --- .project/codeql-integration-plan.md | 2 +- Makefile | 6 +++--- prompts/phase-1c-sandbox.md | 2 +- tests/test_codecome_runner.py | 8 ++++---- tools/codeql/install.py | 12 ++++++++++++ tools/events/state_tracker.py | 7 +++++-- tools/findings/checks_entry.py | 15 +++++++-------- tools/rendering/events/patch.py | 3 ++- 8 files changed, 35 insertions(+), 20 deletions(-) diff --git a/.project/codeql-integration-plan.md b/.project/codeql-integration-plan.md index 7ae04f86..9efb5e6a 100644 --- a/.project/codeql-integration-plan.md +++ b/.project/codeql-integration-plan.md @@ -2,7 +2,7 @@ Status: WIP planning document Branch: `wip/codeql-integration-plan` -Scope: planning only; no implementation changes in this branch. +Scope: planning + phased implementation (install, config, pack resolver, runner, SARIF normalization). ## Goals diff --git a/Makefile b/Makefile index a612e1aa..a6f46512 100644 --- a/Makefile +++ b/Makefile @@ -105,7 +105,7 @@ help: @printf " $(BOLD)make sandbox-build$(RESET) Build the target inside the sandbox\n" @printf " $(BOLD)make sandbox-test$(RESET) Test the target inside the sandbox\n" @printf "\n" - @printf " $(BOLD)$(CYAN)Sandbox bootstrap (Phase 1b):$(RESET)\n" + @printf " $(BOLD)$(CYAN)Sandbox bootstrap (Phase 1c):$(RESET)\n" @printf "\n" @printf " $(BOLD)make sandbox-list$(RESET) List curated example sandboxes\n" @printf " $(BOLD)make sandbox-inspect ID=python$(RESET) Inspect one example\n" @@ -311,7 +311,7 @@ findings-package: # Sandbox # --------------------------------------------------------------------------- -SANDBOX_SCRIPT_HINT := "No sandbox helper script found. Run 'make phase-1' (sub-stage 1b) to bootstrap sandbox/ from templates/sandboxes/, or place the helper script under sandbox/scripts/ manually." +SANDBOX_SCRIPT_HINT := "No sandbox helper script found. Run 'make phase-1' (sub-stage 1c) to bootstrap sandbox/ from templates/sandboxes/, or place the helper script under sandbox/scripts/ manually." sandbox-setup: @if [ -x sandbox/scripts/setup.sh ]; then \ @@ -360,7 +360,7 @@ sandbox-test: ./sandbox/scripts/test.sh # --------------------------------------------------------------------------- -# Sandbox bootstrap (Phase 1b) +# Sandbox bootstrap (Phase 1c) # --------------------------------------------------------------------------- sandbox-list: venv-check diff --git a/prompts/phase-1c-sandbox.md b/prompts/phase-1c-sandbox.md index 531306b0..c7215a5a 100644 --- a/prompts/phase-1c-sandbox.md +++ b/prompts/phase-1c-sandbox.md @@ -111,7 +111,7 @@ Read the following files (all paths are relative to the project/workspace root): ## Important rules - Do not modify files under `src/`. -- Do not silently overwrite a `sandbox/` that lacks `CODECOME-GENERATED.md`. Validate first; if it works, move on; if it does not, halt with the halt protocol. +- Do not silently overwrite a `sandbox/` that lacks `CODECOME-GENERATED.md`. If the sandbox already works, move on; if it needs replacement, inform the user, halt with the halt protocol, and let them re-run with `--force` (which moves the prior content to `sandbox/.backup-/`). - Do not generate vulnerability findings. ## Final response diff --git a/tests/test_codecome_runner.py b/tests/test_codecome_runner.py index 459f8974..850821d0 100644 --- a/tests/test_codecome_runner.py +++ b/tests/test_codecome_runner.py @@ -71,7 +71,7 @@ def fake_for_phase(cls, phase, finding): monkeypatch.setattr(Transcript, "for_phase", classmethod(fake_for_phase)) - code, session_id, res, path = runner._run_single_attempt( + code, session_id, _res, _path = runner._run_single_attempt( mock_args, mock_console, "do work", "model", "var", "http://base", "auth", "dir", lambda *a: None, transcript_phase="1a", @@ -97,7 +97,7 @@ def fake_consume(*a, **kw): fake_transcript.path = Path("fake.jsonl") monkeypatch.setattr(Transcript, "for_phase", classmethod(lambda cls, p, f: fake_transcript)) - code, session_id, res, path = runner._run_single_attempt( + code, session_id, res, _path = runner._run_single_attempt( mock_args, mock_console, "do work", "model", "var", "http://base", "auth", "dir", lambda *a: None ) @@ -124,7 +124,7 @@ def fake_consume(*a, **kw): def fake_fatal(console, title, msg): fatal_errors.append(msg) - code, session_id, res, path = runner._run_single_attempt( + code, session_id, _res, _path = runner._run_single_attempt( mock_args, mock_console, "do work", "model", "var", "http://base", "auth", "dir", lambda *a: None, emit_fatal_error_fn=fake_fatal @@ -145,7 +145,7 @@ def test_run_single_attempt_existing_session(mock_args, mock_console, monkeypatc fake_transcript.path = Path("fake.jsonl") monkeypatch.setattr(Transcript, "for_phase", classmethod(lambda cls, p, f: fake_transcript)) - code, session_id, res, path = runner._run_single_attempt( + code, session_id, _res, _path = runner._run_single_attempt( mock_args, mock_console, "do work", "model", "var", "http://base", "auth", "dir", lambda *a: None, existing_session_id="existing_123" diff --git a/tools/codeql/install.py b/tools/codeql/install.py index 9b1cadea..2f268bfe 100644 --- a/tools/codeql/install.py +++ b/tools/codeql/install.py @@ -12,6 +12,7 @@ import os import platform +import re import shutil import stat import subprocess @@ -26,6 +27,13 @@ GITHUB_API_RELEASES = "https://api.github.com/repos/github/codeql-cli-binaries/releases" +_VERSION_RE = re.compile(r"^v?\d+\.\d+\.\d+$") + + +def _validate_version(version: str) -> bool: + """Return True if *version* is a safe semver-like string (no path traversal).""" + return bool(_VERSION_RE.match(version)) + def _github_headers() -> dict[str, str]: """Return GitHub API headers, using a token when available.""" @@ -226,6 +234,10 @@ def install(config: Optional[CodeQLConfig] = None) -> int: return 1 print(f"Latest version: {version}") + if not _validate_version(version): + print(f"ERROR: invalid version '{version}' — must be semver-like (e.g. 2.25.5)", file=sys.stderr) + return 1 + # --- Determine target directories --- tools_dir = ROOT / ".tools" / "codeql" version_dir = tools_dir / version diff --git a/tools/events/state_tracker.py b/tools/events/state_tracker.py index f8a44742..aadd8565 100644 --- a/tools/events/state_tracker.py +++ b/tools/events/state_tracker.py @@ -110,9 +110,12 @@ def _handle_updated(self, event: dict[str, Any]) -> list[dict[str, Any]]: return [] def _build_finalized_event(self, event: dict[str, Any]) -> dict[str, Any] | None: - """ Convert a message.part.updated into the ND-JSON shape expected by render_event(). + """Convert a message.part.updated into the ND-JSON shape expected by render_event(). - Returns None for event types we don't translate yet (e.g. async progress). + Returns None only for parts that are not yet finalized: text and reasoning + parts without ``time.end``, and tool parts that are still pending/running. + Unknown part types are normalized into a ``message.part.updated`` envelope + with a top-level ``"part"`` key instead of returning None. """ props = event.get("properties", {}) part = props.get("part", {}) diff --git a/tools/findings/checks_entry.py b/tools/findings/checks_entry.py index 93fcae04..67b81ab7 100644 --- a/tools/findings/checks_entry.py +++ b/tools/findings/checks_entry.py @@ -44,10 +44,8 @@ def run_frontmatter_validation() -> tuple[int, str]: if FILE_RISK_INDEX_PATH.exists(): out.write(C.ok(str(FILE_RISK_INDEX_REL)) + "\n") - if not paths: - if not FILE_RISK_INDEX_PATH.exists(): - out.write(C.info("No findings or index to validate.") + "\n") - return (0 if total_errors == 0 else 1, out.getvalue()) + if not paths and not FILE_RISK_INDEX_PATH.exists(): + out.write(C.info("No findings or index to validate.") + "\n") for path in paths: errors = validate_finding(path) @@ -59,10 +57,11 @@ def run_frontmatter_validation() -> tuple[int, str]: for error in errors: out.write(f" {C.SYM_BULLET} {error}\n") - if total_errors: - out.write(f"\n{C.fail(f'Found {total_errors} frontmatter error(s).')}\n") - else: - out.write(f"\n{C.ok(f'Validated {len(paths)} finding(s).')}\n") + if paths or FILE_RISK_INDEX_PATH.exists(): + if total_errors: + out.write(f"\n{C.fail(f'Found {total_errors} frontmatter error(s).')}\n") + else: + out.write(f"\n{C.ok(f'Validated {len(paths)} finding(s).')}\n") return (1 if total_errors else 0, out.getvalue()) diff --git a/tools/rendering/events/patch.py b/tools/rendering/events/patch.py index 1fe07fc7..82441bd4 100644 --- a/tools/rendering/events/patch.py +++ b/tools/rendering/events/patch.py @@ -18,7 +18,8 @@ class PatchRenderer(EventRenderer): def render(self, event: dict[str, Any]) -> bool: part = event.get("part", {}) hash_ = part.get("hash", "") - files: list[str] = part.get("files", []) + raw_files = part.get("files") + files: list[str] = raw_files if isinstance(raw_files, list) else [] short_hash = hash_[:8] if hash_ else "" if not files and not short_hash: From 79d37d3cb6ee4e431d46b0be1a243b31cb22927f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 21:22:06 +0200 Subject: [PATCH 18/47] refactor: move CodeQL artifacts from itemdb/evidence/codeql to itemdb/codeql - Update config.py DEFAULTS and field defaults - Update codecome.yml audit.static_analysis.codeql paths - Update .gitignore to cover itemdb/codeql/* - Update tools/codeql.py resolve-packs --output default - Update integration plan document throughout --- .gitignore | 3 +++ .project/codeql-integration-plan.md | 30 ++++++++++++++--------------- codecome.yml | 4 ++-- tools/codeql.py | 2 +- tools/codeql/config.py | 8 ++++---- 5 files changed, 25 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index ca566b9a..47faff15 100644 --- a/.gitignore +++ b/.gitignore @@ -224,6 +224,9 @@ tmp/* .tools/ .cache/codeql/ +# CodeQL runtime artifacts +itemdb/codeql/* + # CodeCome runtime artifacts itemdb/index.md diff --git a/.project/codeql-integration-plan.md b/.project/codeql-integration-plan.md index 9efb5e6a..377e5269 100644 --- a/.project/codeql-integration-plan.md +++ b/.project/codeql-integration-plan.md @@ -190,12 +190,12 @@ make phase-1 command: tools/codeql.py run --plan itemdb/notes/codeql-plan.yml outputs: - itemdb/evidence/codeql/run-manifest.yml - itemdb/evidence/codeql/selected-query-packs.yml - itemdb/evidence/codeql/sarif/*.sarif - itemdb/evidence/codeql/normalized/alerts.yml - itemdb/evidence/codeql/normalized/file-signals.yml - itemdb/evidence/codeql/codeql-summary.md + itemdb/codeql/run-manifest.yml + itemdb/codeql/selected-query-packs.yml + itemdb/codeql/sarif/*.sarif + itemdb/codeql/normalized/alerts.yml + itemdb/codeql/normalized/file-signals.yml + itemdb/codeql/codeql-summary.md 5. CodeQL artifact gate verifies: @@ -570,8 +570,8 @@ static_analysis: version: "latest" path: ".tools/codeql/current/codeql" - output_dir: "./itemdb/evidence/codeql" - database_dir: "./itemdb/evidence/codeql/databases" + output_dir: "./itemdb/codeql" + database_dir: "./itemdb/codeql/databases" cache_dir: "./.cache/codeql" phase_1: @@ -592,7 +592,7 @@ static_analysis: Use this layout: ```text -itemdb/evidence/codeql/ +itemdb/codeql/ run-manifest.yml selected-query-packs.yml codeql-summary.md @@ -637,7 +637,7 @@ failures: [] Do not expose raw SARIF directly to model prompts. Normalize it first. -`itemdb/evidence/codeql/normalized/alerts.yml`: +`itemdb/codeql/normalized/alerts.yml`: ```yaml schema_version: 1 @@ -756,8 +756,8 @@ tools/codeql.py create-candidates Inputs: ```text -itemdb/evidence/codeql/normalized/alerts.yml -itemdb/evidence/codeql/normalized/file-signals.yml +itemdb/codeql/normalized/alerts.yml +itemdb/codeql/normalized/file-signals.yml itemdb/notes/file-risk-index.yml itemdb/findings/**/CC-*.md ``` @@ -765,7 +765,7 @@ itemdb/findings/**/CC-*.md Outputs: ```text -itemdb/evidence/codeql/normalized/candidate-findings.yml +itemdb/codeql/normalized/candidate-findings.yml itemdb/notes/codeql-candidate-findings.md ``` @@ -796,7 +796,7 @@ Add to `prompts/phase-2-audit.md`: ## CodeQL candidate handling If `itemdb/notes/codeql-candidate-findings.md` or -`itemdb/evidence/codeql/normalized/candidate-findings.yml` exists, you must +`itemdb/codeql/normalized/candidate-findings.yml` exists, you must account for each candidate. For each candidate, choose one: @@ -964,7 +964,7 @@ static_analysis: packs: - "githubsecuritylab/codeql-python-queries" sarif: - - "itemdb/evidence/codeql/sarif/python.github-security-lab.sarif" + - "itemdb/codeql/sarif/python.github-security-lab.sarif" ``` If the frontmatter checker rejects extra fields, place this information in the finding body under: diff --git a/codecome.yml b/codecome.yml index 61aa5a75..59841b1b 100644 --- a/codecome.yml +++ b/codecome.yml @@ -97,8 +97,8 @@ audit: version: "latest" path: ".tools/codeql/current/codeql" - output_dir: "./itemdb/evidence/codeql" - database_dir: "./itemdb/evidence/codeql/databases" + output_dir: "./itemdb/codeql" + database_dir: "./itemdb/codeql/databases" cache_dir: "./.cache/codeql" phase_1: diff --git a/tools/codeql.py b/tools/codeql.py index 62708b7d..33ca55eb 100644 --- a/tools/codeql.py +++ b/tools/codeql.py @@ -126,7 +126,7 @@ def build_parser() -> argparse.ArgumentParser: resolve.add_argument("--plan", default="itemdb/notes/codeql-plan.yml", help="Path to codeql-plan.yml") resolve.add_argument( "--output", - default="itemdb/evidence/codeql/selected-query-packs.yml", + default="itemdb/codeql/selected-query-packs.yml", help="Path to write resolved pack selections", ) resolve.add_argument("--format", choices=["text", "json"], default="text", help="Output format") diff --git a/tools/codeql/config.py b/tools/codeql/config.py index 17349a2b..64641184 100644 --- a/tools/codeql/config.py +++ b/tools/codeql/config.py @@ -37,8 +37,8 @@ "install_managed": True, "install_version": "latest", "install_path": ".tools/codeql/current/codeql", - "output_dir": "./itemdb/evidence/codeql", - "database_dir": "./itemdb/evidence/codeql/databases", + "output_dir": "./itemdb/codeql", + "database_dir": "./itemdb/codeql/databases", "cache_dir": "./.cache/codeql", "phase_1_enabled": True, "phase_2_enabled": True, @@ -113,8 +113,8 @@ class CodeQLConfig: install_version: str = "latest" install_path: str = ".tools/codeql/current/codeql" - output_dir: str = "./itemdb/evidence/codeql" - database_dir: str = "./itemdb/evidence/codeql/databases" + output_dir: str = "./itemdb/codeql" + database_dir: str = "./itemdb/codeql/databases" cache_dir: str = "./.cache/codeql" phase_1_enabled: bool = True From 97002c17c58c14de4a381b332f697b484935fba3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 21:32:02 +0200 Subject: [PATCH 19/47] feat: implement CodeQL run, SARIF normalization, and file-risk enrichment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR 5 — CodeQL run and SARIF normalization. New modules: - tools/codeql/runner.py: database create + analyze per language; writes run-manifest.yml with soft/hard fail policy support - tools/codeql/normalize.py: parse CodeQL SARIF files into normalized alerts.yml and file-signals.yml - tools/codeql/import_risk.py: enrich file-risk-index.yml from CodeQL file-signals (score caps, codeql external_signals) CLI additions: - tools/codeql.py run: full analysis pipeline - tools/codeql.py import-risk: risk index enrichment - tools/codeql.py resolve-packs output default updated to itemdb/codeql/ Pack resolver enhancement: - resolve_plan_packs now includes per-profile 'profile_packs' mapping for per-profile SARIF generation Tests: 27 new tests across 4 test files (469 total, all passing) --- tests/test_codeql_import_risk.py | 144 ++++++++++++ tests/test_codeql_normalize.py | 323 +++++++++++++++++++++++++++ tests/test_codeql_packs.py | 56 ++++- tests/test_codeql_runner.py | 120 ++++++++++ tools/codeql.py | 152 ++++++++++++- tools/codeql/import_risk.py | 130 +++++++++++ tools/codeql/normalize.py | 371 +++++++++++++++++++++++++++++++ tools/codeql/packs.py | 19 ++ tools/codeql/runner.py | 235 ++++++++++++++++++++ 9 files changed, 1543 insertions(+), 7 deletions(-) create mode 100644 tests/test_codeql_import_risk.py create mode 100644 tests/test_codeql_normalize.py create mode 100644 tests/test_codeql_runner.py create mode 100644 tools/codeql/import_risk.py create mode 100644 tools/codeql/normalize.py create mode 100644 tools/codeql/runner.py diff --git a/tests/test_codeql_import_risk.py b/tests/test_codeql_import_risk.py new file mode 100644 index 00000000..3b19bc2f --- /dev/null +++ b/tests/test_codeql_import_risk.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql.import_risk import import_risk + + +def _write_yaml(path: Path, data: dict) -> None: + import yaml + path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8") + + +def test_import_risk_no_signals_file(tmp_path: Path) -> None: + risk_path = tmp_path / "risk.yml" + risk_path.write_text("files: []\n") + status, warnings = import_risk(tmp_path / "missing.yml", risk_path) + assert status is None + assert any("not found" in w for w in warnings) + + +def test_import_risk_no_risk_index(tmp_path: Path) -> None: + signals_path = tmp_path / "signals.yml" + _write_yaml(signals_path, {"files": []}) + status, warnings = import_risk(signals_path, tmp_path / "missing.yml") + assert status == "skipped" + assert any("not found" in w for w in warnings) + + +def test_import_risk_adds_new_entry(tmp_path: Path) -> None: + risk_path = tmp_path / "risk.yml" + _write_yaml( + risk_path, + { + "schema_version": 1, + "files": [{"path": "src/existing.py", "score": 3, "reasons": ["old"]}], + }, + ) + + signals_path = tmp_path / "signals.yml" + _write_yaml( + signals_path, + { + "schema_version": 1, + "files": [ + { + "path": "src/new.py", + "codeql_score_boost": 2, + "alerts": {"total": 2, "path_problems": 1, "high_precision": 1}, + "rules": ["py/injection"], + } + ], + }, + ) + + status, warnings = import_risk(signals_path, risk_path) + assert status is None + assert len(warnings) == 0 + + import yaml + risk = yaml.safe_load(risk_path.read_text()) + files = risk["files"] + assert len(files) == 2 + new_entry = [f for f in files if f["path"] == "src/new.py"][0] + assert new_entry["score"] == 2 + assert new_entry["external_signals"]["codeql"]["alerts"] == 2 + assert new_entry["external_signals"]["codeql"]["rules"] == ["py/injection"] + + +def test_import_risk_updates_existing_entry(tmp_path: Path) -> None: + risk_path = tmp_path / "risk.yml" + _write_yaml( + risk_path, + { + "schema_version": 1, + "files": [ + { + "path": "src/upload.py", + "score": 3, + "reasons": ["manual review"], + } + ], + }, + ) + + signals_path = tmp_path / "signals.yml" + _write_yaml( + signals_path, + { + "schema_version": 1, + "files": [ + { + "path": "src/upload.py", + "codeql_score_boost": 2, + "alerts": {"total": 3, "path_problems": 2, "high_precision": 1}, + "rules": ["py/path-injection", "py/xss"], + } + ], + }, + ) + + status, _ = import_risk(signals_path, risk_path) + assert status is None + + import yaml + risk = yaml.safe_load(risk_path.read_text()) + files = risk["files"] + assert len(files) == 1 + entry = files[0] + assert entry["score"] == 5 # capped at 5 + assert "manual review" in entry["reasons"] + assert entry["external_signals"]["codeql"]["alerts"] == 3 + assert entry["external_signals"]["codeql"]["rules"] == ["py/path-injection", "py/xss"] + + +def test_import_risk_caps_score(tmp_path: Path) -> None: + risk_path = tmp_path / "risk.yml" + _write_yaml( + risk_path, + { + "schema_version": 1, + "files": [{"path": "src/x.py", "score": 4, "reasons": []}], + }, + ) + + signals_path = tmp_path / "signals.yml" + _write_yaml( + signals_path, + { + "schema_version": 1, + "files": [ + {"path": "src/x.py", "codeql_score_boost": 5, "alerts": {}} + ], + }, + ) + + status, _ = import_risk(signals_path, risk_path) + + import yaml + risk = yaml.safe_load(risk_path.read_text()) + assert risk["files"][0]["score"] == 5 diff --git a/tests/test_codeql_normalize.py b/tests/test_codeql_normalize.py new file mode 100644 index 00000000..99e4a4bd --- /dev/null +++ b/tests/test_codeql_normalize.py @@ -0,0 +1,323 @@ +from __future__ import annotations + +import json +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql.normalize import ( + _build_file_signals, + _extract_flow, + _extract_location, + _map_category, + _normalize_severity, + normalize_all, +) + + +def _minimal_sarif(results: list[dict]) -> dict: + return { + "version": "2.1.0", + "$schema": "https://json.schemastore.org/sarif-2.1.0.json", + "runs": [ + { + "tool": { + "driver": { + "name": "CodeQL", + "rules": [ + { + "id": "py/path-injection", + "name": "Uncontrolled data used in path expression", + "properties": { + "precision": "high", + "security-severity": "7.5", + "problem.severity": "error", + }, + } + ], + } + }, + "results": results, + } + ], + } + + +def _simple_result( + rule_id: str, + uri: str, + line: int = 42, + kind: str | None = "path-problem", + severity: str = "warning", + fingerprint: str = "abc123", +) -> dict: + return { + "ruleId": rule_id, + "ruleIndex": 0, + "kind": kind, + "level": severity, + "message": {"text": "Test message"}, + "locations": [ + { + "physicalLocation": { + "artifactLocation": {"uri": uri}, + "region": {"startLine": line, "endLine": line}, + } + } + ], + "partialFingerprints": {"primaryLocationLineHash": fingerprint}, + } + + +def test_normalize_all_empty_sarif_dir(tmp_path: Path) -> None: + sarif_dir = tmp_path / "sarif" + sarif_dir.mkdir() + out_dir = tmp_path / "normalized" + + resolved = {"languages": []} + alerts_path, signals_path = normalize_all( + sarif_dir, out_dir, resolved, "2.21.0", tmp_path, + ) + assert alerts_path.is_file() + assert signals_path.is_file() + + import yaml + alerts = yaml.safe_load(alerts_path.read_text()) + assert alerts["alerts"] == [] + + +def test_normalize_one_sarif(tmp_path: Path) -> None: + sarif_dir = tmp_path / "sarif" + sarif_dir.mkdir() + sarif_file = sarif_dir / "python.official.sarif" + sarif_file.write_text( + json.dumps( + _minimal_sarif( + [ + _simple_result("py/path-injection", "src/upload.py", 88), + ] + ) + ), + encoding="utf-8", + ) + out_dir = tmp_path / "normalized" + + resolved = {"languages": [{"id": "python", "profiles": ["official"]}]} + alerts_path, signals_path = normalize_all( + sarif_dir, out_dir, resolved, "2.21.0", tmp_path, + ) + + import yaml + alerts = yaml.safe_load(alerts_path.read_text()) + assert len(alerts["alerts"]) == 1 + a = alerts["alerts"][0] + assert a["id"] == "CQ-0001" + assert a["language"] == "python" + assert a["pack_profile"] == "official" + assert a["rule_id"] == "py/path-injection" + assert a["primary_location"]["path"] == "src/upload.py" + assert a["primary_location"]["start_line"] == 88 + assert a["mapped"]["category"] == "Path traversal" + + signals = yaml.safe_load(signals_path.read_text()) + assert len(signals["files"]) == 1 + assert signals["files"][0]["path"] == "src/upload.py" + assert signals["files"][0]["rules"] == ["py/path-injection"] + + +def test_normalize_ignores_non_matching_filenames(tmp_path: Path) -> None: + sarif_dir = tmp_path / "sarif" + sarif_dir.mkdir() + (sarif_dir / "not-a-match.json").write_text("{}") + (sarif_dir / "single.sarif").write_text(json.dumps(_minimal_sarif([]))) + out_dir = tmp_path / "normalized" + + resolved = {"languages": []} + alerts_path, _ = normalize_all( + sarif_dir, out_dir, resolved, "2.21.0", tmp_path, + ) + + import yaml + alerts = yaml.safe_load(alerts_path.read_text()) + assert alerts["alerts"] == [] + + +def test_normalize_handles_invalid_json(tmp_path: Path) -> None: + sarif_dir = tmp_path / "sarif" + sarif_dir.mkdir() + (sarif_dir / "python.bad.sarif").write_text("not json", encoding="utf-8") + out_dir = tmp_path / "normalized" + + resolved = {"languages": []} + alerts_path, _ = normalize_all( + sarif_dir, out_dir, resolved, "2.21.0", tmp_path, + ) + + import yaml + alerts = yaml.safe_load(alerts_path.read_text()) + assert alerts["alerts"] == [] + + +def test_extract_location() -> None: + result = { + "locations": [ + { + "physicalLocation": { + "artifactLocation": {"uri": "src/x.py"}, + "region": {"startLine": 42, "endLine": 44}, + } + } + ] + } + loc = _extract_location(result) + assert loc is not None + assert loc["path"] == "src/x.py" + assert loc["start_line"] == 42 + assert loc["end_line"] == 44 + + +def test_extract_location_empty() -> None: + assert _extract_location({"locations": []}) is None + assert _extract_location({}) is None + + +def test_extract_flow_with_code_flows() -> None: + result = { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "physicalLocation": { + "artifactLocation": {"uri": "src/a.py"}, + "region": {"startLine": 10}, + } + }, + "message": {"text": "source"}, + }, + { + "location": { + "physicalLocation": { + "artifactLocation": {"uri": "src/b.py"}, + "region": {"startLine": 20}, + } + }, + "message": {"text": "mid"}, + }, + { + "location": { + "physicalLocation": { + "artifactLocation": {"uri": "src/c.py"}, + "region": {"startLine": 30}, + } + }, + "message": {"text": "sink"}, + }, + ] + } + ] + } + ] + } + flow = _extract_flow(result, Path(".")) + assert flow is not None + assert flow["source"]["path"] == "src/a.py" + assert flow["source"]["line"] == 10 + assert flow["sink"]["path"] == "src/c.py" + assert flow["sink"]["line"] == 30 + assert len(flow["steps"]) == 1 + assert flow["steps"][0]["path"] == "src/b.py" + + +def test_extract_flow_single_step_no_steps() -> None: + """Two-location flow yields source+sink but no intermediate steps.""" + result = { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "physicalLocation": { + "artifactLocation": {"uri": "src/x.py"}, + "region": {"startLine": 1}, + } + }, + "message": {"text": "s"}, + }, + { + "location": { + "physicalLocation": { + "artifactLocation": {"uri": "src/x.py"}, + "region": {"startLine": 99}, + } + }, + "message": {"text": "k"}, + }, + ] + } + ] + } + ] + } + flow = _extract_flow(result, Path(".")) + assert flow is not None + assert flow["steps"] == [] + + +def test_extract_flow_no_code_flows() -> None: + assert _extract_flow({}, Path(".")) is None + + +def test_build_file_signals() -> None: + alerts = [ + { + "id": "CQ-0001", + "rule_id": "py/injection", + "kind": "path-problem", + "precision": "high", + "primary_location": {"path": "src/a.py", "start_line": 10, "end_line": 10}, + }, + { + "id": "CQ-0002", + "rule_id": "py/injection", + "kind": "path-problem", + "precision": "high", + "primary_location": {"path": "src/a.py", "start_line": 20, "end_line": 20}, + }, + { + "id": "CQ-0003", + "rule_id": "py/xss", + "kind": "problem", + "precision": "medium", + "primary_location": {"path": "src/b.py", "start_line": 5, "end_line": 5}, + }, + ] + signals = _build_file_signals(alerts) + assert len(signals) == 2 + a = [s for s in signals if s["path"] == "src/a.py"][0] + assert a["alerts"]["total"] == 2 + assert a["alerts"]["path_problems"] == 2 + assert a["alerts"]["high_precision"] == 2 + assert a["suggested_sweep"] is True + assert a["codeql_score_boost"] >= 4 + + +def test_map_category() -> None: + assert _map_category("py/path-injection", {}) == "Path traversal" + assert _map_category("java/sql-injection", {}) == "SQL injection" + assert _map_category("js/xss", {}) == "Cross-site scripting" + assert _map_category("unknown-rule", {}) == "unknown-rule" + + +def test_normalize_severity() -> None: + assert _normalize_severity("error") == "error" + assert _normalize_severity("warning") == "warning" + assert _normalize_severity("note") == "note" + assert _normalize_severity("none") == "info" + assert _normalize_severity("unknown") == "warning" diff --git a/tests/test_codeql_packs.py b/tests/test_codeql_packs.py index 24fa528f..dfc3dd6d 100644 --- a/tests/test_codeql_packs.py +++ b/tests/test_codeql_packs.py @@ -8,7 +8,7 @@ ROOT = Path(__file__).resolve().parents[1] sys.path.insert(0, str(ROOT / "tools")) -from codeql.packs import PackResolverError, load_codeql_plan, load_pack_catalog, resolve_pack_profiles, resolve_plan_packs +from codeql.packs import PackResolverError, load_codeql_plan, load_pack_catalog, resolve_pack_profiles, resolve_plan_packs, _resolve_profile_packs def _write_catalog(path: Path) -> None: @@ -103,7 +103,59 @@ def test_resolve_pack_profiles_rejects_unknown_profile(tmp_path: Path) -> None: raise AssertionError("expected PackResolverError") -def test_resolve_plan_packs_returns_candidate_policy(tmp_path: Path) -> None: +def test_resolve_plan_packs_includes_profile_packs(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + plan_path = tmp_path / "plan.yml" + _write_catalog(catalog_path) + _write_plan(plan_path) + + catalog = load_pack_catalog(catalog_path) + plan = load_codeql_plan(plan_path) + resolved = resolve_plan_packs(plan, catalog) + + assert resolved["languages"][0]["packs"] == [ + "codeql/python-queries", + "githubsecuritylab/codeql-python-queries", + ] + # profile_packs maps each profile to its individual packs (no dedup across profiles) + assert resolved["languages"][0]["profile_packs"] == { + "official": ["codeql/python-queries"], + "github-security-lab": ["githubsecuritylab/codeql-python-queries"], + } + assert resolved["languages"][1]["candidate_policy"]["coding-standards"]["allow_precreate"] is False + + +def test_resolve_profile_packs_rejects_unknown_profile() -> None: + catalog = { + "schema_version": 1, + "packs": { + "python": { + "official": ["codeql/python-queries"], + } + }, + } + try: + _resolve_profile_packs("python", ["trailofbits"], catalog) + except PackResolverError as exc: + assert "Unknown CodeQL pack profile" in str(exc) + else: + raise AssertionError("expected PackResolverError") + + +def test_resolve_profile_packs_rejects_unknown_language() -> None: + catalog = { + "schema_version": 1, + "packs": {}, + } + try: + _resolve_profile_packs("ruby", ["official"], catalog) + except PackResolverError as exc: + assert "Unsupported CodeQL language id" in str(exc) + else: + raise AssertionError("expected PackResolverError") + + +def test_load_codeql_plan_rejects_invalid_language_entry(tmp_path: Path) -> None: catalog_path = tmp_path / "catalog.yml" plan_path = tmp_path / "plan.yml" _write_catalog(catalog_path) diff --git a/tests/test_codeql_runner.py b/tests/test_codeql_runner.py new file mode 100644 index 00000000..632deb1f --- /dev/null +++ b/tests/test_codeql_runner.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql.config import CodeQLConfig +from codeql.runner import _lookup_build, _manifest, write_manifest + + +def test_manifest_completed() -> None: + config = CodeQLConfig(enabled=True, fail_policy="soft") + result = _manifest( + "completed", + "2025-01-01T00:00:00Z", + config, + ["2.20.0"], + [], + languages=["python"], + ) + assert result["schema_version"] == 1 + assert result["status"] == "completed" + assert result["codeql_version"] == "2.20.0" + assert result["languages"] == ["python"] + assert result["failures"] == [] + assert result["warnings"] == [] + assert "finished_at" in result + assert "started_at" in result + + +def test_manifest_failed_with_failures() -> None: + config = CodeQLConfig(enabled=True, fail_policy="hard") + result = _manifest( + "failed", + "2025-01-01T00:00:00Z", + config, + ["2.20.0"], + ["warn1"], + failures=["fail1", "fail2"], + ) + assert result["status"] == "failed" + assert result["fail_policy"] == "hard" + assert result["failures"] == ["fail1", "fail2"] + assert result["warnings"] == ["warn1"] + + +def test_manifest_skipped_with_failures() -> None: + config = CodeQLConfig(enabled=False) + result = _manifest( + "skipped", + "2025-01-01T00:00:00Z", + config, + [], + [], + failures=["no plan"], + ) + assert result["status"] == "skipped" + assert result["codeql_enabled"] is False + assert result["failures"] == ["no plan"] + + +def test_manifest_defaults() -> None: + config = CodeQLConfig(enabled=True) + result = _manifest( + "completed", + "2025-01-01T00:00:00Z", + config, + [], + [], + ) + assert result["languages"] == [] + assert result["failures"] == [] + assert result["warnings"] == [] + + +def test_write_manifest(tmp_path: Path) -> None: + config = CodeQLConfig(enabled=True) + manifest = _manifest( + "completed", + "2025-01-01T00:00:00Z", + config, + ["2.21.0"], + [], + languages=["python", "c-cpp"], + ) + out_dir = tmp_path / "codeql" + path = write_manifest(manifest, out_dir) + assert path == out_dir / "run-manifest.yml" + assert path.is_file() + + import yaml + data = yaml.safe_load(path.read_text()) + assert data["status"] == "completed" + assert data["languages"] == ["python", "c-cpp"] + + +def test_lookup_build_match() -> None: + plan = [ + {"id": "python", "build_mode": "none", "build_command": None}, + {"id": "c-cpp", "build_mode": "manual", "build_command": "make -C src"}, + ] + mode, cmd = _lookup_build({"id": "c-cpp"}, plan) + assert mode == "manual" + assert cmd == "make -C src" + + +def test_lookup_build_fallback() -> None: + plan: list = [] + mode, cmd = _lookup_build({"id": "python"}, plan) + assert mode == "none" + assert cmd is None + + +def test_lookup_build_no_match_within_plan() -> None: + plan = [{"id": "go", "build_mode": "autobuild"}] + mode, cmd = _lookup_build({"id": "python"}, plan) + assert mode == "none" + assert cmd is None diff --git a/tools/codeql.py b/tools/codeql.py index 33ca55eb..30de64aa 100644 --- a/tools/codeql.py +++ b/tools/codeql.py @@ -8,6 +8,9 @@ tools/codeql.py install tools/codeql.py check + tools/codeql.py resolve-packs + tools/codeql.py run + tools/codeql.py import-risk """ from __future__ import annotations @@ -20,8 +23,7 @@ sys.path.insert(0, str(Path(__file__).resolve().parent)) -from codeql.config import resolve_config -from codeql.install import ROOT +from codeql.config import ROOT, resolve_config from codeql.packs import PackResolverError, dump_yaml, load_codeql_plan, load_pack_catalog, resolve_plan_packs @@ -41,7 +43,6 @@ def _cmd_check() -> int: binary_path = config.abs_install_path - # 1. Binary check if not binary_path.is_file(): print(f"FAIL: CodeQL binary not found at {binary_path}") print("Run 'tools/codeql.py install' to install the managed CodeQL CLI.") @@ -63,7 +64,6 @@ def _cmd_check() -> int: print(f"FAIL: {exc}") return 1 - # 2. Pack resolve check print("Checking pack resolution …") try: result = subprocess.run( @@ -74,7 +74,6 @@ def _cmd_check() -> int: ) if result.returncode != 0: print(f"WARN: codeql resolve qlpacks failed: {result.stderr}") - # Soft-fail: the binary works, packs might need downloading later else: print("Pack resolution OK.") except Exception as exc: @@ -114,6 +113,143 @@ def _cmd_resolve_packs(args: argparse.Namespace) -> int: return 0 +def _cmd_run() -> int: + """Run CodeQL analysis: create databases, analyze, normalize SARIF.""" + config = resolve_config() + + if not config.enabled: + print("CodeQL is disabled (CODEQL=0 or CODEQL_SKIP=1). Skipping run.") + return 0 + + binary_path = config.abs_install_path + if not binary_path.is_file(): + print(f"FAIL: CodeQL binary not found at {binary_path}") + print("Run 'tools/codeql.py install' to install the managed CodeQL CLI.") + return 1 + + from codeql.runner import run_codeql, write_manifest + from codeql.normalize import normalize_all + from codeql.packs import _load_yaml_mapping + + manifest = run_codeql(config) + output_dir = config.abs_output_dir + output_dir.mkdir(parents=True, exist_ok=True) + write_manifest(manifest, output_dir) + + status = manifest["status"] + print(f"CodeQL run: {status}") + + if manifest.get("warnings"): + for w in manifest["warnings"]: + print(f" WARN: {w}") + if manifest.get("failures"): + for f in manifest["failures"]: + print(f" FAIL: {f}") + + normalized_dir = output_dir / "normalized" + resolved_path = output_dir / "selected-query-packs.yml" + + if status == "completed" and resolved_path.is_file(): + sarif_dir = output_dir / "sarif" + if list(sarif_dir.glob("*.sarif")): + try: + resolved = _load_yaml_mapping(resolved_path, what="resolved packs") + alerts_path, file_signals_path = normalize_all( + sarif_dir, normalized_dir, resolved, + manifest.get("codeql_version", "unknown"), ROOT, + ) + print(f"Normalized alerts: {alerts_path.relative_to(ROOT) if alerts_path.is_relative_to(ROOT) else alerts_path}") + print(f"File signals: {file_signals_path.relative_to(ROOT) if file_signals_path.is_relative_to(ROOT) else file_signals_path}") + except Exception as exc: + print(f"WARN: SARIF normalization failed: {exc}") + + summary_path = _write_summary(manifest, normalized_dir, output_dir) + print(f"Summary: {summary_path.relative_to(ROOT) if summary_path.is_relative_to(ROOT) else summary_path}") + + if status == "failed": + return 1 + return 0 + + +def _cmd_import_risk() -> int: + """Import CodeQL file signals into file-risk-index.yml.""" + config = resolve_config() + if not config.enabled: + print("CodeQL is disabled — skipping risk import.") + return 0 + + from codeql.import_risk import import_risk + + signals_path = config.abs_output_dir / "normalized" / "file-signals.yml" + risk_path = ROOT / "itemdb/notes/file-risk-index.yml" + + status, warnings = import_risk(signals_path, risk_path) + for w in warnings: + print(f"WARN: {w}") + if status == "skipped": + print("Risk import skipped — no risk index to enrich.") + return 0 + + print(f"File risk index enriched from {signals_path.relative_to(ROOT) if signals_path.is_relative_to(ROOT) else signals_path}") + return 0 + + +def _write_summary(manifest: dict, normalized_dir: Path, output_dir: Path) -> Path: + """Write codeql-summary.md.""" + status = manifest.get("status", "unknown") + version = manifest.get("codeql_version", "unknown") + languages = manifest.get("languages", []) + warnings = manifest.get("warnings", []) + failures = manifest.get("failures", []) + fail_policy = manifest.get("fail_policy", "soft") + + lines = [ + "# CodeQL Analysis Summary", + "", + f"- **Status**: {status}", + f"- **CodeQL version**: {version}", + f"- **Fail policy**: {fail_policy}", + f"- **Started**: {manifest.get('started_at', '')}", + f"- **Finished**: {manifest.get('finished_at', '')}", + "", + ] + + if languages: + lines.append(f"- **Languages**: {', '.join(languages)}") + lines.append("") + + alerts_path = normalized_dir / "alerts.yml" + signals_path = normalized_dir / "file-signals.yml" + + if alerts_path.is_file(): + from codeql.packs import _load_yaml_mapping + try: + data = _load_yaml_mapping(alerts_path, what="alerts") + total_alerts = len(data.get("alerts", [])) + lines.append(f"- **Total alerts**: {total_alerts}") + lines.append("") + except Exception: + pass + + if warnings: + lines.append("## Warnings") + lines.append("") + for w in warnings: + lines.append(f"- {w}") + lines.append("") + + if failures: + lines.append("## Failures") + lines.append("") + for f in failures: + lines.append(f"- {f}") + lines.append("") + + path = output_dir / "codeql-summary.md" + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + return path + + def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="CodeQL CLI wrapper for CodeCome.", @@ -122,6 +258,8 @@ def build_parser() -> argparse.ArgumentParser: sub.add_parser("install", help="Install the managed CodeQL CLI.") sub.add_parser("check", help="Verify the CodeQL CLI is installed and working.") + sub.add_parser("run", help="Run CodeQL analysis (create DBs, analyze, normalize SARIF).") + sub.add_parser("import-risk", help="Import CodeQL file signals into file-risk-index.yml.") resolve = sub.add_parser("resolve-packs", help="Resolve plan pack profiles to concrete pack references.") resolve.add_argument("--plan", default="itemdb/notes/codeql-plan.yml", help="Path to codeql-plan.yml") resolve.add_argument( @@ -144,6 +282,10 @@ def main() -> int: return _cmd_check() elif args.command == "resolve-packs": return _cmd_resolve_packs(args) + elif args.command == "run": + return _cmd_run() + elif args.command == "import-risk": + return _cmd_import_risk() return 1 diff --git a/tools/codeql/import_risk.py b/tools/codeql/import_risk.py new file mode 100644 index 00000000..4a191c96 --- /dev/null +++ b/tools/codeql/import_risk.py @@ -0,0 +1,130 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""Enrich file-risk-index.yml from CodeQL file-signals.yml.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from codeql.packs import PackResolverError, _load_yaml_mapping, dump_yaml + + +def import_risk(file_signals_path: Path, risk_index_path: Path) -> tuple[str | None, list[str]]: + """Enrich the file-risk-index with CodeQL signals. + + Returns (status or None, warnings). + + - Preserves existing entries and model-authored reasons. + - Does not duplicate file entries. + - Caps scores at 5. + - Adds ``codeql_score_boost`` and ``external_signals.codeql`` block. + + If the risk index does not exist, no-op with a warning. + """ + warnings: list[str] = [] + + if not file_signals_path.is_file(): + return None, [f"file-signals.yml not found at {file_signals_path}"] + + if not risk_index_path.is_file(): + return "skipped", [f"file-risk-index.yml not found at {risk_index_path}"] + + try: + signals = _load_yaml_mapping(file_signals_path, what="CodeQL file signals") + except PackResolverError as exc: + return None, [str(exc)] + + try: + risk_index = _load_yaml_mapping(risk_index_path, what="file risk index") + except PackResolverError as exc: + return None, [str(exc)] + + risks = risk_index.get("files") + if not isinstance(risks, list): + return None, ["file-risk-index.yml missing 'files' list"] + + signal_files = signals.get("files", []) + if not isinstance(signal_files, list): + return "skipped", ["file-signals.yml has no files"] + + existing_paths = {entry.get("path", "") for entry in risks if isinstance(entry, dict)} + modified = False + + for signal in signal_files: + if not isinstance(signal, dict): + continue + file_path = signal.get("path", "") + if not file_path: + continue + if file_path in existing_paths: + _update_existing_entry(risks, file_path, signal) + modified = True + else: + _add_new_entry(risks, file_path, signal) + existing_paths.add(file_path) + modified = True + + if modified: + risk_index["files"] = risks + risk_index_path.write_text(dump_yaml(risk_index), encoding="utf-8") + + return None, warnings + + +def _update_existing_entry(entries: list[dict[str, Any]], file_path: str, signal: dict[str, Any]) -> None: + """Enrich an existing file-risk-index entry with CodeQL signals.""" + for entry in entries: + if entry.get("path") != file_path: + continue + + boost = signal.get("codeql_score_boost", 0) + if isinstance(boost, (int, float)): + current = entry.get("score", 1) + current = int(current) if isinstance(current, (int, float)) else 1 + entry["score"] = min(5, current + int(boost)) + + codeql_alerts = signal.get("alerts", {}) + rules = signal.get("rules", []) + if isinstance(codeql_alerts, dict): + entry.setdefault("external_signals", {}) + entry["external_signals"]["codeql"] = { + "alerts": codeql_alerts.get("total", 0), + "path_problems": codeql_alerts.get("path_problems", 0), + "highest_precision": "high" if codeql_alerts.get("high_precision", 0) > 0 else "medium", + "rules": rules if isinstance(rules, list) else [], + } + + return + + +def _add_new_entry(entries: list[dict[str, Any]], file_path: str, signal: dict[str, Any]) -> None: + """Append a new file-risk-index entry from CodeQL signals.""" + boost = signal.get("codeql_score_boost", 1) + codeql_alerts = signal.get("alerts", {}) + rules = signal.get("rules", []) + + entry: dict[str, Any] = { + "path": file_path, + "score": min(5, int(boost) if isinstance(boost, (int, float)) else 1), + "confidence": "MEDIUM", + "target_area": "", + "reasons": ["CodeQL static analysis signal."], + "sources": [], + "sinks": [], + "trust_boundaries": [], + "suggested_vulnerability_classes": [], + "suggested_skills": [], + "suggested_validation_methods": [], + "external_signals": { + "codeql": { + "alerts": codeql_alerts.get("total", 0) if isinstance(codeql_alerts, dict) else 0, + "path_problems": codeql_alerts.get("path_problems", 0) if isinstance(codeql_alerts, dict) else 0, + "highest_precision": "high" if (isinstance(codeql_alerts, dict) and codeql_alerts.get("high_precision", 0) > 0) else "medium", + "rules": rules if isinstance(rules, list) else [], + } + }, + } + + entries.append(entry) diff --git a/tools/codeql/normalize.py b/tools/codeql/normalize.py new file mode 100644 index 00000000..13b38af9 --- /dev/null +++ b/tools/codeql/normalize.py @@ -0,0 +1,371 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""SARIF normalization: parse CodeQL SARIF into alerts.yml and file-signals.yml.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + + +def normalize_all( + sarif_dir: Path, + output_dir: Path, + resolved_plan: dict[str, Any], + codeql_version: str, + source_root: Path, +) -> tuple[Path, Path]: + """Normalize all SARIF files, write alerts.yml and file-signals.yml. + + Returns (alerts_path, file_signals_path). + """ + alerts: list[dict[str, Any]] = [] + alert_counter = 0 + + for sarif_file in sorted(sarif_dir.glob("*.sarif")): + stem = sarif_file.stem # e.g. "python.official" + parts = stem.split(".", 1) + if len(parts) != 2: + continue + language_id, profile = parts + + new_alerts = _parse_sarif(sarif_file, language_id, profile, alert_counter, source_root) + alert_counter += len(new_alerts) + alerts.extend(new_alerts) + + file_signals = _build_file_signals(alerts) + + output_dir.mkdir(parents=True, exist_ok=True) + + alerts_path = output_dir / "alerts.yml" + file_signals_path = output_dir / "file-signals.yml" + + from codeql.packs import dump_yaml + + alerts_path.write_text( + dump_yaml( + { + "schema_version": 1, + "generated_by": "codeql-normalize", + "codeql_version": codeql_version, + "target": "codecome-target", + "alerts": alerts, + } + ), + encoding="utf-8", + ) + + file_signals_path.write_text( + dump_yaml( + { + "schema_version": 1, + "generated_by": "codeql-normalize", + "codeql_version": codeql_version, + "files": file_signals, + } + ), + encoding="utf-8", + ) + + return alerts_path, file_signals_path + + +def _parse_sarif( + path: Path, + language_id: str, + pack_profile: str, + start_index: int, + source_root: Path, +) -> list[dict[str, Any]]: + """Parse one SARIF file and return a list of normalized alert dicts.""" + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + return [] + + alerts: list[dict[str, Any]] = [] + runs = data.get("runs", []) + if not isinstance(runs, list): + return alerts + + for run in runs: + if not isinstance(run, dict): + continue + results = run.get("results", []) + if not isinstance(results, list): + continue + rules_lookup = _build_rules_lookup(run) + + for ri, result in enumerate(results): + if not isinstance(result, dict): + continue + alert = _normalize_one_result( + result, rules_lookup, language_id, pack_profile, + start_index + ri + 1, source_root, + ) + if alert: + alerts.append(alert) + + return alerts + + +def _build_rules_lookup(run: dict[str, Any]) -> dict[str, dict[str, Any]]: + """Build {ruleId: {name, precision, ...}} from tool.driver.rules.""" + driver = run.get("tool", {}).get("driver", {}) + if not isinstance(driver, dict): + return {} + rules = driver.get("rules", []) + if not isinstance(rules, list): + return {} + lookup: dict[str, dict[str, Any]] = {} + for rule in rules: + if not isinstance(rule, dict): + continue + rid = rule.get("id") + if rid: + props = rule.get("properties", {}) + lookup[rid] = { + "name": rule.get("name", rid), + "precision": _coerce_str(props.get("precision")) or _coerce_str(rule.get("precision")), + "security_severity": _coerce_str(props.get("security-severity")) or _coerce_str(props.get("security-severity")), + "severity": _coerce_str(props.get("problem.severity")), + } + return lookup + + +def _normalize_one_result( + result: dict[str, Any], + rules_lookup: dict[str, dict[str, Any]], + language_id: str, + pack_profile: str, + index: int, + source_root: Path, +) -> dict[str, Any] | None: + """Normalize a single SARIF result into a CodeCome alert dict.""" + rule_id = result.get("ruleId") or result.get("ruleIndex") + if rule_id is None: + return None + + rule_meta = rules_lookup.get(str(rule_id), {}) + + primary_location = _extract_location(result) + if primary_location is None: + return None + + severity = result.get("level") if result.get("level") else "warning" + + fingerprints = result.get("partialFingerprints") or {} + fingerprint = fingerprints.get("primaryLocationLineHash", "") + + flow = _extract_flow(result, source_root) + + return { + "id": f"CQ-{index:04d}", + "fingerprint": fingerprint, + "language": language_id, + "pack_profile": pack_profile, + "pack": _first_pack(result, rules_lookup), + "rule_id": str(rule_id), + "rule_name": rule_meta.get("name", str(rule_id)), + "severity": _normalize_severity(severity), + "security_severity": rule_meta.get("security_severity"), + "precision": rule_meta.get("precision"), + "kind": result.get("kind"), + "primary_location": primary_location, + "flow": flow, + "mapped": { + "category": _map_category(str(rule_id), result), + "suggested_validation_methods": _suggested_validation_methods(str(rule_id)), + }, + } + + +def _extract_location(result: dict[str, Any]) -> dict[str, Any] | None: + """Extract the primary_location from the first result location.""" + locations = result.get("locations", []) + if not isinstance(locations, list) or not locations: + return None + first = locations[0] + if not isinstance(first, dict): + return None + pl = first.get("physicalLocation", {}) + if not isinstance(pl, dict): + return None + artifact = pl.get("artifactLocation", {}) + if not isinstance(artifact, dict): + return None + uri = artifact.get("uri", "") + if not uri: + return None + region = pl.get("region", {}) + if not isinstance(region, dict): + return {"path": uri, "start_line": 1, "end_line": 1} + start_line = region.get("startLine", 1) + return { + "path": uri, + "start_line": start_line, + "end_line": region.get("endLine", start_line), + } + + +def _extract_flow(result: dict[str, Any], source_root: Path) -> dict[str, Any] | None: + """Extract source/sink/steps from codeFlows.""" + code_flows = result.get("codeFlows", []) + if not isinstance(code_flows, list) or not code_flows: + return None + + first_flow = code_flows[0] + if not isinstance(first_flow, dict): + return None + + thread_flows = first_flow.get("threadFlows", []) + if not isinstance(thread_flows, list) or not thread_flows: + return None + + locations = thread_flows[0].get("locations", []) + if not isinstance(locations, list) or not locations: + return None + + def _loc_to_entry(loc: dict[str, Any]) -> dict[str, Any] | None: + loc_obj = loc.get("location", {}) + if not isinstance(loc_obj, dict): + return None + pl = loc_obj.get("physicalLocation", {}) + if not isinstance(pl, dict): + return None + artifact = pl.get("artifactLocation", {}) + if not isinstance(artifact, dict): + return None + uri = artifact.get("uri", "") + region = pl.get("region", {}) + start_line = region.get("startLine", 1) if isinstance(region, dict) else 1 + message = loc.get("message", {}) + text = message.get("text", "") if isinstance(message, dict) else "" + return {"path": uri, "line": start_line, "message": text} + + entries = [] + for loc in locations: + if isinstance(loc, dict): + entry = _loc_to_entry(loc) + if entry: + entries.append(entry) + + if len(entries) < 2: + return None + + source = {"path": entries[0]["path"], "line": entries[0]["line"], "label": entries[0]["message"]} + sink = {"path": entries[-1]["path"], "line": entries[-1]["line"], "label": entries[-1]["message"]} + steps = [] + for entry in entries[1:-1]: + steps.append({"path": entry["path"], "line": entry["line"], "message": entry["message"]}) + + return {"source": source, "sink": sink, "steps": steps} + + +def _build_file_signals(alerts: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Aggregate alerts into per-file signals.""" + groups: dict[str, dict[str, Any]] = {} + + for alert in alerts: + path = alert.get("primary_location", {}).get("path", "") + if not path: + continue + if path not in groups: + groups[path] = { + "path": path, + "codeql_score_boost": 0, + "suggested_sweep": False, + "alerts": {"total": 0, "path_problems": 0, "high_precision": 0}, + "rules": [], + } + grp = groups[path] + grp["alerts"]["total"] += 1 + if alert.get("kind") == "path-problem": + grp["alerts"]["path_problems"] += 1 + if alert.get("precision") == "high": + grp["alerts"]["high_precision"] += 1 + rule_id = alert.get("rule_id", "") + if rule_id and rule_id not in grp["rules"]: + grp["rules"].append(rule_id) + + for grp in groups.values(): + total = grp["alerts"]["total"] + path_problems = grp["alerts"]["path_problems"] + high_prec = grp["alerts"]["high_precision"] + boost = min(5, max(1, total + path_problems)) + if high_prec >= 2: + boost = min(5, boost + 1) + grp["codeql_score_boost"] = boost + grp["suggested_sweep"] = total >= 2 + + return sorted(groups.values(), key=lambda g: g["path"]) + + +def _map_category(rule_id: str, result: dict[str, Any]) -> str: + """Map a CodeQL rule ID to a vulnerability category.""" + mapping = { + "path-injection": "Path traversal", + "command-line-injection": "Command injection", + "code-injection": "Code injection", + "sql-injection": "SQL injection", + "nosql-injection": "NoSQL injection", + "xss": "Cross-site scripting", + "hardcoded-credentials": "Hardcoded credentials", + "incomplete-url-substring-sanitization": "URL redirection", + "uncontrolled-deserialization": "Insecure deserialization", + "open-redirect": "Open redirect", + "information-exposure": "Information exposure", + "cleartext-transmission": "Cleartext transmission", + "codeql": "", # catch-all + } + for suffix, category in mapping.items(): + if rule_id.endswith(suffix): + return category + # For CWE-prefixed rules or other unknown forms + if "/" in rule_id: + last = rule_id.rsplit("/", 1)[-1].replace("-", " ").title() + return last + return rule_id + + +def _suggested_validation_methods(rule_id: str) -> list[str]: + """Suggest validation methods based on rule type.""" + if "sql" in rule_id or "nosql" in rule_id: + return ["static_proof", "database_evidence"] + if "injection" in rule_id: + return ["static_proof", "runtime_reproduction"] + if "xss" in rule_id or "cross-site" in rule_id.lower(): + return ["http_exploit"] + return ["static_proof"] + + +def _normalize_severity(level: str) -> str: + """Normalize SARIF severity levels.""" + mapping = {"error": "error", "warning": "warning", "note": "note", "none": "info"} + return mapping.get(level, "warning") + + +def _first_pack(result: dict[str, Any], rules_lookup: dict[str, dict[str, Any]]) -> str: + """Guess a pack reference from the result, fall back to rule metadata.""" + for loc in result.get("relatedLocations", []) or []: + if isinstance(loc, dict): + try: + pr = loc.get("physicalLocation", {}).get("artifactLocation", {}).get("uri", "") + if pr and "codeql/" in pr: + return pr + except Exception: + pass + return "" + + +def _coerce_str(value: Any) -> str | None: + if value is None: + return None + return str(value) + + +def _rel(path_str: str, source_root: Path) -> str: + """Make a path workspace-relative when possible.""" + return str(path_str) diff --git a/tools/codeql/packs.py b/tools/codeql/packs.py index d7fa271e..df451237 100644 --- a/tools/codeql/packs.py +++ b/tools/codeql/packs.py @@ -137,6 +137,24 @@ def allow_precreate(profile_name: str, catalog: dict[str, Any]) -> bool: return value if isinstance(value, bool) else True +def _resolve_profile_packs(language_id: str, profiles: list[str], catalog: dict[str, Any]) -> dict[str, list[str]]: + """Resolve each profile to its own pack list (no dedup across profiles).""" + packs = catalog["packs"] + language_profiles = packs.get(language_id) + if not isinstance(language_profiles, dict): + raise PackResolverError(f"Unsupported CodeQL language id: {language_id!r}.") + + result: dict[str, list[str]] = {} + for profile_name in profiles: + refs = language_profiles.get(profile_name) + if not isinstance(refs, list): + raise PackResolverError( + f"Unknown CodeQL pack profile {profile_name!r} for language {language_id!r}." + ) + result[profile_name] = list(refs) + return result + + def resolve_plan_packs(plan: dict[str, Any], catalog: dict[str, Any]) -> dict[str, Any]: """Resolve all language entries in a CodeQL plan to concrete pack references.""" languages_out: list[dict[str, Any]] = [] @@ -149,6 +167,7 @@ def resolve_plan_packs(plan: dict[str, Any], catalog: dict[str, Any]) -> dict[st "id": language_id, "profiles": profiles, "packs": resolve_pack_profiles(language_id, profiles, catalog), + "profile_packs": _resolve_profile_packs(language_id, profiles, catalog), "candidate_policy": { profile: {"allow_precreate": allow_precreate(profile, catalog)} for profile in profiles diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py new file mode 100644 index 00000000..44b01c50 --- /dev/null +++ b/tools/codeql/runner.py @@ -0,0 +1,235 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL runner: database create, analyze, and run manifest.""" + +from __future__ import annotations + +import subprocess +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from codeql.config import ROOT, CodeQLConfig +from codeql.packs import PackResolverError, dump_yaml, load_codeql_plan, load_pack_catalog, resolve_plan_packs + + +def run_codeql(config: CodeQLConfig) -> dict[str, Any]: + """Run CodeQL analysis for every language in the plan. + + Returns the run manifest as a dict. + """ + now_utc = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + binary_path = config.abs_install_path + if not binary_path.is_file(): + return _manifest("failed", now_utc, config, [], [], failures=[f"CodeQL binary not found at {binary_path}"]) + + version = _get_codeql_version(binary_path) + + plan_path = ROOT / "itemdb/notes/codeql-plan.yml" + if not plan_path.is_file(): + return _manifest("skipped", now_utc, config, [version], [], failures=["codeql-plan.yml not found"]) + + catalog_path = config.abs_pack_catalog + if not catalog_path.is_file(): + return _manifest("skipped", now_utc, config, [version], [], failures=[f"Pack catalog not found at {catalog_path}"]) + + try: + catalog = load_pack_catalog(catalog_path) + plan = load_codeql_plan(plan_path) + resolved = resolve_plan_packs(plan, catalog) + except PackResolverError as exc: + return _manifest("failed", now_utc, config, [version], [], failures=[str(exc)]) + + resolved_path = config.abs_output_dir / "selected-query-packs.yml" + resolved_path.parent.mkdir(parents=True, exist_ok=True) + resolved_path.write_text(dump_yaml(resolved), encoding="utf-8") + + source_path = plan.get("source_path", "./src") + exclude_patterns = plan.get("exclude", []) + + warnings: list[str] = [] + failures: list[str] = [] + language_ids: list[str] = [] + + for lang_entry in resolved["languages"]: + language_id = lang_entry["id"] + profiles = lang_entry.get("profiles", []) + profile_packs = lang_entry.get("profile_packs", {}) + language_ids.append(language_id) + + build_mode, build_command = _lookup_build(lang_entry, plan.get("languages", [])) + + db_dir = config.abs_database_dir / language_id + sarif_dir = config.abs_output_dir / "sarif" + sarif_dir.mkdir(parents=True, exist_ok=True) + + ok, msg = _create_database(binary_path, language_id, source_path, db_dir, build_mode, build_command, exclude_patterns) + if not ok: + failures.append(msg) + if config.fail_policy == "hard": + return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids) + continue + + for profile in profiles: + packs = profile_packs.get(profile, []) + if not packs: + continue + sarif_path = sarif_dir / f"{language_id}.{profile}.sarif" + ok, msg = _run_analyze(binary_path, db_dir, packs, sarif_path) + if not ok: + if config.fail_policy == "hard": + failures.append(msg) + return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids) + warnings.append(msg) + + if failures: + return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids) + + return _manifest("completed", now_utc, config, [version], warnings, failures, language_ids) + + +def _lookup_build(lang_entry: dict, plan_languages: list[dict]) -> tuple[str, str | None]: + """Return (build_mode, build_command) for a language entry.""" + language_id = lang_entry["id"] + for pl in plan_languages: + if pl.get("id") == language_id: + mode = pl.get("build_mode", "none") + cmd = pl.get("build_command") + return mode if isinstance(mode, str) and mode else "none", cmd if isinstance(cmd, str) and cmd else None + return "none", None + + +def _get_codeql_version(binary: Path) -> str: + try: + result = subprocess.run( + [str(binary), "--version"], + capture_output=True, text=True, timeout=30, + ) + line = result.stdout.strip().split("\n")[0] + return line.removeprefix("CodeQL version ") + except Exception: + return "unknown" + + +def _create_database( + binary: Path, + language_id: str, + source_path: str, + db_dir: Path, + build_mode: str, + build_command: str | None, + exclude_patterns: list[str], +) -> tuple[bool, str]: + """Create a CodeQL database. Returns (success, message).""" + cmd = [ + str(binary), "database", "create", + str(db_dir), + "-l", language_id, + "-s", str(ROOT / source_path), + "--overwrite", + "--no-run-unnecessary-builds", + ] + + if build_mode == "manual" and build_command: + cmd += ["-c", build_command] + elif build_mode == "autobuild": + pass # let CodeQL auto-detect + + for pattern in exclude_patterns: + cmd += ["--no-source-unpack", "--additional-build-options", f"--exclude={pattern}"] + + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) + except subprocess.TimeoutExpired: + return False, f"Database create timed out for {language_id}" + except Exception as exc: + return False, f"Database create failed for {language_id}: {exc}" + + if result.returncode != 0: + return False, f"Database create failed for {language_id}:\n{result.stderr[:2000]}" + + return True, "" + + +def _run_analyze( + binary: Path, + db_dir: Path, + packs: list[str], + sarif_path: Path, +) -> tuple[bool, str]: + """Run codeql database analyze. Returns (success, message).""" + cmd = [ + str(binary), "database", "analyze", + str(db_dir), + "--format=sarif-latest", + f"--output={sarif_path}", + "--no-sarif-add-query-help", + ] + packs + + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) + except subprocess.TimeoutExpired: + return False, f"Analyze timed out for {db_dir.name} with packs {packs}" + except Exception as exc: + return False, f"Analyze failed for {db_dir.name} with packs {packs}: {exc}" + + if result.returncode != 0: + return False, f"Analyze failed for {db_dir.name} with packs {packs}:\n{result.stderr[:2000]}" + + return True, "" + + +def _manifest( + status: str, + started_at: str, + config: CodeQLConfig, + versions: list[str], + warnings: list[str], + failures: list[str] | None = None, + languages: list[str] | None = None, +) -> dict[str, Any]: + if failures is None: + failures = [] + if languages is None: + languages = [] + + codeql_version = versions[0] if versions else "unknown" + now_utc = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + return { + "schema_version": 1, + "phase": "phase-1", + "status": status, + "codeql_enabled": config.enabled, + "codeql_version": codeql_version, + "started_at": started_at, + "finished_at": now_utc, + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": str(_rel(config.abs_pack_catalog)), + "fail_policy": config.fail_policy, + "languages": languages, + "warnings": warnings, + "failures": failures if failures else [], + } + + +def write_manifest(manifest: dict[str, Any], output_dir: Path) -> Path: + """Write the run manifest to *output_dir*/run-manifest.yml.""" + import json + + path = output_dir / "run-manifest.yml" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(dump_yaml(manifest), encoding="utf-8") + return path + + +def _rel(path: Path) -> str: + """Return a workspace-relative path when under ROOT, else the absolute path.""" + try: + rel = path.relative_to(ROOT) + return str(rel) + except ValueError: + return str(path) From 5d92fc6b2373db9cf47620b00eea5d3e314700b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 22:20:48 +0200 Subject: [PATCH 20/47] PR 6: Phase 1 CodeQL integration + address review comments - Replace _run_codeql_placeholder with real CodeQL pipeline integration - Add tools/codeql/pipeline.py: run_full_pipeline() shared by phase_1.py and CLI - Add tools/codeql/artifacts.py: check_artifacts() gate logic - Move write_summary to runner.py; refactor codeql.py _cmd_run to use pipeline - Add 'check-artifacts' subcommand to tools/codeql.py - Rename _load_yaml_mapping -> load_yaml_mapping (public API in packs.py) - Fix stale itemdb/evidence/codeql/ paths in phase-1b-codeql-recon.md - Add tests: test_codeql_artifacts.py (7), test_codeql_pipeline.py (3) Review comment fixes: - Fix duplicate test name in test_codeql_packs.py (shadowed definition) - Fix nosql-injection matched as sql-injection in normalize.py (reorder mapping) - Fix security_severity duplicate operand in normalize.py (rule fallback) - Add missing assert in test_codeql_import_risk.py cap test - Remove unreachable status=='running' branch in tool_use.py - Strip leading 'v' from version in install.py to avoid double-v in URLs - Thread FindingsContext through checks_entry.py run_frontmatter_validation() 480 tests passing, frontmatter clean. --- prompts/phase-1b-codeql-recon.md | 10 +- tests/test_codeql_artifacts.py | 85 ++++++++++++++ tests/test_codeql_import_risk.py | 1 + tests/test_codeql_normalize.py | 1 + tests/test_codeql_packs.py | 2 +- tests/test_codeql_pipeline.py | 128 +++++++++++++++++++++ tools/codecome/phase_1.py | 174 ++++++++++++++++++++++++++--- tools/codeql.py | 105 +++++------------ tools/codeql/artifacts.py | 56 ++++++++++ tools/codeql/import_risk.py | 6 +- tools/codeql/install.py | 3 + tools/codeql/normalize.py | 4 +- tools/codeql/packs.py | 6 +- tools/codeql/pipeline.py | 73 ++++++++++++ tools/codeql/runner.py | 55 +++++++++ tools/findings/checks_entry.py | 38 +++++-- tools/rendering/events/tool_use.py | 4 +- 17 files changed, 629 insertions(+), 122 deletions(-) create mode 100644 tests/test_codeql_artifacts.py create mode 100644 tests/test_codeql_pipeline.py create mode 100644 tools/codeql/artifacts.py create mode 100644 tools/codeql/pipeline.py diff --git a/prompts/phase-1b-codeql-recon.md b/prompts/phase-1b-codeql-recon.md index ca884a38..7301d391 100644 --- a/prompts/phase-1b-codeql-recon.md +++ b/prompts/phase-1b-codeql-recon.md @@ -25,10 +25,10 @@ Also read the Phase 1a outputs: If CodeQL analysis was performed, the following artifacts may exist. Treat them as reconnaissance evidence, not proof of vulnerability: -- `itemdb/evidence/codeql/run-manifest.yml` — CodeQL run outcome and metadata. -- `itemdb/evidence/codeql/normalized/alerts.yml` — Normalized CodeQL alerts with source/sink/flow. -- `itemdb/evidence/codeql/normalized/file-signals.yml` — Per-file CodeQL signal scores. -- `itemdb/evidence/codeql/codeql-summary.md` — Human-readable CodeQL summary. +- `itemdb/codeql/run-manifest.yml` — CodeQL run outcome and metadata. +- `itemdb/codeql/normalized/alerts.yml` — Normalized CodeQL alerts with source/sink/flow. +- `itemdb/codeql/normalized/file-signals.yml` — Per-file CodeQL signal scores. +- `itemdb/codeql/codeql-summary.md` — Human-readable CodeQL summary. If these files exist: @@ -145,7 +145,7 @@ Prioritize files that contain or strongly influence: For each high-risk file, include concrete reasons, likely entry points, sources, sinks, trust boundaries, suggested vulnerability classes, suggested skills, and suggested validation methods when inferable. -If CodeQL file signals exist (`itemdb/evidence/codeql/normalized/file-signals.yml`), incorporate them: +If CodeQL file signals exist (`itemdb/codeql/normalized/file-signals.yml`), incorporate them: - Add `external_signals.codeql` blocks to file entries with CodeQL alerts. - Boost scores where CodeQL reports high-precision alerts, but cap at 5. - Explain every CodeQL-driven score boost in the `reasons` field. diff --git a/tests/test_codeql_artifacts.py b/tests/test_codeql_artifacts.py new file mode 100644 index 00000000..7170050e --- /dev/null +++ b/tests/test_codeql_artifacts.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +import yaml + +from codeql.artifacts import check_artifacts + + +def _write_manifest(output_dir: Path, manifest: dict) -> None: + output_dir.mkdir(parents=True, exist_ok=True) + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump(manifest, sort_keys=False), encoding="utf-8" + ) + + +def test_missing_manifest(tmp_path: Path) -> None: + status, warnings = check_artifacts(tmp_path / "nonexistent") + assert status == "missing" + assert len(warnings) == 1 + assert "not found" in warnings[0] + + +def test_completed_all_present(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "completed", "failures": []}) + normalized = out / "normalized" + normalized.mkdir() + (normalized / "alerts.yml").write_text("alerts: []\n") + (normalized / "file-signals.yml").write_text("files: []\n") + + status, warnings = check_artifacts(out) + assert status == "completed" + assert warnings == [] + + +def test_completed_missing_normalized(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "completed", "failures": []}) + + status, warnings = check_artifacts(out) + assert status == "completed" + assert len(warnings) == 2 + assert any("alerts.yml" in w for w in warnings) + assert any("file-signals.yml" in w for w in warnings) + + +def test_skipped(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "skipped", "failures": []}) + + status, warnings = check_artifacts(out) + assert status == "skipped" + assert warnings == [] + + +def test_soft_failed_with_failures(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "soft-failed", "failures": ["db create timed out"]}) + + status, warnings = check_artifacts(out) + assert status == "soft-failed" + assert "db create timed out" in warnings + + +def test_failed(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "failed", "failures": ["binary not found"]}) + + status, warnings = check_artifacts(out) + assert status == "failed" + assert "binary not found" in warnings + + +def test_invalid_status(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "bogus", "failures": []}) + + status, warnings = check_artifacts(out) + assert status == "unknown" + assert any("bogus" in w for w in warnings) diff --git a/tests/test_codeql_import_risk.py b/tests/test_codeql_import_risk.py index 3b19bc2f..4e42b48b 100644 --- a/tests/test_codeql_import_risk.py +++ b/tests/test_codeql_import_risk.py @@ -138,6 +138,7 @@ def test_import_risk_caps_score(tmp_path: Path) -> None: ) status, _ = import_risk(signals_path, risk_path) + assert status is None import yaml risk = yaml.safe_load(risk_path.read_text()) diff --git a/tests/test_codeql_normalize.py b/tests/test_codeql_normalize.py index 99e4a4bd..d103d924 100644 --- a/tests/test_codeql_normalize.py +++ b/tests/test_codeql_normalize.py @@ -311,6 +311,7 @@ def test_build_file_signals() -> None: def test_map_category() -> None: assert _map_category("py/path-injection", {}) == "Path traversal" assert _map_category("java/sql-injection", {}) == "SQL injection" + assert _map_category("js/nosql-injection", {}) == "NoSQL injection" assert _map_category("js/xss", {}) == "Cross-site scripting" assert _map_category("unknown-rule", {}) == "unknown-rule" diff --git a/tests/test_codeql_packs.py b/tests/test_codeql_packs.py index dfc3dd6d..715526cd 100644 --- a/tests/test_codeql_packs.py +++ b/tests/test_codeql_packs.py @@ -155,7 +155,7 @@ def test_resolve_profile_packs_rejects_unknown_language() -> None: raise AssertionError("expected PackResolverError") -def test_load_codeql_plan_rejects_invalid_language_entry(tmp_path: Path) -> None: +def test_resolve_plan_packs_candidate_policy(tmp_path: Path) -> None: catalog_path = tmp_path / "catalog.yml" plan_path = tmp_path / "plan.yml" _write_catalog(catalog_path) diff --git a/tests/test_codeql_pipeline.py b/tests/test_codeql_pipeline.py new file mode 100644 index 00000000..76091589 --- /dev/null +++ b/tests/test_codeql_pipeline.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import sys +from pathlib import Path +from unittest.mock import patch, MagicMock + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +import yaml + +from codeql.config import CodeQLConfig + + +def _make_config(tmp_path: Path) -> CodeQLConfig: + """Create a minimal CodeQLConfig pointing at tmp_path.""" + output_dir = tmp_path / "itemdb" / "codeql" + output_dir.mkdir(parents=True, exist_ok=True) + return CodeQLConfig( + enabled=True, + phase_1_enabled=True, + install_path=".tools/codeql/current/codeql", + pack_catalog="codeql-pack-catalog.yml", + output_dir="itemdb/codeql", + database_dir="itemdb/codeql/databases", + fail_policy="soft", + abs_output_dir=output_dir, + abs_install_path=tmp_path / ".tools" / "codeql" / "current" / "codeql", + abs_pack_catalog=tmp_path / "codeql-pack-catalog.yml", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + abs_cache_dir=tmp_path / ".cache" / "codeql", + ) + + +def test_pipeline_skipped_no_plan(tmp_path: Path) -> None: + """When run_codeql returns skipped, pipeline returns manifest without calling normalize.""" + config = _make_config(tmp_path) + + skipped_manifest = { + "schema_version": 1, + "phase": "phase-1", + "status": "skipped", + "codeql_enabled": True, + "codeql_version": "2.18.0", + "started_at": "2025-01-01T00:00:00Z", + "finished_at": "2025-01-01T00:00:01Z", + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": "codeql-pack-catalog.yml", + "fail_policy": "soft", + "languages": [], + "warnings": [], + "failures": ["codeql-plan.yml not found"], + } + + with patch("codeql.runner.run_codeql", return_value=skipped_manifest) as mock_run, \ + patch("codeql.normalize.normalize_all") as mock_normalize, \ + patch("codeql.pipeline.ROOT", tmp_path): + from codeql.pipeline import run_full_pipeline + + result = run_full_pipeline(config) + + assert result["status"] == "skipped" + mock_run.assert_called_once_with(config) + mock_normalize.assert_not_called() + + +def test_pipeline_completed_writes_manifest(tmp_path: Path) -> None: + """When run_codeql returns completed, manifest file is written.""" + config = _make_config(tmp_path) + + completed_manifest = { + "schema_version": 1, + "phase": "phase-1", + "status": "completed", + "codeql_enabled": True, + "codeql_version": "2.18.0", + "started_at": "2025-01-01T00:00:00Z", + "finished_at": "2025-01-01T00:01:00Z", + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": "codeql-pack-catalog.yml", + "fail_policy": "soft", + "languages": ["python"], + "warnings": [], + "failures": [], + } + + with patch("codeql.runner.run_codeql", return_value=completed_manifest), \ + patch("codeql.normalize.normalize_all") as mock_normalize, \ + patch("codeql.pipeline.ROOT", tmp_path): + from codeql.pipeline import run_full_pipeline + + result = run_full_pipeline(config) + + assert result["status"] == "completed" + manifest_path = config.abs_output_dir / "run-manifest.yml" + assert manifest_path.is_file() + data = yaml.safe_load(manifest_path.read_text()) + assert data["status"] == "completed" + + +def test_pipeline_soft_failed_continues(tmp_path: Path) -> None: + """When run_codeql returns soft-failed, pipeline returns without raising.""" + config = _make_config(tmp_path) + + soft_failed_manifest = { + "schema_version": 1, + "phase": "phase-1", + "status": "soft-failed", + "codeql_enabled": True, + "codeql_version": "2.18.0", + "started_at": "2025-01-01T00:00:00Z", + "finished_at": "2025-01-01T00:00:30Z", + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": "codeql-pack-catalog.yml", + "fail_policy": "soft", + "languages": ["python"], + "warnings": ["analyze timed out"], + "failures": [], + } + + with patch("codeql.runner.run_codeql", return_value=soft_failed_manifest), \ + patch("codeql.pipeline.ROOT", tmp_path): + from codeql.pipeline import run_full_pipeline + + result = run_full_pipeline(config) + + assert result["status"] == "soft-failed" + # Should not raise diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index 83c5a3ed..5ba15012 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -4,8 +4,8 @@ """ Phase 1 subphase orchestration. -Runs Phase 1 as three subphases (1a / 1b / 1c) with gates and a CodeQL -placeholder between 1a and 1b. The opencode server is started once and +Runs Phase 1 as three subphases (1a / 1b / 1c) with gates and CodeQL +analysis between 1a and 1b. The opencode server is started once and reused across all three subphase sessions. """ @@ -39,28 +39,161 @@ build_frontmatter_resume_prompt, ) # --------------------------------------------------------------------------- -# CodeQL placeholder (no-op until PR 5) +# CodeQL analysis (between 1a gate and 1b) # --------------------------------------------------------------------------- -def _run_codeql_placeholder(console: Any) -> None: - """Log that CodeQL is not yet implemented.""" +def _run_codeql(console: Any) -> int: + """Run full CodeQL pipeline and report results.""" + from codeql.config import resolve_config as _resolve_codeql_config + + config = _resolve_codeql_config() + if HAVE_RICH: from rich.rule import Rule from rich.text import Text - console.print(Rule(title="CodeQL", style="yellow")) - console.print(Text( - "CodeQL analysis not yet implemented — coming in a future PR. " - "Proceeding to Phase 1b without CodeQL artifacts.", - style="yellow", - )) + console.print(Rule(title="CodeQL", style="cyan")) else: import _colors as C print(C.header("CodeQL")) - print(C.warn( - "CodeQL analysis not yet implemented — coming in a future PR. " - "Proceeding to Phase 1b without CodeQL artifacts." - )) - print() + + if not config.enabled: + msg = "CodeQL disabled — skipping." + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="yellow")) + else: + import _colors as C + print(C.warn(msg)) + return 0 + + if not config.phase_1_enabled: + msg = "CodeQL phase 1 disabled — skipping." + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="yellow")) + else: + import _colors as C + print(C.warn(msg)) + return 0 + + if HAVE_RICH: + from rich.text import Text + console.print(Text("Running CodeQL analysis…", style="dim")) + else: + print("Running CodeQL analysis…") + + from codeql.pipeline import run_full_pipeline + + try: + manifest = run_full_pipeline(config) + except Exception as exc: + msg = f"CodeQL: FAILED — {exc}" + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold red")) + else: + import _colors as C + print(C.fail(msg)) + if config.fail_policy == "hard": + return 1 + return 0 + + status = manifest["status"] + warnings = manifest.get("warnings", []) + failures = manifest.get("failures", []) + + if status == "completed": + msg = f"CodeQL: analysis completed ({len(manifest.get('languages', []))} language(s))" + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="green")) + else: + import _colors as C + print(C.ok(msg)) + elif status == "skipped": + reason = failures[0] if failures else "no plan" + msg = f"CodeQL: skipped — {reason}" + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="yellow")) + else: + import _colors as C + print(C.warn(msg)) + elif status == "soft-failed": + msg = "CodeQL: soft-failed — continuing" + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="yellow")) + else: + import _colors as C + print(C.warn(msg)) + for w in warnings + failures: + if HAVE_RICH: + console.print(Text(f" {w}", style="yellow")) + else: + print(C.warn(f" {w}")) + elif status == "failed": + msg = "CodeQL: FAILED" + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold red")) + else: + import _colors as C + print(C.fail(msg)) + for f in failures: + if HAVE_RICH: + console.print(Text(f" {f}", style="red")) + else: + print(C.fail(f" {f}")) + if config.fail_policy == "hard": + return 1 + + return 0 + + +def _check_codeql_artifacts(console: Any) -> int: + """Validate CodeQL artifacts; block 1b only on hard fail policy.""" + from codeql.config import resolve_config as _resolve_codeql_config + from codeql.artifacts import check_artifacts + + config = _resolve_codeql_config() + + if not config.enabled or not config.phase_1_enabled: + return 0 + + status, warnings = check_artifacts(config.abs_output_dir) + + for w in warnings: + if HAVE_RICH: + from rich.text import Text + console.print(Text(f" WARN: {w}", style="yellow")) + else: + import _colors as C + print(C.warn(f" WARN: {w}")) + + if config.fail_policy == "hard" and status == "failed": + msg = "CodeQL artifact gate: FAILED — blocking Phase 1b" + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold red")) + else: + import _colors as C + print(C.fail(msg)) + return 1 + + label = f"CodeQL artifact gate: {status}" + if HAVE_RICH: + from rich.text import Text + style = "green" if status == "completed" else "yellow" + console.print(Text(label, style=style)) + else: + import _colors as C + if status == "completed": + print(C.ok(label)) + else: + print(C.info(label)) + + return 0 # --------------------------------------------------------------------------- @@ -378,8 +511,13 @@ def run_phase_1( if gate_rc != 0: return gate_rc - # ---- CodeQL placeholder ---- - _run_codeql_placeholder(console) + # ---- CodeQL analysis ---- + rc = _run_codeql(console) + if rc != 0: + return rc + rc = _check_codeql_artifacts(console) + if rc != 0: + return rc # Snapshot findings immediately before 1b so the warning scope matches 1b. findings_snapshot = count_findings_snapshot() diff --git a/tools/codeql.py b/tools/codeql.py index 30de64aa..c1951c88 100644 --- a/tools/codeql.py +++ b/tools/codeql.py @@ -11,6 +11,7 @@ tools/codeql.py resolve-packs tools/codeql.py run tools/codeql.py import-risk + tools/codeql.py check-artifacts """ from __future__ import annotations @@ -127,14 +128,9 @@ def _cmd_run() -> int: print("Run 'tools/codeql.py install' to install the managed CodeQL CLI.") return 1 - from codeql.runner import run_codeql, write_manifest - from codeql.normalize import normalize_all - from codeql.packs import _load_yaml_mapping + from codeql.pipeline import run_full_pipeline - manifest = run_codeql(config) - output_dir = config.abs_output_dir - output_dir.mkdir(parents=True, exist_ok=True) - write_manifest(manifest, output_dir) + manifest = run_full_pipeline(config) status = manifest["status"] print(f"CodeQL run: {status}") @@ -146,25 +142,19 @@ def _cmd_run() -> int: for f in manifest["failures"]: print(f" FAIL: {f}") + output_dir = config.abs_output_dir normalized_dir = output_dir / "normalized" - resolved_path = output_dir / "selected-query-packs.yml" - - if status == "completed" and resolved_path.is_file(): - sarif_dir = output_dir / "sarif" - if list(sarif_dir.glob("*.sarif")): - try: - resolved = _load_yaml_mapping(resolved_path, what="resolved packs") - alerts_path, file_signals_path = normalize_all( - sarif_dir, normalized_dir, resolved, - manifest.get("codeql_version", "unknown"), ROOT, - ) - print(f"Normalized alerts: {alerts_path.relative_to(ROOT) if alerts_path.is_relative_to(ROOT) else alerts_path}") - print(f"File signals: {file_signals_path.relative_to(ROOT) if file_signals_path.is_relative_to(ROOT) else file_signals_path}") - except Exception as exc: - print(f"WARN: SARIF normalization failed: {exc}") - - summary_path = _write_summary(manifest, normalized_dir, output_dir) - print(f"Summary: {summary_path.relative_to(ROOT) if summary_path.is_relative_to(ROOT) else summary_path}") + alerts_path = normalized_dir / "alerts.yml" + signals_path = normalized_dir / "file-signals.yml" + + if alerts_path.is_file(): + print(f"Normalized alerts: {alerts_path.relative_to(ROOT) if alerts_path.is_relative_to(ROOT) else alerts_path}") + if signals_path.is_file(): + print(f"File signals: {signals_path.relative_to(ROOT) if signals_path.is_relative_to(ROOT) else signals_path}") + + summary_path = output_dir / "codeql-summary.md" + if summary_path.is_file(): + print(f"Summary: {summary_path.relative_to(ROOT) if summary_path.is_relative_to(ROOT) else summary_path}") if status == "failed": return 1 @@ -194,60 +184,16 @@ def _cmd_import_risk() -> int: return 0 -def _write_summary(manifest: dict, normalized_dir: Path, output_dir: Path) -> Path: - """Write codeql-summary.md.""" - status = manifest.get("status", "unknown") - version = manifest.get("codeql_version", "unknown") - languages = manifest.get("languages", []) - warnings = manifest.get("warnings", []) - failures = manifest.get("failures", []) - fail_policy = manifest.get("fail_policy", "soft") - - lines = [ - "# CodeQL Analysis Summary", - "", - f"- **Status**: {status}", - f"- **CodeQL version**: {version}", - f"- **Fail policy**: {fail_policy}", - f"- **Started**: {manifest.get('started_at', '')}", - f"- **Finished**: {manifest.get('finished_at', '')}", - "", - ] - - if languages: - lines.append(f"- **Languages**: {', '.join(languages)}") - lines.append("") - - alerts_path = normalized_dir / "alerts.yml" - signals_path = normalized_dir / "file-signals.yml" +def _cmd_check_artifacts() -> int: + """Check CodeQL artifact state after a run.""" + config = resolve_config() + from codeql.artifacts import check_artifacts - if alerts_path.is_file(): - from codeql.packs import _load_yaml_mapping - try: - data = _load_yaml_mapping(alerts_path, what="alerts") - total_alerts = len(data.get("alerts", [])) - lines.append(f"- **Total alerts**: {total_alerts}") - lines.append("") - except Exception: - pass - - if warnings: - lines.append("## Warnings") - lines.append("") - for w in warnings: - lines.append(f"- {w}") - lines.append("") - - if failures: - lines.append("## Failures") - lines.append("") - for f in failures: - lines.append(f"- {f}") - lines.append("") - - path = output_dir / "codeql-summary.md" - path.write_text("\n".join(lines) + "\n", encoding="utf-8") - return path + status, warnings = check_artifacts(config.abs_output_dir) + for w in warnings: + print(f" WARN: {w}") + print(f"CodeQL artifacts: {status}") + return 0 if status in ("completed", "skipped", "soft-failed") else 1 def build_parser() -> argparse.ArgumentParser: @@ -260,6 +206,7 @@ def build_parser() -> argparse.ArgumentParser: sub.add_parser("check", help="Verify the CodeQL CLI is installed and working.") sub.add_parser("run", help="Run CodeQL analysis (create DBs, analyze, normalize SARIF).") sub.add_parser("import-risk", help="Import CodeQL file signals into file-risk-index.yml.") + sub.add_parser("check-artifacts", help="Check CodeQL artifact state after a run.") resolve = sub.add_parser("resolve-packs", help="Resolve plan pack profiles to concrete pack references.") resolve.add_argument("--plan", default="itemdb/notes/codeql-plan.yml", help="Path to codeql-plan.yml") resolve.add_argument( @@ -286,6 +233,8 @@ def main() -> int: return _cmd_run() elif args.command == "import-risk": return _cmd_import_risk() + elif args.command == "check-artifacts": + return _cmd_check_artifacts() return 1 diff --git a/tools/codeql/artifacts.py b/tools/codeql/artifacts.py new file mode 100644 index 00000000..7edac524 --- /dev/null +++ b/tools/codeql/artifacts.py @@ -0,0 +1,56 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL artifact gate: validate post-run artifacts exist and are consistent.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +VALID_STATUSES = frozenset({"completed", "skipped", "soft-failed", "failed"}) + + +def check_artifacts(output_dir: Path) -> tuple[str, list[str]]: + """Check CodeQL artifact state after a run. + + Returns (status_string, warnings). + + status_string values: + "missing" — run-manifest.yml does not exist + "completed" — analysis ran; normalized outputs expected + "skipped" — CodeQL was disabled or no plan existed + "soft-failed" — analysis failed but phase may continue + "failed" — hard failure + "unknown" — unrecognized status value in manifest + """ + manifest_path = output_dir / "run-manifest.yml" + if not manifest_path.is_file(): + return ("missing", [f"run-manifest.yml not found at {manifest_path}"]) + + try: + from codeql.packs import load_yaml_mapping + + manifest = load_yaml_mapping(manifest_path, what="run manifest") + except Exception as exc: + return ("unknown", [f"run-manifest.yml is not valid YAML: {exc}"]) + + status = manifest.get("status", "") + if status not in VALID_STATUSES: + return ("unknown", [f"unrecognized status {status!r} in run-manifest.yml"]) + + warnings: list[str] = [] + + # Propagate recorded failures as warnings for the gate consumer. + failures = manifest.get("failures", []) + if isinstance(failures, list): + warnings.extend(failures) + + # For completed runs, verify normalized outputs exist. + if status == "completed": + normalized_dir = output_dir / "normalized" + for expected in ("alerts.yml", "file-signals.yml"): + if not (normalized_dir / expected).is_file(): + warnings.append(f"expected normalized output missing: {expected}") + + return (status, warnings) diff --git a/tools/codeql/import_risk.py b/tools/codeql/import_risk.py index 4a191c96..87f12574 100644 --- a/tools/codeql/import_risk.py +++ b/tools/codeql/import_risk.py @@ -8,7 +8,7 @@ from pathlib import Path from typing import Any -from codeql.packs import PackResolverError, _load_yaml_mapping, dump_yaml +from codeql.packs import PackResolverError, load_yaml_mapping, dump_yaml def import_risk(file_signals_path: Path, risk_index_path: Path) -> tuple[str | None, list[str]]: @@ -32,12 +32,12 @@ def import_risk(file_signals_path: Path, risk_index_path: Path) -> tuple[str | N return "skipped", [f"file-risk-index.yml not found at {risk_index_path}"] try: - signals = _load_yaml_mapping(file_signals_path, what="CodeQL file signals") + signals = load_yaml_mapping(file_signals_path, what="CodeQL file signals") except PackResolverError as exc: return None, [str(exc)] try: - risk_index = _load_yaml_mapping(risk_index_path, what="file risk index") + risk_index = load_yaml_mapping(risk_index_path, what="file risk index") except PackResolverError as exc: return None, [str(exc)] diff --git a/tools/codeql/install.py b/tools/codeql/install.py index 2f268bfe..94605077 100644 --- a/tools/codeql/install.py +++ b/tools/codeql/install.py @@ -238,6 +238,9 @@ def install(config: Optional[CodeQLConfig] = None) -> int: print(f"ERROR: invalid version '{version}' — must be semver-like (e.g. 2.25.5)", file=sys.stderr) return 1 + # Normalize: strip optional leading 'v' to avoid double-v in URLs/paths. + version = version.lstrip("v") + # --- Determine target directories --- tools_dir = ROOT / ".tools" / "codeql" version_dir = tools_dir / version diff --git a/tools/codeql/normalize.py b/tools/codeql/normalize.py index 13b38af9..5e4b7d53 100644 --- a/tools/codeql/normalize.py +++ b/tools/codeql/normalize.py @@ -129,7 +129,7 @@ def _build_rules_lookup(run: dict[str, Any]) -> dict[str, dict[str, Any]]: lookup[rid] = { "name": rule.get("name", rid), "precision": _coerce_str(props.get("precision")) or _coerce_str(rule.get("precision")), - "security_severity": _coerce_str(props.get("security-severity")) or _coerce_str(props.get("security-severity")), + "security_severity": _coerce_str(props.get("security-severity")) or _coerce_str(rule.get("security-severity")), "severity": _coerce_str(props.get("problem.severity")), } return lookup @@ -309,8 +309,8 @@ def _map_category(rule_id: str, result: dict[str, Any]) -> str: "path-injection": "Path traversal", "command-line-injection": "Command injection", "code-injection": "Code injection", - "sql-injection": "SQL injection", "nosql-injection": "NoSQL injection", + "sql-injection": "SQL injection", "xss": "Cross-site scripting", "hardcoded-credentials": "Hardcoded credentials", "incomplete-url-substring-sanitization": "URL redirection", diff --git a/tools/codeql/packs.py b/tools/codeql/packs.py index df451237..3dbe0e7b 100644 --- a/tools/codeql/packs.py +++ b/tools/codeql/packs.py @@ -23,7 +23,7 @@ def _require_yaml() -> None: raise PackResolverError("PyYAML is required to load CodeQL pack catalogs and plans.") -def _load_yaml_mapping(path: Path, *, what: str) -> dict[str, Any]: +def load_yaml_mapping(path: Path, *, what: str) -> dict[str, Any]: _require_yaml() try: data = yaml.safe_load(path.read_text(encoding="utf-8")) @@ -36,7 +36,7 @@ def _load_yaml_mapping(path: Path, *, what: str) -> dict[str, Any]: def load_pack_catalog(path: Path) -> dict[str, Any]: """Load and validate the CodeQL pack catalog.""" - data = _load_yaml_mapping(path, what="CodeQL pack catalog") + data = load_yaml_mapping(path, what="CodeQL pack catalog") if data.get("schema_version") != 1: raise PackResolverError(f"CodeQL pack catalog at {path} must have schema_version: 1.") @@ -82,7 +82,7 @@ def load_pack_catalog(path: Path) -> dict[str, Any]: def load_codeql_plan(path: Path) -> dict[str, Any]: """Load and validate a CodeQL plan file.""" - data = _load_yaml_mapping(path, what="CodeQL plan") + data = load_yaml_mapping(path, what="CodeQL plan") languages = data.get("languages") if not isinstance(languages, list): diff --git a/tools/codeql/pipeline.py b/tools/codeql/pipeline.py new file mode 100644 index 00000000..da88c938 --- /dev/null +++ b/tools/codeql/pipeline.py @@ -0,0 +1,73 @@ +# Copyright (C) 2025-2026 Pablo Ruiz García +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL full pipeline: run analysis, normalize SARIF, import risk, write summary.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from codeql.config import ROOT, CodeQLConfig + + +def run_full_pipeline(config: CodeQLConfig) -> dict[str, Any]: + """Run the complete CodeQL analysis pipeline. + + Steps (all internal, no printing): + 1. run_codeql(config) -> manifest + 2. write_manifest(manifest, output_dir) + 3. normalize_all(sarif_dir, ...) -> alerts.yml, file-signals.yml (if SARIF exist) + 4. import_risk(signals_path, risk_path) + 5. write_summary(manifest, normalized_dir, output_dir) + + Returns the manifest dict (with extra keys for artifact paths). + """ + from codeql.runner import run_codeql, write_manifest, write_summary + from codeql.normalize import normalize_all + from codeql.import_risk import import_risk + from codeql.packs import load_yaml_mapping + + output_dir = config.abs_output_dir + output_dir.mkdir(parents=True, exist_ok=True) + + # Step 1: run analysis + manifest = run_codeql(config) + + # Step 2: write manifest + write_manifest(manifest, output_dir) + + status = manifest["status"] + normalized_dir = output_dir / "normalized" + resolved_path = output_dir / "selected-query-packs.yml" + + # Step 3: normalize SARIF (only if completed and SARIF files exist) + if status == "completed" and resolved_path.is_file(): + sarif_dir = output_dir / "sarif" + if list(sarif_dir.glob("*.sarif")): + try: + resolved = load_yaml_mapping(resolved_path, what="resolved packs") + normalize_all( + sarif_dir, normalized_dir, resolved, + manifest.get("codeql_version", "unknown"), ROOT, + ) + except Exception as exc: + manifest.setdefault("warnings", []).append( + f"SARIF normalization failed: {exc}" + ) + + # Step 4: import risk + signals_path = normalized_dir / "file-signals.yml" + risk_path = ROOT / "itemdb/notes/file-risk-index.yml" + if signals_path.is_file(): + try: + import_risk(signals_path, risk_path) + except Exception as exc: + manifest.setdefault("warnings", []).append( + f"Risk import failed: {exc}" + ) + + # Step 5: write summary + write_summary(manifest, normalized_dir, output_dir) + + return manifest diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index 44b01c50..f25d873b 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -226,6 +226,61 @@ def write_manifest(manifest: dict[str, Any], output_dir: Path) -> Path: return path +def write_summary(manifest: dict[str, Any], normalized_dir: Path, output_dir: Path) -> Path: + """Write codeql-summary.md.""" + status = manifest.get("status", "unknown") + version = manifest.get("codeql_version", "unknown") + languages = manifest.get("languages", []) + warnings = manifest.get("warnings", []) + failures = manifest.get("failures", []) + fail_policy = manifest.get("fail_policy", "soft") + + lines = [ + "# CodeQL Analysis Summary", + "", + f"- **Status**: {status}", + f"- **CodeQL version**: {version}", + f"- **Fail policy**: {fail_policy}", + f"- **Started**: {manifest.get('started_at', '')}", + f"- **Finished**: {manifest.get('finished_at', '')}", + "", + ] + + if languages: + lines.append(f"- **Languages**: {', '.join(languages)}") + lines.append("") + + alerts_path = normalized_dir / "alerts.yml" + + if alerts_path.is_file(): + from codeql.packs import load_yaml_mapping + try: + data = load_yaml_mapping(alerts_path, what="alerts") + total_alerts = len(data.get("alerts", [])) + lines.append(f"- **Total alerts**: {total_alerts}") + lines.append("") + except Exception: + pass + + if warnings: + lines.append("## Warnings") + lines.append("") + for w in warnings: + lines.append(f"- {w}") + lines.append("") + + if failures: + lines.append("## Failures") + lines.append("") + for f in failures: + lines.append(f"- {f}") + lines.append("") + + path = output_dir / "codeql-summary.md" + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + return path + + def _rel(path: Path) -> str: """Return a workspace-relative path when under ROOT, else the absolute path.""" try: diff --git a/tools/findings/checks_entry.py b/tools/findings/checks_entry.py index 67b81ab7..afea2744 100644 --- a/tools/findings/checks_entry.py +++ b/tools/findings/checks_entry.py @@ -5,10 +5,16 @@ import sys from pathlib import Path +from typing import Optional import _colors as C -from findings.constants import FILE_RISK_INDEX_PATH, FILE_RISK_INDEX_REL, ROOT +from findings.constants import ( + FILE_RISK_INDEX_PATH, + FILE_RISK_INDEX_REL, + FindingsContext, + ROOT, +) from findings.checks import validate_finding, validate_file_risk_index, iter_all_finding_files @@ -20,15 +26,29 @@ def build_parser(): return parser -def run_frontmatter_validation() -> tuple[int, str]: +def run_frontmatter_validation( + ctx: Optional[FindingsContext] = None, +) -> tuple[int, str]: """Run frontmatter validation in-process and return (exit_code, output_text). This is the reusable entrypoint for code paths that need to validate frontmatter without shelling out to a subprocess (phase retry loops, gate checks, etc.). + + Parameters + ---------- + ctx : FindingsContext, optional + Injectable context for testing. When None, uses default global paths. """ import io + if ctx is None: + ctx = FindingsContext.default() + + _root = ctx.root + _risk_index_path = _root / "itemdb" / "notes" / "file-risk-index.yml" + _risk_index_rel = Path("itemdb/notes/file-risk-index.yml") + out = io.StringIO() paths = iter_all_finding_files() @@ -37,27 +57,27 @@ def run_frontmatter_validation() -> tuple[int, str]: index_errors = validate_file_risk_index() if index_errors: total_errors += len(index_errors) - out.write(C.fail(str(FILE_RISK_INDEX_REL)) + "\n") + out.write(C.fail(str(_risk_index_rel)) + "\n") for error in index_errors: out.write(f" {C.SYM_BULLET} {error}\n") else: - if FILE_RISK_INDEX_PATH.exists(): - out.write(C.ok(str(FILE_RISK_INDEX_REL)) + "\n") + if _risk_index_path.exists(): + out.write(C.ok(str(_risk_index_rel)) + "\n") - if not paths and not FILE_RISK_INDEX_PATH.exists(): + if not paths and not _risk_index_path.exists(): out.write(C.info("No findings or index to validate.") + "\n") for path in paths: errors = validate_finding(path) if not errors: - out.write(C.ok(str(path.relative_to(ROOT))) + "\n") + out.write(C.ok(str(path.relative_to(_root))) + "\n") continue total_errors += len(errors) - out.write(C.fail(str(path.relative_to(ROOT))) + "\n") + out.write(C.fail(str(path.relative_to(_root))) + "\n") for error in errors: out.write(f" {C.SYM_BULLET} {error}\n") - if paths or FILE_RISK_INDEX_PATH.exists(): + if paths or _risk_index_path.exists(): if total_errors: out.write(f"\n{C.fail(f'Found {total_errors} frontmatter error(s).')}\n") else: diff --git a/tools/rendering/events/tool_use.py b/tools/rendering/events/tool_use.py index 80db603d..831cad6e 100644 --- a/tools/rendering/events/tool_use.py +++ b/tools/rendering/events/tool_use.py @@ -38,9 +38,7 @@ def render(self, event: dict[str, Any]) -> bool: if _is_write_like(inp): file_path = _normalize_path(str(inp["filePath"])) - if status == "running": - self.context.inflight_write_files.add(file_path) - elif status in ("completed", "error"): + if status in ("completed", "error"): self.context.inflight_write_files.discard(file_path) _clear_hidden_reasoning_state(self.context) From c6cd680d0a85d9b0c7735f818f686b4ede0b0f77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 22:48:52 +0200 Subject: [PATCH 21/47] Fix bugs and deviations from integration plan - Update default candidate_mode to precreate in plan document - Add findings snapshot check to Phase 1a gate - Reject template placeholder in Phase 1b gate - Add reason for score boosts to existing entries in import_risk.py - Remove unused _rel function from normalize.py - Remove unsupported exclude flags from codeql database create - Make CodeQL execution failures halting regardless of soft policy These changes address the remaining review feedback and integration plan mismatches. --- .project/codeql-integration-plan.md | 10 +++++----- tools/codecome/phase_1.py | 13 ++++++++++++- tools/codeql/import_risk.py | 7 ++++++- tools/codeql/normalize.py | 4 +--- tools/codeql/runner.py | 18 +++++++----------- tools/phases/phase_1_gates.py | 18 +++++++++++++++++- 6 files changed, 48 insertions(+), 22 deletions(-) diff --git a/.project/codeql-integration-plan.md b/.project/codeql-integration-plan.md index 377e5269..f8f98b08 100644 --- a/.project/codeql-integration-plan.md +++ b/.project/codeql-integration-plan.md @@ -550,7 +550,7 @@ Default policy: ```text CodeQL enabled: yes Failure policy: soft -Candidate mode: briefing +Candidate mode: precreate Community packs: enabled ``` @@ -579,7 +579,7 @@ static_analysis: phase_2: enabled: true - candidate_mode: "briefing" + candidate_mode: "precreate" max_candidates: 10 sweep: @@ -773,11 +773,11 @@ Candidate modes: ```text off -> do nothing -briefing -> write candidate briefing only +precreate -> write candidate finding files precreate -> create filtered PENDING findings before model runs ``` -Default: `briefing`. +Default: `precreate`. Precreate only when: @@ -1100,7 +1100,7 @@ Required cases: - Confirm `tools/codeql.py` vs `tools/codecome.py codeql` decision. - Confirm exact CodeQL install source/version policy. - Verify package names in `templates/codeql-packs.yml`. -- Confirm default `CODEQL_CANDIDATES` mode: `briefing` vs `precreate`. +- Confirm default `CODEQL_CANDIDATES` mode: `precreate`. - Confirm whether finding frontmatter schema should accept `origin` / `static_analysis`. - Confirm whether `coding-standards` should ever precreate findings by default. - Confirm whether Phase 1c sandbox prompt should be copied from current `phase-1-recon.md` or rewritten tighter. diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index 5ba15012..715aa6fa 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -181,6 +181,16 @@ def _check_codeql_artifacts(console: Any) -> int: print(C.fail(msg)) return 1 + if status == "failed": + msg = "CodeQL artifact gate: FAILED — execution crashed, blocking Phase 1b" + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold red")) + else: + import _colors as C + print(C.fail(msg)) + return 1 + label = f"CodeQL artifact gate: {status}" if HAVE_RICH: from rich.text import Text @@ -493,6 +503,7 @@ def run_phase_1( ) -> int: """Orchestrate Phase 1 subphases 1a → 1b → 1c with gates.""" # ---- Phase 1a: Target Profile ---- + findings_snapshot_1a = count_findings_snapshot() rc = _run_subphase( args=args, console=console, @@ -507,7 +518,7 @@ def run_phase_1( if rc != 0: return rc - gate_rc = check_phase_1a(console) + gate_rc = check_phase_1a(console, findings_snapshot=findings_snapshot_1a) if gate_rc != 0: return gate_rc diff --git a/tools/codeql/import_risk.py b/tools/codeql/import_risk.py index 87f12574..a6f3e131 100644 --- a/tools/codeql/import_risk.py +++ b/tools/codeql/import_risk.py @@ -83,7 +83,12 @@ def _update_existing_entry(entries: list[dict[str, Any]], file_path: str, signal if isinstance(boost, (int, float)): current = entry.get("score", 1) current = int(current) if isinstance(current, (int, float)) else 1 - entry["score"] = min(5, current + int(boost)) + new_score = min(5, current + int(boost)) + if new_score > current: + entry["score"] = new_score + reasons = entry.setdefault("reasons", []) + if isinstance(reasons, list): + reasons.append(f"CodeQL static analysis signal increased score by +{new_score - current}.") codeql_alerts = signal.get("alerts", {}) rules = signal.get("rules", []) diff --git a/tools/codeql/normalize.py b/tools/codeql/normalize.py index 5e4b7d53..0438b732 100644 --- a/tools/codeql/normalize.py +++ b/tools/codeql/normalize.py @@ -366,6 +366,4 @@ def _coerce_str(value: Any) -> str | None: return str(value) -def _rel(path_str: str, source_root: Path) -> str: - """Make a path workspace-relative when possible.""" - return str(path_str) + diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index f25d873b..722d12b6 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -24,7 +24,10 @@ def run_codeql(config: CodeQLConfig) -> dict[str, Any]: binary_path = config.abs_install_path if not binary_path.is_file(): - return _manifest("failed", now_utc, config, [], [], failures=[f"CodeQL binary not found at {binary_path}"]) + if config.fail_policy == "hard": + return _manifest("failed", now_utc, config, [], [], failures=[f"CodeQL binary not found at {binary_path}"]) + else: + return _manifest("soft-failed", now_utc, config, [], [f"CodeQL binary not found at {binary_path}"]) version = _get_codeql_version(binary_path) @@ -69,9 +72,7 @@ def run_codeql(config: CodeQLConfig) -> dict[str, Any]: ok, msg = _create_database(binary_path, language_id, source_path, db_dir, build_mode, build_command, exclude_patterns) if not ok: failures.append(msg) - if config.fail_policy == "hard": - return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids) - continue + return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids) for profile in profiles: packs = profile_packs.get(profile, []) @@ -80,10 +81,8 @@ def run_codeql(config: CodeQLConfig) -> dict[str, Any]: sarif_path = sarif_dir / f"{language_id}.{profile}.sarif" ok, msg = _run_analyze(binary_path, db_dir, packs, sarif_path) if not ok: - if config.fail_policy == "hard": - failures.append(msg) - return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids) - warnings.append(msg) + failures.append(msg) + return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids) if failures: return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids) @@ -138,9 +137,6 @@ def _create_database( elif build_mode == "autobuild": pass # let CodeQL auto-detect - for pattern in exclude_patterns: - cmd += ["--no-source-unpack", "--additional-build-options", f"--exclude={pattern}"] - try: result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) except subprocess.TimeoutExpired: diff --git a/tools/phases/phase_1_gates.py b/tools/phases/phase_1_gates.py index e6f3c427..5971b12e 100644 --- a/tools/phases/phase_1_gates.py +++ b/tools/phases/phase_1_gates.py @@ -100,7 +100,7 @@ def count_findings_snapshot(snapshot: dict[str, int] | None = None) -> dict[str, return {status: max(0, current[status] - snapshot.get(status, 0)) for status in FINDING_STATUS_DIRS} -def check_phase_1a(console=None) -> int: +def check_phase_1a(console=None, findings_snapshot: dict[str, int] | None = None) -> int: """Gate 1a: target-profile/build-model/codeql-plan outputs must exist.""" _emit(console, "header", "Gate 1a: Target Profile") _emit_separator(console, "cyan") @@ -119,6 +119,19 @@ def check_phase_1a(console=None) -> int: _emit(console, "ok", "itemdb/notes/build-model.md exists") _emit(console, "ok", "itemdb/notes/codeql-plan.yml exists") + if findings_snapshot is not None: + delta = count_findings_snapshot(findings_snapshot) + new_findings = sum(delta.values()) + if new_findings > 0: + _emit( + console, + "warn", + f"{new_findings} new finding(s) were created during Phase 1a. Findings should not be created during reconnaissance.", + ) + for status, count in delta.items(): + if count > 0: + _emit(console, "info", f" {status}: +{count}") + plan_path = notes_dir / "codeql-plan.yml" if yaml is None: _emit(console, "warn", "Cannot validate codeql-plan.yml: PyYAML not available") @@ -216,6 +229,9 @@ def check_phase_1b(console=None, findings_snapshot: dict[str, int] | None = None if not isinstance(entry, dict): continue path_val = entry.get("path", "") + if path_val == "src/example/path/to/file.ext": + _emit(console, "fail", "file-risk-index.yml: contains template placeholder entry ('src/example/path/to/file.ext')") + return 1 if "../" in str(path_val) or str(path_val).startswith("/"): _emit(console, "warn", f"file-risk-index.yml: path '{path_val}' is not workspace-relative") score = entry.get("score") From 3538558a9d551a21d530bd66f7fd9eb0c10bd71a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sat, 30 May 2026 23:08:06 +0200 Subject: [PATCH 22/47] fix: create CodeQL database directory --- tests/test_codeql_runner.py | 70 ++++++++++++++++++++++++++++++++++++- tools/codeql/runner.py | 10 ++++-- 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/tests/test_codeql_runner.py b/tests/test_codeql_runner.py index 632deb1f..6d8c0e5d 100644 --- a/tests/test_codeql_runner.py +++ b/tests/test_codeql_runner.py @@ -2,12 +2,13 @@ import sys from pathlib import Path +from unittest.mock import MagicMock, patch ROOT = Path(__file__).resolve().parents[1] sys.path.insert(0, str(ROOT / "tools")) from codeql.config import CodeQLConfig -from codeql.runner import _lookup_build, _manifest, write_manifest +from codeql.runner import _create_database, _lookup_build, _manifest, run_codeql, write_manifest def test_manifest_completed() -> None: @@ -118,3 +119,70 @@ def test_lookup_build_no_match_within_plan() -> None: mode, cmd = _lookup_build({"id": "python"}, plan) assert mode == "none" assert cmd is None + + +def test_create_database_creates_parent_dir(tmp_path: Path) -> None: + db_dir = tmp_path / "itemdb" / "codeql" / "databases" / "c-cpp" + completed = MagicMock(returncode=0, stderr="") + + with patch("codeql.runner.subprocess.run", return_value=completed) as mock_run: + ok, msg = _create_database( + tmp_path / "codeql", + "c-cpp", + "./src", + db_dir, + "none", + None, + [], + ) + + assert ok is True + assert msg == "" + assert db_dir.parent.is_dir() + assert mock_run.call_args.args[0][3] == str(db_dir) + + +def test_run_codeql_database_failure_honors_soft_policy(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\n", encoding="utf-8") + + catalog = tmp_path / "templates" / "codeql-packs.yml" + catalog.parent.mkdir(parents=True) + catalog.write_text("schema_version: 1\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + + resolved = { + "languages": [ + { + "id": "c-cpp", + "profiles": ["official"], + "profile_packs": {"official": ["codeql/cpp-queries"]}, + } + ] + } + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={}), \ + patch("codeql.runner.load_codeql_plan", return_value={"source_path": "./src", "languages": []}), \ + patch("codeql.runner.resolve_plan_packs", return_value=resolved), \ + patch("codeql.runner._create_database", return_value=(False, "db create failed")): + manifest = run_codeql(config) + + assert manifest["status"] == "soft-failed" + assert manifest["fail_policy"] == "soft" + assert manifest["failures"] == ["db create failed"] + assert manifest["languages"] == ["c-cpp"] diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index 722d12b6..072338af 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -72,7 +72,7 @@ def run_codeql(config: CodeQLConfig) -> dict[str, Any]: ok, msg = _create_database(binary_path, language_id, source_path, db_dir, build_mode, build_command, exclude_patterns) if not ok: failures.append(msg) - return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids) + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids) for profile in profiles: packs = profile_packs.get(profile, []) @@ -82,7 +82,7 @@ def run_codeql(config: CodeQLConfig) -> dict[str, Any]: ok, msg = _run_analyze(binary_path, db_dir, packs, sarif_path) if not ok: failures.append(msg) - return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids) + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids) if failures: return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids) @@ -90,6 +90,10 @@ def run_codeql(config: CodeQLConfig) -> dict[str, Any]: return _manifest("completed", now_utc, config, [version], warnings, failures, language_ids) +def _tool_failure_status(config: CodeQLConfig) -> str: + return "failed" if config.fail_policy == "hard" else "soft-failed" + + def _lookup_build(lang_entry: dict, plan_languages: list[dict]) -> tuple[str, str | None]: """Return (build_mode, build_command) for a language entry.""" language_id = lang_entry["id"] @@ -123,6 +127,8 @@ def _create_database( exclude_patterns: list[str], ) -> tuple[bool, str]: """Create a CodeQL database. Returns (success, message).""" + db_dir.parent.mkdir(parents=True, exist_ok=True) + cmd = [ str(binary), "database", "create", str(db_dir), From 9ea504e9e2b9c95ae25b18e7fea158f53cd1764e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 00:31:10 +0200 Subject: [PATCH 23/47] feat: harden CodeQL phase integration --- Makefile | 8 +- prompts/phase-1a-profile.md | 13 +++- templates/codeql-plan.yml | 54 ++++++++----- tests/test_codecome_check_codeql.py | 90 +++++++++++++++++++++ tests/test_codeql_normalize.py | 13 ++-- tests/test_codeql_packs.py | 37 +++++---- tests/test_codeql_pipeline.py | 51 +++++++++++- tests/test_codeql_runner.py | 55 ++++++++++--- tests/test_phase_1_gates.py | 17 ++++ tools/codecome.py | 108 +++++++++++++++++++++++++- tools/codecome/phase_1.py | 13 +++- tools/codeql.py | 17 ++-- tools/codeql/capabilities.py | 28 +++++++ tools/codeql/config.py | 6 +- tools/codeql/normalize.py | 16 ++-- tools/codeql/packs.py | 83 +++++++++++++------- tools/codeql/pipeline.py | 37 ++++++++- tools/codeql/runner.py | 116 +++++++++++++++++++--------- tools/phases/phase_1_gates.py | 108 +++++++++++++++++++------- 19 files changed, 696 insertions(+), 174 deletions(-) create mode 100644 tests/test_codecome_check_codeql.py create mode 100644 tests/test_phase_1_gates.py create mode 100644 tools/codeql/capabilities.py diff --git a/Makefile b/Makefile index a6f46512..afc08e6c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Copyright (C) 2025-2026 Pablo Ruiz García # SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later -.PHONY: help init venv venv-check check status next-id frontmatter tests test-parity itemdb-reset index report +.PHONY: help init venv venv-check check status next-id frontmatter tests test-parity itemdb-reset codeql-clean index report .PHONY: findings findings-create findings-move findings-evidence findings-package .PHONY: phase-1 phase-2 phase-3 phase-4 phase-5 phase-6 validate-all exploit-all opencode-raw .PHONY: sandbox-setup sandbox-check sandbox-up sandbox-down sandbox-shell sandbox-logs sandbox-clean sandbox-reset sandbox-build sandbox-test @@ -80,6 +80,7 @@ help: @printf " $(BOLD)make frontmatter$(RESET) Validate finding frontmatter\n" @printf " $(BOLD)make tests$(RESET) Run dev test suite + frontmatter gate\n" @printf " $(BOLD)make itemdb-reset$(RESET) Remove local audit artifacts and recreate .gitkeep files\n" + @printf " $(BOLD)make codeql-clean$(RESET) Remove generated CodeQL artifacts and cache\n" @printf " $(BOLD)make index$(RESET) Regenerate itemdb/index.md\n" @printf " $(BOLD)make report$(RESET) Regenerate itemdb/reports/report.md (local, no AI)\n" @printf "\n" @@ -280,6 +281,11 @@ itemdb-reset: venv-check touch tmp/.gitkeep $(PYTHON) tools/render-index.py +codeql-clean: + rm -rf itemdb/codeql + rm -rf .cache/codeql + rm -rf src/_codeql_detected_source_root + index: venv-check $(PYTHON) tools/render-index.py diff --git a/prompts/phase-1a-profile.md b/prompts/phase-1a-profile.md index 412369bc..54a576b0 100644 --- a/prompts/phase-1a-profile.md +++ b/prompts/phase-1a-profile.md @@ -59,14 +59,23 @@ Create `itemdb/notes/codeql-plan.yml` by filling in the template from `templates Rules: +- Discover analysis units under `./src`. An analysis unit is a coherent project/component with one source root and one or more languages/stacks, such as an API service, frontend app, native library, CLI, package, firmware tree, or benchmark corpus. +- Use stable, lowercase `analysis_units[].id` values such as `api`, `frontend`, `native-lib`, or `root`. These IDs are discovered here; users do not define them in `codecome.yml`. +- Set `analysis_units[].path` to the real source path under `./src` for that unit. Do not use CodeQL-generated helper paths such as `_codeql_detected_source_root`. +- Use one `analysis_units` entry for a single-project repository and multiple entries for monorepos or mixed stacks. - Only include languages you have detected with **HIGH** or **MEDIUM** confidence. -- For each language, select the appropriate pack profiles: +- For each language in each analysis unit, select the appropriate pack profiles: - `official` — always include for languages with CodeQL support. - `github-security-lab` — include for security-focused audits. - `trailofbits` — include for C/C++ and Go targets. - `coding-standards` — include for C/C++ targets where coding standards queries apply. - `local` — include if custom queries exist under `queries/codeql//`. -- Set `build_mode` to `none` for interpreted languages, `manual` for compiled languages with a known build command, or `autobuild` if CodeQL autobuild should be attempted. +- Set `build_mode` according to CodeQL language support: + - `none`: python, javascript-typescript, ruby, csharp, java-kotlin. + - `manual` or `autobuild`: c-cpp, go, csharp, java-kotlin, swift. +- Do not set `build_mode: none` for C/C++, Go, or Swift. +- Use `manual` only when you identified a concrete build command for that analysis unit. +- Use `autobuild` only as an explicit choice when build files exist but the exact command is uncertain. - Fill in `build_command` when `build_mode` is `manual`. - Set `recommended: false` if you cannot confidently profile any language. - Add relevant `notes` explaining your language choices and any uncertainties. diff --git a/templates/codeql-plan.yml b/templates/codeql-plan.yml index a2a82f6f..8ed1cd8c 100644 --- a/templates/codeql-plan.yml +++ b/templates/codeql-plan.yml @@ -8,31 +8,43 @@ generated_by: "phase-1a-profile" source_path: "./src" recommended: true -languages: [] -# Example language entries: +analysis_units: [] +# Example analysis units discovered under ./src: # -# languages: -# - id: "python" -# confidence: "HIGH" -# build_mode: "none" -# build_command: null -# packs: -# - "official" -# - "github-security-lab" +# analysis_units: +# - id: "api" +# path: "./src/api" +# kind: "service" +# primary: true +# languages: +# - id: "python" +# confidence: "HIGH" +# build_mode: "none" +# build_command: null +# packs: +# - "official" +# - "github-security-lab" # -# - id: "c-cpp" -# confidence: "HIGH" -# build_mode: "manual" -# build_command: "make -C src" -# packs: -# - "official" -# - "github-security-lab" -# - "trailofbits" -# - "coding-standards" +# - id: "native-lib" +# path: "./src/native" +# kind: "library" +# primary: false +# languages: +# - id: "c-cpp" +# confidence: "HIGH" +# build_mode: "manual" +# build_command: "make -C src/native" +# packs: +# - "official" +# - "github-security-lab" +# - "trailofbits" +# - "coding-standards" # -# Allowed language IDs: python, javascript-typescript, c-cpp, go, csharp, java-kotlin +# Allowed language IDs: python, javascript-typescript, ruby, c-cpp, go, csharp, java-kotlin, swift # Allowed confidence values: HIGH, MEDIUM, LOW -# Allowed build_mode values: none, manual, autobuild +# Allowed build_mode values by language: +# none: python, javascript-typescript, ruby, csharp, java-kotlin +# manual/autobuild: c-cpp, go, csharp, java-kotlin, swift # Allowed pack profile names: official, github-security-lab, trailofbits, coding-standards, local exclude: diff --git a/tests/test_codecome_check_codeql.py b/tests/test_codecome_check_codeql.py new file mode 100644 index 00000000..49b44c65 --- /dev/null +++ b/tests/test_codecome_check_codeql.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path +from unittest.mock import patch + +import yaml + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from codeql.config import CodeQLConfig + + +def _load_codecome_cli(): + spec = importlib.util.spec_from_file_location("codecome_cli_script", ROOT / "tools" / "codecome.py") + assert spec is not None + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + spec.loader.exec_module(module) + return module + + +def _config(tmp_path: Path, *, enabled: bool = True, fail_policy: str = "soft") -> CodeQLConfig: + return CodeQLConfig( + enabled=enabled, + fail_policy=fail_policy, + abs_install_path=tmp_path / ".tools" / "codeql" / "current" / "codeql", + abs_pack_catalog=tmp_path / "templates" / "codeql-packs.yml", + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + abs_cache_dir=tmp_path / ".cache" / "codeql", + ) + + +def test_codeql_check_accepts_recorded_disabled_run(tmp_path: Path, capsys) -> None: + module = _load_codecome_cli() + config = _config(tmp_path, enabled=True) + manifest_dir = config.abs_output_dir + manifest_dir.mkdir(parents=True) + (manifest_dir / "run-manifest.yml").write_text( + yaml.safe_dump( + { + "status": "skipped", + "codeql_enabled": False, + "skip_reason": "CodeQL disabled for Phase 1", + "fail_policy": "soft", + "failures": ["CodeQL disabled for Phase 1"], + } + ), + encoding="utf-8", + ) + + with patch.object(module, "ROOT", tmp_path), patch("codeql.config.resolve_config", return_value=config): + rc = module.check_codeql_status() + + out = capsys.readouterr().out + assert rc == 0 + assert "last phase-1 CodeQL state: skipped" in out + + +def test_codeql_check_fails_failed_artifacts(tmp_path: Path, capsys) -> None: + module = _load_codecome_cli() + config = _config(tmp_path, enabled=True) + config.abs_install_path.parent.mkdir(parents=True) + config.abs_install_path.write_text("", encoding="utf-8") + config.abs_pack_catalog.parent.mkdir(parents=True) + config.abs_pack_catalog.write_text("schema_version: 1\npacks:\n python:\n official:\n - codeql/python-queries\n", encoding="utf-8") + notes = tmp_path / "itemdb" / "notes" + notes.mkdir(parents=True) + (notes / "codeql-plan.yml").write_text( + "schema_version: 1\nanalysis_units:\n - id: root\n path: ./src\n languages:\n - id: python\n packs:\n - official\n", + encoding="utf-8", + ) + manifest_dir = config.abs_output_dir + manifest_dir.mkdir(parents=True) + (manifest_dir / "run-manifest.yml").write_text( + yaml.safe_dump({"status": "failed", "codeql_enabled": True, "fail_policy": "hard", "failures": ["boom"]}), + encoding="utf-8", + ) + + with patch.object(module, "ROOT", tmp_path), patch("codeql.config.resolve_config", return_value=config): + rc = module.check_codeql_status() + + out = capsys.readouterr().out + assert rc == 1 + assert "artifacts: failed" in out + assert "boom" in out diff --git a/tests/test_codeql_normalize.py b/tests/test_codeql_normalize.py index d103d924..6f49b4c4 100644 --- a/tests/test_codeql_normalize.py +++ b/tests/test_codeql_normalize.py @@ -76,7 +76,7 @@ def test_normalize_all_empty_sarif_dir(tmp_path: Path) -> None: sarif_dir.mkdir() out_dir = tmp_path / "normalized" - resolved = {"languages": []} + resolved = {"analysis_units": []} alerts_path, signals_path = normalize_all( sarif_dir, out_dir, resolved, "2.21.0", tmp_path, ) @@ -91,7 +91,7 @@ def test_normalize_all_empty_sarif_dir(tmp_path: Path) -> None: def test_normalize_one_sarif(tmp_path: Path) -> None: sarif_dir = tmp_path / "sarif" sarif_dir.mkdir() - sarif_file = sarif_dir / "python.official.sarif" + sarif_file = sarif_dir / "api.python.official.sarif" sarif_file.write_text( json.dumps( _minimal_sarif( @@ -104,7 +104,7 @@ def test_normalize_one_sarif(tmp_path: Path) -> None: ) out_dir = tmp_path / "normalized" - resolved = {"languages": [{"id": "python", "profiles": ["official"]}]} + resolved = {"analysis_units": [{"id": "api", "languages": [{"id": "python", "profiles": ["official"]}]}]} alerts_path, signals_path = normalize_all( sarif_dir, out_dir, resolved, "2.21.0", tmp_path, ) @@ -114,6 +114,7 @@ def test_normalize_one_sarif(tmp_path: Path) -> None: assert len(alerts["alerts"]) == 1 a = alerts["alerts"][0] assert a["id"] == "CQ-0001" + assert a["analysis_unit_id"] == "api" assert a["language"] == "python" assert a["pack_profile"] == "official" assert a["rule_id"] == "py/path-injection" @@ -134,7 +135,7 @@ def test_normalize_ignores_non_matching_filenames(tmp_path: Path) -> None: (sarif_dir / "single.sarif").write_text(json.dumps(_minimal_sarif([]))) out_dir = tmp_path / "normalized" - resolved = {"languages": []} + resolved = {"analysis_units": []} alerts_path, _ = normalize_all( sarif_dir, out_dir, resolved, "2.21.0", tmp_path, ) @@ -147,10 +148,10 @@ def test_normalize_ignores_non_matching_filenames(tmp_path: Path) -> None: def test_normalize_handles_invalid_json(tmp_path: Path) -> None: sarif_dir = tmp_path / "sarif" sarif_dir.mkdir() - (sarif_dir / "python.bad.sarif").write_text("not json", encoding="utf-8") + (sarif_dir / "api.python.bad.sarif").write_text("not json", encoding="utf-8") out_dir = tmp_path / "normalized" - resolved = {"languages": []} + resolved = {"analysis_units": []} alerts_path, _ = normalize_all( sarif_dir, out_dir, resolved, "2.21.0", tmp_path, ) diff --git a/tests/test_codeql_packs.py b/tests/test_codeql_packs.py index 715526cd..a1fc4e49 100644 --- a/tests/test_codeql_packs.py +++ b/tests/test_codeql_packs.py @@ -44,15 +44,18 @@ def _write_plan(path: Path) -> None: path.write_text( ( "schema_version: 1\n" - "languages:\n" - " - id: python\n" - " packs:\n" - " - official\n" - " - github-security-lab\n" - " - id: c-cpp\n" - " packs:\n" - " - official\n" - " - coding-standards\n" + "analysis_units:\n" + " - id: root\n" + " path: ./src\n" + " languages:\n" + " - id: python\n" + " packs:\n" + " - official\n" + " - github-security-lab\n" + " - id: c-cpp\n" + " packs:\n" + " - official\n" + " - coding-standards\n" ), encoding="utf-8", ) @@ -113,16 +116,17 @@ def test_resolve_plan_packs_includes_profile_packs(tmp_path: Path) -> None: plan = load_codeql_plan(plan_path) resolved = resolve_plan_packs(plan, catalog) - assert resolved["languages"][0]["packs"] == [ + languages = resolved["analysis_units"][0]["languages"] + assert languages[0]["packs"] == [ "codeql/python-queries", "githubsecuritylab/codeql-python-queries", ] # profile_packs maps each profile to its individual packs (no dedup across profiles) - assert resolved["languages"][0]["profile_packs"] == { + assert languages[0]["profile_packs"] == { "official": ["codeql/python-queries"], "github-security-lab": ["githubsecuritylab/codeql-python-queries"], } - assert resolved["languages"][1]["candidate_policy"]["coding-standards"]["allow_precreate"] is False + assert languages[1]["candidate_policy"]["coding-standards"]["allow_precreate"] is False def test_resolve_profile_packs_rejects_unknown_profile() -> None: @@ -165,20 +169,21 @@ def test_resolve_plan_packs_candidate_policy(tmp_path: Path) -> None: plan = load_codeql_plan(plan_path) resolved = resolve_plan_packs(plan, catalog) - assert resolved["languages"][0]["packs"] == [ + languages = resolved["analysis_units"][0]["languages"] + assert languages[0]["packs"] == [ "codeql/python-queries", "githubsecuritylab/codeql-python-queries", ] - assert resolved["languages"][1]["candidate_policy"]["coding-standards"]["allow_precreate"] is False + assert languages[1]["candidate_policy"]["coding-standards"]["allow_precreate"] is False def test_load_codeql_plan_rejects_invalid_language_entry(tmp_path: Path) -> None: plan_path = tmp_path / "bad-plan.yml" - plan_path.write_text("languages:\n - nope\n", encoding="utf-8") + plan_path.write_text("analysis_units:\n - nope\n", encoding="utf-8") try: load_codeql_plan(plan_path) except PackResolverError as exc: - assert "non-mapping language entry" in str(exc) + assert "non-mapping analysis unit" in str(exc) else: raise AssertionError("expected PackResolverError") diff --git a/tests/test_codeql_pipeline.py b/tests/test_codeql_pipeline.py index 76091589..0965405c 100644 --- a/tests/test_codeql_pipeline.py +++ b/tests/test_codeql_pipeline.py @@ -10,6 +10,7 @@ import yaml from codeql.config import CodeQLConfig +from codeql.pipeline import record_skipped_run def _make_config(tmp_path: Path) -> CodeQLConfig: @@ -60,10 +61,45 @@ def test_pipeline_skipped_no_plan(tmp_path: Path) -> None: result = run_full_pipeline(config) assert result["status"] == "skipped" - mock_run.assert_called_once_with(config) + mock_run.assert_called_once_with(config, progress=None) mock_normalize.assert_not_called() +def test_pipeline_emits_progress(tmp_path: Path) -> None: + config = _make_config(tmp_path) + messages: list[str] = [] + + manifest = { + "schema_version": 1, + "phase": "phase-1", + "status": "skipped", + "codeql_enabled": True, + "codeql_version": "2.18.0", + "started_at": "2025-01-01T00:00:00Z", + "finished_at": "2025-01-01T00:00:01Z", + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": "codeql-pack-catalog.yml", + "fail_policy": "soft", + "analysis_units": [], + "languages": [], + "warnings": [], + "failures": ["codeql-plan.yml not found"], + } + + with patch("codeql.runner.run_codeql", return_value=manifest) as mock_run, \ + patch("codeql.pipeline.ROOT", tmp_path): + from codeql.pipeline import run_full_pipeline + + result = run_full_pipeline(config, progress=messages.append) + + assert result["status"] == "skipped" + mock_run.assert_called_once() + assert mock_run.call_args.args == (config,) + assert mock_run.call_args.kwargs["progress"] is not None + assert "CodeQL: manifest written" in messages + assert "CodeQL: summary written" in messages + + def test_pipeline_completed_writes_manifest(tmp_path: Path) -> None: """When run_codeql returns completed, manifest file is written.""" config = _make_config(tmp_path) @@ -126,3 +162,16 @@ def test_pipeline_soft_failed_continues(tmp_path: Path) -> None: assert result["status"] == "soft-failed" # Should not raise + + +def test_record_skipped_run_writes_manifest_and_summary(tmp_path: Path) -> None: + config = _make_config(tmp_path) + config.enabled = False + + manifest = record_skipped_run(config, "CodeQL disabled for Phase 1") + + assert manifest["status"] == "skipped" + assert manifest["codeql_enabled"] is False + assert manifest["skip_reason"] == "CodeQL disabled for Phase 1" + assert (config.abs_output_dir / "run-manifest.yml").is_file() + assert (config.abs_output_dir / "codeql-summary.md").is_file() diff --git a/tests/test_codeql_runner.py b/tests/test_codeql_runner.py index 6d8c0e5d..fee1c714 100644 --- a/tests/test_codeql_runner.py +++ b/tests/test_codeql_runner.py @@ -98,25 +98,25 @@ def test_write_manifest(tmp_path: Path) -> None: def test_lookup_build_match() -> None: - plan = [ + languages = [ {"id": "python", "build_mode": "none", "build_command": None}, {"id": "c-cpp", "build_mode": "manual", "build_command": "make -C src"}, ] - mode, cmd = _lookup_build({"id": "c-cpp"}, plan) + mode, cmd = _lookup_build("c-cpp", languages) assert mode == "manual" assert cmd == "make -C src" def test_lookup_build_fallback() -> None: - plan: list = [] - mode, cmd = _lookup_build({"id": "python"}, plan) + languages: list = [] + mode, cmd = _lookup_build("python", languages) assert mode == "none" assert cmd is None def test_lookup_build_no_match_within_plan() -> None: - plan = [{"id": "go", "build_mode": "autobuild"}] - mode, cmd = _lookup_build({"id": "python"}, plan) + languages = [{"id": "go", "build_mode": "autobuild"}] + mode, cmd = _lookup_build("python", languages) assert mode == "none" assert cmd is None @@ -140,6 +140,30 @@ def test_create_database_creates_parent_dir(tmp_path: Path) -> None: assert msg == "" assert db_dir.parent.is_dir() assert mock_run.call_args.args[0][3] == str(db_dir) + assert "--build-mode=none" in mock_run.call_args.args[0] + + +def test_create_database_manual_build_mode_and_command(tmp_path: Path) -> None: + db_dir = tmp_path / "itemdb" / "codeql" / "databases" / "root" / "c-cpp" + completed = MagicMock(returncode=0, stderr="") + + with patch("codeql.runner.subprocess.run", return_value=completed) as mock_run: + ok, msg = _create_database( + tmp_path / "codeql", + "c-cpp", + "./src/native", + db_dir, + "manual", + "make -C src/native", + [], + ) + + assert ok is True + assert msg == "" + cmd = mock_run.call_args.args[0] + assert "--build-mode=manual" in cmd + assert "-c" in cmd + assert "make -C src/native" in cmd def test_run_codeql_database_failure_honors_soft_policy(tmp_path: Path) -> None: @@ -165,11 +189,17 @@ def test_run_codeql_database_failure_honors_soft_policy(tmp_path: Path) -> None: ) resolved = { - "languages": [ + "analysis_units": [ { - "id": "c-cpp", - "profiles": ["official"], - "profile_packs": {"official": ["codeql/cpp-queries"]}, + "id": "root", + "path": "./src", + "languages": [ + { + "id": "c-cpp", + "profiles": ["official"], + "profile_packs": {"official": ["codeql/cpp-queries"]}, + } + ], } ] } @@ -177,7 +207,7 @@ def test_run_codeql_database_failure_honors_soft_policy(tmp_path: Path) -> None: with patch("codeql.runner.ROOT", tmp_path), \ patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ patch("codeql.runner.load_pack_catalog", return_value={}), \ - patch("codeql.runner.load_codeql_plan", return_value={"source_path": "./src", "languages": []}), \ + patch("codeql.runner.load_codeql_plan", return_value={"analysis_units": [{"id": "root", "path": "./src", "languages": [{"id": "c-cpp", "build_mode": "autobuild", "build_command": None}]}]}), \ patch("codeql.runner.resolve_plan_packs", return_value=resolved), \ patch("codeql.runner._create_database", return_value=(False, "db create failed")): manifest = run_codeql(config) @@ -185,4 +215,5 @@ def test_run_codeql_database_failure_honors_soft_policy(tmp_path: Path) -> None: assert manifest["status"] == "soft-failed" assert manifest["fail_policy"] == "soft" assert manifest["failures"] == ["db create failed"] - assert manifest["languages"] == ["c-cpp"] + assert manifest["analysis_units"] == ["root"] + assert manifest["languages"] == ["root:c-cpp"] diff --git a/tests/test_phase_1_gates.py b/tests/test_phase_1_gates.py new file mode 100644 index 00000000..6d2886b5 --- /dev/null +++ b/tests/test_phase_1_gates.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from phases.phase_1_gates import _emit + + +def test_emit_plain_fallback_prints_formatted_text(capsys) -> None: + _emit(None, "ok", "plain gate output") + + out = capsys.readouterr().out + assert "plain gate output" in out diff --git a/tools/codecome.py b/tools/codecome.py index f58ef089..45c5baa8 100755 --- a/tools/codecome.py +++ b/tools/codecome.py @@ -345,6 +345,110 @@ def count_findings() -> Dict[str, int]: return counts +def _phase_1_notes_exist() -> bool: + notes_dir = ROOT / "itemdb" / "notes" + return (notes_dir / "target-profile.md").is_file() and (notes_dir / "build-model.md").is_file() + + +def check_codeql_status() -> int: + """Check CodeQL configuration and last recorded artifact state.""" + print() + print(C.header("CodeQL:")) + + try: + from codeql.config import resolve_config + from codeql.artifacts import check_artifacts + from codeql.packs import load_codeql_plan + except Exception as exc: + print(C.warn(f"CodeQL checks unavailable: {exc}")) + return 0 + + config = resolve_config() + manifest_path = config.abs_output_dir / "run-manifest.yml" + manifest = None + + if manifest_path.is_file() and yaml is not None: + try: + loaded = yaml.safe_load(manifest_path.read_text(encoding="utf-8")) + manifest = loaded if isinstance(loaded, dict) else None + except (OSError, yaml.YAMLError, UnicodeDecodeError): + manifest = None + + current_state = "enabled" if config.enabled else "disabled" + print(C.ok(f"current config: CodeQL {current_state}")) + + if manifest and manifest.get("status") == "skipped" and manifest.get("codeql_enabled") is False: + reason = manifest.get("skip_reason") or "CodeQL disabled during recorded run" + print(C.ok(f"last phase-1 CodeQL state: skipped ({reason})")) + print(C.info("No CodeQL artifacts are required for that recorded run.")) + return 0 + + if not config.enabled: + print(C.ok("CodeQL disabled for current invocation; artifact checks skipped.")) + return 0 + + exit_code = 0 + if config.phase_1_enabled: + print(C.ok("phase-1 integration: enabled")) + else: + print(C.ok("phase-1 integration: disabled; artifact checks skipped.")) + return 0 + + if config.abs_install_path.is_file(): + print(C.ok(f"binary: {config.abs_install_path.relative_to(ROOT) if config.abs_install_path.is_relative_to(ROOT) else config.abs_install_path}")) + else: + print(C.fail(f"binary missing: {config.abs_install_path}")) + exit_code = 1 + + if config.abs_pack_catalog.is_file(): + print(C.ok(f"pack catalog: {config.abs_pack_catalog.relative_to(ROOT) if config.abs_pack_catalog.is_relative_to(ROOT) else config.abs_pack_catalog}")) + else: + print(C.fail(f"pack catalog missing: {config.abs_pack_catalog}")) + exit_code = 1 + + plan_path = ROOT / "itemdb" / "notes" / "codeql-plan.yml" + if plan_path.is_file(): + try: + load_codeql_plan(plan_path) + print(C.ok("plan: itemdb/notes/codeql-plan.yml")) + except Exception as exc: + print(C.fail(f"plan invalid: {exc}")) + exit_code = 1 + elif _phase_1_notes_exist(): + print(C.warn("plan missing after Phase 1 notes exist: itemdb/notes/codeql-plan.yml")) + else: + print(C.info("Phase 1 has not produced a CodeQL plan yet; no artifacts expected.")) + return exit_code + + artifact_status, warnings = check_artifacts(config.abs_output_dir) + if artifact_status == "missing": + if _phase_1_notes_exist(): + print(C.warn("artifacts: missing run-manifest.yml; run make phase-1 to refresh CodeQL state.")) + else: + print(C.info("artifacts: not present yet; Phase 1 has not run.")) + elif artifact_status == "completed" and not warnings: + print(C.ok("artifacts: completed")) + elif artifact_status == "soft-failed": + print(C.warn("artifacts: soft-failed")) + for warning in warnings: + print(C.warn(f" {warning}")) + if (manifest or {}).get("fail_policy", config.fail_policy) == "hard": + exit_code = 1 + elif artifact_status == "skipped": + print(C.ok("artifacts: skipped")) + for warning in warnings: + print(C.info(f" {warning}")) + else: + formatter = C.fail if artifact_status in {"failed", "unknown"} else C.warn + print(formatter(f"artifacts: {artifact_status}")) + for warning in warnings: + print(formatter(f" {warning}")) + if artifact_status in {"completed", "failed", "unknown"}: + exit_code = 1 + + return exit_code + + def command_check(_: argparse.Namespace) -> int: missing = [] @@ -375,6 +479,8 @@ def command_check(_: argparse.Namespace) -> int: if not has_source: print(C.warn("src/ is empty — place your target source code there before running phase-1.")) + check_exit = check_codeql_status() + # Warn (do not fail) about missing optional recording tools used by Phase 5. recording_warnings = check_recording_tools() if recording_warnings: @@ -408,7 +514,7 @@ def command_check(_: argparse.Namespace) -> int: "and PTY-acquisition guidance." ) - return 0 + return check_exit def command_status(_: argparse.Namespace) -> int: diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index 715aa6fa..59a2b8b8 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -58,6 +58,8 @@ def _run_codeql(console: Any) -> int: if not config.enabled: msg = "CodeQL disabled — skipping." + from codeql.pipeline import record_skipped_run + record_skipped_run(config, "CodeQL disabled for Phase 1") if HAVE_RICH: from rich.text import Text console.print(Text(msg, style="yellow")) @@ -68,6 +70,8 @@ def _run_codeql(console: Any) -> int: if not config.phase_1_enabled: msg = "CodeQL phase 1 disabled — skipping." + from codeql.pipeline import record_skipped_run + record_skipped_run(config, "CodeQL phase 1 disabled") if HAVE_RICH: from rich.text import Text console.print(Text(msg, style="yellow")) @@ -84,8 +88,15 @@ def _run_codeql(console: Any) -> int: from codeql.pipeline import run_full_pipeline + def progress(message: str) -> None: + if HAVE_RICH: + from rich.text import Text + console.print(Text(message, style="dim")) + else: + print(message, flush=True) + try: - manifest = run_full_pipeline(config) + manifest = run_full_pipeline(config, progress=progress) except Exception as exc: msg = f"CodeQL: FAILED — {exc}" if HAVE_RICH: diff --git a/tools/codeql.py b/tools/codeql.py index c1951c88..ac74b35c 100644 --- a/tools/codeql.py +++ b/tools/codeql.py @@ -107,10 +107,12 @@ def _cmd_resolve_packs(args: argparse.Namespace) -> int: print(json.dumps(resolved, indent=2)) else: print(f"Resolved CodeQL packs written to {output_path.relative_to(ROOT) if output_path.is_relative_to(ROOT) else output_path}") - for language in resolved["languages"]: - print(f"- {language['id']}: {', '.join(language['profiles'])}") - for pack in language["packs"]: - print(f" {pack}") + for unit in resolved["analysis_units"]: + print(f"- {unit['id']} ({unit['path']})") + for language in unit["languages"]: + print(f" {language['id']}: {', '.join(language['profiles'])}") + for pack in language["packs"]: + print(f" {pack}") return 0 @@ -120,6 +122,8 @@ def _cmd_run() -> int: if not config.enabled: print("CodeQL is disabled (CODEQL=0 or CODEQL_SKIP=1). Skipping run.") + from codeql.pipeline import record_skipped_run + record_skipped_run(config, "CodeQL disabled for this run") return 0 binary_path = config.abs_install_path @@ -130,7 +134,10 @@ def _cmd_run() -> int: from codeql.pipeline import run_full_pipeline - manifest = run_full_pipeline(config) + def progress(message: str) -> None: + print(message, flush=True) + + manifest = run_full_pipeline(config, progress=progress) status = manifest["status"] print(f"CodeQL run: {status}") diff --git a/tools/codeql/capabilities.py b/tools/codeql/capabilities.py new file mode 100644 index 00000000..65b29ff7 --- /dev/null +++ b/tools/codeql/capabilities.py @@ -0,0 +1,28 @@ +# Copyright (C) 2025-2026 Pablo Ruiz Garcia +# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later + +"""CodeQL language capability metadata.""" + +from __future__ import annotations + + +BUILD_MODES_BY_LANGUAGE: dict[str, set[str]] = { + "python": {"none"}, + "javascript-typescript": {"none"}, + "ruby": {"none"}, + "c-cpp": {"manual", "autobuild"}, + "go": {"manual", "autobuild"}, + "csharp": {"none", "manual", "autobuild"}, + "java-kotlin": {"none", "manual", "autobuild"}, + "swift": {"manual", "autobuild"}, +} + + +def supported_build_modes(language_id: str) -> set[str]: + """Return supported CodeQL build modes for *language_id*.""" + return BUILD_MODES_BY_LANGUAGE.get(language_id, set()) + + +def is_supported_language(language_id: str) -> bool: + """Return whether *language_id* is known to this CodeQL integration.""" + return language_id in BUILD_MODES_BY_LANGUAGE diff --git a/tools/codeql/config.py b/tools/codeql/config.py index 64641184..d3be9045 100644 --- a/tools/codeql/config.py +++ b/tools/codeql/config.py @@ -180,14 +180,10 @@ def _get(key: str, default: Any, env: str | None = None, coerce: Any = None) -> else: enabled = _get("enabled", DEFAULTS["enabled"], coerce=bool) - # If CODEQL=0 overrides everything - if not enabled: - return CodeQLConfig(enabled=False) - # Also check CODEQL_SKIP skip = _bool_env("CODEQL_SKIP") if skip is True: - return CodeQLConfig(enabled=False) + enabled = False fail_policy = _str_env("CODEQL_FAIL_POLICY") or _get("fail_policy", DEFAULTS["fail_policy"]) diff --git a/tools/codeql/normalize.py b/tools/codeql/normalize.py index 0438b732..bbfca19b 100644 --- a/tools/codeql/normalize.py +++ b/tools/codeql/normalize.py @@ -25,13 +25,13 @@ def normalize_all( alert_counter = 0 for sarif_file in sorted(sarif_dir.glob("*.sarif")): - stem = sarif_file.stem # e.g. "python.official" - parts = stem.split(".", 1) - if len(parts) != 2: + stem = sarif_file.stem # e.g. "api.python.official" + parts = stem.split(".", 2) + if len(parts) != 3: continue - language_id, profile = parts + analysis_unit_id, language_id, profile = parts - new_alerts = _parse_sarif(sarif_file, language_id, profile, alert_counter, source_root) + new_alerts = _parse_sarif(sarif_file, analysis_unit_id, language_id, profile, alert_counter, source_root) alert_counter += len(new_alerts) alerts.extend(new_alerts) @@ -74,6 +74,7 @@ def normalize_all( def _parse_sarif( path: Path, + analysis_unit_id: str, language_id: str, pack_profile: str, start_index: int, @@ -102,7 +103,7 @@ def _parse_sarif( if not isinstance(result, dict): continue alert = _normalize_one_result( - result, rules_lookup, language_id, pack_profile, + result, rules_lookup, analysis_unit_id, language_id, pack_profile, start_index + ri + 1, source_root, ) if alert: @@ -138,6 +139,7 @@ def _build_rules_lookup(run: dict[str, Any]) -> dict[str, dict[str, Any]]: def _normalize_one_result( result: dict[str, Any], rules_lookup: dict[str, dict[str, Any]], + analysis_unit_id: str, language_id: str, pack_profile: str, index: int, @@ -164,6 +166,7 @@ def _normalize_one_result( return { "id": f"CQ-{index:04d}", "fingerprint": fingerprint, + "analysis_unit_id": analysis_unit_id, "language": language_id, "pack_profile": pack_profile, "pack": _first_pack(result, rules_lookup), @@ -366,4 +369,3 @@ def _coerce_str(value: Any) -> str | None: return str(value) - diff --git a/tools/codeql/packs.py b/tools/codeql/packs.py index 3dbe0e7b..b2af3aa1 100644 --- a/tools/codeql/packs.py +++ b/tools/codeql/packs.py @@ -84,21 +84,33 @@ def load_codeql_plan(path: Path) -> dict[str, Any]: """Load and validate a CodeQL plan file.""" data = load_yaml_mapping(path, what="CodeQL plan") - languages = data.get("languages") - if not isinstance(languages, list): - raise PackResolverError(f"CodeQL plan at {path} must define 'languages' as a list.") - - for i, entry in enumerate(languages): - if not isinstance(entry, dict): - raise PackResolverError(f"CodeQL plan at {path} has non-mapping language entry at index {i}.") - language_id = entry.get("id") - if not isinstance(language_id, str) or not language_id: - raise PackResolverError(f"CodeQL plan at {path} has language entry {i} without a valid 'id'.") - profiles = entry.get("packs") - if not isinstance(profiles, list) or not all(isinstance(p, str) and p for p in profiles): - raise PackResolverError( - f"CodeQL plan at {path} must define language {language_id!r} packs as a list of profile names." - ) + units = data.get("analysis_units") + if not isinstance(units, list): + raise PackResolverError(f"CodeQL plan at {path} must define 'analysis_units' as a list.") + + for i, unit in enumerate(units): + if not isinstance(unit, dict): + raise PackResolverError(f"CodeQL plan at {path} has non-mapping analysis unit at index {i}.") + unit_id = unit.get("id") + if not isinstance(unit_id, str) or not unit_id: + raise PackResolverError(f"CodeQL plan at {path} has analysis unit {i} without a valid 'id'.") + unit_path = unit.get("path") + if not isinstance(unit_path, str) or not unit_path: + raise PackResolverError(f"CodeQL plan at {path} has analysis unit {unit_id!r} without a valid 'path'.") + languages = unit.get("languages") + if not isinstance(languages, list) or not languages: + raise PackResolverError(f"CodeQL plan at {path} must define analysis unit {unit_id!r} languages as a non-empty list.") + for j, entry in enumerate(languages): + if not isinstance(entry, dict): + raise PackResolverError(f"CodeQL plan at {path} has non-mapping language entry {j} in analysis unit {unit_id!r}.") + language_id = entry.get("id") + if not isinstance(language_id, str) or not language_id: + raise PackResolverError(f"CodeQL plan at {path} has language entry {j} in analysis unit {unit_id!r} without a valid 'id'.") + profiles = entry.get("packs") + if not isinstance(profiles, list) or not all(isinstance(p, str) and p for p in profiles): + raise PackResolverError( + f"CodeQL plan at {path} must define language {language_id!r} packs as a list of profile names." + ) return data @@ -157,28 +169,39 @@ def _resolve_profile_packs(language_id: str, profiles: list[str], catalog: dict[ def resolve_plan_packs(plan: dict[str, Any], catalog: dict[str, Any]) -> dict[str, Any]: """Resolve all language entries in a CodeQL plan to concrete pack references.""" - languages_out: list[dict[str, Any]] = [] - - for entry in plan.get("languages", []): - language_id = entry["id"] - profiles = list(entry.get("packs", [])) - languages_out.append( + units_out: list[dict[str, Any]] = [] + + for unit in plan.get("analysis_units", []): + languages_out: list[dict[str, Any]] = [] + for entry in unit.get("languages", []): + language_id = entry["id"] + profiles = list(entry.get("packs", [])) + languages_out.append( + { + "id": language_id, + "profiles": profiles, + "packs": resolve_pack_profiles(language_id, profiles, catalog), + "profile_packs": _resolve_profile_packs(language_id, profiles, catalog), + "candidate_policy": { + profile: {"allow_precreate": allow_precreate(profile, catalog)} + for profile in profiles + }, + } + ) + units_out.append( { - "id": language_id, - "profiles": profiles, - "packs": resolve_pack_profiles(language_id, profiles, catalog), - "profile_packs": _resolve_profile_packs(language_id, profiles, catalog), - "candidate_policy": { - profile: {"allow_precreate": allow_precreate(profile, catalog)} - for profile in profiles - }, + "id": unit["id"], + "path": unit["path"], + "kind": unit.get("kind"), + "primary": unit.get("primary", False), + "languages": languages_out, } ) return { "schema_version": 1, "generated_by": "codeql-pack-resolver", - "languages": languages_out, + "analysis_units": units_out, } diff --git a/tools/codeql/pipeline.py b/tools/codeql/pipeline.py index da88c938..15d52e6c 100644 --- a/tools/codeql/pipeline.py +++ b/tools/codeql/pipeline.py @@ -6,12 +6,34 @@ from __future__ import annotations from pathlib import Path -from typing import Any +from typing import Any, Callable +from datetime import datetime, timezone from codeql.config import ROOT, CodeQLConfig -def run_full_pipeline(config: CodeQLConfig) -> dict[str, Any]: +def record_skipped_run(config: CodeQLConfig, reason: str) -> dict[str, Any]: + """Write a skipped CodeQL manifest and summary for a deliberate skip.""" + from codeql.runner import _manifest, write_manifest, write_summary + + started_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + manifest = _manifest( + "skipped", + started_at, + config, + [], + [], + failures=[reason], + skip_reason=reason, + ) + output_dir = config.abs_output_dir + normalized_dir = output_dir / "normalized" + write_manifest(manifest, output_dir) + write_summary(manifest, normalized_dir, output_dir) + return manifest + + +def run_full_pipeline(config: CodeQLConfig, progress: Callable[[str], None] | None = None) -> dict[str, Any]: """Run the complete CodeQL analysis pipeline. Steps (all internal, no printing): @@ -32,10 +54,11 @@ def run_full_pipeline(config: CodeQLConfig) -> dict[str, Any]: output_dir.mkdir(parents=True, exist_ok=True) # Step 1: run analysis - manifest = run_codeql(config) + manifest = run_codeql(config, progress=progress) # Step 2: write manifest write_manifest(manifest, output_dir) + _progress(progress, "CodeQL: manifest written") status = manifest["status"] normalized_dir = output_dir / "normalized" @@ -51,6 +74,7 @@ def run_full_pipeline(config: CodeQLConfig) -> dict[str, Any]: sarif_dir, normalized_dir, resolved, manifest.get("codeql_version", "unknown"), ROOT, ) + _progress(progress, "CodeQL: normalized SARIF artifacts") except Exception as exc: manifest.setdefault("warnings", []).append( f"SARIF normalization failed: {exc}" @@ -62,6 +86,7 @@ def run_full_pipeline(config: CodeQLConfig) -> dict[str, Any]: if signals_path.is_file(): try: import_risk(signals_path, risk_path) + _progress(progress, "CodeQL: imported file risk signals") except Exception as exc: manifest.setdefault("warnings", []).append( f"Risk import failed: {exc}" @@ -69,5 +94,11 @@ def run_full_pipeline(config: CodeQLConfig) -> dict[str, Any]: # Step 5: write summary write_summary(manifest, normalized_dir, output_dir) + _progress(progress, "CodeQL: summary written") return manifest + + +def _progress(progress: Callable[[str], None] | None, message: str) -> None: + if progress is not None: + progress(message) diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index 072338af..c57e3c6e 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -9,13 +9,14 @@ import sys from datetime import datetime, timezone from pathlib import Path -from typing import Any +from typing import Any, Callable +from codeql.capabilities import supported_build_modes from codeql.config import ROOT, CodeQLConfig from codeql.packs import PackResolverError, dump_yaml, load_codeql_plan, load_pack_catalog, resolve_plan_packs -def run_codeql(config: CodeQLConfig) -> dict[str, Any]: +def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = None) -> dict[str, Any]: """Run CodeQL analysis for every language in the plan. Returns the run manifest as a dict. @@ -30,6 +31,7 @@ def run_codeql(config: CodeQLConfig) -> dict[str, Any]: return _manifest("soft-failed", now_utc, config, [], [f"CodeQL binary not found at {binary_path}"]) version = _get_codeql_version(binary_path) + _progress(progress, f"CodeQL: using {version}") plan_path = ROOT / "itemdb/notes/codeql-plan.yml" if not plan_path.is_file(): @@ -40,6 +42,7 @@ def run_codeql(config: CodeQLConfig) -> dict[str, Any]: return _manifest("skipped", now_utc, config, [version], [], failures=[f"Pack catalog not found at {catalog_path}"]) try: + _progress(progress, f"CodeQL: loading plan {_rel(plan_path)}") catalog = load_pack_catalog(catalog_path) plan = load_codeql_plan(plan_path) resolved = resolve_plan_packs(plan, catalog) @@ -49,54 +52,84 @@ def run_codeql(config: CodeQLConfig) -> dict[str, Any]: resolved_path = config.abs_output_dir / "selected-query-packs.yml" resolved_path.parent.mkdir(parents=True, exist_ok=True) resolved_path.write_text(dump_yaml(resolved), encoding="utf-8") + _progress(progress, f"CodeQL: resolved packs for {len(resolved['analysis_units'])} analysis unit(s)") - source_path = plan.get("source_path", "./src") exclude_patterns = plan.get("exclude", []) warnings: list[str] = [] failures: list[str] = [] language_ids: list[str] = [] - - for lang_entry in resolved["languages"]: - language_id = lang_entry["id"] - profiles = lang_entry.get("profiles", []) - profile_packs = lang_entry.get("profile_packs", {}) - language_ids.append(language_id) - - build_mode, build_command = _lookup_build(lang_entry, plan.get("languages", [])) - - db_dir = config.abs_database_dir / language_id - sarif_dir = config.abs_output_dir / "sarif" - sarif_dir.mkdir(parents=True, exist_ok=True) - - ok, msg = _create_database(binary_path, language_id, source_path, db_dir, build_mode, build_command, exclude_patterns) - if not ok: - failures.append(msg) - return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids) - - for profile in profiles: - packs = profile_packs.get(profile, []) - if not packs: - continue - sarif_path = sarif_dir / f"{language_id}.{profile}.sarif" - ok, msg = _run_analyze(binary_path, db_dir, packs, sarif_path) + analysis_units: list[str] = [] + + for unit_entry in resolved["analysis_units"]: + unit_id = unit_entry["id"] + source_path = unit_entry["path"] + analysis_units.append(unit_id) + plan_unit = _lookup_unit(unit_id, plan.get("analysis_units", [])) + + for lang_entry in unit_entry["languages"]: + language_id = lang_entry["id"] + profiles = lang_entry.get("profiles", []) + profile_packs = lang_entry.get("profile_packs", {}) + language_ids.append(f"{unit_id}:{language_id}") + + build_mode, build_command = _lookup_build(language_id, plan_unit.get("languages", [])) + supported_modes = supported_build_modes(language_id) + if build_mode not in supported_modes: + failures.append( + f"Unsupported build_mode '{build_mode}' for {language_id} in analysis unit {unit_id}. " + f"Allowed: {', '.join(sorted(supported_modes))}" + ) + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) + + db_dir = config.abs_database_dir / unit_id / language_id + sarif_dir = config.abs_output_dir / "sarif" + sarif_dir.mkdir(parents=True, exist_ok=True) + + _progress(progress, f"CodeQL: creating database {unit_id}:{language_id} ({build_mode})") + ok, msg = _create_database(binary_path, language_id, source_path, db_dir, build_mode, build_command, exclude_patterns) if not ok: failures.append(msg) - return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids) + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) + _progress(progress, f"CodeQL: database ready {unit_id}:{language_id}") + + for profile in profiles: + packs = profile_packs.get(profile, []) + if not packs: + continue + sarif_path = sarif_dir / f"{unit_id}.{language_id}.{profile}.sarif" + _progress(progress, f"CodeQL: analyzing {unit_id}:{language_id} profile {profile}") + ok, msg = _run_analyze(binary_path, db_dir, packs, sarif_path) + if not ok: + failures.append(msg) + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) + _progress(progress, f"CodeQL: SARIF written {_rel(sarif_path)}") if failures: - return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids) + return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids, analysis_units) - return _manifest("completed", now_utc, config, [version], warnings, failures, language_ids) + return _manifest("completed", now_utc, config, [version], warnings, failures, language_ids, analysis_units) def _tool_failure_status(config: CodeQLConfig) -> str: return "failed" if config.fail_policy == "hard" else "soft-failed" -def _lookup_build(lang_entry: dict, plan_languages: list[dict]) -> tuple[str, str | None]: +def _progress(progress: Callable[[str], None] | None, message: str) -> None: + if progress is not None: + progress(message) + + +def _lookup_unit(unit_id: str, plan_units: list[dict]) -> dict: + """Return the plan analysis unit with *unit_id*.""" + for unit in plan_units: + if unit.get("id") == unit_id: + return unit + return {} + + +def _lookup_build(language_id: str, plan_languages: list[dict]) -> tuple[str, str | None]: """Return (build_mode, build_command) for a language entry.""" - language_id = lang_entry["id"] for pl in plan_languages: if pl.get("id") == language_id: mode = pl.get("build_mode", "none") @@ -138,10 +171,12 @@ def _create_database( "--no-run-unnecessary-builds", ] - if build_mode == "manual" and build_command: - cmd += ["-c", build_command] + if build_mode == "none": + cmd += ["--build-mode=none"] + elif build_mode == "manual" and build_command: + cmd += ["--build-mode=manual", "-c", build_command] elif build_mode == "autobuild": - pass # let CodeQL auto-detect + cmd += ["--build-mode=autobuild"] try: result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) @@ -192,16 +227,20 @@ def _manifest( warnings: list[str], failures: list[str] | None = None, languages: list[str] | None = None, + analysis_units: list[str] | None = None, + skip_reason: str | None = None, ) -> dict[str, Any]: if failures is None: failures = [] if languages is None: languages = [] + if analysis_units is None: + analysis_units = [] codeql_version = versions[0] if versions else "unknown" now_utc = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - return { + manifest = { "schema_version": 1, "phase": "phase-1", "status": status, @@ -212,10 +251,14 @@ def _manifest( "plan_file": "itemdb/notes/codeql-plan.yml", "pack_catalog": str(_rel(config.abs_pack_catalog)), "fail_policy": config.fail_policy, + "analysis_units": analysis_units, "languages": languages, "warnings": warnings, "failures": failures if failures else [], } + if skip_reason: + manifest["skip_reason"] = skip_reason + return manifest def write_manifest(manifest: dict[str, Any], output_dir: Path) -> Path: @@ -279,6 +322,7 @@ def write_summary(manifest: dict[str, Any], normalized_dir: Path, output_dir: Pa lines.append("") path = output_dir / "codeql-summary.md" + path.parent.mkdir(parents=True, exist_ok=True) path.write_text("\n".join(lines) + "\n", encoding="utf-8") return path diff --git a/tools/phases/phase_1_gates.py b/tools/phases/phase_1_gates.py index 5971b12e..933ad011 100644 --- a/tools/phases/phase_1_gates.py +++ b/tools/phases/phase_1_gates.py @@ -17,6 +17,7 @@ import _colors as C from codecome.config import ROOT +from codeql.capabilities import is_supported_language, supported_build_modes REQUIRED_NOTES_1B = [ @@ -69,7 +70,8 @@ def _emit(console, level: str, text: str) -> None: "warn": C.warn, "info": C.info, } - fn_map.get(level, print)(text) + formatter = fn_map.get(level) + print(formatter(text) if formatter else text) def _emit_separator(console, style: str = "green") -> None: @@ -147,40 +149,92 @@ def check_phase_1a(console=None, findings_snapshot: dict[str, int] | None = None return 1 if plan.get("recommended") is True: - languages = plan.get("languages", []) - if not isinstance(languages, list) or len(languages) == 0: - _emit(console, "fail", "codeql-plan.yml: recommended=true but no language entries") + units = plan.get("analysis_units", []) + if not isinstance(units, list) or len(units) == 0: + _emit(console, "fail", "codeql-plan.yml: recommended=true but no analysis_units entries") return 1 - valid_build_modes = {"none", "manual", "autobuild"} valid_confidences = {"HIGH", "MEDIUM", "LOW"} - for i, lang in enumerate(languages): - if not isinstance(lang, dict): - _emit(console, "fail", f"codeql-plan.yml: language entry {i} is not a mapping") + seen_unit_ids: set[str] = set() + seen_databases: set[tuple[str, str]] = set() + for i, unit in enumerate(units): + if not isinstance(unit, dict): + _emit(console, "fail", f"codeql-plan.yml: analysis unit {i} is not a mapping") return 1 - if "id" not in lang: - _emit(console, "fail", f"codeql-plan.yml: language entry {i} missing 'id'") + unit_id = unit.get("id") + if not isinstance(unit_id, str) or not unit_id: + _emit(console, "fail", f"codeql-plan.yml: analysis unit {i} missing valid 'id'") return 1 - if lang.get("confidence") not in valid_confidences: - _emit( - console, - "warn", - f"codeql-plan.yml: language '{lang.get('id', '?')}' has unexpected confidence '{lang.get('confidence')}'", - ) - if lang.get("build_mode") not in valid_build_modes: - _emit( - console, - "warn", - f"codeql-plan.yml: language '{lang.get('id', '?')}' has unexpected build_mode '{lang.get('build_mode')}'", - ) - if "packs" not in lang: - _emit(console, "fail", f"codeql-plan.yml: language '{lang['id']}' missing 'packs'") + if unit_id in seen_unit_ids: + _emit(console, "fail", f"codeql-plan.yml: duplicate analysis unit id '{unit_id}'") return 1 - if not isinstance(lang["packs"], list) or len(lang["packs"]) == 0: - _emit(console, "fail", f"codeql-plan.yml: language '{lang['id']}' has empty packs list") + seen_unit_ids.add(unit_id) + + unit_path = unit.get("path") + if not isinstance(unit_path, str) or not unit_path: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' missing valid 'path'") + return 1 + resolved_path = (ROOT / unit_path).resolve() + src_root = (ROOT / "src").resolve() + try: + under_src = resolved_path == src_root or resolved_path.is_relative_to(src_root) + except ValueError: + under_src = False + if not under_src: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' path must be under src/: {unit_path}") + return 1 + if "_codeql_detected_source_root" in resolved_path.parts: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' path uses CodeQL-generated helper path") + return 1 + if not resolved_path.exists(): + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' path does not exist: {unit_path}") + return 1 + + languages = unit.get("languages") + if not isinstance(languages, list) or len(languages) == 0: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' has no languages") return 1 - _emit(console, "ok", f"codeql-plan.yml: {len(languages)} language(s) configured") + for j, lang in enumerate(languages): + if not isinstance(lang, dict): + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' language entry {j} is not a mapping") + return 1 + language_id = lang.get("id") + if not isinstance(language_id, str) or not language_id: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' language entry {j} missing valid 'id'") + return 1 + if not is_supported_language(language_id): + _emit(console, "fail", f"codeql-plan.yml: unsupported CodeQL language '{language_id}' in analysis unit '{unit_id}'") + return 1 + db_key = (unit_id, language_id) + if db_key in seen_databases: + _emit(console, "fail", f"codeql-plan.yml: duplicate language '{language_id}' in analysis unit '{unit_id}'") + return 1 + seen_databases.add(db_key) + if lang.get("confidence") not in valid_confidences: + _emit( + console, + "warn", + f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' has unexpected confidence '{lang.get('confidence')}'", + ) + build_mode = lang.get("build_mode") + supported_modes = supported_build_modes(language_id) + if build_mode not in supported_modes: + allowed = ", ".join(sorted(supported_modes)) + _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' has unsupported build_mode '{build_mode}' (allowed: {allowed})") + return 1 + build_command = lang.get("build_command") + if build_mode == "manual" and not (isinstance(build_command, str) and build_command.strip()): + _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' uses manual build without build_command") + return 1 + if "packs" not in lang: + _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' missing 'packs'") + return 1 + if not isinstance(lang["packs"], list) or len(lang["packs"]) == 0: + _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' has empty packs list") + return 1 + + _emit(console, "ok", f"codeql-plan.yml: {len(units)} analysis unit(s) configured") _emit_separator(console, "green") _emit(console, "ok", "Ready to run Phase 1b (CodeQL-assisted Reconnaissance).") From 523e8c26e55096802077a4e44d633a9f051291a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 00:33:02 +0200 Subject: [PATCH 24/47] fix: mark unused runner test session id --- tests/test_codecome_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_codecome_runner.py b/tests/test_codecome_runner.py index 850821d0..5c0f6f1b 100644 --- a/tests/test_codecome_runner.py +++ b/tests/test_codecome_runner.py @@ -124,7 +124,7 @@ def fake_consume(*a, **kw): def fake_fatal(console, title, msg): fatal_errors.append(msg) - code, session_id, _res, _path = runner._run_single_attempt( + code, _session_id, _res, _path = runner._run_single_attempt( mock_args, mock_console, "do work", "model", "var", "http://base", "auth", "dir", lambda *a: None, emit_fatal_error_fn=fake_fatal From 11e9674b85a2e7c31a119bf8f0d29f9d5df1699b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 03:20:23 +0200 Subject: [PATCH 25/47] feat: stream CodeQL stderr live, add configurable timeouts with plan estimates --- prompts/phase-1a-profile.md | 6 +++ templates/codeql-plan.yml | 7 +++ tests/test_codeql_runner.py | 56 ++++++++++++++++++++---- tools/codeql/config.py | 19 ++++++++ tools/codeql/runner.py | 87 +++++++++++++++++++++++++++++-------- 5 files changed, 148 insertions(+), 27 deletions(-) diff --git a/prompts/phase-1a-profile.md b/prompts/phase-1a-profile.md index 54a576b0..222f5ba6 100644 --- a/prompts/phase-1a-profile.md +++ b/prompts/phase-1a-profile.md @@ -77,6 +77,12 @@ Rules: - Use `manual` only when you identified a concrete build command for that analysis unit. - Use `autobuild` only as an explicit choice when build files exist but the exact command is uncertain. - Fill in `build_command` when `build_mode` is `manual`. +- Estimate `db_create_timeout` (seconds) for each language when `build_mode` is `manual` or `autobuild`: + - For `none` mode leave it unset; harness default is 600s. + - Estimate based on approximate source file count, build complexity, and whether compilation is involved. + - Rule of thumb: ~300s for small projects, ~600s for medium, ~1200-1800s for large C/C++ corpora. + - Round up to be safe; CodeQL extraction adds significant overhead per compiled file. +- Estimate `analyze_timeout` (seconds) per profile if query packs are known to be heavy (e.g. security suites on large codebases); otherwise omit to use harness default. - Set `recommended: false` if you cannot confidently profile any language. - Add relevant `notes` explaining your language choices and any uncertainties. - Update `exclude` patterns to match the target's test, fixture, vendor, and generated code directories if different from the defaults. diff --git a/templates/codeql-plan.yml b/templates/codeql-plan.yml index 8ed1cd8c..20b0b948 100644 --- a/templates/codeql-plan.yml +++ b/templates/codeql-plan.yml @@ -34,6 +34,8 @@ analysis_units: [] # confidence: "HIGH" # build_mode: "manual" # build_command: "make -C src/native" +# db_create_timeout: 1800 # optional: seconds, model-estimated from source size +# analyze_timeout: 900 # optional: seconds, per query-profile run # packs: # - "official" # - "github-security-lab" @@ -46,6 +48,11 @@ analysis_units: [] # none: python, javascript-typescript, ruby, csharp, java-kotlin # manual/autobuild: c-cpp, go, csharp, java-kotlin, swift # Allowed pack profile names: official, github-security-lab, trailofbits, coding-standards, local +# +# Optional per-language timeout overrides (seconds): +# - db_create_timeout: max seconds for 'codeql database create' (default 600) +# - analyze_timeout: max seconds for 'codeql database analyze' per profile (default 600) +# Use CODEQL_DB_CREATE_TIMEOUT / CODEQL_ANALYZE_TIMEOUT env vars to override at runtime. exclude: - "src/**/tests/**" diff --git a/tests/test_codeql_runner.py b/tests/test_codeql_runner.py index fee1c714..4ca1ed72 100644 --- a/tests/test_codeql_runner.py +++ b/tests/test_codeql_runner.py @@ -8,7 +8,7 @@ sys.path.insert(0, str(ROOT / "tools")) from codeql.config import CodeQLConfig -from codeql.runner import _create_database, _lookup_build, _manifest, run_codeql, write_manifest +from codeql.runner import _create_database, _lookup_build, _lookup_timeout, _manifest, run_codeql, write_manifest def test_manifest_completed() -> None: @@ -123,9 +123,12 @@ def test_lookup_build_no_match_within_plan() -> None: def test_create_database_creates_parent_dir(tmp_path: Path) -> None: db_dir = tmp_path / "itemdb" / "codeql" / "databases" / "c-cpp" - completed = MagicMock(returncode=0, stderr="") + mock_process = MagicMock() + mock_process.returncode = 0 + mock_process.wait.return_value = 0 + mock_process.stderr = [] - with patch("codeql.runner.subprocess.run", return_value=completed) as mock_run: + with patch("codeql.runner.subprocess.Popen", return_value=mock_process) as mock_popen: ok, msg = _create_database( tmp_path / "codeql", "c-cpp", @@ -139,15 +142,18 @@ def test_create_database_creates_parent_dir(tmp_path: Path) -> None: assert ok is True assert msg == "" assert db_dir.parent.is_dir() - assert mock_run.call_args.args[0][3] == str(db_dir) - assert "--build-mode=none" in mock_run.call_args.args[0] + assert mock_popen.call_args.args[0][3] == str(db_dir) + assert "--build-mode=none" in mock_popen.call_args.args[0] def test_create_database_manual_build_mode_and_command(tmp_path: Path) -> None: db_dir = tmp_path / "itemdb" / "codeql" / "databases" / "root" / "c-cpp" - completed = MagicMock(returncode=0, stderr="") + mock_process = MagicMock() + mock_process.returncode = 0 + mock_process.wait.return_value = 0 + mock_process.stderr = [] - with patch("codeql.runner.subprocess.run", return_value=completed) as mock_run: + with patch("codeql.runner.subprocess.Popen", return_value=mock_process) as mock_popen: ok, msg = _create_database( tmp_path / "codeql", "c-cpp", @@ -160,7 +166,7 @@ def test_create_database_manual_build_mode_and_command(tmp_path: Path) -> None: assert ok is True assert msg == "" - cmd = mock_run.call_args.args[0] + cmd = mock_popen.call_args.args[0] assert "--build-mode=manual" in cmd assert "-c" in cmd assert "make -C src/native" in cmd @@ -217,3 +223,37 @@ def test_run_codeql_database_failure_honors_soft_policy(tmp_path: Path) -> None: assert manifest["failures"] == ["db create failed"] assert manifest["analysis_units"] == ["root"] assert manifest["languages"] == ["root:c-cpp"] + + +def test_lookup_timeout_plan_takes_priority() -> None: + languages = [ + {"id": "c-cpp", "db_create_timeout": 1800, "analyze_timeout": 900}, + ] + assert _lookup_timeout("db_create_timeout", "c-cpp", languages, 600) == 1800 + assert _lookup_timeout("analyze_timeout", "c-cpp", languages, 600) == 900 + + +def test_lookup_timeout_falls_back_to_default() -> None: + languages = [{"id": "c-cpp"}] + assert _lookup_timeout("db_create_timeout", "c-cpp", languages, 600) == 600 + assert _lookup_timeout("analyze_timeout", "c-cpp", [], 600) == 600 + + +def test_create_database_streams_stderr_to_progress(tmp_path: Path) -> None: + db_dir = tmp_path / "itemdb" / "codeql" / "databases" / "n" / "c-cpp" + mock_process = MagicMock() + mock_process.returncode = 0 + mock_process.wait.return_value = 0 + mock_process.stderr = ["extracting file\n", "compiling done\n"] + + messages: list[str] = [] + + with patch("codeql.runner.subprocess.Popen", return_value=mock_process): + ok, msg = _create_database( + tmp_path / "codeql", "c-cpp", "./src", db_dir, + "none", None, [], progress=messages.append, + ) + + assert ok is True + assert "CodeQL: extracting file" in messages + assert "CodeQL: compiling done" in messages diff --git a/tools/codeql/config.py b/tools/codeql/config.py index d3be9045..f9ab8960 100644 --- a/tools/codeql/config.py +++ b/tools/codeql/config.py @@ -46,6 +46,8 @@ "max_candidates": 10, "sweep_enabled": True, "sweep_inject_context": True, + "db_create_timeout": 600, + "analyze_timeout": 600, } @@ -125,6 +127,9 @@ class CodeQLConfig: sweep_enabled: bool = True sweep_inject_context: bool = True + db_create_timeout: int = 600 + analyze_timeout: int = 600 + # Absolute paths (resolved from ROOT) abs_pack_catalog: Path = field(default_factory=Path) abs_install_path: Path = field(default_factory=Path) @@ -217,6 +222,18 @@ def _get(key: str, default: Any, env: str | None = None, coerce: Any = None) -> sweep_inject_context = _get("sweep_inject_context", DEFAULTS["sweep_inject_context"], coerce=bool) + # Timeout settings + db_create_timeout = _safe_int( + _str_env("CODEQL_DB_CREATE_TIMEOUT") + or _get("db_create_timeout", DEFAULTS["db_create_timeout"]), + DEFAULTS["db_create_timeout"], + ) + analyze_timeout = _safe_int( + _str_env("CODEQL_ANALYZE_TIMEOUT") + or _get("analyze_timeout", DEFAULTS["analyze_timeout"]), + DEFAULTS["analyze_timeout"], + ) + return CodeQLConfig( enabled=enabled, fail_policy=fail_policy, @@ -233,6 +250,8 @@ def _get(key: str, default: Any, env: str | None = None, coerce: Any = None) -> max_candidates=max_candidates, sweep_enabled=sweep_enabled, sweep_inject_context=sweep_inject_context, + db_create_timeout=db_create_timeout, + analyze_timeout=analyze_timeout, abs_pack_catalog=(ROOT / pack_catalog).resolve(), abs_install_path=(ROOT / install_path).resolve(), abs_output_dir=(ROOT / output_dir).resolve(), diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index c57e3c6e..5a0a4515 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -7,6 +7,7 @@ import subprocess import sys +import threading from datetime import datetime, timezone from pathlib import Path from typing import Any, Callable @@ -74,6 +75,10 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No language_ids.append(f"{unit_id}:{language_id}") build_mode, build_command = _lookup_build(language_id, plan_unit.get("languages", [])) + plan_languages = plan_unit.get("languages", []) + db_timeout = _lookup_timeout("db_create_timeout", language_id, plan_languages, config.db_create_timeout) + analyze_timeout = _lookup_timeout("analyze_timeout", language_id, plan_languages, config.analyze_timeout) + supported_modes = supported_build_modes(language_id) if build_mode not in supported_modes: failures.append( @@ -87,7 +92,7 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No sarif_dir.mkdir(parents=True, exist_ok=True) _progress(progress, f"CodeQL: creating database {unit_id}:{language_id} ({build_mode})") - ok, msg = _create_database(binary_path, language_id, source_path, db_dir, build_mode, build_command, exclude_patterns) + ok, msg = _create_database(binary_path, language_id, source_path, db_dir, build_mode, build_command, exclude_patterns, timeout=db_timeout, progress=progress) if not ok: failures.append(msg) return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) @@ -99,7 +104,7 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No continue sarif_path = sarif_dir / f"{unit_id}.{language_id}.{profile}.sarif" _progress(progress, f"CodeQL: analyzing {unit_id}:{language_id} profile {profile}") - ok, msg = _run_analyze(binary_path, db_dir, packs, sarif_path) + ok, msg = _run_analyze(binary_path, db_dir, packs, sarif_path, timeout=analyze_timeout, progress=progress) if not ok: failures.append(msg) return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) @@ -138,6 +143,16 @@ def _lookup_build(language_id: str, plan_languages: list[dict]) -> tuple[str, st return "none", None +def _lookup_timeout(field: str, language_id: str, plan_languages: list[dict], default: int) -> int: + """Return a per-language timeout, falling back to *default*.""" + for pl in plan_languages: + if pl.get("id") == language_id: + value = pl.get(field) + if isinstance(value, (int, float)) and value > 0: + return int(value) + return default + + def _get_codeql_version(binary: Path) -> str: try: result = subprocess.run( @@ -158,6 +173,8 @@ def _create_database( build_mode: str, build_command: str | None, exclude_patterns: list[str], + timeout: int = 600, + progress: Callable[[str], None] | None = None, ) -> tuple[bool, str]: """Create a CodeQL database. Returns (success, message).""" db_dir.parent.mkdir(parents=True, exist_ok=True) @@ -178,17 +195,8 @@ def _create_database( elif build_mode == "autobuild": cmd += ["--build-mode=autobuild"] - try: - result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) - except subprocess.TimeoutExpired: - return False, f"Database create timed out for {language_id}" - except Exception as exc: - return False, f"Database create failed for {language_id}: {exc}" - - if result.returncode != 0: - return False, f"Database create failed for {language_id}:\n{result.stderr[:2000]}" - - return True, "" + return _run_with_progress(cmd, f"Database create timed out for {language_id} after {timeout}s", + f"Database create failed for {language_id}", timeout, progress) def _run_analyze( @@ -196,6 +204,8 @@ def _run_analyze( db_dir: Path, packs: list[str], sarif_path: Path, + timeout: int = 600, + progress: Callable[[str], None] | None = None, ) -> tuple[bool, str]: """Run codeql database analyze. Returns (success, message).""" cmd = [ @@ -206,15 +216,54 @@ def _run_analyze( "--no-sarif-add-query-help", ] + packs + return _run_with_progress(cmd, f"Analyze timed out for {db_dir.name} after {timeout}s", + f"Analyze failed for {db_dir.name}", timeout, progress) + + +def _run_with_progress( + cmd: list[str], + timeout_msg_prefix: str, + failure_msg_prefix: str, + timeout: int, + progress: Callable[[str], None] | None, +) -> tuple[bool, str]: + """Run a subprocess, streaming stderr line-by-line to *progress*.""" try: - result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) - except subprocess.TimeoutExpired: - return False, f"Analyze timed out for {db_dir.name} with packs {packs}" + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) except Exception as exc: - return False, f"Analyze failed for {db_dir.name} with packs {packs}: {exc}" + return False, f"{failure_msg_prefix}: {exc}" + + stderr_lines: list[str] = [] + + def _read_stderr() -> None: + for line in process.stderr: + stripped = line.rstrip() + if stripped: + stderr_lines.append(stripped) + _progress(progress, f"CodeQL: {stripped}") + + reader = threading.Thread(target=_read_stderr, daemon=True) + reader.start() + + try: + returncode = process.wait(timeout=timeout) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + reader.join(timeout=5) + detail = "\n".join(stderr_lines[-40:]) + return False, f"{timeout_msg_prefix}\n{detail}" if detail else timeout_msg_prefix + + reader.join(timeout=5) - if result.returncode != 0: - return False, f"Analyze failed for {db_dir.name} with packs {packs}:\n{result.stderr[:2000]}" + if returncode != 0: + detail = "\n".join(stderr_lines[-40:]) + return False, f"{failure_msg_prefix}:\n{detail}" if detail else failure_msg_prefix return True, "" From 34e66155c0686aa5031d242fe074e78e796f49b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 03:33:25 +0200 Subject: [PATCH 26/47] fix: return skipped when no CodeQL languages resolve, skip artifact check for empty languages --- tests/test_codeql_artifacts.py | 11 ++++++++++- tests/test_codeql_runner.py | 31 +++++++++++++++++++++++++++++++ tools/codeql/artifacts.py | 5 +++-- tools/codeql/runner.py | 5 +++++ 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/tests/test_codeql_artifacts.py b/tests/test_codeql_artifacts.py index 7170050e..2a35b6d7 100644 --- a/tests/test_codeql_artifacts.py +++ b/tests/test_codeql_artifacts.py @@ -40,7 +40,7 @@ def test_completed_all_present(tmp_path: Path) -> None: def test_completed_missing_normalized(tmp_path: Path) -> None: out = tmp_path / "codeql" - _write_manifest(out, {"status": "completed", "failures": []}) + _write_manifest(out, {"status": "completed", "languages": ["python"], "failures": []}) status, warnings = check_artifacts(out) assert status == "completed" @@ -83,3 +83,12 @@ def test_invalid_status(tmp_path: Path) -> None: status, warnings = check_artifacts(out) assert status == "unknown" assert any("bogus" in w for w in warnings) + + +def test_completed_empty_languages_skips_normalized_check(tmp_path: Path) -> None: + out = tmp_path / "codeql" + _write_manifest(out, {"status": "completed", "languages": [], "failures": []}) + + status, warnings = check_artifacts(out) + assert status == "completed" + assert warnings == [] diff --git a/tests/test_codeql_runner.py b/tests/test_codeql_runner.py index 4ca1ed72..ef790854 100644 --- a/tests/test_codeql_runner.py +++ b/tests/test_codeql_runner.py @@ -257,3 +257,34 @@ def test_create_database_streams_stderr_to_progress(tmp_path: Path) -> None: assert ok is True assert "CodeQL: extracting file" in messages assert "CodeQL: compiling done" in messages + + +def test_run_codeql_empty_languages_returns_skipped(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\nanalysis_units: []\n", encoding="utf-8") + + catalog = tmp_path / "templates" / "codeql-packs.yml" + catalog.parent.mkdir(parents=True) + catalog.write_text("schema_version: 1\npacks:\n python:\n official:\n - codeql/python-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"): + manifest = run_codeql(config) + + assert manifest["status"] == "skipped" + assert manifest["languages"] == [] + assert any("No languages resolved" in f for f in manifest["failures"]) diff --git a/tools/codeql/artifacts.py b/tools/codeql/artifacts.py index 7edac524..e5b1ec92 100644 --- a/tools/codeql/artifacts.py +++ b/tools/codeql/artifacts.py @@ -46,8 +46,9 @@ def check_artifacts(output_dir: Path) -> tuple[str, list[str]]: if isinstance(failures, list): warnings.extend(failures) - # For completed runs, verify normalized outputs exist. - if status == "completed": + # For completed runs, verify normalized outputs exist (only if languages were analyzed). + languages = manifest.get("languages") or manifest.get("language_ids", []) + if status == "completed" and languages: normalized_dir = output_dir / "normalized" for expected in ("alerts.yml", "file-signals.yml"): if not (normalized_dir / expected).is_file(): diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index 5a0a4515..990a6595 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -113,6 +113,11 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No if failures: return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids, analysis_units) + if not language_ids: + return _manifest("skipped", now_utc, config, [version], warnings, + failures=["No languages resolved from analysis plan."], + languages=language_ids, analysis_units=analysis_units) + return _manifest("completed", now_utc, config, [version], warnings, failures, language_ids, analysis_units) From 549de46847106982a20c667490dbab8301e96731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 03:40:38 +0200 Subject: [PATCH 27/47] fix: add Recording tools header separator in make check output --- tools/codecome.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/codecome.py b/tools/codecome.py index 45c5baa8..ec313cac 100755 --- a/tools/codecome.py +++ b/tools/codecome.py @@ -481,15 +481,17 @@ def command_check(_: argparse.Namespace) -> int: check_exit = check_codeql_status() + print() + # Warn (do not fail) about missing optional recording tools used by Phase 5. recording_warnings = check_recording_tools() if recording_warnings: - print() - print(C.header("Optional recording tools (used by phase-5 exploit demonstrations):")) + print(C.header("Recording tools:")) for message in recording_warnings: print(C.warn(message)) else: - print(C.ok("Optional recording tools available (asciinema, agg, ffmpeg, Xvfb).")) + print(C.header("Recording tools:")) + print(C.ok("all tools available (asciinema, agg, ffmpeg, Xvfb).")) # Probe only the current helper invocation context; phase-5 may later run # from a different shell, container, or PTY wrapper. From 6564ab52dec0ca7697321ddfab663fbfa4a75e79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 03:48:17 +0200 Subject: [PATCH 28/47] feat: add Phase progress summary to make check output --- tools/codecome.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/tools/codecome.py b/tools/codecome.py index ec313cac..0e23c650 100755 --- a/tools/codecome.py +++ b/tools/codecome.py @@ -350,6 +350,81 @@ def _phase_1_notes_exist() -> bool: return (notes_dir / "target-profile.md").is_file() and (notes_dir / "build-model.md").is_file() +def check_phase_progress() -> None: + """Print a summary of which phases have been run based on durable artifacts.""" + from phases.phase_1_gates import REQUIRED_NOTES_1B + + notes_dir = ROOT / "itemdb" / "notes" + evidence_root = ROOT / "itemdb" / "evidence" + counts = count_findings() + rows: list[tuple[str, str, str]] = [] + + # Phase 1a + has_1a = all( + (notes_dir / name).is_file() + for name in ("target-profile.md", "build-model.md", "codeql-plan.yml") + ) + rows.append(("Phase 1a", "ok" if has_1a else "info", "completed" if has_1a else "not run")) + + # CodeQL + manifest_path = ROOT / "itemdb" / "codeql" / "run-manifest.yml" + if manifest_path.is_file(): + try: + manifest = yaml.safe_load(manifest_path.read_text(encoding="utf-8")) + status = manifest.get("status", "unknown") if isinstance(manifest, dict) else "unknown" + except Exception: + status = "unknown" + level = "ok" if status == "completed" else "warn" if status == "soft-failed" else "info" + rows.append(("CodeQL", level, status)) + else: + rows.append(("CodeQL", "info", "not run")) + + # Phase 1b + missing_1b = [n for n in REQUIRED_NOTES_1B if not (notes_dir / n).is_file()] + if not missing_1b: + rows.append(("Phase 1b", "ok", "completed")) + elif len(missing_1b) < len(REQUIRED_NOTES_1B): + rows.append(("Phase 1b", "warn", f"{len(missing_1b)} of {len(REQUIRED_NOTES_1B)} notes missing")) + else: + rows.append(("Phase 1b", "info", "not run")) + + # Phase 1c + has_1c = (notes_dir / "sandbox-plan.md").is_file() + rows.append(("Phase 1c", "ok" if has_1c else "info", "completed" if has_1c else "not run")) + + # Phase 2 + pending = counts["PENDING"] + rows.append(("Phase 2", "ok" if pending else "info", f"{pending} PENDING findings" if pending else "not run")) + + # Phase 3 + reviewed = counts["CONFIRMED"] + counts["EXPLOITED"] + counts["REJECTED"] + counts["DUPLICATE"] + rows.append(("Phase 3", "ok" if reviewed else "info", f"{reviewed} reviewed" if reviewed else "not run")) + + # Phase 4 + confirmed = counts["CONFIRMED"] + counts["EXPLOITED"] + rows.append(("Phase 4", "ok" if confirmed else "info", f"{confirmed} confirmed" if confirmed else "not run")) + + # Phase 5 + exploited = counts["EXPLOITED"] + rows.append(("Phase 5", "ok" if exploited else "info", f"{exploited} exploited" if exploited else "not run")) + + # Phase 6 + has_report = (ROOT / "itemdb" / "reports" / "report.md").is_file() + rows.append(("Phase 6", "ok" if has_report else "info", "completed" if has_report else "not run")) + + print() + print(C.header("Phase progress:")) + label_width = max(len(label) for label, _, _ in rows) + for label, level, detail in rows: + prefix = " " + label.ljust(label_width) + if level == "ok": + print(C.ok(f"{prefix} {detail}")) + elif level == "warn": + print(C.warn(f"{prefix} {detail}")) + else: + print(C.info(f"{prefix} {detail}")) + + def check_codeql_status() -> int: """Check CodeQL configuration and last recorded artifact state.""" print() @@ -479,6 +554,7 @@ def command_check(_: argparse.Namespace) -> int: if not has_source: print(C.warn("src/ is empty — place your target source code there before running phase-1.")) + check_phase_progress() check_exit = check_codeql_status() print() From bc49575fa3f838b6f69062c1e8718ac802c2d27e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 04:27:08 +0200 Subject: [PATCH 29/47] fix: address unresolved PR #29 review comments - runner.py: wire exclude_patterns into codeql database create via --codescanning-config (silent data loss bug) - pipeline.py: normalize SARIF for soft-failed runs too - normalize.py: use file-level running counter for monotonic alert IDs across multi-run SARIF files - pipeline.py: re-write manifest after post-processing warnings so on-disk manifest stays current - capabilities.py: return set() copy to prevent mutation of global BUILD_MODES_BY_LANGUAGE - docs: update stale scope headers in codeql-integration-plan.md and codeql-plan.yml --- .project/codeql-integration-plan.md | 4 ++-- templates/codeql-plan.yml | 2 +- tools/codeql/capabilities.py | 2 +- tools/codeql/normalize.py | 4 ++-- tools/codeql/pipeline.py | 7 +++++-- tools/codeql/runner.py | 19 +++++++++++++++++-- 6 files changed, 28 insertions(+), 10 deletions(-) diff --git a/.project/codeql-integration-plan.md b/.project/codeql-integration-plan.md index f8f98b08..f0f18bfd 100644 --- a/.project/codeql-integration-plan.md +++ b/.project/codeql-integration-plan.md @@ -1,8 +1,8 @@ # CodeQL Integration Plan -Status: WIP planning document +Status: Implementation complete Branch: `wip/codeql-integration-plan` -Scope: planning + phased implementation (install, config, pack resolver, runner, SARIF normalization). +Scope: install, config, pack resolver, runner, SARIF normalization, risk import, pipeline orchestration. ## Goals diff --git a/templates/codeql-plan.yml b/templates/codeql-plan.yml index 20b0b948..7be3fc0d 100644 --- a/templates/codeql-plan.yml +++ b/templates/codeql-plan.yml @@ -1,6 +1,6 @@ # CodeQL analysis plan generated by Phase 1a target profiling. # The model fills in language entries based on source tree analysis. -# Reserved for future CodeQL run orchestration. +# Consumed by CodeQL run orchestration (tools/codeql/runner.py). schema_version: 1 generated_by: "phase-1a-profile" diff --git a/tools/codeql/capabilities.py b/tools/codeql/capabilities.py index 65b29ff7..f0604459 100644 --- a/tools/codeql/capabilities.py +++ b/tools/codeql/capabilities.py @@ -20,7 +20,7 @@ def supported_build_modes(language_id: str) -> set[str]: """Return supported CodeQL build modes for *language_id*.""" - return BUILD_MODES_BY_LANGUAGE.get(language_id, set()) + return set(BUILD_MODES_BY_LANGUAGE.get(language_id, set())) def is_supported_language(language_id: str) -> bool: diff --git a/tools/codeql/normalize.py b/tools/codeql/normalize.py index bbfca19b..af634655 100644 --- a/tools/codeql/normalize.py +++ b/tools/codeql/normalize.py @@ -99,12 +99,12 @@ def _parse_sarif( continue rules_lookup = _build_rules_lookup(run) - for ri, result in enumerate(results): + for result in results: if not isinstance(result, dict): continue alert = _normalize_one_result( result, rules_lookup, analysis_unit_id, language_id, pack_profile, - start_index + ri + 1, source_root, + start_index + len(alerts) + 1, source_root, ) if alert: alerts.append(alert) diff --git a/tools/codeql/pipeline.py b/tools/codeql/pipeline.py index 15d52e6c..40e12269 100644 --- a/tools/codeql/pipeline.py +++ b/tools/codeql/pipeline.py @@ -64,8 +64,8 @@ def run_full_pipeline(config: CodeQLConfig, progress: Callable[[str], None] | No normalized_dir = output_dir / "normalized" resolved_path = output_dir / "selected-query-packs.yml" - # Step 3: normalize SARIF (only if completed and SARIF files exist) - if status == "completed" and resolved_path.is_file(): + # Step 3: normalize SARIF (completed or soft-failed, with SARIF files present) + if status in ("completed", "soft-failed") and resolved_path.is_file(): sarif_dir = output_dir / "sarif" if list(sarif_dir.glob("*.sarif")): try: @@ -92,6 +92,9 @@ def run_full_pipeline(config: CodeQLConfig, progress: Callable[[str], None] | No f"Risk import failed: {exc}" ) + # Re-write manifest so any warnings appended above are on disk. + write_manifest(manifest, output_dir) + # Step 5: write summary write_summary(manifest, normalized_dir, output_dir) _progress(progress, "CodeQL: summary written") diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index 990a6595..5b645371 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -7,6 +7,7 @@ import subprocess import sys +import tempfile import threading from datetime import datetime, timezone from pathlib import Path @@ -200,8 +201,22 @@ def _create_database( elif build_mode == "autobuild": cmd += ["--build-mode=autobuild"] - return _run_with_progress(cmd, f"Database create timed out for {language_id} after {timeout}s", - f"Database create failed for {language_id}", timeout, progress) + temp_config: Path | None = None + if exclude_patterns: + import yaml as _yaml + temp_config = Path(tempfile.mkdtemp(prefix="codeql-codescanning-")) / "codescanning-config.yml" + temp_config.parent.mkdir(parents=True, exist_ok=True) + config_content = {"paths-ignore": exclude_patterns} + temp_config.write_text(_yaml.dump(config_content, default_flow_style=False), encoding="utf-8") + cmd += ["--codescanning-config=" + str(temp_config)] + + try: + return _run_with_progress(cmd, f"Database create timed out for {language_id} after {timeout}s", + f"Database create failed for {language_id}", timeout, progress) + finally: + if temp_config is not None and temp_config.parent.exists(): + import shutil as _shutil + _shutil.rmtree(temp_config.parent, ignore_errors=True) def _run_analyze( From 89443370593f01919f2263d74e101a2b041e794b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 04:46:34 +0200 Subject: [PATCH 30/47] fix: respect fail_policy for unsupported CodeQL languages Phase 1a gate now checks fail_policy: unsupported languages emit a warning with soft policy (continue), fail with hard policy. Plan resolver (resolve_plan_packs) accepts skip_unsupported flag; run_codeql passes it when fail_policy is soft, so supported languages still run even when some are unsupported. PackResolverError catch now uses _tool_failure_status instead of hardcoded 'failed', producing 'soft-failed' with soft policy. _check_codeql_artifacts second status==failed block now guarded by fail_policy==hard; soft policy prints 'continuing'. 8 regression tests cover all four branches. --- tests/test_codecome_check_codeql.py | 79 +++++++++++++++++++++++++++++ tests/test_codeql_packs.py | 55 ++++++++++++++++++++ tests/test_codeql_runner.py | 72 ++++++++++++++++++++++++++ tests/test_phase_1_gates.py | 66 ++++++++++++++++++++++++ tools/codecome/phase_1.py | 11 +++- tools/codeql/packs.py | 23 +++++++-- tools/codeql/runner.py | 7 +-- tools/phases/phase_1_gates.py | 19 ++++++- 8 files changed, 323 insertions(+), 9 deletions(-) diff --git a/tests/test_codecome_check_codeql.py b/tests/test_codecome_check_codeql.py index 49b44c65..abccaa8d 100644 --- a/tests/test_codecome_check_codeql.py +++ b/tests/test_codecome_check_codeql.py @@ -14,6 +14,19 @@ from codeql.config import CodeQLConfig +def _ensure_codecome_package(): + """Ensure 'codecome' is imported as the package (dir), not the module (.py). + + Some tests (e.g. test_codecome.py) import ``codecome.py`` as a module, + which blocks accessing ``codecome.phase_1`` as a submodule. Remove the + module from sys.modules so the package can be imported instead. + """ + if "codecome" in sys.modules and not getattr( + sys.modules["codecome"], "__path__", None + ): + del sys.modules["codecome"] + + def _load_codecome_cli(): spec = importlib.util.spec_from_file_location("codecome_cli_script", ROOT / "tools" / "codecome.py") assert spec is not None @@ -88,3 +101,69 @@ def test_codeql_check_fails_failed_artifacts(tmp_path: Path, capsys) -> None: assert rc == 1 assert "artifacts: failed" in out assert "boom" in out + + +def test_check_codeql_artifacts_failed_soft_policy_returns_0(tmp_path: Path, capsys) -> None: + """_check_codeql_artifacts with status=failed and soft fail_policy should return 0.""" + config = _config(tmp_path, enabled=True, fail_policy="soft") + manifest_dir = config.abs_output_dir + manifest_dir.mkdir(parents=True) + (manifest_dir / "run-manifest.yml").write_text( + yaml.safe_dump( + { + "status": "failed", + "codeql_enabled": True, + "fail_policy": "soft", + "failures": ["boom"], + } + ), + encoding="utf-8", + ) + + _ensure_codecome_package() + from codecome.phase_1 import _check_codeql_artifacts as _check + import codecome.phase_1 as p1 + + saved = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch("codeql.config.resolve_config", return_value=config): + rc = _check(None) + finally: + p1.HAVE_RICH = saved + + out = capsys.readouterr().out + assert rc == 0 + assert "fail_policy is soft" in out + + +def test_check_codeql_artifacts_failed_hard_policy_returns_1(tmp_path: Path, capsys) -> None: + """_check_codeql_artifacts with status=failed and hard fail_policy should return 1.""" + config = _config(tmp_path, enabled=True, fail_policy="hard") + manifest_dir = config.abs_output_dir + manifest_dir.mkdir(parents=True) + (manifest_dir / "run-manifest.yml").write_text( + yaml.safe_dump( + { + "status": "failed", + "codeql_enabled": True, + "fail_policy": "hard", + "failures": ["boom"], + } + ), + encoding="utf-8", + ) + + _ensure_codecome_package() + from codecome.phase_1 import _check_codeql_artifacts as _check + import codecome.phase_1 as p1 + + saved = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch("codeql.config.resolve_config", return_value=config): + rc = _check(None) + finally: + p1.HAVE_RICH = saved + + assert rc == 1 diff --git a/tests/test_codeql_packs.py b/tests/test_codeql_packs.py index a1fc4e49..fc13786f 100644 --- a/tests/test_codeql_packs.py +++ b/tests/test_codeql_packs.py @@ -187,3 +187,58 @@ def test_load_codeql_plan_rejects_invalid_language_entry(tmp_path: Path) -> None assert "non-mapping analysis unit" in str(exc) else: raise AssertionError("expected PackResolverError") + + +def test_resolve_plan_packs_skip_unsupported(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + catalog = load_pack_catalog(catalog_path) + + plan = { + "schema_version": 1, + "analysis_units": [ + { + "id": "gilroy", + "path": "./src", + "languages": [ + {"id": "elixir", "packs": ["official"]}, + {"id": "python", "packs": ["official"]}, + ], + }, + ], + } + + resolved = resolve_plan_packs(plan, catalog, skip_unsupported=True) + + languages = resolved["analysis_units"][0]["languages"] + assert len(languages) == 1 + assert languages[0]["id"] == "python" + warnings = resolved.get("warnings", []) + assert len(warnings) == 1 + assert "elixir" in warnings[0] + + +def test_resolve_plan_packs_skip_unsupported_raises_by_default(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + catalog = load_pack_catalog(catalog_path) + + plan = { + "schema_version": 1, + "analysis_units": [ + { + "id": "gilroy", + "path": "./src", + "languages": [ + {"id": "elixir", "packs": ["official"]}, + ], + }, + ], + } + + try: + resolve_plan_packs(plan, catalog) + except PackResolverError as exc: + assert "Unsupported CodeQL language id" in str(exc) + else: + raise AssertionError("expected PackResolverError with skip_unsupported=False") diff --git a/tests/test_codeql_runner.py b/tests/test_codeql_runner.py index ef790854..339cde0d 100644 --- a/tests/test_codeql_runner.py +++ b/tests/test_codeql_runner.py @@ -288,3 +288,75 @@ def test_run_codeql_empty_languages_returns_skipped(tmp_path: Path) -> None: assert manifest["status"] == "skipped" assert manifest["languages"] == [] assert any("No languages resolved" in f for f in manifest["failures"]) + + +def test_run_codeql_pack_resolver_error_soft_policy(tmp_path: Path) -> None: + from codeql.packs import PackResolverError + + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\n", encoding="utf-8") + + catalog = tmp_path / "templates" / "codeql-packs.yml" + catalog.parent.mkdir(parents=True) + catalog.write_text("schema_version: 1\npacks:\n python:\n official:\n - codeql/python-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={}), \ + patch("codeql.runner.load_codeql_plan", side_effect=PackResolverError("boom")): + manifest = run_codeql(config) + + assert manifest["status"] == "soft-failed" + assert manifest["fail_policy"] == "soft" + + +def test_run_codeql_skips_unsupported_languages_soft_policy(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\n", encoding="utf-8") + + catalog = tmp_path / "templates" / "codeql-packs.yml" + catalog.parent.mkdir(parents=True) + catalog.write_text("schema_version: 1\npacks:\n python:\n official:\n - codeql/python-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + + resolved = { + "warnings": ["Skipping unsupported CodeQL language 'elixir' in analysis unit 'gilroy'"], + "analysis_units": [], + } + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={"packs": {"python": {"official": ["codeql/python-queries"]}}}), \ + patch("codeql.runner.load_codeql_plan", return_value={"analysis_units": [{"id": "gilroy", "path": "./src", "languages": [{"id": "elixir", "packs": ["official"]}]}]}), \ + patch("codeql.runner.resolve_plan_packs", return_value=resolved): + manifest = run_codeql(config) + + assert manifest["status"] == "skipped" + assert "elixir" in manifest["warnings"][0] diff --git a/tests/test_phase_1_gates.py b/tests/test_phase_1_gates.py index 6d2886b5..505cb066 100644 --- a/tests/test_phase_1_gates.py +++ b/tests/test_phase_1_gates.py @@ -2,6 +2,7 @@ import sys from pathlib import Path +from unittest.mock import patch ROOT = Path(__file__).resolve().parents[1] @@ -15,3 +16,68 @@ def test_emit_plain_fallback_prints_formatted_text(capsys) -> None: out = capsys.readouterr().out assert "plain gate output" in out + + +def test_unsupported_language_soft_policy_warns_not_fails(tmp_path: Path, capsys) -> None: + notes = tmp_path / "itemdb" / "notes" + notes.mkdir(parents=True) + (notes / "target-profile.md").write_text("profile", encoding="utf-8") + (notes / "build-model.md").write_text("model", encoding="utf-8") + (notes / "codeql-plan.yml").write_text( + "schema_version: 1\n" + "recommended: true\n" + "analysis_units:\n" + " - id: gilroy\n" + " path: ./src\n" + " languages:\n" + " - id: elixir\n" + " packs:\n" + " - official\n", + encoding="utf-8", + ) + + (tmp_path / "src").mkdir() + + mock_config = type("cfg", (), {"fail_policy": "soft", "enabled": True})() + + from phases.phase_1_gates import check_phase_1a + + with patch("phases.phase_1_gates.ROOT", tmp_path), \ + patch("phases.phase_1_gates._resolve_codeql_config", return_value=mock_config): + rc = check_phase_1a() + + out = capsys.readouterr().out + assert rc == 0 + assert "will be skipped" in out + + +def test_unsupported_language_hard_policy_fails(tmp_path: Path, capsys) -> None: + notes = tmp_path / "itemdb" / "notes" + notes.mkdir(parents=True) + (notes / "target-profile.md").write_text("profile", encoding="utf-8") + (notes / "build-model.md").write_text("model", encoding="utf-8") + (notes / "codeql-plan.yml").write_text( + "schema_version: 1\n" + "recommended: true\n" + "analysis_units:\n" + " - id: gilroy\n" + " path: ./src\n" + " languages:\n" + " - id: elixir\n" + " packs:\n" + " - official\n", + encoding="utf-8", + ) + + (tmp_path / "src").mkdir() + + mock_config = type("cfg", (), {"fail_policy": "hard", "enabled": True})() + + from phases.phase_1_gates import check_phase_1a + + with patch("phases.phase_1_gates.ROOT", tmp_path), \ + patch("phases.phase_1_gates._resolve_codeql_config", return_value=mock_config): + rc = check_phase_1a() + + out = capsys.readouterr().out + assert rc == 1 diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index 59a2b8b8..6f655011 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -192,7 +192,7 @@ def _check_codeql_artifacts(console: Any) -> int: print(C.fail(msg)) return 1 - if status == "failed": + if status == "failed" and config.fail_policy == "hard": msg = "CodeQL artifact gate: FAILED — execution crashed, blocking Phase 1b" if HAVE_RICH: from rich.text import Text @@ -202,6 +202,15 @@ def _check_codeql_artifacts(console: Any) -> int: print(C.fail(msg)) return 1 + if status == "failed": + # fail_policy is soft, so treat as a non-blocking warning + if HAVE_RICH: + from rich.text import Text + console.print(Text("CodeQL artifact gate: execution crashed but fail_policy is soft — continuing", style="yellow")) + else: + import _colors as C + print(C.warn("CodeQL artifact gate: execution crashed but fail_policy is soft — continuing")) + label = f"CodeQL artifact gate: {status}" if HAVE_RICH: from rich.text import Text diff --git a/tools/codeql/packs.py b/tools/codeql/packs.py index b2af3aa1..fbc510f8 100644 --- a/tools/codeql/packs.py +++ b/tools/codeql/packs.py @@ -167,15 +167,29 @@ def _resolve_profile_packs(language_id: str, profiles: list[str], catalog: dict[ return result -def resolve_plan_packs(plan: dict[str, Any], catalog: dict[str, Any]) -> dict[str, Any]: - """Resolve all language entries in a CodeQL plan to concrete pack references.""" +def resolve_plan_packs(plan: dict[str, Any], catalog: dict[str, Any], skip_unsupported: bool = False) -> dict[str, Any]: + """Resolve all language entries in a CodeQL plan to concrete pack references. + + If *skip_unsupported* is True, language IDs not found in the catalog are + skipped with a warning instead of raising PackResolverError. + """ units_out: list[dict[str, Any]] = [] + plan_warnings: list[str] = [] for unit in plan.get("analysis_units", []): languages_out: list[dict[str, Any]] = [] for entry in unit.get("languages", []): language_id = entry["id"] profiles = list(entry.get("packs", [])) + + if language_id not in catalog.get("packs", {}): + if skip_unsupported: + plan_warnings.append( + f"Skipping unsupported CodeQL language '{language_id}' in analysis unit '{unit['id']}'" + ) + continue + raise PackResolverError(f"Unsupported CodeQL language id: {language_id!r}.") + languages_out.append( { "id": language_id, @@ -198,11 +212,14 @@ def resolve_plan_packs(plan: dict[str, Any], catalog: dict[str, Any]) -> dict[st } ) - return { + result: dict[str, Any] = { "schema_version": 1, "generated_by": "codeql-pack-resolver", "analysis_units": units_out, } + if plan_warnings: + result["warnings"] = plan_warnings + return result def dump_yaml(data: dict[str, Any]) -> str: diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index 5b645371..ad418c11 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -47,9 +47,10 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No _progress(progress, f"CodeQL: loading plan {_rel(plan_path)}") catalog = load_pack_catalog(catalog_path) plan = load_codeql_plan(plan_path) - resolved = resolve_plan_packs(plan, catalog) + skip_unsupported = config.fail_policy == "soft" + resolved = resolve_plan_packs(plan, catalog, skip_unsupported=skip_unsupported) except PackResolverError as exc: - return _manifest("failed", now_utc, config, [version], [], failures=[str(exc)]) + return _manifest(_tool_failure_status(config), now_utc, config, [version], [], failures=[str(exc)]) resolved_path = config.abs_output_dir / "selected-query-packs.yml" resolved_path.parent.mkdir(parents=True, exist_ok=True) @@ -58,7 +59,7 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No exclude_patterns = plan.get("exclude", []) - warnings: list[str] = [] + warnings: list[str] = list(resolved.get("warnings", [])) failures: list[str] = [] language_ids: list[str] = [] analysis_units: list[str] = [] diff --git a/tools/phases/phase_1_gates.py b/tools/phases/phase_1_gates.py index 933ad011..1a2a708b 100644 --- a/tools/phases/phase_1_gates.py +++ b/tools/phases/phase_1_gates.py @@ -19,6 +19,11 @@ from codecome.config import ROOT from codeql.capabilities import is_supported_language, supported_build_modes +try: + from codeql.config import resolve_config as _resolve_codeql_config +except ImportError: + _resolve_codeql_config = None # type: ignore[assignment] + REQUIRED_NOTES_1B = [ "attack-surface.md", @@ -204,8 +209,18 @@ def check_phase_1a(console=None, findings_snapshot: dict[str, int] | None = None _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' language entry {j} missing valid 'id'") return 1 if not is_supported_language(language_id): - _emit(console, "fail", f"codeql-plan.yml: unsupported CodeQL language '{language_id}' in analysis unit '{unit_id}'") - return 1 + fail_policy = "soft" + if _resolve_codeql_config is not None: + try: + cfg = _resolve_codeql_config() + fail_policy = cfg.fail_policy + except Exception: + pass + if fail_policy == "hard": + _emit(console, "fail", f"codeql-plan.yml: unsupported CodeQL language '{language_id}' in analysis unit '{unit_id}'") + return 1 + _emit(console, "warn", f"codeql-plan.yml: unsupported CodeQL language '{language_id}' in analysis unit '{unit_id}' — will be skipped (fail_policy=soft)") + continue db_key = (unit_id, language_id) if db_key in seen_databases: _emit(console, "fail", f"codeql-plan.yml: duplicate language '{language_id}' in analysis unit '{unit_id}'") From a254751f9635897d6be3e989f9c15d21a0bf46db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 05:11:43 +0200 Subject: [PATCH 31/47] fix: add CodeQL build repair retry --- prompts/phase-1-codeql-repair.md | 67 +++++++++++++++++++ tests/test_codecome_check_codeql.py | 100 ++++++++++++++++++++++++++++ tests/test_codeql_pipeline.py | 77 +++++++++++++++++++++ tests/test_codeql_runner.py | 2 + tools/codecome/phase_1.py | 94 ++++++++++++++++++++++++++ tools/codeql/pipeline.py | 1 + tools/codeql/runner.py | 2 +- 7 files changed, 342 insertions(+), 1 deletion(-) create mode 100644 prompts/phase-1-codeql-repair.md diff --git a/prompts/phase-1-codeql-repair.md b/prompts/phase-1-codeql-repair.md new file mode 100644 index 00000000..dea0aae6 --- /dev/null +++ b/prompts/phase-1-codeql-repair.md @@ -0,0 +1,67 @@ +# CodeCome Phase 1: CodeQL Build Repair + +You are performing a narrow repair step after Phase 1a generated a CodeQL plan and the CodeQL database creation step failed. + +Your task is to make the smallest durable change needed so CodeQL can create a database on the next run. + +## Required Reading + +Read these files if they exist: + +- `AGENTS.md` +- `itemdb/notes/target-profile.md` +- `itemdb/notes/build-model.md` +- `itemdb/notes/codeql-plan.yml` +- `itemdb/codeql/run-manifest.yml` +- `itemdb/codeql/codeql-summary.md` + +Also inspect relevant CodeQL database logs under: + +- `itemdb/codeql/databases/**/log/*.log` + +Focus on the last useful `[build-stderr]`, `[build-stdout]`, `ERROR`, and `Exception caught` lines. + +## Goal + +Repair `itemdb/notes/codeql-plan.yml` so the next CodeQL run can create databases. + +For C/C++, Go, and Swift, do not use `build_mode: none`. Use only `manual` or `autobuild` as supported by the CodeQL integration. + +If autobuild failed because no supported root build system was detected, prefer `build_mode: manual` with a concrete `build_command`. + +## Allowed Writes + +You may write only: + +- `itemdb/notes/codeql-plan.yml` +- helper scripts under `tmp/` +- helper scripts under `sandbox/` +- a short run summary under `runs/` if useful + +Do not write helper scripts under `tools/`. + +Do not write helper scripts under `itemdb/`. + +Do not modify files under `src/`. + +Do not modify project orchestration or configuration files. + +If the manual command is simple enough, put it directly in `build_command` instead of creating a helper script. + +## Build Command Rules + +- CodeQL runs the manual `build_command` from the analysis unit source path. +- Prefer commands that are deterministic and non-interactive. +- Prefer commands that avoid modifying `src/` when possible. +- If existing target build files naturally write object files or binaries into `src/`, document that limitation in the `notes` field. +- Use workspace-relative helper script paths that work from the CodeQL source path. +- Keep the plan schema and existing pack selections intact unless a minimal change requires otherwise. + +## Output Requirements + +Make the repair directly in files. At the end, summarize: + +- why the previous CodeQL build failed, +- what changed in `itemdb/notes/codeql-plan.yml`, +- any helper script created, +- the exact manual build command CodeQL will run next. diff --git a/tests/test_codecome_check_codeql.py b/tests/test_codecome_check_codeql.py index abccaa8d..aabc175f 100644 --- a/tests/test_codecome_check_codeql.py +++ b/tests/test_codecome_check_codeql.py @@ -167,3 +167,103 @@ def test_check_codeql_artifacts_failed_hard_policy_returns_1(tmp_path: Path, cap p1.HAVE_RICH = saved assert rc == 1 + + +def test_codeql_repair_needed_for_autobuild_database_failure(tmp_path: Path) -> None: + _ensure_codecome_package() + from codecome.phase_1 import _codeql_repair_needed + + output_dir = tmp_path / "itemdb" / "codeql" + output_dir.mkdir(parents=True) + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump( + { + "status": "soft-failed", + "failures": ["Database create failed for c-cpp:\nNo supported build system detected."], + } + ), + encoding="utf-8", + ) + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text( + yaml.safe_dump( + { + "schema_version": 1, + "analysis_units": [ + { + "id": "native", + "path": "./src/native", + "languages": [ + {"id": "c-cpp", "build_mode": "autobuild", "packs": ["official"]} + ], + } + ], + } + ), + encoding="utf-8", + ) + + assert _codeql_repair_needed(output_dir, plan_path) is True + + +def test_codeql_repair_not_needed_after_manual_database_failure(tmp_path: Path) -> None: + _ensure_codecome_package() + from codecome.phase_1 import _codeql_repair_needed + + output_dir = tmp_path / "itemdb" / "codeql" + output_dir.mkdir(parents=True) + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump( + { + "status": "soft-failed", + "failures": ["Database create failed for c-cpp:\nmanual build failed."], + } + ), + encoding="utf-8", + ) + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text( + yaml.safe_dump( + { + "schema_version": 1, + "analysis_units": [ + { + "id": "native", + "path": "./src/native", + "languages": [ + {"id": "c-cpp", "build_mode": "manual", "build_command": "make", "packs": ["official"]} + ], + } + ], + } + ), + encoding="utf-8", + ) + + assert _codeql_repair_needed(output_dir, plan_path) is False + + +def test_phase_1_reruns_codeql_after_repair() -> None: + _ensure_codecome_package() + import codecome.phase_1 as p1 + + saved = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch.object(p1, "count_findings_snapshot", return_value={}), \ + patch.object(p1, "_run_subphase", return_value=0) as subphase, \ + patch.object(p1, "check_phase_1a", return_value=0), \ + patch.object(p1, "check_phase_1b", return_value=0), \ + patch.object(p1, "check_phase_1c", return_value=0), \ + patch.object(p1, "_run_codeql", return_value=0) as run_codeql, \ + patch.object(p1, "_run_codeql_repair_if_needed", return_value=True), \ + patch.object(p1, "_check_codeql_artifacts", return_value=0): + rc = p1.run_phase_1(object(), None, None, object(), "http://127.0.0.1") + finally: + p1.HAVE_RICH = saved + + assert rc == 0 + assert run_codeql.call_count == 2 + assert subphase.call_count == 3 diff --git a/tests/test_codeql_pipeline.py b/tests/test_codeql_pipeline.py index 0965405c..a736b8d2 100644 --- a/tests/test_codeql_pipeline.py +++ b/tests/test_codeql_pipeline.py @@ -164,6 +164,83 @@ def test_pipeline_soft_failed_continues(tmp_path: Path) -> None: # Should not raise +def test_pipeline_normalize_failure_marks_failed_for_hard_policy(tmp_path: Path) -> None: + config = _make_config(tmp_path) + config.fail_policy = "hard" + (config.abs_output_dir / "selected-query-packs.yml").write_text( + "schema_version: 1\nanalysis_units: []\n", + encoding="utf-8", + ) + sarif_dir = config.abs_output_dir / "sarif" + sarif_dir.mkdir(parents=True) + (sarif_dir / "root.python.official.sarif").write_text("{}", encoding="utf-8") + + manifest = { + "schema_version": 1, + "phase": "phase-1", + "status": "completed", + "codeql_enabled": True, + "codeql_version": "2.18.0", + "started_at": "2025-01-01T00:00:00Z", + "finished_at": "2025-01-01T00:01:00Z", + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": "codeql-pack-catalog.yml", + "fail_policy": "hard", + "languages": ["root:python"], + "warnings": [], + "failures": [], + } + + with patch("codeql.runner.run_codeql", return_value=manifest), \ + patch("codeql.normalize.normalize_all", side_effect=RuntimeError("bad sarif")), \ + patch("codeql.pipeline.ROOT", tmp_path): + from codeql.pipeline import run_full_pipeline + + result = run_full_pipeline(config) + + assert result["status"] == "failed" + assert "SARIF normalization failed: bad sarif" in result["warnings"] + data = yaml.safe_load((config.abs_output_dir / "run-manifest.yml").read_text()) + assert data["status"] == "failed" + + +def test_pipeline_normalize_failure_marks_soft_failed_for_soft_policy(tmp_path: Path) -> None: + config = _make_config(tmp_path) + (config.abs_output_dir / "selected-query-packs.yml").write_text( + "schema_version: 1\nanalysis_units: []\n", + encoding="utf-8", + ) + sarif_dir = config.abs_output_dir / "sarif" + sarif_dir.mkdir(parents=True) + (sarif_dir / "root.python.official.sarif").write_text("{}", encoding="utf-8") + + manifest = { + "schema_version": 1, + "phase": "phase-1", + "status": "completed", + "codeql_enabled": True, + "codeql_version": "2.18.0", + "started_at": "2025-01-01T00:00:00Z", + "finished_at": "2025-01-01T00:01:00Z", + "plan_file": "itemdb/notes/codeql-plan.yml", + "pack_catalog": "codeql-pack-catalog.yml", + "fail_policy": "soft", + "languages": ["root:python"], + "warnings": [], + "failures": [], + } + + with patch("codeql.runner.run_codeql", return_value=manifest), \ + patch("codeql.normalize.normalize_all", side_effect=RuntimeError("bad sarif")), \ + patch("codeql.pipeline.ROOT", tmp_path): + from codeql.pipeline import run_full_pipeline + + result = run_full_pipeline(config) + + assert result["status"] == "soft-failed" + assert "SARIF normalization failed: bad sarif" in result["warnings"] + + def test_record_skipped_run_writes_manifest_and_summary(tmp_path: Path) -> None: config = _make_config(tmp_path) config.enabled = False diff --git a/tests/test_codeql_runner.py b/tests/test_codeql_runner.py index 339cde0d..ee9190af 100644 --- a/tests/test_codeql_runner.py +++ b/tests/test_codeql_runner.py @@ -1,5 +1,6 @@ from __future__ import annotations +import subprocess import sys from pathlib import Path from unittest.mock import MagicMock, patch @@ -144,6 +145,7 @@ def test_create_database_creates_parent_dir(tmp_path: Path) -> None: assert db_dir.parent.is_dir() assert mock_popen.call_args.args[0][3] == str(db_dir) assert "--build-mode=none" in mock_popen.call_args.args[0] + assert mock_popen.call_args.kwargs["stdout"] == subprocess.DEVNULL def test_create_database_manual_build_mode_and_command(tmp_path: Path) -> None: diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index 6f655011..a562dc60 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -11,6 +11,7 @@ from __future__ import annotations +import os import time from pathlib import Path from typing import Any @@ -226,6 +227,89 @@ def _check_codeql_artifacts(console: Any) -> int: return 0 +def _load_codeql_yaml(path: Path) -> dict[str, Any]: + """Load a CodeQL YAML artifact as a mapping, returning {} on absence/errors.""" + if not path.is_file(): + return {} + try: + from codeql.packs import load_yaml_mapping + + return load_yaml_mapping(path, what=path.name) + except Exception: + return {} + + +def _codeql_repair_needed(output_dir: Path, plan_path: Path) -> bool: + """Return whether a failed CodeQL run should get one model repair attempt.""" + manifest = _load_codeql_yaml(output_dir / "run-manifest.yml") + status = manifest.get("status") + if status not in {"soft-failed", "failed"}: + return False + + failures = manifest.get("failures", []) + if not isinstance(failures, list): + return False + if not any("Database create failed" in str(failure) for failure in failures): + return False + + plan = _load_codeql_yaml(plan_path) + for unit in plan.get("analysis_units", []) if isinstance(plan.get("analysis_units"), list) else []: + languages = unit.get("languages", []) if isinstance(unit, dict) else [] + if not isinstance(languages, list): + continue + for language in languages: + if isinstance(language, dict) and language.get("build_mode") == "autobuild": + return True + return False + + +def _run_codeql_repair_if_needed( + *, + args: Any, + console: Any, + rendering_ctx: Any, + runner: ServerRunner, + base_url: str, +) -> bool: + """Ask the model to repair CodeQL build instructions after autobuild failure.""" + from codeql.config import resolve_config as _resolve_codeql_config + + max_retries = int(os.environ.get("CODEQL_REPAIR_RETRIES", "1")) + if max_retries <= 0: + return False + + config = _resolve_codeql_config() + plan_path = ROOT / "itemdb" / "notes" / "codeql-plan.yml" + if not _codeql_repair_needed(config.abs_output_dir, plan_path): + return False + + msg = "CodeQL autobuild failed; asking the model to repair manual build instructions." + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold yellow")) + else: + import _colors as C + print(C.warn(msg)) + + for attempt in range(1, max_retries + 1): + rc = _run_subphase( + args=args, + console=console, + rendering_ctx=rendering_ctx, + runner=runner, + base_url=base_url, + phase_id="1-codeql-repair", + label=f"CodeQL Build Repair ({attempt}/{max_retries})", + agent="recon", + prompt_file="prompts/phase-1-codeql-repair.md", + ) + if rc != 0: + continue + return True + + return False + + # --------------------------------------------------------------------------- # Subphase runner # --------------------------------------------------------------------------- @@ -546,6 +630,16 @@ def run_phase_1( rc = _run_codeql(console) if rc != 0: return rc + if _run_codeql_repair_if_needed( + args=args, + console=console, + rendering_ctx=rendering_ctx, + runner=runner, + base_url=base_url, + ): + rc = _run_codeql(console) + if rc != 0: + return rc rc = _check_codeql_artifacts(console) if rc != 0: return rc diff --git a/tools/codeql/pipeline.py b/tools/codeql/pipeline.py index 40e12269..234f9ad0 100644 --- a/tools/codeql/pipeline.py +++ b/tools/codeql/pipeline.py @@ -79,6 +79,7 @@ def run_full_pipeline(config: CodeQLConfig, progress: Callable[[str], None] | No manifest.setdefault("warnings", []).append( f"SARIF normalization failed: {exc}" ) + manifest["status"] = "failed" if config.fail_policy == "hard" else "soft-failed" # Step 4: import risk signals_path = normalized_dir / "file-signals.yml" diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index ad418c11..2a48215b 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -252,7 +252,7 @@ def _run_with_progress( try: process = subprocess.Popen( cmd, - stdout=subprocess.PIPE, + stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True, ) From 4267c57ad9cda4efdd5d93251faa7d8e8de18c4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 05:44:37 +0200 Subject: [PATCH 32/47] fix: address CodeQL review findings --- tools/codecome/phase_1.py | 21 +++- tools/codeql/runner.py | 6 +- tools/phases/phase_1_gates.py | 220 +++++++++++++++++++++------------- 3 files changed, 159 insertions(+), 88 deletions(-) diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index a562dc60..1db7692f 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -11,6 +11,7 @@ from __future__ import annotations +import hashlib import os import time from pathlib import Path @@ -263,6 +264,14 @@ def _codeql_repair_needed(output_dir: Path, plan_path: Path) -> bool: return False +def _file_digest(path: Path) -> str | None: + """Return a stable digest for a file, or None when it cannot be read.""" + try: + return hashlib.sha256(path.read_bytes()).hexdigest() + except OSError: + return None + + def _run_codeql_repair_if_needed( *, args: Any, @@ -291,6 +300,7 @@ def _run_codeql_repair_if_needed( import _colors as C print(C.warn(msg)) + plan_digest = _file_digest(plan_path) for attempt in range(1, max_retries + 1): rc = _run_subphase( args=args, @@ -305,7 +315,16 @@ def _run_codeql_repair_if_needed( ) if rc != 0: continue - return True + next_plan_digest = _file_digest(plan_path) + if next_plan_digest != plan_digest: + return True + unchanged_msg = "CodeQL repair completed but did not change itemdb/notes/codeql-plan.yml." + if HAVE_RICH: + from rich.text import Text + console.print(Text(unchanged_msg, style="yellow")) + else: + import _colors as C + print(C.warn(unchanged_msg)) return False diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index 2a48215b..a8dd4fc0 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -30,7 +30,7 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No if config.fail_policy == "hard": return _manifest("failed", now_utc, config, [], [], failures=[f"CodeQL binary not found at {binary_path}"]) else: - return _manifest("soft-failed", now_utc, config, [], [f"CodeQL binary not found at {binary_path}"]) + return _manifest("soft-failed", now_utc, config, [], [], failures=[f"CodeQL binary not found at {binary_path}"]) version = _get_codeql_version(binary_path) _progress(progress, f"CodeQL: using {version}") @@ -205,7 +205,9 @@ def _create_database( temp_config: Path | None = None if exclude_patterns: import yaml as _yaml - temp_config = Path(tempfile.mkdtemp(prefix="codeql-codescanning-")) / "codescanning-config.yml" + workspace_tmp = ROOT / "tmp" + workspace_tmp.mkdir(parents=True, exist_ok=True) + temp_config = Path(tempfile.mkdtemp(prefix="codeql-codescanning-", dir=str(workspace_tmp))) / "codescanning-config.yml" temp_config.parent.mkdir(parents=True, exist_ok=True) config_content = {"paths-ignore": exclude_patterns} temp_config.write_text(_yaml.dump(config_content, default_flow_style=False), encoding="utf-8") diff --git a/tools/phases/phase_1_gates.py b/tools/phases/phase_1_gates.py index 1a2a708b..219f7598 100644 --- a/tools/phases/phase_1_gates.py +++ b/tools/phases/phase_1_gates.py @@ -95,6 +95,131 @@ def _notes_exist(*names: str) -> list[str]: return [name for name in names if not (notes_dir / name).exists()] +def _codeql_fail_policy() -> str: + """Return configured CodeQL fail policy, defaulting to soft on errors.""" + if _resolve_codeql_config is None: + return "soft" + try: + return _resolve_codeql_config().fail_policy + except Exception: + return "soft" + + +def _validate_codeql_language_entry( + *, + console, + unit_id: str, + lang: object, + index: int, + seen_databases: set[tuple[str, str]], + valid_confidences: set[str], +) -> int | None: + """Validate one language entry from codeql-plan.yml.""" + if not isinstance(lang, dict): + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' language entry {index} is not a mapping") + return 1 + language_id = lang.get("id") + if not isinstance(language_id, str) or not language_id: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' language entry {index} missing valid 'id'") + return 1 + if not is_supported_language(language_id): + fail_policy = _codeql_fail_policy() + if fail_policy == "hard": + _emit(console, "fail", f"codeql-plan.yml: unsupported CodeQL language '{language_id}' in analysis unit '{unit_id}'") + return 1 + _emit(console, "warn", f"codeql-plan.yml: unsupported CodeQL language '{language_id}' in analysis unit '{unit_id}' — will be skipped (fail_policy=soft)") + return None + db_key = (unit_id, language_id) + if db_key in seen_databases: + _emit(console, "fail", f"codeql-plan.yml: duplicate language '{language_id}' in analysis unit '{unit_id}'") + return 1 + seen_databases.add(db_key) + if lang.get("confidence") not in valid_confidences: + _emit( + console, + "warn", + f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' has unexpected confidence '{lang.get('confidence')}'", + ) + build_mode = lang.get("build_mode") + supported_modes = supported_build_modes(language_id) + if build_mode not in supported_modes: + allowed = ", ".join(sorted(supported_modes)) + _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' has unsupported build_mode '{build_mode}' (allowed: {allowed})") + return 1 + build_command = lang.get("build_command") + if build_mode == "manual" and not (isinstance(build_command, str) and build_command.strip()): + _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' uses manual build without build_command") + return 1 + if "packs" not in lang: + _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' missing 'packs'") + return 1 + if not isinstance(lang["packs"], list) or len(lang["packs"]) == 0: + _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' has empty packs list") + return 1 + return None + + +def _validate_codeql_analysis_unit( + *, + console, + unit: object, + index: int, + seen_unit_ids: set[str], + seen_databases: set[tuple[str, str]], + valid_confidences: set[str], +) -> int | None: + """Validate one analysis unit from codeql-plan.yml.""" + if not isinstance(unit, dict): + _emit(console, "fail", f"codeql-plan.yml: analysis unit {index} is not a mapping") + return 1 + unit_id = unit.get("id") + if not isinstance(unit_id, str) or not unit_id: + _emit(console, "fail", f"codeql-plan.yml: analysis unit {index} missing valid 'id'") + return 1 + if unit_id in seen_unit_ids: + _emit(console, "fail", f"codeql-plan.yml: duplicate analysis unit id '{unit_id}'") + return 1 + seen_unit_ids.add(unit_id) + + unit_path = unit.get("path") + if not isinstance(unit_path, str) or not unit_path: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' missing valid 'path'") + return 1 + resolved_path = (ROOT / unit_path).resolve() + src_root = (ROOT / "src").resolve() + try: + under_src = resolved_path == src_root or resolved_path.is_relative_to(src_root) + except ValueError: + under_src = False + if not under_src: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' path must be under src/: {unit_path}") + return 1 + if "_codeql_detected_source_root" in resolved_path.parts: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' path uses CodeQL-generated helper path") + return 1 + if not resolved_path.exists(): + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' path does not exist: {unit_path}") + return 1 + + languages = unit.get("languages") + if not isinstance(languages, list) or len(languages) == 0: + _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' has no languages") + return 1 + + for j, lang in enumerate(languages): + result = _validate_codeql_language_entry( + console=console, + unit_id=unit_id, + lang=lang, + index=j, + seen_databases=seen_databases, + valid_confidences=valid_confidences, + ) + if result is not None: + return result + return None + + def count_findings_snapshot(snapshot: dict[str, int] | None = None) -> dict[str, int]: """Return finding counts, or deltas from a previous snapshot.""" findings_root = ROOT / "itemdb" / "findings" @@ -163,91 +288,16 @@ def check_phase_1a(console=None, findings_snapshot: dict[str, int] | None = None seen_unit_ids: set[str] = set() seen_databases: set[tuple[str, str]] = set() for i, unit in enumerate(units): - if not isinstance(unit, dict): - _emit(console, "fail", f"codeql-plan.yml: analysis unit {i} is not a mapping") - return 1 - unit_id = unit.get("id") - if not isinstance(unit_id, str) or not unit_id: - _emit(console, "fail", f"codeql-plan.yml: analysis unit {i} missing valid 'id'") - return 1 - if unit_id in seen_unit_ids: - _emit(console, "fail", f"codeql-plan.yml: duplicate analysis unit id '{unit_id}'") - return 1 - seen_unit_ids.add(unit_id) - - unit_path = unit.get("path") - if not isinstance(unit_path, str) or not unit_path: - _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' missing valid 'path'") - return 1 - resolved_path = (ROOT / unit_path).resolve() - src_root = (ROOT / "src").resolve() - try: - under_src = resolved_path == src_root or resolved_path.is_relative_to(src_root) - except ValueError: - under_src = False - if not under_src: - _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' path must be under src/: {unit_path}") - return 1 - if "_codeql_detected_source_root" in resolved_path.parts: - _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' path uses CodeQL-generated helper path") - return 1 - if not resolved_path.exists(): - _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' path does not exist: {unit_path}") - return 1 - - languages = unit.get("languages") - if not isinstance(languages, list) or len(languages) == 0: - _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' has no languages") - return 1 - - for j, lang in enumerate(languages): - if not isinstance(lang, dict): - _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' language entry {j} is not a mapping") - return 1 - language_id = lang.get("id") - if not isinstance(language_id, str) or not language_id: - _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' language entry {j} missing valid 'id'") - return 1 - if not is_supported_language(language_id): - fail_policy = "soft" - if _resolve_codeql_config is not None: - try: - cfg = _resolve_codeql_config() - fail_policy = cfg.fail_policy - except Exception: - pass - if fail_policy == "hard": - _emit(console, "fail", f"codeql-plan.yml: unsupported CodeQL language '{language_id}' in analysis unit '{unit_id}'") - return 1 - _emit(console, "warn", f"codeql-plan.yml: unsupported CodeQL language '{language_id}' in analysis unit '{unit_id}' — will be skipped (fail_policy=soft)") - continue - db_key = (unit_id, language_id) - if db_key in seen_databases: - _emit(console, "fail", f"codeql-plan.yml: duplicate language '{language_id}' in analysis unit '{unit_id}'") - return 1 - seen_databases.add(db_key) - if lang.get("confidence") not in valid_confidences: - _emit( - console, - "warn", - f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' has unexpected confidence '{lang.get('confidence')}'", - ) - build_mode = lang.get("build_mode") - supported_modes = supported_build_modes(language_id) - if build_mode not in supported_modes: - allowed = ", ".join(sorted(supported_modes)) - _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' has unsupported build_mode '{build_mode}' (allowed: {allowed})") - return 1 - build_command = lang.get("build_command") - if build_mode == "manual" and not (isinstance(build_command, str) and build_command.strip()): - _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' uses manual build without build_command") - return 1 - if "packs" not in lang: - _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' missing 'packs'") - return 1 - if not isinstance(lang["packs"], list) or len(lang["packs"]) == 0: - _emit(console, "fail", f"codeql-plan.yml: language '{language_id}' in analysis unit '{unit_id}' has empty packs list") - return 1 + result = _validate_codeql_analysis_unit( + console=console, + unit=unit, + index=i, + seen_unit_ids=seen_unit_ids, + seen_databases=seen_databases, + valid_confidences=valid_confidences, + ) + if result is not None: + return result _emit(console, "ok", f"codeql-plan.yml: {len(units)} analysis unit(s) configured") From 58ab5a6ec1323a911fb0d3799ce007c151ca4308 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 18:20:56 +0200 Subject: [PATCH 33/47] fix: skip non-CodeQL analysis units --- tests/test_codeql_packs.py | 56 +++++++++++++++++++++++++++++++++++ tests/test_phase_1_gates.py | 40 +++++++++++++++++++++++++ tools/codeql/packs.py | 6 ++++ tools/phases/phase_1_gates.py | 3 ++ 4 files changed, 105 insertions(+) diff --git a/tests/test_codeql_packs.py b/tests/test_codeql_packs.py index fc13786f..c2b0a78a 100644 --- a/tests/test_codeql_packs.py +++ b/tests/test_codeql_packs.py @@ -189,6 +189,30 @@ def test_load_codeql_plan_rejects_invalid_language_entry(tmp_path: Path) -> None raise AssertionError("expected PackResolverError") +def test_load_codeql_plan_allows_non_recommended_unit_without_languages(tmp_path: Path) -> None: + plan_path = tmp_path / "plan.yml" + plan_path.write_text( + ( + "schema_version: 1\n" + "analysis_units:\n" + " - id: api\n" + " path: ./src/api\n" + " languages:\n" + " - id: python\n" + " packs:\n" + " - official\n" + " - id: gilroy\n" + " path: ./src/gilroy\n" + " recommended: false\n" + ), + encoding="utf-8", + ) + + plan = load_codeql_plan(plan_path) + + assert plan["analysis_units"][1]["id"] == "gilroy" + + def test_resolve_plan_packs_skip_unsupported(tmp_path: Path) -> None: catalog_path = tmp_path / "catalog.yml" _write_catalog(catalog_path) @@ -242,3 +266,35 @@ def test_resolve_plan_packs_skip_unsupported_raises_by_default(tmp_path: Path) - assert "Unsupported CodeQL language id" in str(exc) else: raise AssertionError("expected PackResolverError with skip_unsupported=False") + + +def test_resolve_plan_packs_skips_non_recommended_units(tmp_path: Path) -> None: + catalog_path = tmp_path / "catalog.yml" + _write_catalog(catalog_path) + catalog = load_pack_catalog(catalog_path) + + plan = { + "schema_version": 1, + "analysis_units": [ + { + "id": "api", + "path": "./src/api", + "languages": [ + {"id": "python", "packs": ["official"]}, + ], + }, + { + "id": "gilroy", + "path": "./src/gilroy", + "recommended": False, + }, + ], + } + + resolved = resolve_plan_packs(plan, catalog, skip_unsupported=True) + + assert [unit["id"] for unit in resolved["analysis_units"]] == ["api"] + warnings = resolved.get("warnings", []) + assert len(warnings) == 1 + assert "gilroy" in warnings[0] + assert "recommended=false" in warnings[0] diff --git a/tests/test_phase_1_gates.py b/tests/test_phase_1_gates.py index 505cb066..7273d63b 100644 --- a/tests/test_phase_1_gates.py +++ b/tests/test_phase_1_gates.py @@ -81,3 +81,43 @@ def test_unsupported_language_hard_policy_fails(tmp_path: Path, capsys) -> None: out = capsys.readouterr().out assert rc == 1 + assert "unsupported CodeQL language 'elixir'" in out + + +def test_non_recommended_unit_without_languages_is_skipped(tmp_path: Path, capsys) -> None: + notes = tmp_path / "itemdb" / "notes" + notes.mkdir(parents=True) + (notes / "target-profile.md").write_text("profile", encoding="utf-8") + (notes / "build-model.md").write_text("model", encoding="utf-8") + (notes / "codeql-plan.yml").write_text( + "schema_version: 1\n" + "recommended: true\n" + "analysis_units:\n" + " - id: api\n" + " path: ./src/api\n" + " languages:\n" + " - id: python\n" + " confidence: HIGH\n" + " build_mode: none\n" + " packs:\n" + " - official\n" + " - id: gilroy\n" + " path: ./src/gilroy\n" + " recommended: false\n", + encoding="utf-8", + ) + + (tmp_path / "src" / "api").mkdir(parents=True) + (tmp_path / "src" / "gilroy").mkdir(parents=True) + + mock_config = type("cfg", (), {"fail_policy": "hard", "enabled": True})() + + from phases.phase_1_gates import check_phase_1a + + with patch("phases.phase_1_gates.ROOT", tmp_path), \ + patch("phases.phase_1_gates._resolve_codeql_config", return_value=mock_config): + rc = check_phase_1a() + + out = capsys.readouterr().out + assert rc == 0 + assert "not recommended for CodeQL" in out diff --git a/tools/codeql/packs.py b/tools/codeql/packs.py index fbc510f8..02c0419f 100644 --- a/tools/codeql/packs.py +++ b/tools/codeql/packs.py @@ -98,6 +98,8 @@ def load_codeql_plan(path: Path) -> dict[str, Any]: if not isinstance(unit_path, str) or not unit_path: raise PackResolverError(f"CodeQL plan at {path} has analysis unit {unit_id!r} without a valid 'path'.") languages = unit.get("languages") + if unit.get("recommended") is False and (languages is None or languages == []): + continue if not isinstance(languages, list) or not languages: raise PackResolverError(f"CodeQL plan at {path} must define analysis unit {unit_id!r} languages as a non-empty list.") for j, entry in enumerate(languages): @@ -177,6 +179,10 @@ def resolve_plan_packs(plan: dict[str, Any], catalog: dict[str, Any], skip_unsup plan_warnings: list[str] = [] for unit in plan.get("analysis_units", []): + if unit.get("recommended") is False: + plan_warnings.append(f"Skipping analysis unit '{unit['id']}' because recommended=false") + continue + languages_out: list[dict[str, Any]] = [] for entry in unit.get("languages", []): language_id = entry["id"] diff --git a/tools/phases/phase_1_gates.py b/tools/phases/phase_1_gates.py index 219f7598..65d2d4c6 100644 --- a/tools/phases/phase_1_gates.py +++ b/tools/phases/phase_1_gates.py @@ -202,6 +202,9 @@ def _validate_codeql_analysis_unit( return 1 languages = unit.get("languages") + if unit.get("recommended") is False and (languages is None or languages == []): + _emit(console, "info", f"codeql-plan.yml: analysis unit '{unit_id}' is not recommended for CodeQL; skipping language validation") + return None if not isinstance(languages, list) or len(languages) == 0: _emit(console, "fail", f"codeql-plan.yml: analysis unit '{unit_id}' has no languages") return 1 From 861e113034cb895c98f846d3b73711ab62de8a6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 19:30:29 +0200 Subject: [PATCH 34/47] fix: auto-repair invalid CodeQL plans --- prompts/phase-1-codeql-repair.md | 2 + tests/test_phase_1_codeql_plan_repair.py | 147 +++++++++++++++++++++++ tools/codecome/phase_1.py | 67 +++++++++++ tools/phases/completion.py | 13 ++ 4 files changed, 229 insertions(+) create mode 100644 tests/test_phase_1_codeql_plan_repair.py diff --git a/prompts/phase-1-codeql-repair.md b/prompts/phase-1-codeql-repair.md index dea0aae6..7b3d3030 100644 --- a/prompts/phase-1-codeql-repair.md +++ b/prompts/phase-1-codeql-repair.md @@ -65,3 +65,5 @@ Make the repair directly in files. At the end, summarize: - what changed in `itemdb/notes/codeql-plan.yml`, - any helper script created, - the exact manual build command CodeQL will run next. + +Before ending, validate that `itemdb/notes/codeql-plan.yml` is valid YAML and still follows the CodeQL plan schema. If validation fails, repair only the reported YAML/schema issue before summarizing. diff --git a/tests/test_phase_1_codeql_plan_repair.py b/tests/test_phase_1_codeql_plan_repair.py new file mode 100644 index 00000000..285b896e --- /dev/null +++ b/tests/test_phase_1_codeql_plan_repair.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +import sys +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import patch + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from events.phase_loop import RunResult + + +def _write_invalid_plan(root: Path) -> None: + plan = root / "itemdb" / "notes" / "codeql-plan.yml" + plan.parent.mkdir(parents=True, exist_ok=True) + plan.write_text( + "schema_version: 1\n" + "analysis_units:\n" + " - id: native\n" + " path: ./src/native\n" + " languages:\n" + " - id: c-cpp\n" + " packs:\n" + " - official\n" + "- outdented-note\n", + encoding="utf-8", + ) + + +def _write_valid_plan(root: Path) -> None: + plan = root / "itemdb" / "notes" / "codeql-plan.yml" + plan.parent.mkdir(parents=True, exist_ok=True) + plan.write_text( + "schema_version: 1\n" + "analysis_units:\n" + " - id: native\n" + " path: ./src/native\n" + " languages:\n" + " - id: c-cpp\n" + " packs:\n" + " - official\n" + "notes:\n" + " - repaired\n", + encoding="utf-8", + ) + + +def _runtime_config() -> SimpleNamespace: + return SimpleNamespace( + model="test-model", + variant=None, + thinking_on=False, + model_source="test", + variant_source="test", + thinking_source="test", + ) + + +def _runner() -> SimpleNamespace: + return SimpleNamespace(info=SimpleNamespace(password="")) + + +def _ok_result() -> RunResult: + return RunResult(any_step_finish_seen=True, step_finish_count=1, last_finish_reason="stop") + + +def test_subphase_resumes_same_session_to_repair_invalid_codeql_plan(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + transcript = tmp_path / "transcript.jsonl" + calls: list[tuple[str, str | None]] = [] + + def fake_run_single_attempt(_args, _console, prompt, *_rest, existing_session_id=None, **_kwargs): + calls.append((prompt, existing_session_id)) + if len(calls) == 1: + _write_invalid_plan(tmp_path) + return 0, "sess-1", _ok_result(), transcript + assert existing_session_id == "sess-1" + assert "itemdb/notes/codeql-plan.yml" in prompt + assert "Validation errors:" in prompt + _write_valid_plan(tmp_path) + return 0, "sess-1", _ok_result(), transcript + + saved_rich = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch.object(p1, "ROOT", tmp_path), \ + patch.object(p1, "load_prompt", return_value="initial prompt"), \ + patch.object(p1, "resolve_runtime_config", return_value=_runtime_config()), \ + patch.object(p1, "configure_rendering"), \ + patch.object(p1, "_run_single_attempt", side_effect=fake_run_single_attempt), \ + patch("findings.checks_entry.run_frontmatter_validation", return_value=(0, "")): + rc = p1._run_subphase( + args=object(), + console=None, + rendering_ctx=None, + runner=_runner(), + base_url="http://127.0.0.1", + phase_id="1a", + label="Target Profile", + agent="recon", + prompt_file="prompts/phase-1a-profile.md", + ) + finally: + p1.HAVE_RICH = saved_rich + + assert rc == 0 + assert len(calls) == 2 + assert calls[1][1] == "sess-1" + + +def test_subphase_fails_after_codeql_plan_auto_repair_retries_exhausted(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + transcript = tmp_path / "transcript.jsonl" + + def fake_run_single_attempt(*_args, **_kwargs): + _write_invalid_plan(tmp_path) + return 0, "sess-1", _ok_result(), transcript + + saved_rich = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch.object(p1, "ROOT", tmp_path), \ + patch.object(p1, "load_prompt", return_value="initial prompt"), \ + patch.object(p1, "resolve_runtime_config", return_value=_runtime_config()), \ + patch.object(p1, "configure_rendering"), \ + patch.object(p1, "_run_single_attempt", side_effect=fake_run_single_attempt) as run_attempt, \ + patch("findings.checks_entry.run_frontmatter_validation", return_value=(0, "")): + rc = p1._run_subphase( + args=object(), + console=None, + rendering_ctx=None, + runner=_runner(), + base_url="http://127.0.0.1", + phase_id="1-codeql-repair", + label="CodeQL Build Repair", + agent="recon", + prompt_file="prompts/phase-1-codeql-repair.md", + ) + finally: + p1.HAVE_RICH = saved_rich + + assert rc == 2 + assert run_attempt.call_count == 3 diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index 1db7692f..ca7b84da 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -39,6 +39,7 @@ check_phase_graceful_completion, build_phase_resume_prompt, build_frontmatter_resume_prompt, + build_codeql_plan_resume_prompt, ) # --------------------------------------------------------------------------- # CodeQL analysis (between 1a gate and 1b) @@ -240,6 +241,27 @@ def _load_codeql_yaml(path: Path) -> dict[str, Any]: return {} +def _validate_codeql_plan_for_repair() -> tuple[int, str]: + """Validate the generated CodeQL plan, returning CLI-style (rc, output).""" + plan_path = ROOT / "itemdb" / "notes" / "codeql-plan.yml" + if not plan_path.exists(): + return 0, "" + + try: + from codeql.packs import load_codeql_plan + + load_codeql_plan(plan_path) + except Exception as exc: + return 1, f"itemdb/notes/codeql-plan.yml is invalid: {exc}" + + return 0, "" + + +def _subphase_should_validate_codeql_plan(phase_id: str) -> bool: + """Return whether a subphase is responsible for producing/editing codeql-plan.yml.""" + return phase_id in {"1a", "1-codeql-repair"} + + def _codeql_repair_needed(output_dir: Path, plan_path: Path) -> bool: """Return whether a failed CodeQL run should get one model repair attempt.""" manifest = _load_codeql_yaml(output_dir / "run-manifest.yml") @@ -390,6 +412,7 @@ def _run_subphase( iteration_retry_count = 0 frontmatter_retry_count = 0 + codeql_plan_retry_count = 0 attempt_number = 0 last_session_id: str = "" last_finish_reason: str | None = None @@ -485,6 +508,50 @@ def _run_subphase( returncode = 2 if returncode == 0: + if _subphase_should_validate_codeql_plan(phase_id): + validation_rc, validation_output = _validate_codeql_plan_for_repair() + if validation_rc != 0: + max_codeql_plan_retries = 2 + if codeql_plan_retry_count < max_codeql_plan_retries: + codeql_plan_retry_count += 1 + msg = ( + "\n[Auto-Correction] The model completed a turn, but itemdb/notes/codeql-plan.yml " + "failed local CodeQL plan validation. CodeCome will resume the same session and ask " + f"for a minimal YAML/plan repair (retry {codeql_plan_retry_count}/{max_codeql_plan_retries})." + ) + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold yellow")) + else: + import _colors as C + print(C.warn(msg)) + if last_session_id and last_session_id != "id": + prompt = build_codeql_plan_resume_prompt(validation_output) + continue + else: + returncode = 2 + finish_warning = ( + "The model output failed CodeQL plan validation, and CodeCome could not determine " + "a session ID to resume for repair. Treating the subphase as incomplete so the " + "validator output can be reported back with the saved transcript." + ) + else: + returncode = 2 + finish_warning = ( + f"itemdb/notes/codeql-plan.yml still fails validation after {max_codeql_plan_retries} " + "auto-repair attempts. Treating the subphase as incomplete so the validation errors " + "can be reported back." + ) + msg = f"\n[Warning] CodeQL plan validation errors persist after {max_codeql_plan_retries} auto-retries." + if HAVE_RICH: + from rich.text import Text + console.print(Text(msg, style="bold red")) + else: + import _colors as C + print(C.fail(msg)) + print(validation_output) + break + from findings.checks_entry import run_frontmatter_validation validation_rc, validation_output = run_frontmatter_validation() diff --git a/tools/phases/completion.py b/tools/phases/completion.py index 05338221..155dc684 100644 --- a/tools/phases/completion.py +++ b/tools/phases/completion.py @@ -232,6 +232,19 @@ def build_frontmatter_resume_prompt(phase: str, finding: str | None, validation_ ) +def build_codeql_plan_resume_prompt(validation_output: str) -> str: + return ( + "Your previous run created or edited `itemdb/notes/codeql-plan.yml`, but the file failed local " + "CodeQL plan validation.\n\n" + "Validation errors:\n" + f"{validation_output}\n\n" + "Repair only `itemdb/notes/codeql-plan.yml` with the smallest change needed. Do not redo unrelated " + "reconnaissance or modify target source code. Preserve the existing analysis units, pack selections, " + "manual build commands, and notes unless a reported validation error requires changing them.\n\n" + "Before ending, verify that `itemdb/notes/codeql-plan.yml` is valid YAML and passes CodeQL plan validation." + ) + + def build_resume_command(initial_command: list[str], session_id: str, prompt: str) -> list[str]: """Preserve connection/runtime flags needed to reach the original session.""" resume = ["opencode", "run"] From 815e6da724b66ade056dce713f61d157e2bea8e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 20:15:13 +0200 Subject: [PATCH 35/47] fix: retry CodeQL manual build repairs --- prompts/phase-1-codeql-repair.md | 6 +- tests/test_codecome_check_codeql.py | 8 +- tests/test_phase_1_codeql_plan_repair.py | 127 +++++++++++++++ tools/codecome/phase_1.py | 193 ++++++++++++++++++++--- tools/phases/completion.py | 19 +++ 5 files changed, 324 insertions(+), 29 deletions(-) diff --git a/prompts/phase-1-codeql-repair.md b/prompts/phase-1-codeql-repair.md index 7b3d3030..2984b84c 100644 --- a/prompts/phase-1-codeql-repair.md +++ b/prompts/phase-1-codeql-repair.md @@ -51,10 +51,14 @@ If the manual command is simple enough, put it directly in `build_command` inste ## Build Command Rules - CodeQL runs the manual `build_command` from the analysis unit source path. +- CodeQL does not run `build_command` from the workspace root or from the helper script directory. - Prefer commands that are deterministic and non-interactive. - Prefer commands that avoid modifying `src/` when possible. - If existing target build files naturally write object files or binaries into `src/`, document that limitation in the `notes` field. - Use workspace-relative helper script paths that work from the CodeQL source path. +- Never use absolute `/tmp/` paths. Use workspace-relative `tmp/` paths for scratch/build output. +- Do not embed this workspace's absolute path in `build_command`; prefer paths relative to the analysis unit source path. +- If a helper script changes directory, it must change to the analysis unit source path or to a path explicitly derived from that execution model, not blindly to the helper script directory. - Keep the plan schema and existing pack selections intact unless a minimal change requires otherwise. ## Output Requirements @@ -66,4 +70,4 @@ Make the repair directly in files. At the end, summarize: - any helper script created, - the exact manual build command CodeQL will run next. -Before ending, validate that `itemdb/notes/codeql-plan.yml` is valid YAML and still follows the CodeQL plan schema. If validation fails, repair only the reported YAML/schema issue before summarizing. +Before ending, validate that `itemdb/notes/codeql-plan.yml` is valid YAML and still follows the CodeQL plan schema. Also verify that any referenced helper shell script exists and passes syntax-only validation. If validation fails, repair only the reported YAML/schema/helper issue before summarizing. diff --git a/tests/test_codecome_check_codeql.py b/tests/test_codecome_check_codeql.py index aabc175f..39abe6af 100644 --- a/tests/test_codecome_check_codeql.py +++ b/tests/test_codecome_check_codeql.py @@ -207,7 +207,7 @@ def test_codeql_repair_needed_for_autobuild_database_failure(tmp_path: Path) -> assert _codeql_repair_needed(output_dir, plan_path) is True -def test_codeql_repair_not_needed_after_manual_database_failure(tmp_path: Path) -> None: +def test_codeql_repair_needed_after_manual_database_failure(tmp_path: Path) -> None: _ensure_codecome_package() from codecome.phase_1 import _codeql_repair_needed @@ -242,7 +242,7 @@ def test_codeql_repair_not_needed_after_manual_database_failure(tmp_path: Path) encoding="utf-8", ) - assert _codeql_repair_needed(output_dir, plan_path) is False + assert _codeql_repair_needed(output_dir, plan_path) is True def test_phase_1_reruns_codeql_after_repair() -> None: @@ -258,12 +258,12 @@ def test_phase_1_reruns_codeql_after_repair() -> None: patch.object(p1, "check_phase_1b", return_value=0), \ patch.object(p1, "check_phase_1c", return_value=0), \ patch.object(p1, "_run_codeql", return_value=0) as run_codeql, \ - patch.object(p1, "_run_codeql_repair_if_needed", return_value=True), \ + patch.object(p1, "_run_codeql_repair_if_needed", return_value=0), \ patch.object(p1, "_check_codeql_artifacts", return_value=0): rc = p1.run_phase_1(object(), None, None, object(), "http://127.0.0.1") finally: p1.HAVE_RICH = saved assert rc == 0 - assert run_codeql.call_count == 2 + assert run_codeql.call_count == 1 assert subphase.call_count == 3 diff --git a/tests/test_phase_1_codeql_plan_repair.py b/tests/test_phase_1_codeql_plan_repair.py index 285b896e..efabdb27 100644 --- a/tests/test_phase_1_codeql_plan_repair.py +++ b/tests/test_phase_1_codeql_plan_repair.py @@ -5,6 +5,8 @@ from types import SimpleNamespace from unittest.mock import patch +import yaml + ROOT = Path(__file__).resolve().parents[1] sys.path.insert(0, str(ROOT / "tools")) @@ -66,6 +68,34 @@ def _ok_result() -> RunResult: return RunResult(any_step_finish_seen=True, step_finish_count=1, last_finish_reason="stop") +def _write_manual_plan(root: Path, build_command: str) -> None: + plan = root / "itemdb" / "notes" / "codeql-plan.yml" + plan.parent.mkdir(parents=True, exist_ok=True) + plan.write_text( + yaml.safe_dump( + { + "schema_version": 1, + "analysis_units": [ + { + "id": "native", + "path": "./src/native", + "languages": [ + { + "id": "c-cpp", + "build_mode": "manual", + "build_command": build_command, + "packs": ["official"], + } + ], + } + ], + }, + sort_keys=False, + ), + encoding="utf-8", + ) + + def test_subphase_resumes_same_session_to_repair_invalid_codeql_plan(tmp_path: Path) -> None: import codecome.phase_1 as p1 @@ -145,3 +175,100 @@ def fake_run_single_attempt(*_args, **_kwargs): assert rc == 2 assert run_attempt.call_count == 3 + + +def test_codeql_plan_validation_rejects_absolute_tmp_in_build_command(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + _write_manual_plan(tmp_path, "bash -c 'mkdir -p /tmp/codeql-build'") + + with patch.object(p1, "ROOT", tmp_path): + rc, output = p1._validate_codeql_plan_for_repair() + + assert rc == 1 + assert "absolute /tmp/" in output + + +def test_codeql_plan_validation_checks_helper_from_analysis_root(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + analysis_root = tmp_path / "src" / "native" + helper = tmp_path / "tmp" / "codeql-build.sh" + analysis_root.mkdir(parents=True) + helper.parent.mkdir(parents=True) + helper.write_text("#!/usr/bin/env bash\necho ok\n", encoding="utf-8") + _write_manual_plan(tmp_path, "bash ../../tmp/codeql-build.sh") + + with patch.object(p1, "ROOT", tmp_path): + rc, output = p1._validate_codeql_plan_for_repair() + + assert rc == 0, output + + +def test_codeql_plan_validation_rejects_missing_helper_from_analysis_root(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + (tmp_path / "src" / "native").mkdir(parents=True) + _write_manual_plan(tmp_path, "bash tmp/codeql-build.sh") + + with patch.object(p1, "ROOT", tmp_path): + rc, output = p1._validate_codeql_plan_for_repair() + + assert rc == 1 + assert "referenced helper script does not exist from analysis root" in output + + +def test_codeql_repair_loop_resumes_same_session_after_failed_rerun(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + output_dir = tmp_path / "itemdb" / "codeql" + output_dir.mkdir(parents=True) + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump({"status": "soft-failed", "failures": ["Database create failed for c-cpp:\nautobuild failed"]}), + encoding="utf-8", + ) + _write_manual_plan(tmp_path, "make") + config = SimpleNamespace(abs_output_dir=output_dir) + calls: list[tuple[str | None, str | None]] = [] + + def fake_subphase(**kwargs): + calls.append((kwargs.get("existing_session_id"), kwargs.get("initial_prompt"))) + if len(calls) == 1: + return p1._SubphaseOutcome(0, "repair-session", tmp_path / "one.jsonl") + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump({"status": "completed", "failures": []}), + encoding="utf-8", + ) + return p1._SubphaseOutcome(0, "repair-session", tmp_path / "two.jsonl") + + def fake_run_codeql(_console): + if len(calls) == 1: + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump({"status": "soft-failed", "failures": ["Database create failed for c-cpp:\nmanual failed"]}), + encoding="utf-8", + ) + return 0 + + saved_rich = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch.object(p1, "ROOT", tmp_path), \ + patch("codeql.config.resolve_config", return_value=config), \ + patch.object(p1, "_run_subphase", side_effect=fake_subphase), \ + patch.object(p1, "_run_codeql", side_effect=fake_run_codeql): + rc = p1._run_codeql_repair_if_needed( + args=object(), + console=None, + rendering_ctx=None, + runner=_runner(), + base_url="http://127.0.0.1", + ) + finally: + p1.HAVE_RICH = saved_rich + + assert rc == 0 + assert len(calls) == 2 + assert calls[0] == (None, None) + assert calls[1][0] == "repair-session" + assert calls[1][1] is not None + assert "Latest CodeQL failure details" in calls[1][1] diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index ca7b84da..d3340b12 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -13,7 +13,11 @@ import hashlib import os +import re +import shlex +import subprocess import time +from dataclasses import dataclass from pathlib import Path from typing import Any @@ -40,7 +44,15 @@ build_phase_resume_prompt, build_frontmatter_resume_prompt, build_codeql_plan_resume_prompt, + build_codeql_build_failure_resume_prompt, ) + + +@dataclass(frozen=True) +class _SubphaseOutcome: + returncode: int + session_id: str + transcript_path: Path # --------------------------------------------------------------------------- # CodeQL analysis (between 1a gate and 1b) # --------------------------------------------------------------------------- @@ -250,13 +262,83 @@ def _validate_codeql_plan_for_repair() -> tuple[int, str]: try: from codeql.packs import load_codeql_plan - load_codeql_plan(plan_path) + plan = load_codeql_plan(plan_path) except Exception as exc: return 1, f"itemdb/notes/codeql-plan.yml is invalid: {exc}" + errors: list[str] = [] + for unit in plan.get("analysis_units", []): + if not isinstance(unit, dict): + continue + unit_id = str(unit.get("id", "")) + unit_path = unit.get("path") + analysis_root = ROOT / unit_path if isinstance(unit_path, str) else ROOT + languages = unit.get("languages", []) + if not isinstance(languages, list): + continue + for language in languages: + if not isinstance(language, dict): + continue + language_id = str(language.get("id", "")) + build_command = language.get("build_command") + if not isinstance(build_command, str) or not build_command.strip(): + continue + context = f"analysis unit {unit_id!r} language {language_id!r}" + errors.extend(_validate_codeql_build_command(build_command, analysis_root, context)) + + if errors: + return 1, "itemdb/notes/codeql-plan.yml failed CodeQL build-command validation:\n" + "\n".join( + f"- {error}" for error in errors + ) + return 0, "" +def _validate_codeql_build_command(build_command: str, analysis_root: Path, context: str) -> list[str]: + """Return generic portability/safety validation errors for a manual build command.""" + errors: list[str] = [] + if _contains_absolute_tmp(build_command): + errors.append(f"{context}: build_command uses absolute /tmp/; use workspace-relative tmp/ instead") + if str(ROOT) in build_command: + errors.append(f"{context}: build_command embeds the absolute workspace path {ROOT}") + + try: + tokens = shlex.split(build_command) + except ValueError as exc: + return errors + [f"{context}: build_command is not shell-parseable: {exc}"] + + for token in tokens: + if not token.endswith(".sh"): + continue + script_path = Path(token) + if not script_path.is_absolute(): + script_path = analysis_root / script_path + if not script_path.is_file(): + errors.append(f"{context}: referenced helper script does not exist from analysis root: {token}") + continue + try: + content = script_path.read_text(encoding="utf-8") + except OSError as exc: + errors.append(f"{context}: referenced helper script cannot be read: {token}: {exc}") + continue + if _contains_absolute_tmp(content): + errors.append(f"{context}: referenced helper script {token} uses absolute /tmp/; use workspace-relative tmp/") + if str(ROOT) in content: + errors.append(f"{context}: referenced helper script {token} embeds the absolute workspace path {ROOT}") + result = subprocess.run(["bash", "-n", str(script_path)], capture_output=True, text=True, timeout=30) + if result.returncode != 0: + detail = (result.stderr or result.stdout).strip() + suffix = f": {detail}" if detail else "" + errors.append(f"{context}: referenced helper script {token} failed bash -n{suffix}") + + return errors + + +def _contains_absolute_tmp(text: str) -> bool: + """Return whether text contains an absolute /tmp path, not a relative tmp/ component.""" + return re.search(r"(^|[\s\"'=])/(tmp)(/|$)", text) is not None + + def _subphase_should_validate_codeql_plan(phase_id: str) -> bool: """Return whether a subphase is responsible for producing/editing codeql-plan.yml.""" return phase_id in {"1a", "1-codeql-repair"} @@ -281,11 +363,43 @@ def _codeql_repair_needed(output_dir: Path, plan_path: Path) -> bool: if not isinstance(languages, list): continue for language in languages: - if isinstance(language, dict) and language.get("build_mode") == "autobuild": + if isinstance(language, dict) and language.get("build_mode") in {"autobuild", "manual"}: return True return False +def _latest_codeql_database_log(output_dir: Path) -> Path | None: + logs = [p for p in output_dir.glob("databases/**/log/database-create-*.log") if p.is_file()] + if not logs: + return None + return max(logs, key=lambda p: p.stat().st_mtime) + + +def _codeql_repair_failure_context(output_dir: Path) -> str: + """Return target-agnostic failure context for the repair model.""" + lines: list[str] = [] + manifest = _load_codeql_yaml(output_dir / "run-manifest.yml") + failures = manifest.get("failures", []) + if isinstance(failures, list) and failures: + lines.append("Manifest failures:") + lines.extend(str(failure) for failure in failures[-3:]) + + latest_log = _latest_codeql_database_log(output_dir) + if latest_log is not None: + interesting: list[str] = [] + try: + for line in latest_log.read_text(encoding="utf-8", errors="replace").splitlines(): + if any(marker in line for marker in ("[build-stderr]", "[build-stdout]", "[ERROR]", "Exception caught", "A fatal error")): + interesting.append(line) + except OSError as exc: + interesting.append(f"Failed to read latest database log {latest_log}: {exc}") + if interesting: + lines.append(f"Latest database-create log: {latest_log.relative_to(ROOT) if latest_log.is_relative_to(ROOT) else latest_log}") + lines.extend(interesting[-40:]) + + return "\n".join(lines) if lines else "CodeQL database creation failed; no additional log details were available." + + def _file_digest(path: Path) -> str | None: """Return a stable digest for a file, or None when it cannot be read.""" try: @@ -301,20 +415,20 @@ def _run_codeql_repair_if_needed( rendering_ctx: Any, runner: ServerRunner, base_url: str, -) -> bool: - """Ask the model to repair CodeQL build instructions after autobuild failure.""" +) -> int: + """Ask the model to repair CodeQL build instructions and rerun CodeQL until stable.""" from codeql.config import resolve_config as _resolve_codeql_config - max_retries = int(os.environ.get("CODEQL_REPAIR_RETRIES", "1")) + max_retries = int(os.environ.get("CODEQL_REPAIR_RETRIES", "2")) if max_retries <= 0: - return False + return 0 config = _resolve_codeql_config() plan_path = ROOT / "itemdb" / "notes" / "codeql-plan.yml" if not _codeql_repair_needed(config.abs_output_dir, plan_path): - return False + return 0 - msg = "CodeQL autobuild failed; asking the model to repair manual build instructions." + msg = "CodeQL database creation failed; asking the model to repair build instructions." if HAVE_RICH: from rich.text import Text console.print(Text(msg, style="bold yellow")) @@ -323,8 +437,10 @@ def _run_codeql_repair_if_needed( print(C.warn(msg)) plan_digest = _file_digest(plan_path) + repair_session_id: str | None = None + repair_prompt: str | None = None for attempt in range(1, max_retries + 1): - rc = _run_subphase( + outcome = _run_subphase( args=args, console=console, rendering_ctx=rendering_ctx, @@ -334,21 +450,46 @@ def _run_codeql_repair_if_needed( label=f"CodeQL Build Repair ({attempt}/{max_retries})", agent="recon", prompt_file="prompts/phase-1-codeql-repair.md", + existing_session_id=repair_session_id, + initial_prompt=repair_prompt, + return_outcome=True, ) - if rc != 0: + assert isinstance(outcome, _SubphaseOutcome) + repair_session_id = outcome.session_id or repair_session_id + if outcome.returncode != 0: continue next_plan_digest = _file_digest(plan_path) - if next_plan_digest != plan_digest: - return True - unchanged_msg = "CodeQL repair completed but did not change itemdb/notes/codeql-plan.yml." + if next_plan_digest == plan_digest: + unchanged_msg = "CodeQL repair completed but did not change itemdb/notes/codeql-plan.yml." + if HAVE_RICH: + from rich.text import Text + console.print(Text(unchanged_msg, style="yellow")) + else: + import _colors as C + print(C.warn(unchanged_msg)) + plan_digest = next_plan_digest + + rc = _run_codeql(console) + if rc != 0: + return rc + if not _codeql_repair_needed(config.abs_output_dir, plan_path): + return 0 + + repair_prompt = build_codeql_build_failure_resume_prompt( + _codeql_repair_failure_context(config.abs_output_dir) + ) + + if _codeql_repair_needed(config.abs_output_dir, plan_path): + msg = f"CodeQL database creation still fails after {max_retries} repair attempt(s); blocking Phase 1b." if HAVE_RICH: from rich.text import Text - console.print(Text(unchanged_msg, style="yellow")) + console.print(Text(msg, style="bold red")) else: import _colors as C - print(C.warn(unchanged_msg)) + print(C.fail(msg)) + return 1 - return False + return 0 # --------------------------------------------------------------------------- @@ -367,10 +508,13 @@ def _run_subphase( agent: str, prompt_file: str, finding: str | None = None, -) -> int: + existing_session_id: str | None = None, + initial_prompt: str | None = None, + return_outcome: bool = False, +) -> int | _SubphaseOutcome: """Run a single subphase agent session with retry/resume.""" prompt_path = ROOT / prompt_file - prompt = load_prompt(prompt_path, finding, phase=phase_id) + prompt = initial_prompt if initial_prompt is not None else load_prompt(prompt_path, finding, phase=phase_id) rc = resolve_runtime_config(agent) model = rc.model variant = rc.variant @@ -414,7 +558,7 @@ def _run_subphase( frontmatter_retry_count = 0 codeql_plan_retry_count = 0 attempt_number = 0 - last_session_id: str = "" + last_session_id: str = existing_session_id or "" last_finish_reason: str | None = None last_finish_tokens: dict[str, Any] = {} last_permission_error: str | None = None @@ -677,6 +821,8 @@ def _run_subphase( print(C.fail(f" reason: {finish_warning}")) print(f" finish reason: {last_finish_reason!r} transcript: {transcript_path.relative_to(ROOT) if transcript_path.name else 'N/A'}") + if return_outcome: + return _SubphaseOutcome(returncode=returncode, session_id=last_session_id, transcript_path=transcript_path) return returncode @@ -716,16 +862,15 @@ def run_phase_1( rc = _run_codeql(console) if rc != 0: return rc - if _run_codeql_repair_if_needed( + rc = _run_codeql_repair_if_needed( args=args, console=console, rendering_ctx=rendering_ctx, runner=runner, base_url=base_url, - ): - rc = _run_codeql(console) - if rc != 0: - return rc + ) + if rc != 0: + return rc rc = _check_codeql_artifacts(console) if rc != 0: return rc diff --git a/tools/phases/completion.py b/tools/phases/completion.py index 155dc684..3d2d51f7 100644 --- a/tools/phases/completion.py +++ b/tools/phases/completion.py @@ -245,6 +245,25 @@ def build_codeql_plan_resume_prompt(validation_output: str) -> str: ) +def build_codeql_build_failure_resume_prompt(validation_output: str) -> str: + return ( + "The repaired `itemdb/notes/codeql-plan.yml` was valid, but the next CodeQL database creation run still " + "failed. Continue the same narrow CodeQL build repair task.\n\n" + "Latest CodeQL failure details:\n" + f"{validation_output}\n\n" + "Repair only `itemdb/notes/codeql-plan.yml` and any helper scripts under workspace-relative `tmp/` or " + "`sandbox/`. Do not modify target source code.\n\n" + "Important execution model: CodeQL runs the manual `build_command` with the current working directory set " + "to the analysis unit source path (`analysis_units[].path`). It is not run from the workspace root, and it " + "is not run from the helper script directory. If a helper script changes directory, it must do so based on " + "the analysis source root or explicit paths that work from that source root.\n\n" + "Do not use absolute `/tmp/` paths. Use workspace-relative `tmp/` paths. Do not embed this workspace's " + "absolute path in `build_command`; prefer paths relative to the analysis unit source path.\n\n" + "Before ending, verify that the plan is valid YAML, that referenced helper scripts exist, and that shell " + "helpers pass syntax-only validation." + ) + + def build_resume_command(initial_command: list[str], session_id: str, prompt: str) -> list[str]: """Preserve connection/runtime flags needed to reach the original session.""" resume = ["opencode", "run"] From 68ffd9e8dc9a064b132e1472f16ca071d817a489 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 20:38:29 +0200 Subject: [PATCH 36/47] fix: remove duplicate CodeQL hard-fail branch --- tools/codecome/phase_1.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index d3340b12..938ff1ac 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -207,16 +207,6 @@ def _check_codeql_artifacts(console: Any) -> int: print(C.fail(msg)) return 1 - if status == "failed" and config.fail_policy == "hard": - msg = "CodeQL artifact gate: FAILED — execution crashed, blocking Phase 1b" - if HAVE_RICH: - from rich.text import Text - console.print(Text(msg, style="bold red")) - else: - import _colors as C - print(C.fail(msg)) - return 1 - if status == "failed": # fail_policy is soft, so treat as a non-blocking warning if HAVE_RICH: From 67c0834af0f34ffcca48c967f4a1f0e19f6bf4cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 21:02:47 +0200 Subject: [PATCH 37/47] fix(phase-1): correctly handle codeql repair loop and hard-fail gate --- tools/codecome/phase_1.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index 938ff1ac..d9e4d003 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -120,8 +120,6 @@ def progress(message: str) -> None: else: import _colors as C print(C.fail(msg)) - if config.fail_policy == "hard": - return 1 return 0 status = manifest["status"] @@ -171,8 +169,6 @@ def progress(message: str) -> None: console.print(Text(f" {f}", style="red")) else: print(C.fail(f" {f}")) - if config.fail_policy == "hard": - return 1 return 0 @@ -406,7 +402,21 @@ def _run_codeql_repair_if_needed( runner: ServerRunner, base_url: str, ) -> int: - """Ask the model to repair CodeQL build instructions and rerun CodeQL until stable.""" + """ + Ask the model to repair CodeQL build instructions and rerun CodeQL until stable. + + Architecture / Retries Logic: + 1. CodeCome generates a `codeql-plan.yml` in Phase 1a. + 2. We attempt to run CodeQL using that plan. + 3. If CodeQL database creation fails (e.g., due to build errors), this function is + triggered. It allocates a retry budget (`CODEQL_REPAIR_RETRIES`) to use the model + to debug the failure and output a repaired `codeql-plan.yml`. + 4. If the agent itself fails to produce a valid plan (e.g. gets stuck validating its + YAML repeatedly) or the user hits Ctrl+C, we break out of the repair loop. + 5. We NEVER halt the entire pipeline in this function. We simply exhaust the allocated + budget. Only after all repair attempts finish does `_check_codeql_artifacts` finally + enforce the `fail_policy: hard` gate and halt the pipeline if the database is still missing. + """ from codeql.config import resolve_config as _resolve_codeql_config max_retries = int(os.environ.get("CODEQL_REPAIR_RETRIES", "2")) @@ -446,8 +456,16 @@ def _run_codeql_repair_if_needed( ) assert isinstance(outcome, _SubphaseOutcome) repair_session_id = outcome.session_id or repair_session_id + + if outcome.returncode == 130: + return 130 # Honor user interrupt immediately + if outcome.returncode != 0: - continue + # The agent exhausted its internal validation retries or failed fatally. + # Continuing here would just loop the same broken state, so we break + # out of the repair loop to let the phase proceed (and potentially halt). + break + next_plan_digest = _file_digest(plan_path) if next_plan_digest == plan_digest: unchanged_msg = "CodeQL repair completed but did not change itemdb/notes/codeql-plan.yml." From e24502bc5b922bf7541ba660fe7dd114915949f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 21:21:41 +0200 Subject: [PATCH 38/47] refactor: make _run_codeql return None, remove dead rc checks _run_codeql always returned 0 after fail_policy enforcement was moved to _check_codeql_artifacts. Change return type to None and remove the two unreachable 'if rc != 0: return rc' guards that depended on it. --- tests/test_codecome_check_codeql.py | 2 +- tests/test_phase_1_codeql_plan_repair.py | 2 +- tools/codecome/phase_1.py | 24 +++++++++++------------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/tests/test_codecome_check_codeql.py b/tests/test_codecome_check_codeql.py index 39abe6af..21ddbdce 100644 --- a/tests/test_codecome_check_codeql.py +++ b/tests/test_codecome_check_codeql.py @@ -257,7 +257,7 @@ def test_phase_1_reruns_codeql_after_repair() -> None: patch.object(p1, "check_phase_1a", return_value=0), \ patch.object(p1, "check_phase_1b", return_value=0), \ patch.object(p1, "check_phase_1c", return_value=0), \ - patch.object(p1, "_run_codeql", return_value=0) as run_codeql, \ + patch.object(p1, "_run_codeql", return_value=None) as run_codeql, \ patch.object(p1, "_run_codeql_repair_if_needed", return_value=0), \ patch.object(p1, "_check_codeql_artifacts", return_value=0): rc = p1.run_phase_1(object(), None, None, object(), "http://127.0.0.1") diff --git a/tests/test_phase_1_codeql_plan_repair.py b/tests/test_phase_1_codeql_plan_repair.py index efabdb27..d808d99c 100644 --- a/tests/test_phase_1_codeql_plan_repair.py +++ b/tests/test_phase_1_codeql_plan_repair.py @@ -247,7 +247,7 @@ def fake_run_codeql(_console): yaml.safe_dump({"status": "soft-failed", "failures": ["Database create failed for c-cpp:\nmanual failed"]}), encoding="utf-8", ) - return 0 + return None saved_rich = p1.HAVE_RICH p1.HAVE_RICH = False diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index d9e4d003..25ea77fb 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -57,8 +57,12 @@ class _SubphaseOutcome: # CodeQL analysis (between 1a gate and 1b) # --------------------------------------------------------------------------- -def _run_codeql(console: Any) -> int: - """Run full CodeQL pipeline and report results.""" +def _run_codeql(console: Any) -> None: + """Run full CodeQL pipeline and report results. + + This function always succeeds (returns None). Pass/fail enforcement + is handled separately by ``_check_codeql_artifacts``. + """ from codeql.config import resolve_config as _resolve_codeql_config config = _resolve_codeql_config() @@ -81,7 +85,7 @@ def _run_codeql(console: Any) -> int: else: import _colors as C print(C.warn(msg)) - return 0 + return if not config.phase_1_enabled: msg = "CodeQL phase 1 disabled — skipping." @@ -93,7 +97,7 @@ def _run_codeql(console: Any) -> int: else: import _colors as C print(C.warn(msg)) - return 0 + return if HAVE_RICH: from rich.text import Text @@ -120,7 +124,7 @@ def progress(message: str) -> None: else: import _colors as C print(C.fail(msg)) - return 0 + return status = manifest["status"] warnings = manifest.get("warnings", []) @@ -170,8 +174,6 @@ def progress(message: str) -> None: else: print(C.fail(f" {f}")) - return 0 - def _check_codeql_artifacts(console: Any) -> int: """Validate CodeQL artifacts; block 1b only on hard fail policy.""" @@ -477,9 +479,7 @@ def _run_codeql_repair_if_needed( print(C.warn(unchanged_msg)) plan_digest = next_plan_digest - rc = _run_codeql(console) - if rc != 0: - return rc + _run_codeql(console) if not _codeql_repair_needed(config.abs_output_dir, plan_path): return 0 @@ -867,9 +867,7 @@ def run_phase_1( return gate_rc # ---- CodeQL analysis ---- - rc = _run_codeql(console) - if rc != 0: - return rc + _run_codeql(console) rc = _run_codeql_repair_if_needed( args=args, console=console, From 13d29ce4c05464a5a6eb4cac1bd31cf579fcf1e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 21:33:36 +0200 Subject: [PATCH 39/47] fix: let soft CodeQL repair failures continue --- tests/test_phase_1_codeql_plan_repair.py | 43 ++++++++++++++++++++++++ tools/codecome/phase_1.py | 7 ++-- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/tests/test_phase_1_codeql_plan_repair.py b/tests/test_phase_1_codeql_plan_repair.py index d808d99c..6982948b 100644 --- a/tests/test_phase_1_codeql_plan_repair.py +++ b/tests/test_phase_1_codeql_plan_repair.py @@ -272,3 +272,46 @@ def fake_run_codeql(_console): assert calls[1][0] == "repair-session" assert calls[1][1] is not None assert "Latest CodeQL failure details" in calls[1][1] + + +def test_codeql_repair_loop_does_not_block_after_retries_exhausted(tmp_path: Path, monkeypatch) -> None: + import codecome.phase_1 as p1 + + output_dir = tmp_path / "itemdb" / "codeql" + output_dir.mkdir(parents=True) + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump({"status": "soft-failed", "failures": ["Database create failed for c-cpp:\nautobuild failed"]}), + encoding="utf-8", + ) + _write_manual_plan(tmp_path, "make") + config = SimpleNamespace(abs_output_dir=output_dir) + + def fake_subphase(**_kwargs): + return p1._SubphaseOutcome(0, "repair-session", tmp_path / "repair.jsonl") + + def fake_run_codeql(_console): + (output_dir / "run-manifest.yml").write_text( + yaml.safe_dump({"status": "soft-failed", "failures": ["Database create failed for c-cpp:\nmanual failed"]}), + encoding="utf-8", + ) + return None + + monkeypatch.setenv("CODEQL_REPAIR_RETRIES", "1") + saved_rich = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch.object(p1, "ROOT", tmp_path), \ + patch("codeql.config.resolve_config", return_value=config), \ + patch.object(p1, "_run_subphase", side_effect=fake_subphase), \ + patch.object(p1, "_run_codeql", side_effect=fake_run_codeql): + rc = p1._run_codeql_repair_if_needed( + args=object(), + console=None, + rendering_ctx=None, + runner=_runner(), + base_url="http://127.0.0.1", + ) + finally: + p1.HAVE_RICH = saved_rich + + assert rc == 0 diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index 25ea77fb..310faf91 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -488,14 +488,13 @@ def _run_codeql_repair_if_needed( ) if _codeql_repair_needed(config.abs_output_dir, plan_path): - msg = f"CodeQL database creation still fails after {max_retries} repair attempt(s); blocking Phase 1b." + msg = f"CodeQL database creation still fails after {max_retries} repair attempt(s); continuing to artifact gate." if HAVE_RICH: from rich.text import Text - console.print(Text(msg, style="bold red")) + console.print(Text(msg, style="bold yellow")) else: import _colors as C - print(C.fail(msg)) - return 1 + print(C.warn(msg)) return 0 From fe95ab551761a8c8ddde1a9ad6fbaefe3065dd70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 22:32:57 +0200 Subject: [PATCH 40/47] feat: rename venv-check to env-check, add sandbox status to make check, strengthen CodeQL integration - Rename venv-check -> env-check; add CodeQL binary presence gate with .tools/codeql/.disabled marker managed by make init. - Add sandbox status section to 'make check' (state, gate, capabilities) using importlib to access sandbox-bootstrap module. - Add build_command shape validation rejecting shell operators, multiline, comments, and bash -c in CodeQL build commands. - Add _ensure_query_packs_available() for pack preflight/download with policy-aware failure handling (official=fail, optional=skip under soft). - Guard against false 'completed' when all profiles are skipped. - Update --no-sarif-add-query-help to --sarif-include-query-help=never. - Add AGENTS.md rule 12: no subprocess for internal module communication. --- Makefile | 82 +++++---- prompts/phase-1-codeql-repair.md | 4 + tests/test_codeql_runner.py | 210 +++++++++++++++++++++++ tests/test_phase_1_codeql_plan_repair.py | 38 ++++ tools/AGENTS.md | 16 ++ tools/codecome.py | 76 ++++++++ tools/codecome/phase_1.py | 33 ++++ tools/codeql/runner.py | 74 +++++++- tools/phases/completion.py | 4 + 9 files changed, 500 insertions(+), 37 deletions(-) diff --git a/Makefile b/Makefile index afc08e6c..e0c65e0a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Copyright (C) 2025-2026 Pablo Ruiz García # SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later -.PHONY: help init venv venv-check check status next-id frontmatter tests test-parity itemdb-reset codeql-clean index report +.PHONY: help init venv env-check check status next-id frontmatter tests test-parity itemdb-reset codeql-clean index report .PHONY: findings findings-create findings-move findings-evidence findings-package .PHONY: phase-1 phase-2 phase-3 phase-4 phase-5 phase-6 validate-all exploit-all opencode-raw .PHONY: sandbox-setup sandbox-check sandbox-up sandbox-down sandbox-shell sandbox-logs sandbox-clean sandbox-reset sandbox-build sandbox-test @@ -138,29 +138,39 @@ init: @$(PYTHON) -m pip install --no-input -r requirements.txt || { printf "$(BOLD)$(RED)[FAIL]$(RESET) requirements install failed\n"; exit 1; } @printf "$(BOLD)$(GREEN)[OK]$(RESET) Python dependencies installed\n\n" @printf "$(BOLD)$(CYAN)==> [4/4] Installing managed CodeQL CLI$(RESET)\n" + @rm -f .tools/codeql/.disabled @if [ "$$CODEQL" != "0" ] && [ "$$CODEQL_SKIP_INSTALL" != "1" ]; then \ - $(PYTHON) tools/codeql.py install || { printf "$(BOLD)$(RED)[FAIL]$(RESET) managed CodeQL install failed\n"; exit 1; }; \ - printf "$(BOLD)$(GREEN)[OK]$(RESET) Managed CodeQL CLI ready\n"; \ + if $(PYTHON) -c "import yaml,sys; cfg=yaml.safe_load(open('codecome.yml')); sys.exit(0 if cfg.get('codeql',{}).get('enabled',True) else 1)" 2>/dev/null; then \ + $(PYTHON) tools/codeql.py install || { printf "$(BOLD)$(RED)[FAIL]$(RESET) managed CodeQL install failed\n"; exit 1; }; \ + printf "$(BOLD)$(GREEN)[OK]$(RESET) Managed CodeQL CLI ready\n"; \ + else \ + mkdir -p .tools/codeql && touch .tools/codeql/.disabled; \ + printf "$(BOLD)$(YELLOW)[SKIP]$(RESET) CodeQL disabled in codecome.yml\n"; \ + fi; \ else \ + mkdir -p .tools/codeql && touch .tools/codeql/.disabled; \ printf "$(BOLD)$(YELLOW)[SKIP]$(RESET) Managed CodeQL install skipped (CODEQL=0 or CODEQL_SKIP_INSTALL=1)\n"; \ fi @printf "\n$(BOLD)$(GREEN)Setup complete.$(RESET)\n" venv: init -venv-check: +env-check: @test -x "$(PYTHON)" || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) Missing repo virtualenv at .venv\n\nRun:\n\n make init\n\n" && exit 1) @$(PYTHON) -c "import yaml, rich" >/dev/null 2>&1 || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) .venv is missing required Python packages\n\nRun:\n\n make init\n\nIf you updated requirements, rerun the same command to resync .venv.\n\n" && exit 1) + @if [ ! -f .tools/codeql/.disabled ]; then \ + test -x .tools/codeql/current/codeql || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) CodeQL is enabled but the managed binary is missing.\n\nRun:\n\n make init\n\nOr to explicitly disable CodeQL:\n\n CODEQL=0 make init\n\n" && exit 1); \ + fi # --------------------------------------------------------------------------- # Workflow phases # --------------------------------------------------------------------------- -phase-1: venv-check +phase-1: env-check @$(PYTHON) tools/gate-check.py 1 @$(PYTHON) tools/run-agent.py --phase 1 --label "Phase 1: Reconnaissance" --agent recon -phase-2: venv-check +phase-2: env-check @$(PYTHON) tools/gate-check.py 2 @$(PYTHON) tools/sandbox-bootstrap.py status --gate || ( \ printf "\n$(BOLD)$(YELLOW)[BLOCK]$(RESET) Phase 2 sandbox gate failed.\n" ; \ @@ -169,31 +179,31 @@ phase-2: venv-check exit 1 ) @$(PYTHON) tools/run-agent.py --phase 2 --label "Hypothesis Generation" --agent auditor --prompt-file prompts/phase-2-audit.md -phase-3: venv-check +phase-3: env-check @$(PYTHON) tools/gate-check.py 3 @$(PYTHON) tools/run-agent.py --phase 3 --label "Counter-analysis" --agent reviewer --prompt-file prompts/phase-3-review.md -phase-4: venv-check +phase-4: env-check @test -n "$(FINDING)" || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) Missing required FINDING argument for Phase 4 (Validation).\n\nSpecify which finding you want to validate:\n\n $(BOLD)make phase-4 FINDING=CC-0001$(RESET)\n\nTo list available pending findings: $(BOLD)make findings STATUS=PENDING$(RESET)\n\n" && exit 1) @$(PYTHON) tools/gate-check.py 4 $(FINDING) @$(PYTHON) tools/run-agent.py --phase 4 --label "Validation" --agent validator --prompt-file prompts/phase-4-validate.md --finding "$(FINDING)" -phase-5: venv-check +phase-5: env-check @test -n "$(FINDING)" || (printf "\n$(BOLD)$(RED)[FAIL]$(RESET) Missing required FINDING argument for Phase 5 (Exploitation).\n\nSpecify which finding you want to exploit:\n\n $(BOLD)make phase-5 FINDING=CC-0001$(RESET)\n\nTo list available confirmed findings: $(BOLD)make findings STATUS=CONFIRMED$(RESET)\n\n" && exit 1) @$(PYTHON) tools/gate-check.py 5 $(FINDING) @$(PYTHON) tools/run-agent.py --phase 5 --label "Exploit Development" --agent exploiter --prompt-file prompts/phase-5-exploit.md --finding "$(FINDING)" -phase-6: venv-check +phase-6: env-check @$(PYTHON) tools/gate-check.py 6 @$(PYTHON) tools/run-agent.py --phase 6 --label "Reporting" --agent reporter --prompt-file prompts/phase-6-report.md -chat: venv-check +chat: env-check @$(PYTHON) tools/run-agent.py --chat --label "Interactive Chat" --agent $(or $(AGENT),chat) --prompt-file prompts/chat-initial.md $(if $(DEBUG),--debug,) -list-risk-files: venv-check +list-risk-files: env-check @$(PYTHON) tools/list-risk-files.py -sweep: venv-check +sweep: env-check @if [ -n "$(FILE)" ]; then \ $(PYTHON) tools/run-sweep.py --file "$(FILE)"; \ else \ @@ -209,7 +219,7 @@ opencode-raw: @test -r "$(PROMPT_FILE)" || (echo "PROMPT_FILE must be a readable file. Usage: make opencode-raw AGENT=auditor PROMPT_FILE=prompts/foo.md" && exit 1) @opencode run --agent "$(AGENT)" $(OPENCODE_THINKING_FLAG) "$$(cat "$(PROMPT_FILE)")" -validate-all: venv-check +validate-all: env-check @ids=$$($(PYTHON) tools/list-findings.py --status PENDING --format ids 2>/dev/null); \ if [ -z "$$ids" ]; then \ echo "No PENDING findings to validate."; \ @@ -222,7 +232,7 @@ validate-all: venv-check $(MAKE) phase-4 FINDING=$$f; \ done -exploit-all: venv-check +exploit-all: env-check @ids=$$($(PYTHON) tools/list-findings.py --status CONFIRMED --eligible-for-exploit --format ids 2>/dev/null); \ if [ -z "$$ids" ]; then \ echo "No eligible CONFIRMED findings to exploit."; \ @@ -239,26 +249,26 @@ exploit-all: venv-check # Workspace tools # --------------------------------------------------------------------------- -check: venv-check +check: env-check $(PYTHON) tools/codecome.py check -status: venv-check +status: env-check $(PYTHON) tools/codecome.py status -next-id: venv-check +next-id: env-check $(PYTHON) tools/codecome.py next-id -frontmatter: venv-check +frontmatter: env-check $(PYTHON) tools/check-frontmatter.py -tests: venv-check +tests: env-check $(PYTHON) -m pytest -q tests $(PYTHON) tools/check-frontmatter.py -test-parity: venv-check +test-parity: env-check $(PYTHON) -m pytest tests/test_mock_llm_parity.py -v -itemdb-reset: venv-check +itemdb-reset: env-check rm -f itemdb/notes/*.md rm -rf itemdb/evidence/CC-* rm -f itemdb/reports/*.md @@ -286,27 +296,27 @@ codeql-clean: rm -rf .cache/codeql rm -rf src/_codeql_detected_source_root -index: venv-check +index: env-check $(PYTHON) tools/render-index.py -report: venv-check +report: env-check $(PYTHON) tools/render-report.py -findings: venv-check +findings: env-check ifdef STATUS $(PYTHON) tools/list-findings.py --status $(STATUS) else $(PYTHON) tools/list-findings.py endif -findings-create: venv-check +findings-create: env-check @test -n "$(strip $(TITLE))" || (printf "TITLE is required. Usage: make findings-create TITLE=\"Short descriptive title\" [ARGS='...']\n" && exit 2) $(PYTHON) tools/create-finding.py "$(TITLE)" $(ARGS) -findings-move: venv-check +findings-move: env-check $(PYTHON) tools/move-finding.py $(FINDING) $(STATUS) -findings-evidence: venv-check +findings-evidence: env-check $(PYTHON) tools/create-evidence.py $(FINDING) findings-package: @@ -369,32 +379,32 @@ sandbox-test: # Sandbox bootstrap (Phase 1c) # --------------------------------------------------------------------------- -sandbox-list: venv-check +sandbox-list: env-check @$(PYTHON) tools/sandbox-bootstrap.py list -sandbox-inspect: venv-check +sandbox-inspect: env-check @test -n "$(ID)" || (echo "Usage: make sandbox-inspect ID=" && exit 1) @$(PYTHON) tools/sandbox-bootstrap.py inspect $(ID) -sandbox-detect: venv-check +sandbox-detect: env-check @$(PYTHON) tools/sandbox-bootstrap.py detect -sandbox-bootstrap: venv-check +sandbox-bootstrap: env-check @test -n "$(ID)" || (echo "Usage: make sandbox-bootstrap ID=" && exit 1) @$(PYTHON) tools/sandbox-bootstrap.py apply $(ID) $(BOOTSTRAP_ARGS) -sandbox-validate: venv-check +sandbox-validate: env-check @$(PYTHON) tools/sandbox-bootstrap.py validate $(BOOTSTRAP_ARGS) -sandbox-regenerate: venv-check +sandbox-regenerate: env-check @$(PYTHON) tools/sandbox-bootstrap.py regenerate $(BOOTSTRAP_ARGS) -sandbox-status: venv-check +sandbox-status: env-check @$(PYTHON) tools/sandbox-bootstrap.py status # Print the model that would be picked for a given AGENT (default: recon). # Usage: # make show-model # make show-model AGENT=auditor -show-model: venv-check +show-model: env-check @$(PYTHON) tools/run-agent.py --show-model --agent $(or $(AGENT),recon) diff --git a/prompts/phase-1-codeql-repair.md b/prompts/phase-1-codeql-repair.md index 2984b84c..c3a6014d 100644 --- a/prompts/phase-1-codeql-repair.md +++ b/prompts/phase-1-codeql-repair.md @@ -52,6 +52,10 @@ If the manual command is simple enough, put it directly in `build_command` inste - CodeQL runs the manual `build_command` from the analysis unit source path. - CodeQL does not run `build_command` from the workspace root or from the helper script directory. +- CodeQL tokenizes `build_command` as argv; it does not execute it as a shell script. +- Do not put shell control syntax in `build_command`: no `&&`, `||`, `;`, pipes, comments, multi-line commands, or `bash -c` / `sh -c` snippets. +- Good direct commands: `make`, `make -C challenge`, `gcc main.c -o app`. +- If more than one command is needed, create a helper script under workspace-relative `tmp/` and set `build_command` to invoke it from the analysis unit source path, for example `bash ../../tmp/codeql-build.sh`. - Prefer commands that are deterministic and non-interactive. - Prefer commands that avoid modifying `src/` when possible. - If existing target build files naturally write object files or binaries into `src/`, document that limitation in the `notes` field. diff --git a/tests/test_codeql_runner.py b/tests/test_codeql_runner.py index ee9190af..43164502 100644 --- a/tests/test_codeql_runner.py +++ b/tests/test_codeql_runner.py @@ -362,3 +362,213 @@ def test_run_codeql_skips_unsupported_languages_soft_policy(tmp_path: Path) -> N assert manifest["status"] == "skipped" assert "elixir" in manifest["warnings"][0] + + +def test_run_codeql_downloads_and_skips_unavailable_optional_profile_under_soft_policy(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\nanalysis_units: []\n", encoding="utf-8") + catalog_path = tmp_path / "templates" / "codeql-packs.yml" + catalog_path.parent.mkdir(parents=True) + catalog_path.write_text("schema_version: 1\npacks:\n c-cpp:\n official:\n - codeql/cpp-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog_path, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + resolved = { + "analysis_units": [ + { + "id": "root", + "path": "./src", + "languages": [ + { + "id": "c-cpp", + "profiles": ["official", "github-security-lab"], + "profile_packs": { + "official": ["codeql/cpp-queries"], + "github-security-lab": ["githubsecuritylab/codeql-cpp-queries"], + }, + } + ], + } + ] + } + + def fake_run_quiet(cmd, timeout): + joined = " ".join(cmd) + if "githubsecuritylab/codeql-cpp-queries" in joined: + return False, "pack missing" + return True, "" + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={}), \ + patch("codeql.runner.load_codeql_plan", return_value={"analysis_units": [{"id": "root", "path": "./src", "languages": [{"id": "c-cpp", "build_mode": "autobuild", "packs": ["official", "github-security-lab"]}]}]}), \ + patch("codeql.runner.resolve_plan_packs", return_value=resolved), \ + patch("codeql.runner._create_database", return_value=(True, "")), \ + patch("codeql.runner._run_analyze", return_value=(True, "")) as analyze, \ + patch("codeql.runner._run_quiet", side_effect=fake_run_quiet): + manifest = run_codeql(config) + + assert manifest["status"] == "completed" + assert any("githubsecuritylab/codeql-cpp-queries" in warning for warning in manifest["warnings"]) + assert analyze.call_count == 1 + assert analyze.call_args.args[2] == ["codeql/cpp-queries"] + + +def test_run_codeql_fails_unavailable_official_profile_under_soft_policy(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\nanalysis_units: []\n", encoding="utf-8") + catalog_path = tmp_path / "templates" / "codeql-packs.yml" + catalog_path.parent.mkdir(parents=True) + catalog_path.write_text("schema_version: 1\npacks:\n c-cpp:\n official:\n - codeql/cpp-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog_path, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + resolved = { + "analysis_units": [ + { + "id": "root", + "path": "./src", + "languages": [ + { + "id": "c-cpp", + "profiles": ["official"], + "profile_packs": {"official": ["codeql/cpp-queries"]}, + } + ], + } + ] + } + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={}), \ + patch("codeql.runner.load_codeql_plan", return_value={"analysis_units": [{"id": "root", "path": "./src", "languages": [{"id": "c-cpp", "build_mode": "autobuild", "packs": ["official"]}]}]}), \ + patch("codeql.runner.resolve_plan_packs", return_value=resolved), \ + patch("codeql.runner._create_database", return_value=(True, "")), \ + patch("codeql.runner._run_analyze") as analyze, \ + patch("codeql.runner._run_quiet", return_value=(False, "pack missing")): + manifest = run_codeql(config) + + assert manifest["status"] == "soft-failed" + assert "required official profile" in manifest["failures"][0] + analyze.assert_not_called() + + +def test_run_codeql_fails_unavailable_optional_profile_under_hard_policy(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\nanalysis_units: []\n", encoding="utf-8") + catalog_path = tmp_path / "templates" / "codeql-packs.yml" + catalog_path.parent.mkdir(parents=True) + catalog_path.write_text("schema_version: 1\npacks:\n c-cpp:\n github-security-lab:\n - githubsecuritylab/codeql-cpp-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="hard", + abs_install_path=binary, + abs_pack_catalog=catalog_path, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + resolved = { + "analysis_units": [ + { + "id": "root", + "path": "./src", + "languages": [ + { + "id": "c-cpp", + "profiles": ["github-security-lab"], + "profile_packs": {"github-security-lab": ["githubsecuritylab/codeql-cpp-queries"]}, + } + ], + } + ] + } + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={}), \ + patch("codeql.runner.load_codeql_plan", return_value={"analysis_units": [{"id": "root", "path": "./src", "languages": [{"id": "c-cpp", "build_mode": "autobuild", "packs": ["github-security-lab"]}]}]}), \ + patch("codeql.runner.resolve_plan_packs", return_value=resolved), \ + patch("codeql.runner._create_database", return_value=(True, "")), \ + patch("codeql.runner._run_analyze") as analyze, \ + patch("codeql.runner._run_quiet", return_value=(False, "pack missing")): + manifest = run_codeql(config) + + assert manifest["status"] == "failed" + assert "optional profile 'github-security-lab'" in manifest["failures"][0] + analyze.assert_not_called() + + +def test_run_codeql_soft_fails_when_all_profiles_are_skipped(tmp_path: Path) -> None: + binary = tmp_path / ".tools" / "codeql" / "current" / "codeql" + binary.parent.mkdir(parents=True) + binary.write_text("", encoding="utf-8") + plan_path = tmp_path / "itemdb" / "notes" / "codeql-plan.yml" + plan_path.parent.mkdir(parents=True) + plan_path.write_text("schema_version: 1\nanalysis_units: []\n", encoding="utf-8") + catalog_path = tmp_path / "templates" / "codeql-packs.yml" + catalog_path.parent.mkdir(parents=True) + catalog_path.write_text("schema_version: 1\npacks:\n c-cpp:\n github-security-lab:\n - githubsecuritylab/codeql-cpp-queries\n", encoding="utf-8") + + config = CodeQLConfig( + enabled=True, + fail_policy="soft", + abs_install_path=binary, + abs_pack_catalog=catalog_path, + abs_output_dir=tmp_path / "itemdb" / "codeql", + abs_database_dir=tmp_path / "itemdb" / "codeql" / "databases", + ) + resolved = { + "analysis_units": [ + { + "id": "root", + "path": "./src", + "languages": [ + { + "id": "c-cpp", + "profiles": ["github-security-lab"], + "profile_packs": {"github-security-lab": ["githubsecuritylab/codeql-cpp-queries"]}, + } + ], + } + ] + } + + with patch("codeql.runner.ROOT", tmp_path), \ + patch("codeql.runner._get_codeql_version", return_value="2.25.5"), \ + patch("codeql.runner.load_pack_catalog", return_value={}), \ + patch("codeql.runner.load_codeql_plan", return_value={"analysis_units": [{"id": "root", "path": "./src", "languages": [{"id": "c-cpp", "build_mode": "autobuild", "packs": ["github-security-lab"]}]}]}), \ + patch("codeql.runner.resolve_plan_packs", return_value=resolved), \ + patch("codeql.runner._create_database", return_value=(True, "")), \ + patch("codeql.runner._run_analyze") as analyze, \ + patch("codeql.runner._run_quiet", return_value=(False, "pack missing")): + manifest = run_codeql(config) + + assert manifest["status"] == "soft-failed" + assert "No CodeQL query profiles ran successfully" in manifest["failures"][0] + analyze.assert_not_called() diff --git a/tests/test_phase_1_codeql_plan_repair.py b/tests/test_phase_1_codeql_plan_repair.py index 6982948b..54250a1a 100644 --- a/tests/test_phase_1_codeql_plan_repair.py +++ b/tests/test_phase_1_codeql_plan_repair.py @@ -189,6 +189,44 @@ def test_codeql_plan_validation_rejects_absolute_tmp_in_build_command(tmp_path: assert "absolute /tmp/" in output +def test_codeql_plan_validation_rejects_shell_operators_in_build_command(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + _write_manual_plan(tmp_path, "mkdir -p out && gcc main.c -o out/app") + + with patch.object(p1, "ROOT", tmp_path): + rc, output = p1._validate_codeql_plan_for_repair() + + assert rc == 1 + assert "shell operator" in output + assert "helper script" in output + + +def test_codeql_plan_validation_rejects_multiline_and_comments_in_build_command(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + _write_manual_plan(tmp_path, "# build\nmkdir -p out\ngcc main.c -o out/app") + + with patch.object(p1, "ROOT", tmp_path): + rc, output = p1._validate_codeql_plan_for_repair() + + assert rc == 1 + assert "multi-line" in output + assert "shell comments" in output + + +def test_codeql_plan_validation_rejects_bash_c_build_command(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + _write_manual_plan(tmp_path, "bash -c 'mkdir -p out && gcc main.c -o out/app'") + + with patch.object(p1, "ROOT", tmp_path): + rc, output = p1._validate_codeql_plan_for_repair() + + assert rc == 1 + assert "bash -c" in output + + def test_codeql_plan_validation_checks_helper_from_analysis_root(tmp_path: Path) -> None: import codecome.phase_1 as p1 diff --git a/tools/AGENTS.md b/tools/AGENTS.md index 8018f8fb..a5d5f9e7 100644 --- a/tools/AGENTS.md +++ b/tools/AGENTS.md @@ -213,3 +213,19 @@ Avoid circular imports. When two packages need each other, prefer callable injec - Event loops are tested with deterministic event generators — not live OpenCode servers. - CLI and wrapper compatibility is verified with `--help` and `--show-model` smoke tests. - Thin wrappers must remain thin — their only responsibility is delegation. + +### 12. No subprocess for internal module communication + +CodeCome tools must **never** shell out via `subprocess` to invoke other CodeCome Python scripts. Instead, import the target module's functions directly: + +```python +# BAD — subprocess call to another CodeCome script +result = subprocess.run([sys.executable, "tools/sandbox-bootstrap.py", "status", "--format", "json"], ...) + +# GOOD — direct import (use importlib for hyphenated module names) +import importlib +sb = importlib.import_module("sandbox-bootstrap") +provenance = sb.read_provenance() +``` + +Subprocess is acceptable only for invoking **external** tools (codeql, docker, git, asciinema, etc.) that are not part of the CodeCome Python codebase. diff --git a/tools/codecome.py b/tools/codecome.py index 0e23c650..e94b1a34 100755 --- a/tools/codecome.py +++ b/tools/codecome.py @@ -524,6 +524,81 @@ def check_codeql_status() -> int: return exit_code +def check_sandbox_status() -> None: + """Print sandbox state, gate result, and capability summary.""" + import importlib + + try: + sb = importlib.import_module("sandbox-bootstrap") + except Exception: + print() + print(C.header("Sandbox:")) + print(C.warn("sandbox-bootstrap module unavailable")) + return + + print() + print(C.header("Sandbox:")) + + provenance = sb.read_provenance() + has_user_content = sb.sandbox_has_user_content() + last_validation = sb._last_validation_outcome() + allow_no_sandbox = bool(os.environ.get("CODECOME_ALLOW_NO_SANDBOX")) + + # Determine state + if provenance is not None: + sandbox_state = "generated" + elif has_user_content: + sandbox_state = "user-managed" + else: + sandbox_state = "missing" + + # Gate logic (mirrors cmd_status) + if allow_no_sandbox: + gate_pass = True + gate_reason = "override (CODECOME_ALLOW_NO_SANDBOX=1)" + elif sandbox_state == "missing": + gate_pass = False + gate_reason = "sandbox is missing" + elif sandbox_state == "generated" and last_validation == "failed": + gate_pass = False + gate_reason = "last validation failed" + elif sandbox_state == "generated" and last_validation == "skipped": + gate_pass = False + gate_reason = "last validation has no real outcomes (all tiers skipped)" + else: + gate_pass = True + if sandbox_state == "user-managed": + gate_reason = "sandbox is user-managed (validation not enforced)" + elif last_validation is None: + gate_reason = "no validation run on record" + elif last_validation == "passed": + gate_reason = "last validation passed" + elif last_validation == "mixed": + gate_reason = "last validation passed (some tiers skipped)" + else: + gate_reason = f"last validation: {last_validation}" + + # Print summary + state_detail = sandbox_state + if sandbox_state == "generated" and provenance: + state_detail = "generated (provenance present)" + print(f" {C.DIM}state:{C.RESET} {state_detail}") + print(f" {C.DIM}last validation:{C.RESET} {last_validation or '-'}") + if gate_pass: + print(C.ok(f" Phase 2 gate: pass ({gate_reason})")) + else: + print(C.warn(f" Phase 2 gate: block ({gate_reason})")) + + # Capabilities + capability_status = sb._capability_status() + print(f" {C.DIM}capabilities:{C.RESET}") + for name in ("setup", "start", "check", "build", "test", "stop", "shell", "logs", "clean", "reset"): + status = capability_status[name] + satisfied = status.get("satisfied", False) + state_str = C.ok("ok") if satisfied else C.warn("missing") + print(f" {name:<8} {state_str} {status['path']}") + + def command_check(_: argparse.Namespace) -> int: missing = [] @@ -556,6 +631,7 @@ def command_check(_: argparse.Namespace) -> int: check_phase_progress() check_exit = check_codeql_status() + check_sandbox_status() print() diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index 310faf91..9f9d12dc 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -289,6 +289,7 @@ def _validate_codeql_build_command(build_command: str, analysis_root: Path, cont errors.append(f"{context}: build_command uses absolute /tmp/; use workspace-relative tmp/ instead") if str(ROOT) in build_command: errors.append(f"{context}: build_command embeds the absolute workspace path {ROOT}") + errors.extend(_validate_codeql_build_command_shape(build_command, context)) try: tokens = shlex.split(build_command) @@ -322,6 +323,38 @@ def _validate_codeql_build_command(build_command: str, analysis_root: Path, cont return errors +def _validate_codeql_build_command_shape(build_command: str, context: str) -> list[str]: + """Reject shell-script constructs because CodeQL tokenizes build_command as argv.""" + errors: list[str] = [] + if "\n" in build_command: + errors.append( + f"{context}: build_command is multi-line; CodeQL tokenizes build_command instead of running it as a shell script. " + "Move multi-step logic into a helper script under tmp/ and invoke it with a single command such as `bash ../../tmp/codeql-build.sh`." + ) + if re.search(r"(^|\s)#", build_command): + errors.append( + f"{context}: build_command contains shell comments; CodeQL passes comments as literal argv tokens. " + "Move comments and multi-step logic into a helper script under tmp/." + ) + for operator in ("&&", ";", "|", "||"): + if operator in build_command: + errors.append( + f"{context}: build_command contains shell operator {operator!r}; CodeQL tokenizes build_command, it is not shell-interpreted. " + "Use a helper script under tmp/ for compound commands." + ) + break + try: + tokens = shlex.split(build_command) + except ValueError: + return errors + if len(tokens) >= 3 and tokens[0] in {"bash", "sh"} and tokens[1] == "-c": + errors.append( + f"{context}: build_command uses `{tokens[0]} -c`; CodeQL command tokenization makes nested shell snippets fragile. " + "Write the snippet to a helper script under tmp/ and invoke that script instead." + ) + return errors + + def _contains_absolute_tmp(text: str) -> bool: """Return whether text contains an absolute /tmp path, not a relative tmp/ component.""" return re.search(r"(^|[\s\"'=])/(tmp)(/|$)", text) is not None diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index a8dd4fc0..ebae69dc 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -63,6 +63,7 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No failures: list[str] = [] language_ids: list[str] = [] analysis_units: list[str] = [] + analyzed_profiles = 0 for unit_entry in resolved["analysis_units"]: unit_id = unit_entry["id"] @@ -104,12 +105,26 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No packs = profile_packs.get(profile, []) if not packs: continue + ok, msg = _ensure_query_packs_available(binary_path, packs, profile, config, progress) + if not ok: + if config.fail_policy == "soft" and profile != "official": + warnings.append(msg) + _progress(progress, f"CodeQL: {msg}") + continue + failures.append(msg) + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) + sarif_path = sarif_dir / f"{unit_id}.{language_id}.{profile}.sarif" _progress(progress, f"CodeQL: analyzing {unit_id}:{language_id} profile {profile}") ok, msg = _run_analyze(binary_path, db_dir, packs, sarif_path, timeout=analyze_timeout, progress=progress) if not ok: + if config.fail_policy == "soft" and profile != "official": + warnings.append(msg) + _progress(progress, f"CodeQL: {msg}") + continue failures.append(msg) return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) + analyzed_profiles += 1 _progress(progress, f"CodeQL: SARIF written {_rel(sarif_path)}") if failures: @@ -120,6 +135,10 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No failures=["No languages resolved from analysis plan."], languages=language_ids, analysis_units=analysis_units) + if analyzed_profiles == 0: + failures.append("No CodeQL query profiles ran successfully.") + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) + return _manifest("completed", now_utc, config, [version], warnings, failures, language_ids, analysis_units) @@ -236,13 +255,66 @@ def _run_analyze( str(db_dir), "--format=sarif-latest", f"--output={sarif_path}", - "--no-sarif-add-query-help", + "--sarif-include-query-help=never", ] + packs return _run_with_progress(cmd, f"Analyze timed out for {db_dir.name} after {timeout}s", f"Analyze failed for {db_dir.name}", timeout, progress) +def _ensure_query_packs_available( + binary: Path, + packs: list[str], + profile: str, + config: CodeQLConfig, + progress: Callable[[str], None] | None = None, +) -> tuple[bool, str]: + """Resolve query packs, downloading registry packs once when missing.""" + ok, detail = _run_quiet([str(binary), "resolve", "queries", "--format=json", "--", *packs], timeout=120) + if ok: + return True, "" + + downloadable = [pack for pack in packs if _is_registry_pack_ref(pack)] + for pack in downloadable: + _progress(progress, f"CodeQL: downloading query pack {pack}") + download_ok, download_detail = _run_quiet([str(binary), "pack", "download", pack], timeout=300) + if not download_ok: + detail = download_detail or detail + return False, _pack_failure_message(profile, packs, detail, config) + + if downloadable: + ok, detail = _run_quiet([str(binary), "resolve", "queries", "--format=json", "--", *packs], timeout=120) + if ok: + return True, "" + + return False, _pack_failure_message(profile, packs, detail, config) + + +def _is_registry_pack_ref(pack: str) -> bool: + """Return whether a pack reference can be downloaded from a registry.""" + if pack.startswith((".", "/")): + return False + return "/" in pack + + +def _pack_failure_message(profile: str, packs: list[str], detail: str, config: CodeQLConfig) -> str: + policy = "required official profile" if profile == "official" else f"optional profile {profile!r}" + action = "failing CodeQL step" if config.fail_policy == "hard" or profile == "official" else "skipping profile" + suffix = f":\n{detail}" if detail else "" + return f"CodeQL query packs unavailable for {policy} ({', '.join(packs)}); {action}{suffix}" + + +def _run_quiet(cmd: list[str], timeout: int) -> tuple[bool, str]: + try: + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout) + except Exception as exc: + return False, str(exc) + if result.returncode == 0: + return True, "" + detail = (result.stderr or result.stdout).strip() + return False, detail + + def _run_with_progress( cmd: list[str], timeout_msg_prefix: str, diff --git a/tools/phases/completion.py b/tools/phases/completion.py index 3d2d51f7..e693983a 100644 --- a/tools/phases/completion.py +++ b/tools/phases/completion.py @@ -257,6 +257,10 @@ def build_codeql_build_failure_resume_prompt(validation_output: str) -> str: "to the analysis unit source path (`analysis_units[].path`). It is not run from the workspace root, and it " "is not run from the helper script directory. If a helper script changes directory, it must do so based on " "the analysis source root or explicit paths that work from that source root.\n\n" + "CodeQL tokenizes `build_command` as argv; it does not execute it as a shell script. Do not put shell " + "control syntax in `build_command`: no `&&`, `||`, `;`, pipes, comments, multi-line commands, or " + "`bash -c` / `sh -c` snippets. If more than one command is needed, create a helper script under " + "workspace-relative `tmp/` and set `build_command` to invoke it, for example `bash ../../tmp/codeql-build.sh`.\n\n" "Do not use absolute `/tmp/` paths. Use workspace-relative `tmp/` paths. Do not embed this workspace's " "absolute path in `build_command`; prefer paths relative to the analysis unit source path.\n\n" "Before ending, verify that the plan is valid YAML, that referenced helper scripts exist, and that shell " From 4357645ab44d2caf2407f74d1517c98c22b656d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 22:43:40 +0200 Subject: [PATCH 41/47] fix(codeql): prevent early abort on soft policy and rename misleading test --- tests/test_codecome_check_codeql.py | 2 +- tools/codeql/runner.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/test_codecome_check_codeql.py b/tests/test_codecome_check_codeql.py index 21ddbdce..2148c88d 100644 --- a/tests/test_codecome_check_codeql.py +++ b/tests/test_codecome_check_codeql.py @@ -245,7 +245,7 @@ def test_codeql_repair_needed_after_manual_database_failure(tmp_path: Path) -> N assert _codeql_repair_needed(output_dir, plan_path) is True -def test_phase_1_reruns_codeql_after_repair() -> None: +def test_phase_1_pipeline_structure() -> None: _ensure_codecome_package() import codecome.phase_1 as p1 diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index ebae69dc..4d21c923 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -98,6 +98,9 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No ok, msg = _create_database(binary_path, language_id, source_path, db_dir, build_mode, build_command, exclude_patterns, timeout=db_timeout, progress=progress) if not ok: failures.append(msg) + if config.fail_policy == "soft": + _progress(progress, f"CodeQL: {msg}") + continue return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) _progress(progress, f"CodeQL: database ready {unit_id}:{language_id}") @@ -112,6 +115,9 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No _progress(progress, f"CodeQL: {msg}") continue failures.append(msg) + if config.fail_policy == "soft": + _progress(progress, f"CodeQL: {msg}") + continue return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) sarif_path = sarif_dir / f"{unit_id}.{language_id}.{profile}.sarif" @@ -123,12 +129,15 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No _progress(progress, f"CodeQL: {msg}") continue failures.append(msg) + if config.fail_policy == "soft": + _progress(progress, f"CodeQL: {msg}") + continue return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) analyzed_profiles += 1 _progress(progress, f"CodeQL: SARIF written {_rel(sarif_path)}") if failures: - return _manifest("failed", now_utc, config, [version], warnings, failures, language_ids, analysis_units) + return _manifest(_tool_failure_status(config), now_utc, config, [version], warnings, failures, language_ids, analysis_units) if not language_ids: return _manifest("skipped", now_utc, config, [version], warnings, From 7027bf4f613cf03ee7e05702128eba86589823f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 22:53:26 +0200 Subject: [PATCH 42/47] feat(codeql): expose check-codeql-plan in cli to fix agent validation hallucination --- prompts/phase-1-codeql-repair.md | 6 +++++- prompts/phase-1a-profile.md | 6 ++++++ tools/codecome.py | 11 +++++++++++ tools/phases/completion.py | 2 +- 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/prompts/phase-1-codeql-repair.md b/prompts/phase-1-codeql-repair.md index c3a6014d..a31fc614 100644 --- a/prompts/phase-1-codeql-repair.md +++ b/prompts/phase-1-codeql-repair.md @@ -74,4 +74,8 @@ Make the repair directly in files. At the end, summarize: - any helper script created, - the exact manual build command CodeQL will run next. -Before ending, validate that `itemdb/notes/codeql-plan.yml` is valid YAML and still follows the CodeQL plan schema. Also verify that any referenced helper shell script exists and passes syntax-only validation. If validation fails, repair only the reported YAML/schema/helper issue before summarizing. +Before ending, validate that `itemdb/notes/codeql-plan.yml` is valid and follows CodeCome rules by running: + + rtk python3 tools/codecome.py check-codeql-plan + +If validation fails, repair only the reported issue before summarizing. diff --git a/prompts/phase-1a-profile.md b/prompts/phase-1a-profile.md index 222f5ba6..00074606 100644 --- a/prompts/phase-1a-profile.md +++ b/prompts/phase-1a-profile.md @@ -108,3 +108,9 @@ At the end, summarize: - Languages selected for CodeQL analysis and their confidence levels - Files created: `target-profile.md`, `build-model.md`, `codeql-plan.yml` - Key uncertainties or blockers + +Before ending, validate that `itemdb/notes/codeql-plan.yml` is valid and follows CodeCome rules by running: + + rtk python3 tools/codecome.py check-codeql-plan + +If validation fails, repair only the reported issue before summarizing. diff --git a/tools/codecome.py b/tools/codecome.py index e94b1a34..77a0569d 100755 --- a/tools/codecome.py +++ b/tools/codecome.py @@ -699,6 +699,14 @@ def command_next_id(_: argparse.Namespace) -> int: return 0 +def command_check_codeql_plan(_: argparse.Namespace) -> int: + from codecome.phase_1 import _validate_codeql_plan_for_repair + rc, out = _validate_codeql_plan_for_repair() + if out: + print(out) + return rc + + def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( prog="codecome", @@ -716,6 +724,9 @@ def build_parser() -> argparse.ArgumentParser: next_id_parser = subparsers.add_parser("next-id", help="Print the next available finding id.") next_id_parser.set_defaults(func=command_next_id) + check_plan_parser = subparsers.add_parser("check-codeql-plan", help="Validate itemdb/notes/codeql-plan.yml") + check_plan_parser.set_defaults(func=command_check_codeql_plan) + return parser diff --git a/tools/phases/completion.py b/tools/phases/completion.py index e693983a..67b895a2 100644 --- a/tools/phases/completion.py +++ b/tools/phases/completion.py @@ -241,7 +241,7 @@ def build_codeql_plan_resume_prompt(validation_output: str) -> str: "Repair only `itemdb/notes/codeql-plan.yml` with the smallest change needed. Do not redo unrelated " "reconnaissance or modify target source code. Preserve the existing analysis units, pack selections, " "manual build commands, and notes unless a reported validation error requires changing them.\n\n" - "Before ending, verify that `itemdb/notes/codeql-plan.yml` is valid YAML and passes CodeQL plan validation." + "Before ending, verify that the repaired plan passes local validation by running `rtk python3 tools/codecome.py check-codeql-plan`." ) From 531bfe686ff4a2c848b687167e1c4167c430f089 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 23:12:13 +0200 Subject: [PATCH 43/47] feat(codeql): isolate CodeQL package cache to workspace .cache/codeql All CodeQL subprocess calls (database create, database analyze, resolve queries, pack download, and the check CLI) now pass --common-caches=/.cache/codeql so that CodeQL stores downloaded packs in the workspace-local cache instead of the global ~/.codeql/packages/ cache. This prevents a corrupted global cache entry (e.g. ~/.codeql/packages/codeql/python-queries/1.8.3) from breaking database creation in any workspace that shares the same user profile. Changes: - runner.py: add cache_dir param to _create_database and _run_analyze; all resolve/pack-download calls now go through _codeql_pack_cmd() which injects --common-caches before the '--' separator - tools/codeql.py: _cmd_check resolve qlpacks also uses the workspace cache - test_codeql_runner.py: 4 new tests covering cache propagation in db create, analyze, resolve queries, and lazy pack download --- tests/test_codeql_runner.py | 116 +++++++++++++++++++++++++++++++++++- tools/codeql.py | 2 +- tools/codeql/runner.py | 64 ++++++++++++++++++-- 3 files changed, 174 insertions(+), 8 deletions(-) diff --git a/tests/test_codeql_runner.py b/tests/test_codeql_runner.py index 43164502..6e6de134 100644 --- a/tests/test_codeql_runner.py +++ b/tests/test_codeql_runner.py @@ -9,7 +9,16 @@ sys.path.insert(0, str(ROOT / "tools")) from codeql.config import CodeQLConfig -from codeql.runner import _create_database, _lookup_build, _lookup_timeout, _manifest, run_codeql, write_manifest +from codeql.runner import ( + _create_database, + _ensure_query_packs_available, + _lookup_build, + _lookup_timeout, + _manifest, + _run_analyze, + run_codeql, + write_manifest, +) def test_manifest_completed() -> None: @@ -148,6 +157,111 @@ def test_create_database_creates_parent_dir(tmp_path: Path) -> None: assert mock_popen.call_args.kwargs["stdout"] == subprocess.DEVNULL +def test_create_database_uses_workspace_common_cache(tmp_path: Path) -> None: + db_dir = tmp_path / "itemdb" / "codeql" / "databases" / "python" + cache_dir = tmp_path / ".cache" / "codeql" + mock_process = MagicMock() + mock_process.returncode = 0 + mock_process.wait.return_value = 0 + mock_process.stderr = [] + + with patch("codeql.runner.subprocess.Popen", return_value=mock_process) as mock_popen: + ok, msg = _create_database( + tmp_path / "codeql", + "python", + "./src", + db_dir, + "none", + None, + [], + cache_dir, + ) + + assert ok is True + assert msg == "" + assert f"--common-caches={cache_dir}" in mock_popen.call_args.args[0] + assert cache_dir.is_dir() + + +def test_run_analyze_uses_workspace_common_cache(tmp_path: Path) -> None: + db_dir = tmp_path / "itemdb" / "codeql" / "databases" / "root" / "python" + sarif_path = tmp_path / "itemdb" / "codeql" / "sarif" / "root.python.official.sarif" + cache_dir = tmp_path / ".cache" / "codeql" + mock_process = MagicMock() + mock_process.returncode = 0 + mock_process.wait.return_value = 0 + mock_process.stderr = [] + + with patch("codeql.runner.subprocess.Popen", return_value=mock_process) as mock_popen: + ok, msg = _run_analyze( + tmp_path / "codeql", + db_dir, + ["codeql/python-queries"], + sarif_path, + cache_dir, + ) + + assert ok is True + assert msg == "" + cmd = mock_popen.call_args.args[0] + assert f"--common-caches={cache_dir}" in cmd + assert cmd[-1] == "codeql/python-queries" + assert cache_dir.is_dir() + + +def test_query_pack_resolution_uses_workspace_common_cache(tmp_path: Path) -> None: + binary = tmp_path / "codeql" + cache_dir = tmp_path / ".cache" / "codeql" + config = CodeQLConfig(enabled=True, fail_policy="soft", abs_cache_dir=cache_dir) + commands: list[list[str]] = [] + + def fake_run_quiet(cmd, timeout): + commands.append(cmd) + return True, "" + + with patch("codeql.runner._run_quiet", side_effect=fake_run_quiet): + ok, msg = _ensure_query_packs_available(binary, ["codeql/python-queries"], "official", config) + + assert ok is True + assert msg == "" + assert commands == [[ + str(binary), + "resolve", + "queries", + "--format=json", + f"--common-caches={cache_dir}", + "--", + "codeql/python-queries", + ]] + assert cache_dir.is_dir() + + +def test_query_pack_download_uses_workspace_common_cache(tmp_path: Path) -> None: + binary = tmp_path / "codeql" + cache_dir = tmp_path / ".cache" / "codeql" + config = CodeQLConfig(enabled=True, fail_policy="soft", abs_cache_dir=cache_dir) + commands: list[list[str]] = [] + + def fake_run_quiet(cmd, timeout): + commands.append(cmd) + return (False, "pack missing") if len(commands) == 1 else (True, "") + + with patch("codeql.runner._run_quiet", side_effect=fake_run_quiet): + ok, msg = _ensure_query_packs_available(binary, ["codeql/python-queries"], "official", config) + + assert ok is True + assert msg == "" + assert commands[1] == [ + str(binary), + "pack", + "download", + f"--common-caches={cache_dir}", + "--", + "codeql/python-queries", + ] + assert commands[2] == commands[0] + + def test_create_database_manual_build_mode_and_command(tmp_path: Path) -> None: db_dir = tmp_path / "itemdb" / "codeql" / "databases" / "root" / "c-cpp" mock_process = MagicMock() diff --git a/tools/codeql.py b/tools/codeql.py index ac74b35c..593a7375 100644 --- a/tools/codeql.py +++ b/tools/codeql.py @@ -68,7 +68,7 @@ def _cmd_check() -> int: print("Checking pack resolution …") try: result = subprocess.run( - [str(binary_path), "resolve", "qlpacks"], + [str(binary_path), "resolve", "qlpacks", f"--common-caches={config.abs_cache_dir}"], capture_output=True, text=True, timeout=60, diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index 4d21c923..89d9d1f5 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -95,7 +95,18 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No sarif_dir.mkdir(parents=True, exist_ok=True) _progress(progress, f"CodeQL: creating database {unit_id}:{language_id} ({build_mode})") - ok, msg = _create_database(binary_path, language_id, source_path, db_dir, build_mode, build_command, exclude_patterns, timeout=db_timeout, progress=progress) + ok, msg = _create_database( + binary_path, + language_id, + source_path, + db_dir, + build_mode, + build_command, + exclude_patterns, + config.abs_cache_dir, + timeout=db_timeout, + progress=progress, + ) if not ok: failures.append(msg) if config.fail_policy == "soft": @@ -122,7 +133,15 @@ def run_codeql(config: CodeQLConfig, progress: Callable[[str], None] | None = No sarif_path = sarif_dir / f"{unit_id}.{language_id}.{profile}.sarif" _progress(progress, f"CodeQL: analyzing {unit_id}:{language_id} profile {profile}") - ok, msg = _run_analyze(binary_path, db_dir, packs, sarif_path, timeout=analyze_timeout, progress=progress) + ok, msg = _run_analyze( + binary_path, + db_dir, + packs, + sarif_path, + config.abs_cache_dir, + timeout=analyze_timeout, + progress=progress, + ) if not ok: if config.fail_policy == "soft" and profile != "official": warnings.append(msg) @@ -208,6 +227,7 @@ def _create_database( build_mode: str, build_command: str | None, exclude_patterns: list[str], + cache_dir: Path | None = None, timeout: int = 600, progress: Callable[[str], None] | None = None, ) -> tuple[bool, str]: @@ -222,6 +242,7 @@ def _create_database( "--overwrite", "--no-run-unnecessary-builds", ] + _add_common_caches(cmd, cache_dir) if build_mode == "none": cmd += ["--build-mode=none"] @@ -255,6 +276,7 @@ def _run_analyze( db_dir: Path, packs: list[str], sarif_path: Path, + cache_dir: Path | None = None, timeout: int = 600, progress: Callable[[str], None] | None = None, ) -> tuple[bool, str]: @@ -265,7 +287,9 @@ def _run_analyze( "--format=sarif-latest", f"--output={sarif_path}", "--sarif-include-query-help=never", - ] + packs + ] + _add_common_caches(cmd, cache_dir) + cmd += packs return _run_with_progress(cmd, f"Analyze timed out for {db_dir.name} after {timeout}s", f"Analyze failed for {db_dir.name}", timeout, progress) @@ -279,20 +303,29 @@ def _ensure_query_packs_available( progress: Callable[[str], None] | None = None, ) -> tuple[bool, str]: """Resolve query packs, downloading registry packs once when missing.""" - ok, detail = _run_quiet([str(binary), "resolve", "queries", "--format=json", "--", *packs], timeout=120) + ok, detail = _run_quiet( + _codeql_pack_cmd(binary, config.abs_cache_dir, "resolve", "queries", "--format=json", "--", *packs), + timeout=120, + ) if ok: return True, "" downloadable = [pack for pack in packs if _is_registry_pack_ref(pack)] for pack in downloadable: _progress(progress, f"CodeQL: downloading query pack {pack}") - download_ok, download_detail = _run_quiet([str(binary), "pack", "download", pack], timeout=300) + download_ok, download_detail = _run_quiet( + _codeql_pack_cmd(binary, config.abs_cache_dir, "pack", "download", "--", pack), + timeout=300, + ) if not download_ok: detail = download_detail or detail return False, _pack_failure_message(profile, packs, detail, config) if downloadable: - ok, detail = _run_quiet([str(binary), "resolve", "queries", "--format=json", "--", *packs], timeout=120) + ok, detail = _run_quiet( + _codeql_pack_cmd(binary, config.abs_cache_dir, "resolve", "queries", "--format=json", "--", *packs), + timeout=120, + ) if ok: return True, "" @@ -306,6 +339,25 @@ def _is_registry_pack_ref(pack: str) -> bool: return "/" in pack +def _add_common_caches(cmd: list[str], cache_dir: Path | None) -> None: + """Append CodeQL's workspace-local common cache option when configured.""" + if cache_dir is None or str(cache_dir) in {"", "."}: + return + cache_dir.mkdir(parents=True, exist_ok=True) + option = f"--common-caches={cache_dir}" + if "--" in cmd: + cmd.insert(cmd.index("--"), option) + else: + cmd.append(option) + + +def _codeql_pack_cmd(binary: Path, cache_dir: Path | None, *args: str) -> list[str]: + """Build a CodeQL command that uses the workspace-local common cache.""" + cmd = [str(binary), *args] + _add_common_caches(cmd, cache_dir) + return cmd + + def _pack_failure_message(profile: str, packs: list[str], detail: str, config: CodeQLConfig) -> str: policy = "required official profile" if profile == "official" else f"optional profile {profile!r}" action = "failing CodeQL step" if config.fail_policy == "hard" or profile == "official" else "skipping profile" From 426f2c1434130478152b764a450b60cab904040d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Sun, 31 May 2026 23:48:21 +0200 Subject: [PATCH 44/47] Address PR #29 review feedback - Narrow except Exception to ImportError in check_codeql_status() - Guard stale risk signal import with normalized_ok flag in pipeline - Fail fast on manual build_mode without build_command in runner - Align conflicting overwrite policy wording in phase-1c-sandbox prompt - Add TODO markers for deferred refactors (#30, #31) - Update README: add CodeQL integration section, make init, phase-1 subphases --- README.md | 16 ++++++++++++++-- prompts/phase-1c-sandbox.md | 2 +- tools/chat/app.py | 1 + tools/codecome.py | 4 ++-- tools/codeql/pipeline.py | 6 ++++-- tools/codeql/runner.py | 4 +++- 6 files changed, 25 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 36081c20..ff8c16ad 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,8 @@ CodeCome runs on top of [OpenCode](https://opencode.ai), an open-source AI codin 3. **Python 3.10+** — needed for workspace tooling (`make venv` creates a local virtualenv). 4. **GNU Make** — drives the workflow. 5. **Docker** — required for the sandboxed validation environment. -6. **Optional: exploit recording tools** — for Phase 5 visual evidence: +6. **Optional: CodeQL CLI** — for static analysis integration. Managed install via `make init`, or set `CODEQL_SKIP=1` to skip. +7. **Optional: exploit recording tools** — for Phase 5 visual evidence: - `asciinema` — terminal recordings. - `agg` — renders `.cast` files to GIFs (CodeCome falls back to a Docker container if missing). - `ffmpeg` and `xvfb` (or `xvfb-run`) — for GUI/browser exploits. @@ -121,6 +122,7 @@ A few things to know up front about `src/`: When you're ready: make venv # set up the local Python virtualenv + make init # install CodeQL CLI (optional, skip with CODEQL_SKIP=1) make check # sanity-check the workspace make phase-1 # recon + sandbox bootstrap make phase-2 # generate candidate findings @@ -135,7 +137,7 @@ There are convenience targets too — `make validate-all`, `make exploit-all`, ` Six phases. Each one is a `make` target. Each one writes to disk. -1. **Recon (`make phase-1`)** — agent reads `src/`, infers the target type, languages, build model, attack surface, and writes notes under `itemdb/notes/`. Also bootstraps a Docker sandbox suited to the stack. +1. **Recon (`make phase-1`)** — runs as three subphases: (1a) target profiling and CodeQL plan generation, (1b) CodeQL-assisted reconnaissance using static analysis signals, and (1c) sandbox bootstrap. Writes notes under `itemdb/notes/` including a file-risk-index informed by CodeQL findings. 2. **Hypothesis (`make phase-2`)** — agent writes candidate findings under `itemdb/findings/PENDING/`. Each one points at specific code, sources, sinks, and a trust boundary. 3. **Counter-analysis (`make phase-3`)** — a reviewer pass tries to disprove or deduplicate findings. Weak ones move to `REJECTED/`, repeats to `DUPLICATE/`. 4. **Validation (`make phase-4 FINDING=CC-XXXX`)** — one finding at a time, in the sandbox. Build the target, write a small PoC, capture evidence, decide CONFIRMED or REJECTED. @@ -159,6 +161,16 @@ stateDiagram-v2 Phases 1–3 are batch operations. Phases 4 and 5 are run **per finding** — that's intentional. One finding at a time keeps evidence traceable and lets you mix model choices, prompt overrides, and rerun loops without polluting the audit. +## CodeQL integration + +CodeCome integrates GitHub's [CodeQL](https://codeql.github.com/) as an optional first-class static-analysis capability during Phase 1. + +- **Managed install** — `make init` (or `tools/codeql.py install`) downloads and manages the CodeQL CLI bundle under `.tools/codeql/`. +- **Automatic language detection** — Phase 1a generates `itemdb/notes/codeql-plan.yml` with detected languages and build modes. +- **SARIF normalization** — raw CodeQL results are normalized into `file-signals.yml`, which feeds into the `file-risk-index.yml` used by Phase 1b recon. +- **Configuration** — controlled via `codecome.yml` under `audit.static_analysis.codeql` (enable/disable, pack selection, fail policy, timeouts). +- **Opt-out** — set `CODEQL_SKIP=1` or `enabled: false` in config to skip CodeQL entirely. + ## Who is this for? - **Solo security researchers** who want LLM help on source-code audits but refuse to trust an opaque chat session. diff --git a/prompts/phase-1c-sandbox.md b/prompts/phase-1c-sandbox.md index c7215a5a..077f3780 100644 --- a/prompts/phase-1c-sandbox.md +++ b/prompts/phase-1c-sandbox.md @@ -111,7 +111,7 @@ Read the following files (all paths are relative to the project/workspace root): ## Important rules - Do not modify files under `src/`. -- Do not silently overwrite a `sandbox/` that lacks `CODECOME-GENERATED.md`. If the sandbox already works, move on; if it needs replacement, inform the user, halt with the halt protocol, and let them re-run with `--force` (which moves the prior content to `sandbox/.backup-/`). +- Do not overwrite a `sandbox/` that lacks `CODECOME-GENERATED.md`. If the sandbox already works, move on; if it needs replacement, halt with the halt protocol and inform the user to re-run with `--force` (which moves the prior content to `sandbox/.backup-/`). - Do not generate vulnerability findings. ## Final response diff --git a/tools/chat/app.py b/tools/chat/app.py index 29bff3ad..467e9501 100644 --- a/tools/chat/app.py +++ b/tools/chat/app.py @@ -396,6 +396,7 @@ def __init__(self, server_info=None, session_id=None, initial_prompt="", args=No self.thinking_on = thinking_on from codecome.transcript import Transcript self.transcript = transcript if transcript is not None else Transcript.null() + # TODO: refactor event recording/dedup — consider separate sinks (see GH issue) from codecome.recording import EventRecorder self.event_recorder = EventRecorder( self.transcript, diff --git a/tools/codecome.py b/tools/codecome.py index 77a0569d..cbc8f3b7 100755 --- a/tools/codecome.py +++ b/tools/codecome.py @@ -429,12 +429,12 @@ def check_codeql_status() -> int: """Check CodeQL configuration and last recorded artifact state.""" print() print(C.header("CodeQL:")) - + # TODO: move CodeQL check logic to tools/codecome/checks.py (see GH issue) try: from codeql.config import resolve_config from codeql.artifacts import check_artifacts from codeql.packs import load_codeql_plan - except Exception as exc: + except ImportError as exc: print(C.warn(f"CodeQL checks unavailable: {exc}")) return 0 diff --git a/tools/codeql/pipeline.py b/tools/codeql/pipeline.py index 234f9ad0..1ef412c8 100644 --- a/tools/codeql/pipeline.py +++ b/tools/codeql/pipeline.py @@ -65,6 +65,7 @@ def run_full_pipeline(config: CodeQLConfig, progress: Callable[[str], None] | No resolved_path = output_dir / "selected-query-packs.yml" # Step 3: normalize SARIF (completed or soft-failed, with SARIF files present) + normalized_ok = False if status in ("completed", "soft-failed") and resolved_path.is_file(): sarif_dir = output_dir / "sarif" if list(sarif_dir.glob("*.sarif")): @@ -74,6 +75,7 @@ def run_full_pipeline(config: CodeQLConfig, progress: Callable[[str], None] | No sarif_dir, normalized_dir, resolved, manifest.get("codeql_version", "unknown"), ROOT, ) + normalized_ok = True _progress(progress, "CodeQL: normalized SARIF artifacts") except Exception as exc: manifest.setdefault("warnings", []).append( @@ -81,10 +83,10 @@ def run_full_pipeline(config: CodeQLConfig, progress: Callable[[str], None] | No ) manifest["status"] = "failed" if config.fail_policy == "hard" else "soft-failed" - # Step 4: import risk + # Step 4: import risk (only if normalization succeeded — avoid importing stale signals) signals_path = normalized_dir / "file-signals.yml" risk_path = ROOT / "itemdb/notes/file-risk-index.yml" - if signals_path.is_file(): + if normalized_ok and signals_path.is_file(): try: import_risk(signals_path, risk_path) _progress(progress, "CodeQL: imported file risk signals") diff --git a/tools/codeql/runner.py b/tools/codeql/runner.py index 89d9d1f5..c4e54f06 100644 --- a/tools/codeql/runner.py +++ b/tools/codeql/runner.py @@ -246,7 +246,9 @@ def _create_database( if build_mode == "none": cmd += ["--build-mode=none"] - elif build_mode == "manual" and build_command: + elif build_mode == "manual": + if not build_command: + return False, f"build_mode is 'manual' for {language_id} but no build_command provided in the plan" cmd += ["--build-mode=manual", "-c", build_command] elif build_mode == "autobuild": cmd += ["--build-mode=autobuild"] From d2caf0d6915aa601fdd4435014a501c272fa6e9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 1 Jun 2026 01:10:44 +0200 Subject: [PATCH 45/47] fix: report fresh sandbox as pending --- tests/test_sandbox_bootstrap.py | 54 +++++++++++++++++++ tools/codecome.py | 16 +++--- .../command/interceptors/sandbox_bootstrap.py | 13 ++++- tools/sandbox-bootstrap.py | 34 ++++++++---- 4 files changed, 96 insertions(+), 21 deletions(-) diff --git a/tests/test_sandbox_bootstrap.py b/tests/test_sandbox_bootstrap.py index 2799941e..7425f006 100644 --- a/tests/test_sandbox_bootstrap.py +++ b/tests/test_sandbox_bootstrap.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +from types import SimpleNamespace from conftest import ROOT, load_tool_module @@ -80,6 +81,59 @@ def test_opencode_json_allows_src_and_sandbox_env_reads(): assert read_rules["sandbox/.env"] == "allow" +def test_sandbox_status_is_pending_before_phase_1c(tmp_path, monkeypatch, capsys): + module = load_tool_module("sandbox_bootstrap_pending_status", "tools/sandbox-bootstrap.py") + root = tmp_path + sandbox_root = root / "sandbox" + notes_root = root / "itemdb" / "notes" + sandbox_root.mkdir(parents=True) + notes_root.mkdir(parents=True) + (sandbox_root / ".gitkeep").write_text("", encoding="utf-8") + + monkeypatch.setattr(module, "ROOT", root) + monkeypatch.setattr(module, "SANDBOX_ROOT", sandbox_root) + monkeypatch.setattr(module, "NOTES_ROOT", notes_root) + monkeypatch.setattr(module, "PROVENANCE_FILE", sandbox_root / "CODECOME-GENERATED.md") + + assert module.classify_sandbox_state() == "pending" + + rc = module.cmd_status(SimpleNamespace(format="text", gate=False)) + out = capsys.readouterr().out + + assert rc == 0 + assert "state:" in out + assert "pending" in out + assert "sandbox bootstrap pending; run make phase-1" in out + assert "setup pending" in out + + +def test_sandbox_status_is_missing_after_phase_1c_without_sandbox(tmp_path, monkeypatch, capsys): + module = load_tool_module("sandbox_bootstrap_missing_status", "tools/sandbox-bootstrap.py") + root = tmp_path + sandbox_root = root / "sandbox" + notes_root = root / "itemdb" / "notes" + sandbox_root.mkdir(parents=True) + notes_root.mkdir(parents=True) + (sandbox_root / ".gitkeep").write_text("", encoding="utf-8") + (notes_root / "sandbox-plan.md").write_text("# Sandbox Plan\n", encoding="utf-8") + + monkeypatch.setattr(module, "ROOT", root) + monkeypatch.setattr(module, "SANDBOX_ROOT", sandbox_root) + monkeypatch.setattr(module, "NOTES_ROOT", notes_root) + monkeypatch.setattr(module, "PROVENANCE_FILE", sandbox_root / "CODECOME-GENERATED.md") + + assert module.classify_sandbox_state() == "missing" + + rc = module.cmd_status(SimpleNamespace(format="text", gate=False)) + out = capsys.readouterr().out + + assert rc == 0 + assert "state:" in out + assert "missing" in out + assert "sandbox is missing" in out + assert "setup missing" in out + + def test_detect_signals_prefers_erlang_otp_for_rebar_targets(tmp_path, monkeypatch): module = load_tool_module("sandbox_bootstrap_erlang_detect", "tools/sandbox-bootstrap.py") diff --git a/tools/codecome.py b/tools/codecome.py index cbc8f3b7..ec47c8d0 100755 --- a/tools/codecome.py +++ b/tools/codecome.py @@ -540,22 +540,17 @@ def check_sandbox_status() -> None: print(C.header("Sandbox:")) provenance = sb.read_provenance() - has_user_content = sb.sandbox_has_user_content() last_validation = sb._last_validation_outcome() allow_no_sandbox = bool(os.environ.get("CODECOME_ALLOW_NO_SANDBOX")) - - # Determine state - if provenance is not None: - sandbox_state = "generated" - elif has_user_content: - sandbox_state = "user-managed" - else: - sandbox_state = "missing" + sandbox_state = sb.classify_sandbox_state() # Gate logic (mirrors cmd_status) if allow_no_sandbox: gate_pass = True gate_reason = "override (CODECOME_ALLOW_NO_SANDBOX=1)" + elif sandbox_state == "pending": + gate_pass = False + gate_reason = "sandbox bootstrap pending; run make phase-1" elif sandbox_state == "missing": gate_pass = False gate_reason = "sandbox is missing" @@ -595,7 +590,8 @@ def check_sandbox_status() -> None: for name in ("setup", "start", "check", "build", "test", "stop", "shell", "logs", "clean", "reset"): status = capability_status[name] satisfied = status.get("satisfied", False) - state_str = C.ok("ok") if satisfied else C.warn("missing") + missing_label = "pending" if sandbox_state == "pending" else "missing" + state_str = C.ok("ok") if satisfied else C.warn(missing_label) print(f" {name:<8} {state_str} {status['path']}") diff --git a/tools/rendering/tools/command/interceptors/sandbox_bootstrap.py b/tools/rendering/tools/command/interceptors/sandbox_bootstrap.py index 92614f36..57d9a484 100644 --- a/tools/rendering/tools/command/interceptors/sandbox_bootstrap.py +++ b/tools/rendering/tools/command/interceptors/sandbox_bootstrap.py @@ -203,6 +203,8 @@ def _sandbox_state_style(state_value: str) -> str: return "green" if state_value == "user-managed": return "yellow" + if state_value == "pending": + return "yellow" if state_value == "missing": return "red" return "dim" @@ -379,7 +381,12 @@ def _render_sandbox_status_rich( gate_pass = bool(payload.get("phase2_gate_pass")) gate_reason = str(payload.get("phase2_gate_reason", "")) - state_glyph = {"generated": glyphs["ok"], "user-managed": glyphs["warn"], "missing": glyphs["fail"]}.get(state_value, glyphs["info"]) + state_glyph = { + "generated": glyphs["ok"], + "user-managed": glyphs["warn"], + "pending": glyphs["warn"], + "missing": glyphs["fail"], + }.get(state_value, glyphs["info"]) sections.append(Text.assemble( ("state: ", "bold"), (f"{state_glyph} {state_value}", _sandbox_state_style(state_value)), @@ -425,6 +432,8 @@ def _render_sandbox_status_rich( is_helper = name in _SANDBOX_HELPER_CAPABILITIES if satisfied: badge = Text(f"{glyphs['ok']} ok", style="green") + elif state_value == "pending": + badge = Text(f"{glyphs['warn']} pending", style="yellow") elif is_helper and not present: badge = Text(f"{glyphs['skip']} optional", style="dim") else: @@ -716,6 +725,8 @@ def _render_sandbox_status_plain(payload: dict, glyphs: dict, sink) -> None: is_helper = name in _SANDBOX_HELPER_CAPABILITIES if satisfied: marker = f"{glyphs['ok']} ok" + elif state_value == "pending": + marker = f"{glyphs['warn']} pending" elif is_helper and not present: marker = f"{glyphs['skip']} optional" else: diff --git a/tools/sandbox-bootstrap.py b/tools/sandbox-bootstrap.py index e2d0e1b0..87a53655 100755 --- a/tools/sandbox-bootstrap.py +++ b/tools/sandbox-bootstrap.py @@ -404,6 +404,23 @@ def sandbox_has_user_content() -> bool: return False +def phase_1c_bootstrap_recorded() -> bool: + """Return True once Phase 1c has documented a sandbox bootstrap attempt.""" + return (NOTES_ROOT / "sandbox-plan.md").is_file() + + +def classify_sandbox_state() -> str: + """Classify sandbox state using both filesystem and workflow progress.""" + provenance = read_provenance() + if provenance is not None: + return "generated" + if sandbox_has_user_content(): + return "user-managed" + if phase_1c_bootstrap_recorded(): + return "missing" + return "pending" + + # --- Output helpers ----------------------------------------------------------- @@ -594,21 +611,15 @@ def _last_validation_outcome() -> Optional[str]: def cmd_status(args: argparse.Namespace) -> int: provenance = read_provenance() - has_user_content = sandbox_has_user_content() allow_no_sandbox = bool(os.environ.get("CODECOME_ALLOW_NO_SANDBOX")) capability_status = _capability_status() - - if provenance is not None: - sandbox_state = "generated" - elif has_user_content: - sandbox_state = "user-managed" - else: - sandbox_state = "missing" + sandbox_state = classify_sandbox_state() last_validation = _last_validation_outcome() # Gate logic: - # - missing -> block (override wins) + # - pending -> block (override wins), but Phase 1c has not run yet + # - missing -> block (override wins), because Phase 1c should have created it # - generated + failed -> block (override wins) # - generated + passed -> pass # - generated + mixed -> pass with warning (some tiers skipped) @@ -618,6 +629,9 @@ def cmd_status(args: argparse.Namespace) -> int: if allow_no_sandbox: gate_pass = True gate_reason = "override (CODECOME_ALLOW_NO_SANDBOX=1)" + elif sandbox_state == "pending": + gate_pass = False + gate_reason = "sandbox bootstrap pending; run make phase-1" elif sandbox_state == "missing": gate_pass = False gate_reason = "sandbox is missing" @@ -666,7 +680,7 @@ def cmd_status(args: argparse.Namespace) -> int: print(f" {C.DIM}capabilities:{C.RESET}") for name in ("setup", "start", "check", "build", "test", "stop", "shell", "logs", "clean", "reset"): status = capability_status[name] - state = "ok" if status.get("satisfied") else "missing" + state = "ok" if status.get("satisfied") else "pending" if sandbox_state == "pending" else "missing" print(f" {name:<6} {state:<7} {status['path']}") if gate_pass: print(C.ok(f"Phase 2 sandbox gate would pass ({gate_reason}).")) From c40491c2870dea6ec45864b9bb46ad2594592833 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 1 Jun 2026 01:23:14 +0200 Subject: [PATCH 46/47] fix(phase-1): accept completed subphase artifacts without finish event --- tests/test_phase_1_codeql_plan_repair.py | 42 ++++++++++++++++++++++++ tests/test_phases_completion.py | 2 ++ tools/codecome/phase_1.py | 7 ++-- tools/phases/completion.py | 16 +++++---- 4 files changed, 58 insertions(+), 9 deletions(-) diff --git a/tests/test_phase_1_codeql_plan_repair.py b/tests/test_phase_1_codeql_plan_repair.py index 54250a1a..e325102c 100644 --- a/tests/test_phase_1_codeql_plan_repair.py +++ b/tests/test_phase_1_codeql_plan_repair.py @@ -353,3 +353,45 @@ def fake_run_codeql(_console): p1.HAVE_RICH = saved_rich assert rc == 0 + + +def test_phase1c_accepts_no_step_finish_when_artifacts_are_fresh(tmp_path: Path) -> None: + import codecome.phase_1 as p1 + + transcript = tmp_path / "tmp" / "last-phase-1c-no-finding-attempt-1.jsonl" + transcript.parent.mkdir(parents=True) + transcript.write_text("", encoding="utf-8") + + args = SimpleNamespace(phase="1", finding=None, label="sandbox", debug=False) + calls = [] + + def fake_run_single_attempt(*_args, **_kwargs): + calls.append(_kwargs) + return 0, "session-1", RunResult(any_step_finish_seen=False), transcript + + saved_rich = p1.HAVE_RICH + p1.HAVE_RICH = False + try: + with patch.object(p1, "ROOT", tmp_path), \ + patch.object(p1, "load_prompt", return_value="prompt"), \ + patch.object(p1, "resolve_runtime_config", return_value=_runtime_config()), \ + patch.object(p1, "configure_rendering", return_value=None), \ + patch.object(p1, "_run_single_attempt", side_effect=fake_run_single_attempt), \ + patch.object(p1, "check_phase_graceful_completion", return_value=True), \ + patch("findings.checks_entry.run_frontmatter_validation", return_value=(0, "")): + rc = p1._run_subphase( + args=args, + console=None, + rendering_ctx=None, + runner=_runner(), + base_url="http://127.0.0.1", + phase_id="1c", + label="Sandbox", + agent="recon", + prompt_file="prompts/phase-1c-sandbox.md", + ) + finally: + p1.HAVE_RICH = saved_rich + + assert rc == 0 + assert len(calls) == 1 diff --git a/tests/test_phases_completion.py b/tests/test_phases_completion.py index b70aed95..64f33ba9 100644 --- a/tests/test_phases_completion.py +++ b/tests/test_phases_completion.py @@ -101,6 +101,8 @@ def test_phase1_check_patches_notes_root_and_sandbox_plan(self, tmp_path): try: result = completion_mod.check_phase_graceful_completion("1", None, fake_time) assert result is True, "Phase 1 should succeed when all artifacts exist under patched NOTES_ROOT" + result = completion_mod.check_phase_graceful_completion("1c", None, fake_time) + assert result is True, "Phase 1c should use the same artifact gate as Phase 1" finally: completion_mod.NOTES_ROOT = orig_notes_root completion_mod.SANDBOX_PLAN_PATH = orig_sandbox_plan diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py index 9f9d12dc..1a0fb47c 100644 --- a/tools/codecome/phase_1.py +++ b/tools/codecome/phase_1.py @@ -672,13 +672,14 @@ def _run_subphase( if finish_warning is not None: if ( - last_finish_reason in _FINISH_MID_TURN + (not any_step_finish_seen or last_finish_reason in _FINISH_MID_TURN) and last_permission_error is None and check_phase_graceful_completion(phase_id, finding, subphase_start_time) ): msg = ( - f"CodeCome observed a mid-turn model/provider cutoff for Phase {phase_id} after {step_finish_count} " - "completed loops, but the required durable artifacts were already written. Treating the subphase as complete." + f"CodeCome observed an incomplete model/provider completion signal for Phase {phase_id} after " + f"{step_finish_count} completed loops, but the required durable artifacts were already written. " + "Treating the subphase as complete." ) if HAVE_RICH: from rich.text import Text diff --git a/tools/phases/completion.py b/tools/phases/completion.py index 67b895a2..096621da 100644 --- a/tools/phases/completion.py +++ b/tools/phases/completion.py @@ -87,8 +87,12 @@ def _exploitation_status_looks_real(frontmatter: dict[str, Any] | None) -> bool: def check_phase_graceful_completion(phase: str, finding: str | None, run_start_time: float) -> bool: + phase_key = str(phase) + if phase_key in ("1a", "1b", "1c"): + phase_key = "1" + try: - if str(phase) == "1": + if phase_key == "1": required_artifacts = _phase1_required_artifacts() if all(path.exists() for path in required_artifacts): fresh_required = any(_path_is_fresh(path, run_start_time) for path in required_artifacts) @@ -98,21 +102,21 @@ def check_phase_graceful_completion(phase: str, finding: str | None, run_start_t ) return fresh_required and sandbox_state_recorded return False - elif str(phase) in ("2", "sweep"): + elif phase_key in ("2", "sweep"): pending_dir = finding_status_dir("PENDING") if pending_dir.exists(): return any(f.name.endswith(".md") and f.name != ".gitkeep" and f.stat().st_mtime >= run_start_time for f in pending_dir.iterdir()) return False - elif str(phase) == "3": + elif phase_key == "3": findings_dir = FINDINGS_ROOT return any( path.suffix == ".md" and path.name != ".gitkeep" and path.stat().st_mtime >= run_start_time for path in _iter_files(findings_dir) ) - elif str(phase) == "4" and finding: + elif phase_key == "4" and finding: evidence_dir = evidence_dir_for(finding) return any(path.stat().st_mtime >= run_start_time for path in _iter_files(evidence_dir)) - elif str(phase) == "5" and finding: + elif phase_key == "5" and finding: exploited_file = finding_status_dir("EXPLOITED") / f"{finding}.md" if ( exploited_file.exists() @@ -147,7 +151,7 @@ def check_phase_graceful_completion(phase: str, finding: str | None, run_start_t return True return False - elif str(phase) == "6": + elif phase_key == "6": reports_dir = REPORTS_ROOT if reports_dir.exists(): return any(f.name.endswith(".md") and f.name != ".gitkeep" and f.stat().st_mtime >= run_start_time for f in reports_dir.iterdir()) From 7d22780e4632cc9822deaa25b80ec8b6b429cc28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Mon, 1 Jun 2026 01:57:27 +0200 Subject: [PATCH 47/47] fix(phase-1): preserve retry transcripts and guard resume --- tests/test_codecome_runner.py | 61 +++++++++++++++++++++ tests/test_event_recording.py | 22 +++++++- tests/test_phases_completion.py | 29 ++++++++++ tests/test_session.py | 19 ++++++- tools/codecome/runner.py | 95 +++++++++++++++++++++++++++++++-- tools/codecome/session.py | 37 ++++++++++++- tools/codecome/transcript.py | 20 ++++++- tools/phases/completion.py | 5 +- 8 files changed, 278 insertions(+), 10 deletions(-) diff --git a/tests/test_codecome_runner.py b/tests/test_codecome_runner.py index 5c0f6f1b..de3b9428 100644 --- a/tests/test_codecome_runner.py +++ b/tests/test_codecome_runner.py @@ -154,3 +154,64 @@ def test_run_single_attempt_existing_session(mock_args, mock_console, monkeypatc assert code == 0 assert session_id == "existing_123" assert len(created) == 0 + + +def test_run_single_attempt_records_prompt_timeout(mock_args, mock_console, monkeypatch): + monkeypatch.setattr(runner, "create_session", lambda *a, **kw: "new_session") + monkeypatch.setattr(runner, "_consume_events", lambda *a, **kw: RunResult()) + + def fake_send(*_a, **_kw): + raise TimeoutError("timed out") + + monkeypatch.setattr(runner, "send_prompt_to_session", fake_send) + + events = [] + fake_transcript = MagicMock(spec=Transcript) + fake_transcript.path = Path("fake.jsonl") + fake_transcript.write_event.side_effect = events.append + monkeypatch.setattr(Transcript, "for_phase", classmethod(lambda cls, p, f: fake_transcript)) + + fatal_errors = [] + code, session_id, _res, _path = runner._run_single_attempt( + mock_args, mock_console, "do work", "model", "var", + "http://base", "auth", "dir", lambda *a: None, + emit_fatal_error_fn=lambda _console, _title, msg: fatal_errors.append(msg), + ) + + assert code == 1 + assert session_id == "" + assert fatal_errors == ["timed out"] + event_types = [event["type"] for event in events] + assert "codecome.prompt.send_started" in event_types + assert "codecome.prompt.send_failed" in event_types + assert "codecome.attempt.failed" in event_types + failed = next(event for event in events if event["type"] == "codecome.prompt.send_failed") + assert failed["properties"]["errorType"] == "TimeoutError" + assert failed["properties"]["message"] == "timed out" + + +def test_existing_session_busy_guard_blocks_resume_prompt(mock_args, mock_console, monkeypatch): + monkeypatch.setattr(runner, "create_session", lambda *a, **kw: pytest.fail("should not create session")) + monkeypatch.setattr(runner, "_consume_events", lambda *a, **kw: pytest.fail("should not consume events")) + monkeypatch.setattr(runner, "send_prompt_to_session", lambda *a, **kw: pytest.fail("should not send prompt")) + monkeypatch.setattr(runner, "get_session_status", lambda *a, **kw: "busy") + monkeypatch.setenv("CODECOME_RESUME_IDLE_TIMEOUT", "0") + + events = [] + fake_transcript = MagicMock(spec=Transcript) + fake_transcript.path = Path("fake.jsonl") + fake_transcript.write_event.side_effect = events.append + monkeypatch.setattr(Transcript, "for_phase", classmethod(lambda cls, p, f: fake_transcript)) + + code, session_id, _res, _path = runner._run_single_attempt( + mock_args, mock_console, "resume", "model", "var", + "http://base", "auth", "dir", lambda *a: None, + existing_session_id="existing_123", + emit_fatal_error_fn=lambda *_a: None, + ) + + assert code == 1 + assert session_id == "existing_123" + event_types = [event["type"] for event in events] + assert "codecome.resume.blocked_busy" in event_types + assert "codecome.prompt.send_started" not in event_types diff --git a/tests/test_event_recording.py b/tests/test_event_recording.py index c39502d2..a4f85251 100644 --- a/tests/test_event_recording.py +++ b/tests/test_event_recording.py @@ -8,6 +8,7 @@ from codecome.recording import EventRecorder from codecome.transcript import Transcript +import codecome.transcript as transcript_mod @pytest.fixture @@ -57,4 +58,23 @@ def test_record_always_forwards_all_events(mock_transcript): for event_type in ("reasoning", "text", "message.updated", "tool_use"): mock_transcript.write_event.reset_mock() recorder.record({"type": event_type}) - mock_transcript.write_event.assert_called_once() \ No newline at end of file + mock_transcript.write_event.assert_called_once() + + +def test_phase_transcript_does_not_truncate_existing_file(tmp_path, monkeypatch): + monkeypatch.setattr(transcript_mod, "ROOT", tmp_path) + transcript_mod._ATTEMPT_COUNTER.clear() + + existing = tmp_path / "tmp" / "last-phase-1c-no-finding-attempt-1.jsonl" + existing.parent.mkdir(parents=True) + existing.write_text("keep me\n", encoding="utf-8") + + transcript = Transcript.for_phase("1c", None) + try: + transcript.write_event({"type": "test"}) + finally: + transcript.close() + + assert existing.read_text(encoding="utf-8") == "keep me\n" + assert transcript.path != existing + assert transcript.path.name.startswith("last-phase-1c-no-finding-attempt-1-") diff --git a/tests/test_phases_completion.py b/tests/test_phases_completion.py index 64f33ba9..8c1247ff 100644 --- a/tests/test_phases_completion.py +++ b/tests/test_phases_completion.py @@ -108,6 +108,35 @@ def test_phase1_check_patches_notes_root_and_sandbox_plan(self, tmp_path): completion_mod.SANDBOX_PLAN_PATH = orig_sandbox_plan completion_mod.ROOT = orig_root + def test_phase1c_accepts_fresh_sandbox_state_with_existing_notes(self, tmp_path): + import phases.completion as completion_mod + + orig_notes_root = completion_mod.NOTES_ROOT + orig_sandbox_plan = completion_mod.SANDBOX_PLAN_PATH + orig_root = completion_mod.ROOT + + completion_mod.NOTES_ROOT = tmp_path / "notes" + completion_mod.SANDBOX_PLAN_PATH = completion_mod.NOTES_ROOT / "sandbox-plan.md" + completion_mod.ROOT = tmp_path / "codecome_workspace" + + for name in completion_mod._PHASE1_REQUIRED_ARTIFACT_NAMES: + artifact = completion_mod.NOTES_ROOT / name + artifact.parent.mkdir(parents=True, exist_ok=True) + artifact.write_text("", encoding="utf-8") + + run_start = time.time() + sandbox_generated = completion_mod.ROOT / "sandbox" / "CODECOME-GENERATED.md" + sandbox_generated.parent.mkdir(parents=True) + + try: + assert completion_mod.check_phase_graceful_completion("1", None, run_start) is False + sandbox_generated.write_text("validated", encoding="utf-8") + assert completion_mod.check_phase_graceful_completion("1c", None, run_start) is True + finally: + completion_mod.NOTES_ROOT = orig_notes_root + completion_mod.SANDBOX_PLAN_PATH = orig_sandbox_plan + completion_mod.ROOT = orig_root + def test_phase2_uses_finding_status_dir_via_ast(self): import ast source = (ROOT / "tools" / "phases" / "completion.py").read_text() diff --git a/tests/test_session.py b/tests/test_session.py index 18ececa5..40d0dc77 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +from io import BytesIO from unittest.mock import MagicMock, patch import pytest @@ -167,10 +168,24 @@ def test_send_prompt_http_error_raises(self, mock_urlopen): 500, "Internal Server Error", {}, - None, + BytesIO(b"server says no"), ) - with pytest.raises(RuntimeError, match="Failed to send prompt: HTTP 500"): + with pytest.raises(RuntimeError, match="Failed to send prompt: HTTP 500: server says no"): module.send_prompt_to_session( "http://localhost:8080", "sess-1", "hello", "recon", None, None, None, None ) + + @patch("urllib.request.urlopen") + def test_get_session_status_busy(self, mock_urlopen): + module = _load_session_module() + mock_resp = MagicMock() + mock_resp.__enter__.return_value = mock_resp + mock_resp.read.return_value = json.dumps({"status": {"type": "busy"}}).encode("utf-8") + mock_urlopen.return_value = mock_resp + + status = module.get_session_status("http://localhost:8080", "sess-1", None, None) + + assert status == "busy" + req = mock_urlopen.call_args[0][0] + assert req.full_url == "http://localhost:8080/session/sess-1" diff --git a/tools/codecome/runner.py b/tools/codecome/runner.py index 0d0412c6..821d002c 100644 --- a/tools/codecome/runner.py +++ b/tools/codecome/runner.py @@ -9,6 +9,7 @@ import os import sys import threading +import time from pathlib import Path from typing import Any, Callable @@ -17,7 +18,7 @@ import _colors as C from events.phase_loop import PhaseEventLoop, RunResult from codecome.config import ROOT -from codecome.session import create_session, send_prompt_to_session +from codecome.session import create_session, get_session_status, send_prompt_to_session from codecome.transcript import Transcript from codecome.recording import EventRecorder @@ -33,6 +34,7 @@ def _consume_events( auth_token: str | None, workspace_dir: str | None, render_event_fn: Callable[..., None], + event_loop_box: dict[str, Any] | None = None, ) -> RunResult: event_loop = PhaseEventLoop( base_url=base_url, @@ -43,6 +45,8 @@ def _consume_events( auth_token=auth_token, workspace_dir=workspace_dir, ) + if event_loop_box is not None: + event_loop_box["loop"] = event_loop recorder = EventRecorder(transcript, debug=args.debug) @@ -52,6 +56,38 @@ def _handle_event(console_: Any, phase_: str, label_: str, event: dict[str, Any] return event_loop.run(_handle_event, recorder.record) +def _record_codecome_event(transcript: Transcript, event_type: str, **properties: Any) -> None: + transcript.write_event({ + "type": event_type, + "timestamp": int(time.time() * 1000), + "properties": properties, + }) + + +def _wait_for_resume_idle( + base_url: str, + session_id: str, + auth_token: str | None, + workspace_dir: str | None, + transcript: Transcript, +) -> None: + timeout_s = float(os.environ.get("CODECOME_RESUME_IDLE_TIMEOUT", "15")) + poll_s = float(os.environ.get("CODECOME_RESUME_IDLE_POLL", "1")) + deadline = time.monotonic() + max(timeout_s, 0.0) + + while True: + status = get_session_status(base_url, session_id, auth_token, workspace_dir) + if status != "busy": + if status is not None: + _record_codecome_event(transcript, "codecome.resume.status", sessionID=session_id, status=status) + return + + _record_codecome_event(transcript, "codecome.resume.blocked_busy", sessionID=session_id, status=status) + if time.monotonic() >= deadline: + raise RuntimeError(f"session {session_id} is still busy; refusing to send resume prompt") + time.sleep(max(poll_s, 0.1)) + + def _run_single_attempt( args: argparse.Namespace, console: Any, @@ -82,13 +118,29 @@ def _run_single_attempt( print(C.warn(f"warning: could not open transcript {transcript.path}: {exc}")) try: + _record_codecome_event( + transcript, + "codecome.attempt.started", + phase=transcript_phase or str(args.phase), + label=str(args.label), + existingSession=bool(existing_session_id), + ) if existing_session_id: session_id = existing_session_id + _wait_for_resume_idle(base_url, session_id, auth_token, workspace_dir, transcript) else: session_id = create_session(base_url, str(args.phase), args.agent, model, auth_token, workspace_dir) + _record_codecome_event( + transcript, + "codecome.session.ready", + sessionID=session_id, + existingSession=bool(existing_session_id), + ) + run_result_box: dict[str, Any] = {} consume_error_box: dict[str, Exception] = {} + event_loop_box: dict[str, Any] = {} def _consume() -> None: try: @@ -100,6 +152,7 @@ def _consume() -> None: transcript, auth_token, workspace_dir, render_event_fn=render_event_fn, + event_loop_box=event_loop_box, ) except Exception as exc: # noqa: BLE001 consume_error_box["error"] = exc @@ -107,15 +160,51 @@ def _consume() -> None: consumer = threading.Thread(target=_consume, name=f"codecome-events-{session_id}", daemon=True) consumer.start() - send_prompt_to_session(base_url, session_id, prompt, args.agent, model, variant, auth_token, workspace_dir) + _record_codecome_event(transcript, "codecome.prompt.send_started", sessionID=session_id) + try: + send_prompt_to_session(base_url, session_id, prompt, args.agent, model, variant, auth_token, workspace_dir) + except Exception as exc: + _record_codecome_event( + transcript, + "codecome.prompt.send_failed", + sessionID=session_id, + errorType=type(exc).__name__, + message=str(exc), + ) + loop = event_loop_box.get("loop") + if loop is not None: + try: + loop.stop() + except Exception: + pass + consumer.join(timeout=5.0) + if consumer.is_alive(): + _record_codecome_event(transcript, "codecome.event_loop.stop_timeout", sessionID=session_id) + raise + _record_codecome_event(transcript, "codecome.prompt.send_completed", sessionID=session_id) consumer.join() if "error" in consume_error_box: - raise consume_error_box["error"] + exc = consume_error_box["error"] + _record_codecome_event( + transcript, + "codecome.event_loop.failed", + sessionID=session_id, + errorType=type(exc).__name__, + message=str(exc), + ) + raise exc run_result = run_result_box.get("result") if not isinstance(run_result, RunResult): raise RuntimeError("Event loop ended without a RunResult") except Exception as exc: + _record_codecome_event( + transcript, + "codecome.attempt.failed", + errorType=type(exc).__name__, + message=str(exc), + existingSession=bool(existing_session_id), + ) if emit_fatal_error_fn: emit_fatal_error_fn(console, "Server Error", str(exc)) else: diff --git a/tools/codecome/session.py b/tools/codecome/session.py index d456abad..ad2ca5b0 100644 --- a/tools/codecome/session.py +++ b/tools/codecome/session.py @@ -63,7 +63,42 @@ def send_prompt_to_session( with urllib.request.urlopen(req, timeout=30.0) as resp: pass # 204 expected except urllib.error.HTTPError as exc: - raise RuntimeError(f"Failed to send prompt: HTTP {exc.code}") from exc + body = "" + try: + body = exc.read().decode("utf-8", errors="replace").strip() + except Exception: + body = "" + detail = f"Failed to send prompt: HTTP {exc.code}" + if body: + detail = f"{detail}: {body}" + raise RuntimeError(detail) from exc + + +def get_session_status( + base_url: str, + session_id: str, + auth_token: str | None, + workspace_dir: str | None, +) -> str | None: + """Best-effort lookup of an opencode session status type.""" + req = urllib.request.Request( + f"{base_url}/session/{session_id}", + headers=_get_headers(auth_token, workspace_dir), + method="GET", + ) + try: + with urllib.request.urlopen(req, timeout=5.0) as resp: + data = json.loads(resp.read().decode("utf-8")) + except Exception: + return None + + status = data.get("status") if isinstance(data, dict) else None + if isinstance(status, dict): + status_type = status.get("type") + return status_type if isinstance(status_type, str) else None + if isinstance(status, str): + return status + return None def create_session( diff --git a/tools/codecome/transcript.py b/tools/codecome/transcript.py index 8c46c778..a3316e58 100644 --- a/tools/codecome/transcript.py +++ b/tools/codecome/transcript.py @@ -30,6 +30,22 @@ def _transcript_dir() -> Path: return d +def _unique_transcript_path(path: Path) -> Path: + """Return a transcript path that will not truncate an existing file.""" + if not path.exists(): + return path + + stamp = time.strftime("%Y%m%d-%H%M%S") + pid = os.getpid() + stem = path.stem + suffix = path.suffix + for n in range(1, 1000): + candidate = path.with_name(f"{stem}-{stamp}-pid{pid}-{n}{suffix}") + if not candidate.exists(): + return candidate + raise OSError(f"could not allocate unique transcript path for {path}") + + class Transcript: """JSONL event transcript — handles open, write, close. @@ -54,8 +70,8 @@ def for_phase(cls, phase: str, finding: str | None) -> Transcript: counter = _ATTEMPT_COUNTER.get(key, 1) _ATTEMPT_COUNTER[key] = counter + 1 - path = _transcript_dir() / f"last-phase-{phase}-{finding_tag}-attempt-{counter}.jsonl" - return cls(path, path.open("w", encoding="utf-8", buffering=1)) + path = _unique_transcript_path(_transcript_dir() / f"last-phase-{phase}-{finding_tag}-attempt-{counter}.jsonl") + return cls(path, path.open("x", encoding="utf-8", buffering=1)) @classmethod def for_chat(cls) -> Transcript: diff --git a/tools/phases/completion.py b/tools/phases/completion.py index 096621da..3ca941b7 100644 --- a/tools/phases/completion.py +++ b/tools/phases/completion.py @@ -88,6 +88,7 @@ def _exploitation_status_looks_real(frontmatter: dict[str, Any] | None) -> bool: def check_phase_graceful_completion(phase: str, finding: str | None, run_start_time: float) -> bool: phase_key = str(phase) + phase_is_1c = phase_key == "1c" if phase_key in ("1a", "1b", "1c"): phase_key = "1" @@ -95,11 +96,13 @@ def check_phase_graceful_completion(phase: str, finding: str | None, run_start_t if phase_key == "1": required_artifacts = _phase1_required_artifacts() if all(path.exists() for path in required_artifacts): - fresh_required = any(_path_is_fresh(path, run_start_time) for path in required_artifacts) sandbox_generated = ROOT / "sandbox" / "CODECOME-GENERATED.md" sandbox_state_recorded = _path_is_fresh(sandbox_generated, run_start_time) or _path_is_fresh( SANDBOX_PLAN_PATH, run_start_time ) + if phase_is_1c: + return sandbox_state_recorded + fresh_required = any(_path_is_fresh(path, run_start_time) for path in required_artifacts) return fresh_required and sandbox_state_recorded return False elif phase_key in ("2", "sweep"):