diff --git a/.codeqa.yml b/.codeqa.yml
index e6d5d467..26f39beb 100644
--- a/.codeqa.yml
+++ b/.codeqa.yml
@@ -4,4 +4,32 @@
 # Patterns here are merged with any --ignore-paths passed on the command line.
 
 ignore_paths:
-  - priv/combined_metrics/samples/**
+  - priv/**
+  - tools/**
+  - scripts/**
+  - docs/**
+  - plans/**
+  - test/**
+  - devenv*
+  - direnv*
+  - README.md
+  - action.yml
+
+# Impact weights for overall score calculation.
+# Combined metric categories default to 1 (can be overridden here).
+impact:
+  complexity: 5
+  file_structure: 4
+  function_design: 4
+  code_smells: 3
+  naming_conventions: 2
+  error_handling: 2
+  consistency: 2
+  documentation: 1
+  testing: 1
+  # combined categories override example:
+  # variable_naming: 2
+
+combined_top: 5 # worst offender files per combined-metric behavior
+
+cosine_significance_threshold: 0.25 # threshold for cosine similarity calculation in behavior categories
diff --git a/.dialyzer_ignore.exs b/.dialyzer_ignore.exs
new file mode 100644
index 00000000..9722072f
--- /dev/null
+++ b/.dialyzer_ignore.exs
@@ -0,0 +1,14 @@
+[
+  # Dialyzer specializes analyze/2 for the codebase call-site where include_pairs
+  # is always true, making the false branch appear unreachable. Both branches are
+  # valid and reachable at runtime from the file-level and codebase callers.
+  {"lib/codeqa/metrics/file/near_duplicate_blocks.ex", :pattern_match},
+  # Mix module type information is not available in the PLT; these are valid
+  # Mix.Task callbacks and standard Mix module calls.
+  {"lib/mix/tasks/codeqa/sample_report.ex", :callback_info_missing},
+  {"lib/mix/tasks/codeqa/signal_debug.ex", :callback_info_missing},
+  {"lib/mix/tasks/codeqa/sample_report.ex", :unknown_function},
+  {"lib/mix/tasks/codeqa/signal_debug.ex", :unknown_function},
+  # CodeQA.Engine.Registry.t/0 is defined via a macro; type is available at runtime.
+  {"lib/codeqa/analysis/file_metrics_server.ex", :unknown_type}
+]
diff --git a/.github/workflows/bootstrap-labels.yml b/.github/workflows/bootstrap-labels.yml
index a8653357..52c644a1 100644
--- a/.github/workflows/bootstrap-labels.yml
+++ b/.github/workflows/bootstrap-labels.yml
@@ -10,7 +10,7 @@ jobs:
   bootstrap:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Bootstrap labels
         uses: actions/github-script@v7
diff --git a/.github/workflows/compare.yml b/.github/workflows/compare.yml
index fa13ef0c..5a672ad6 100644
--- a/.github/workflows/compare.yml
+++ b/.github/workflows/compare.yml
@@ -12,10 +12,19 @@ jobs:
   compare:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           fetch-depth: 0
 
+      - name: Cache Mix deps and build
+        uses: actions/cache@v4
+        with:
+          path: |
+            deps
+            _build
+          key: ${{ runner.os }}-mix-1.19-27.3-${{ hashFiles('mix.lock', 'mix.exs') }}
+          restore-keys: ${{ runner.os }}-mix-1.19-27.3-
+
       - name: Get fork point
         id: fork-point
         run: |
diff --git a/.github/workflows/dialyzer.yml b/.github/workflows/dialyzer.yml
index 06743982..dfaca601 100644
--- a/.github/workflows/dialyzer.yml
+++ b/.github/workflows/dialyzer.yml
@@ -25,7 +25,7 @@ jobs:
 
     steps:
       - name: Checkout PR
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
 
       - name: Set up Elixir
         uses: erlef/setup-beam@v1
@@ -45,9 +45,9 @@ jobs:
         uses: actions/cache@v4
         with:
           path: _build
-          key: build-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-${{ hashFiles('mix.lock') }}
+          key: build-${{ env.MIX_ENV }}-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-${{ hashFiles('mix.lock') }}
           restore-keys: |
-            build-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-
+            build-${{ env.MIX_ENV }}-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-
 
       - name: Cache PLT
         uses: actions/cache@v4
diff --git a/.github/workflows/health-report.yml b/.github/workflows/health-report.yml
index 55e38f09..2e0b8966 100644
--- a/.github/workflows/health-report.yml
+++ b/.github/workflows/health-report.yml
@@ -12,7 +12,17 @@ jobs:
   health-report:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
+
+      - name: Cache Mix deps and build
+        uses: actions/cache@v4
+        with:
+          path: |
+            deps
+            _build
+          key: ${{ runner.os }}-mix-1.19-27.3-${{ hashFiles('mix.lock', 'mix.exs') }}
+          restore-keys: ${{ runner.os }}-mix-1.19-27.3-
+
       - uses: ./
         with:
           command: health-report
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index d1ebf666..13300bda 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -17,15 +17,33 @@ jobs:
   build:
     runs-on: ubuntu-latest
 
+    env:
+      ELIXIR_VERSION: "1.19"
+      OTP_VERSION: "27.3"
+
     steps:
       - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
 
       - name: Set up Elixir
         uses: erlef/setup-beam@v1
         with:
-          otp-version: "27.3"
-          elixir-version: "1.19"
+          otp-version: ${{ env.OTP_VERSION }}
+          elixir-version: ${{ env.ELIXIR_VERSION }}
+
+      - name: Cache deps
+        uses: actions/cache@v4
+        with:
+          path: deps
+          key: deps-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-${{ hashFiles('mix.lock', 'mix.exs') }}
+          restore-keys: deps-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-
+
+      - name: Cache build
+        uses: actions/cache@v4
+        with:
+          path: _build
+          key: build-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-${{ hashFiles('mix.lock', 'mix.exs') }}
+          restore-keys: build-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-
 
       - name: Build escript
         run: |
diff --git a/.github/workflows/sync-behavior-coverage.yml b/.github/workflows/sync-behavior-coverage.yml
new file mode 100644
index 00000000..dc1cc4f2
--- /dev/null
+++ b/.github/workflows/sync-behavior-coverage.yml
@@ -0,0 +1,65 @@
+name: Sync Behavior Coverage
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: write
+
+jobs:
+  sync:
+    runs-on: ubuntu-latest
+    if: github.event.pull_request.head.repo.full_name == github.repository
+
+    steps:
+      - name: Checkout PR branch
+        uses: actions/checkout@v6
+        with:
+          ref: ${{ github.head_ref }}
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Set up Elixir
+        uses: erlef/setup-beam@v1
+        with:
+          otp-version: "27.3"
+          elixir-version: "1.19"
+
+      - name: Cache deps
+        uses: actions/cache@v4
+        with:
+          path: |
+            deps
+            _build
+          key: ${{ runner.os }}-mix-dev-otp27.3-elixir1.19-${{ hashFiles('mix.lock', 'mix.exs') }}
+          restore-keys: ${{ runner.os }}-mix-dev-otp27.3-elixir1.19-
+
+      - name: Install dependencies
+        run: mix deps.get
+
+      - name: Compile
+        run: mix compile --warnings-as-errors
+
+      - name: Regenerate language coverage
+        run: mix codeqa.sample_report --apply-languages
+
+      - name: Regenerate scalar vectors
+        run: mix codeqa.sample_report --apply-scalars
+
+      - name: Check for YAML drift
+        id: diff
+        run: |
+          if git diff --quiet priv/combined_metrics/; then
+            echo "changed=false" >> $GITHUB_OUTPUT
+          else
+            echo "changed=true" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Commit and push updated YAMLs
+        if: steps.diff.outputs.changed == 'true'
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add priv/combined_metrics/*.yml
+          git commit -m "chore(combined-metrics): sync language coverage and scalar vectors [skip ci]"
+          git push
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index d560a175..04ba9b3c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -10,24 +10,27 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
 
       - name: Set up Elixir
+        id: beam
         uses: erlef/setup-beam@v1
         with:
           otp-version: "27.3"
           elixir-version: "1.19"
 
       - name: Cache deps
+        id: mix-cache
         uses: actions/cache@v4
         with:
           path: |
             deps
             _build
-          key: ${{ runner.os }}-mix-${{ hashFiles('mix.lock') }}
-          restore-keys: ${{ runner.os }}-mix-
+          key: ${{ runner.os }}-mix-${{ steps.beam.outputs.otp-version }}-${{ steps.beam.outputs.elixir-version }}-${{ hashFiles('mix.lock', 'mix.exs') }}
+          restore-keys: ${{ runner.os }}-mix-${{ steps.beam.outputs.otp-version }}-${{ steps.beam.outputs.elixir-version }}-
 
       - name: Install dependencies
+        if: steps.mix-cache.outputs.cache-hit != 'true'
         run: mix deps.get
 
       - name: Compile
diff --git a/.github/workflows/validate-issue-links.yml b/.github/workflows/validate-issue-links.yml
index e366437e..5960df43 100644
--- a/.github/workflows/validate-issue-links.yml
+++ b/.github/workflows/validate-issue-links.yml
@@ -12,7 +12,7 @@ jobs:
   validate:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Validate issue template links
         run: |
diff --git a/.gitignore b/.gitignore
index ad2603a6..93f865dd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,10 @@ devenv.lock
 
 # Git worktrees
 .worktrees/
+docs/plans/
+docs/superpowers/
+plans/
+scripts/*.exs
+
+# Claude Code
+.claude/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..d008842b
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,8 @@
+repos:
+- repo: local
+  hooks:
+  - id: mix-precommit
+    name: Mix precommit
+    entry: devenv shell precommit
+    language: system
+    pass_filenames: false
diff --git a/README.md b/README.md
index 259ee497..1eab2d4b 100644
--- a/README.md
+++ b/README.md
@@ -17,14 +17,14 @@ Works with Python, Ruby, JavaScript, TypeScript, Elixir, C#, Java, C++, Go, Rust
 - [CLI Reference](#cli-reference)
   - [analyze](#analyze)
   - [health-report](#health-report)
+  - [diagnose](#diagnose)
   - [compare](#compare)
   - [history](#history)
   - [correlate](#correlate)
-  - [stopwords](#stopwords)
 - [Metrics Reference](#metrics-reference)
   - [Raw Metrics](#raw-metrics)
   - [Health Report Categories](#health-report-categories)
-  - [Behavior Checks](#behavior-checks)
+  - [Behavior Categories](#behavior-categories)
 - [Output Formats](#output-formats)
 - [Grading](#grading)
 
@@ -76,7 +76,7 @@ jobs:
   health-report:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - uses: num42/codeqa-action@v1
         with:
           command: health-report
@@ -95,7 +95,7 @@ jobs:
   compare:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           fetch-depth: 0
       - name: Get fork point
@@ -112,7 +112,7 @@ jobs:
 
 | Input | Required | Default | Description |
 |-------|----------|---------|-------------|
-| `command` | yes | — | CLI command to run: `health-report`, `compare`, or `analyze` |
+| `command` | yes | — | CLI command to run: `health-report`, `compare`, `analyze`, `history`, `correlate`, or `diagnose` |
 | `path` | no | `.` | Directory to analyze |
 | `comment` | no | `false` | Post results as a sticky PR comment |
 | `fail-grade` | no | — | Fail the action if overall grade is below this (e.g. `C`) |
@@ -153,22 +153,83 @@ ignore_paths:
 
 ```yaml
 categories:
-  - name: Naming
-    weight: 1.5
+  Naming:
+    name: Naming
     metrics:
       - name: vowel_density
-        good: 0.4
-        thresholds: [0.35, 0.3, 0.25]
+        weight: 1.5
+        good: "high"
+        thresholds:
+          a: 0.42
+          b: 0.38
+          c: 0.32
+          d: 0.25
 ```
 
+Category-level keys: `name` (display name), `metrics` (list of metric overrides), `top` (worst-offender count override).
+
+Metric-level keys: `name` (metric key), `weight` (relative weight within the category), `good` (`"high"` or `"low"` — direction where higher values are better or worse), `source` (metric path), `thresholds` (map of letter-grade cutoffs: `a`, `b`, `c`, `d`).
+
 ### Grade scale override
 
 ```yaml
 grade_scale:
-  - [90, "A"]
-  - [80, "B"]
-  - [70, "C"]
-  - [0,  "F"]
+  - min: 90
+    grade: "A"
+  - min: 80
+    grade: "B"
+  - min: 70
+    grade: "C"
+  - min: 0
+    grade: "F"
+```
+
+### impact
+
+Impact weights used when computing the overall score. The 9 keys below are the built-in defaults; any category not listed falls back to `1`. These weights apply to both primary and behavior categories.
+
+```yaml
+impact:
+  complexity: 5
+  file_structure: 4
+  function_design: 4
+  code_smells: 3
+  naming_conventions: 2
+  error_handling: 2
+  consistency: 2
+  documentation: 1
+  testing: 1
+  # override any category key:
+  # variable_naming: 2
+```
+
+### combined_top
+
+Controls how many worst-offender files are shown per behavior category in `health-report` (default: `2`).
+
+```yaml
+combined_top: 3
+```
+
+### near_duplicate_blocks
+
+Configures codebase-level near-duplicate block detection (used by `analyze`).
+
+```yaml
+near_duplicate_blocks:
+  max_pairs_per_bucket: 50
+```
+
+| Key | Description |
+|-----|-------------|
+| `max_pairs_per_bucket` | Maximum duplicate pairs reported per similarity bucket (default: unlimited) |
+
+### cosine_significance_threshold
+
+Minimum cosine similarity required for a behavior category match to be considered significant. Matches below this threshold are treated as noise and excluded from scoring. Default: `0.15`.
+
+```yaml
+cosine_significance_threshold: 0.25
 ```
 
 ## CLI Reference
@@ -228,6 +289,31 @@ Produces a graded quality report grouped into behavior categories with worst-off
 ./codeqa health-report --detail full --top 10 --format github ./lib
 ```
 
+### diagnose
+
+Identifies likely code quality issues by scoring behavior profiles using cosine similarity. Useful for understanding *why* a codebase scores poorly without running a full health report.
+
+```sh
+./codeqa diagnose --path <path> [OPTIONS]
+```
+
+`--path` is **required**. Note: unlike `health-report`, the path is passed as a named flag (`--path`), not a positional argument.
+
+| Option | Description |
+|--------|-------------|
+| `--path PATH` | **(Required)** File or directory to analyze |
+| `--mode MODE` | `aggregate` (default) or `per-file` |
+| `--top N` | Number of top issues to show (default: `15`) |
+| `--format FORMAT` | Output format: `plain` or `json` (default: `plain`) |
+| `--combined-top N` | Worst-offender files per behavior in per-file mode (default: `2`) |
+
+**Example:**
+
+```sh
+./codeqa diagnose --path ./lib --mode aggregate --top 10
+./codeqa diagnose --path ./lib --mode per-file --format json
+```
+
 ### compare
 
 Compares code quality metrics between two git refs. Designed for PR workflows.
@@ -246,6 +332,16 @@ Compares code quality metrics between two git refs. Designed for PR workflows.
 | `--output MODE` | Output mode: `auto`, `summary`, or `changes` (default: `auto`) |
 | `--changes-only` | Only analyze files changed between refs |
 | `--all-files` | Analyze all source files at both refs (default) |
+| `--workers N` | Parallel worker count |
+| `--progress` | Show per-file progress |
+| `--cache` | Cache computed metrics to disk |
+| `--cache-dir PATH` | Directory for cached metrics (default: `.codeqa_cache`) |
+| `--timeout MS` | Per-file timeout in milliseconds (default: `5000`) |
+| `--show-ncd` | Include NCD similarity matrix |
+| `--ncd-top N` | Top similar pairs per file |
+| `--ncd-paths PATHS` | Comma-separated paths to compare for NCD |
+| `--show-files` | Include per-file metrics in output |
+| `--show-file-paths PATHS` | Comma-separated list of specific file paths to include |
 | `--ignore-paths GLOBS` | Comma-separated glob patterns to exclude |
 
 **Example:**
@@ -269,6 +365,16 @@ Tracks codebase metrics across multiple commits, writing per-commit JSON snapsho
 | `--output-dir PATH` | **(Required)** Directory to write JSON snapshots |
 | `--commits N` | Number of recent commits to analyze |
 | `--commit-list SHAS` | Comma-separated list of explicit commit SHAs |
+| `--workers N` | Parallel worker count |
+| `--progress` | Show per-file progress |
+| `--cache` | Cache computed metrics to disk |
+| `--cache-dir PATH` | Directory for cached metrics (default: `.codeqa_cache`) |
+| `--timeout MS` | Per-file timeout in milliseconds (default: `5000`) |
+| `--show-ncd` | Include NCD similarity matrix |
+| `--ncd-top N` | Top similar pairs per file |
+| `--ncd-paths PATHS` | Comma-separated paths to compare for NCD |
+| `--show-files` | Include per-file metrics in output |
+| `--show-file-paths PATHS` | Comma-separated list of specific file paths to include |
 | `--ignore-paths GLOBS` | Comma-separated glob patterns to exclude |
 
 ### correlate
@@ -282,27 +388,12 @@ Finds metric correlations across history snapshots produced by `history`. Run `h
 | Option | Description |
 |--------|-------------|
 | `--top N` | Number of top correlations to show (default: `20`) |
-| `--hide-exact` | Hide perfect 1.0 correlations |
-| `--all-groups` | Show all metric groups |
+| `--hide-exact` | Hide perfect 1.0 and -1.0 correlations |
+| `--all-groups` | Include correlations between metrics in the same group |
 | `--min FLOAT` | Minimum correlation threshold |
 | `--max FLOAT` | Maximum correlation threshold |
 | `--combined-only` | Show only combined-metric correlations |
-| `--max-steps N` | Limit history steps used |
-
-### stopwords
-
-Extracts codebase-specific vocabulary stopwords and fingerprints. Use the output to reduce noise from project-specific boilerplate tokens in subsequent metric analysis.
-
-```sh
-./codeqa stopwords [OPTIONS] <path>
-```
-
-| Option | Description |
-|--------|-------------|
-| `--workers N` | Parallel worker count |
-| `--stopwords-threshold FLOAT` | Minimum frequency ratio (default: `0.01`) |
-| `--progress` | Show per-file progress |
-| `--ignore-paths GLOBS` | Comma-separated glob patterns to exclude |
+| `--max-steps N` | Maximum number of correlation pairs to evaluate |
 
 ## Metrics Reference
 
@@ -329,10 +420,17 @@ All metrics are computed per file and aggregated at the codebase level.
 | **Magic number density** | Ratio of numeric literals that appear to be unnamed constants |
 | **Function metrics** | Function count, average and maximum function line count, average and maximum parameter count |
 | **Cross-file similarity** | `cross_file_density`: overall codebase redundancy via combined compression ratio. `ncd_pairs` (opt-in via `--show-ncd`): Normalized Compression Distance between similar file pairs using winnowing fingerprints |
+| **Near-duplicate blocks** | Codebase-level detection of near- and exact-duplicate code blocks using token-based similarity. Reports duplicate pairs grouped by bucket, with source locations. Configurable via `near_duplicate_blocks:` in `.codeqa.yml`. |
+| **Block impact & refactoring potentials** | Per-file node tree enriched with leave-one-out impact scores and refactoring potentials. Added to each file entry as `"nodes"` in `analyze` JSON output. Surfaces the highest-impact blocks to refactor. |
 
 ### Health Report Categories
 
-The `health-report` command grades your codebase against 6 primary categories. Each category aggregates raw metrics using configurable weights and thresholds.
+The `health-report` command evaluates your codebase using two complementary scoring models:
+
+- **6 primary categories** — graded using configurable thresholds against raw metrics (Readability, Complexity, Structure, Duplication, Naming, Magic Numbers)
+- **12 behavior categories** — graded using cosine similarity against behavior profiles (see [Behavior Categories](#behavior-categories))
+
+The overall score is a weighted average of all 18 categories. Primary category weights are set via `weight:` in `.codeqa.yml`; behavior category weights are configured via [`impact:`](#impact).
 
 | Category | What it measures |
 |----------|-----------------|
@@ -343,11 +441,21 @@ The `health-report` command grades your codebase against 6 primary categories. E
 | **Naming** | Casing entropy, identifier length variance, avg sub-words per identifier |
 | **Magic Numbers** | Magic number density |
 
+**Cosine scoring breakpoints** (used for behavior categories):
+
+| Cosine similarity | Score | Approx. grade |
+|-------------------|-------|---------------|
+| ≥ 0.5             | 90–100 | A             |
+| ≥ 0.2             | 70–90  | B–A-          |
+| ≥ 0.0             | 50–70  | C–B-          |
+| ≥ −0.3            | 30–50  | D–C-          |
+| ≥ −1.0            | 0–30   | F–D-          |
+
 > Category definitions and thresholds are configurable via `.codeqa.yml`.
 
-### Behavior Checks
+### Behavior Categories
 
-In addition to the 6 graded categories, `health-report` evaluates additional behavior check categories using a separate multiplicative scoring model. These appear in the report as "Top Issues" diagnostics.
+In addition to the 6 primary categories, `health-report` grades 12 behavior categories using cosine similarity against behavior profiles. These contribute to the overall score alongside the primary categories.
 
 | Category | Checks |
 |----------|--------|
@@ -364,13 +472,15 @@ In addition to the 6 graded categories, `health-report` evaluates additional beh
 | **Dependencies** | Import and dependency patterns |
 | **Error Handling** | Error handling completeness |
 
+> These categories are graded in the `health-report` output using cosine similarity scoring and contribute to the overall score.
+
 ## Output Formats
 
 | Format | Commands | Description |
 |--------|----------|-------------|
-| `json` | `analyze`, `compare` | Full metrics structure, suitable for tooling |
-| `markdown` | `compare`, `health-report` | GitHub-flavored markdown tables |
-| `plain` | `health-report` | Human-readable terminal output (Markdown) |
+| `json` | `analyze`, `compare`, `diagnose` | Full metrics structure, suitable for tooling |
+| `markdown` | `compare` | GitHub-flavored markdown tables |
+| `plain` | `health-report`, `diagnose` | Human-readable terminal output |
 | `github` | `health-report`, `compare` | Markdown optimized for GitHub PR comments |
 
 ## Grading
@@ -397,6 +507,8 @@ In addition to the 6 graded categories, `health-report` evaluates additional beh
 | E-    | ≥ 6         |
 | F     | < 6         |
 
+The overall score is a weighted average across all categories. Primary category weights use the `weight:` field inside each category definition in `.codeqa.yml`. Behavior category weights are configured via `impact:` (defaults range from 1–5; categories not listed fall back to `1`). See [Configuration](#configuration) for examples.
+
 The `fail-grade` action input causes a non-zero exit when the overall grade falls below the specified threshold.
 
 ## Contributing & Issues
diff --git a/action.yml b/action.yml
index 6be60789..ebee062b 100644
--- a/action.yml
+++ b/action.yml
@@ -93,15 +93,10 @@ runs:
         INPUT_VERSION: ${{ inputs.version }}
         INPUT_BUILD: ${{ inputs.build }}
         GITHUB_ACTION_PATH: ${{ github.action_path }}
+        PR_NUMBER: ${{ github.event.pull_request.number }}
+        GITHUB_TOKEN: ${{ github.token }}
       run: ${{ github.action_path }}/scripts/run.sh
 
-    - name: Post PR comment
-      if: inputs.comment == 'true' && github.event_name == 'pull_request'
-      uses: marocchino/sticky-pull-request-comment@v2
-      with:
-        header: codeqa-${{ inputs.command }}
-        path: ${{ steps.run.outputs.report-file }}
-
     - name: Check grade threshold
       if: inputs.fail-grade != '' && inputs.command == 'health-report'
       shell: bash
diff --git a/devenv.yaml b/devenv.yaml
index 6bf1e6c1..b45546e6 100644
--- a/devenv.yaml
+++ b/devenv.yaml
@@ -1,3 +1,8 @@
 inputs:
   nixpkgs:
     url: github:cachix/devenv-nixpkgs/rolling
+  git-hooks:
+    url: github:cachix/git-hooks.nix
+    inputs:
+      nixpkgs:
+        follows: nixpkgs
diff --git a/lib/codeqa/analysis/behavior_config_server.ex b/lib/codeqa/analysis/behavior_config_server.ex
new file mode 100644
index 00000000..04cc9bab
--- /dev/null
+++ b/lib/codeqa/analysis/behavior_config_server.ex
@@ -0,0 +1,119 @@
+defmodule CodeQA.Analysis.BehaviorConfigServer do
+  @moduledoc """
+  Per-run GenServer that loads all YAML behavior configs once and serves them
+  from an anonymous ETS table.
+
+  Eliminates repeated disk reads in `SampleRunner.diagnose_aggregate/2` by
+  loading `priv/combined_metrics/*.yml` on startup and keeping data in memory
+  for the duration of the analysis run.
+
+  ETS layout: `{category, behavior} => behavior_data`
+  where `behavior_data` is the raw YAML map for that behavior.
+  """
+
+  use GenServer
+
+  @yaml_dir "priv/combined_metrics"
+
+  # --- Public API ---
+
+  @spec start_link(keyword()) :: GenServer.on_start()
+  def start_link(opts \\ []) do
+    GenServer.start_link(__MODULE__, opts)
+  end
+
+  @doc "Returns the ETS table id. Callers may read directly from it."
+  @spec get_tid(pid()) :: :ets.tid()
+  def get_tid(pid), do: GenServer.call(pid, :get_tid)
+
+  @doc """
+  Returns all behaviors grouped by category.
+
+      %{"function_design" => [{"no_boolean_parameter", behavior_data}, ...], ...}
+  """
+  @spec get_all_behaviors(pid()) :: %{String.t() => [{String.t(), map()}]}
+  def get_all_behaviors(pid) do
+    tid = get_tid(pid)
+
+    tid
+    |> :ets.tab2list()
+    |> Enum.reduce(%{}, fn {{cat, beh}, data}, acc ->
+      Map.update(acc, cat, [{beh, data}], &[{beh, data} | &1])
+    end)
+  end
+
+  @doc "Returns the scalar weight map for a given category + behavior."
+  @spec get_scalars(pid(), String.t(), String.t()) :: %{{String.t(), String.t()} => float()}
+  def get_scalars(pid, category, behavior) do
+    tid = get_tid(pid)
+
+    case :ets.lookup(tid, {category, behavior}) do
+      [{_, data}] -> scalars_from_behavior_data(data)
+      [] -> %{}
+    end
+  end
+
+  @doc "Returns the `_log_baseline` value for a given category + behavior."
+  @spec get_log_baseline(pid(), String.t(), String.t()) :: float()
+  def get_log_baseline(pid, category, behavior) do
+    tid = get_tid(pid)
+
+    case :ets.lookup(tid, {category, behavior}) do
+      [{_, data}] -> Map.get(data, "_log_baseline", 0.0) / 1.0
+      [] -> 0.0
+    end
+  end
+
+  # --- GenServer callbacks ---
+
+  @impl true
+  def init(_opts) do
+    tid = :ets.new(:behavior_config, [:set, :public, read_concurrency: true])
+    load_configs(tid)
+    {:ok, %{tid: tid}}
+  end
+
+  @impl true
+  def handle_call(:get_tid, _from, state) do
+    {:reply, state.tid, state}
+  end
+
+  # --- Private helpers ---
+
+  defp load_configs(tid) do
+    case File.ls(@yaml_dir) do
+      {:ok, files} ->
+        files
+        |> Enum.filter(&String.ends_with?(&1, ".yml"))
+        |> Enum.each(&load_yml_file(&1, tid))
+
+      {:error, _} ->
+        :ok
+    end
+  end
+
+  defp load_yml_file(yml_file, tid) do
+    category = String.trim_trailing(yml_file, ".yml")
+    yaml_path = Path.join(@yaml_dir, yml_file)
+    {:ok, data} = YamlElixir.read_from_file(yaml_path)
+
+    data
+    |> Enum.filter(fn {_k, v} -> is_map(v) end)
+    |> Enum.each(fn {behavior, behavior_data} ->
+      :ets.insert(tid, {{category, behavior}, behavior_data})
+    end)
+  end
+
+  @doc false
+  def scalars_from_behavior_data(behavior_data) do
+    behavior_data
+    |> Enum.flat_map(fn
+      {group, keys} when is_map(keys) ->
+        Enum.map(keys, fn {key, scalar} -> {{group, key}, scalar / 1.0} end)
+
+      _ ->
+        []
+    end)
+    |> Map.new()
+  end
+end
diff --git a/lib/codeqa/analysis/file_context_server.ex b/lib/codeqa/analysis/file_context_server.ex
new file mode 100644
index 00000000..987595f9
--- /dev/null
+++ b/lib/codeqa/analysis/file_context_server.ex
@@ -0,0 +1,87 @@
+defmodule CodeQA.Analysis.FileContextServer do
+  @moduledoc """
+  Per-run GenServer that memoizes `Pipeline.build_file_context/2` by
+  `{MD5(content), language_name}`.
+
+  Cache key includes the resolved language name because different languages
+  produce different keyword/operator sets, yielding different identifiers from
+  the same content.
+
+  ETS layout: `{md5_binary, language_name} => FileContext.t()`
+
+  On a cache miss, the calling process builds the context directly and inserts
+  it into the shared ETS table — no GenServer mailbox round-trip for the
+  computation itself.
+  """
+
+  use GenServer
+
+  alias CodeQA.Engine.{FileContext, Pipeline}
+  alias CodeQA.Language
+  alias CodeQA.Languages.Unknown
+
+  # --- Public API ---
+
+  @spec start_link(keyword()) :: GenServer.on_start()
+  def start_link(opts \\ []) do
+    GenServer.start_link(__MODULE__, opts)
+  end
+
+  @doc "Returns the ETS table id. Callers may read directly from it."
+  @spec get_tid(pid()) :: :ets.tid()
+  def get_tid(pid), do: GenServer.call(pid, :get_tid)
+
+  @doc """
+  Returns a cached (or freshly built) `FileContext` for `content`.
+
+  The language is resolved from `opts` (`:language` or `:path`); defaults to
+  `Unknown`.
+  """
+  @spec get(pid(), String.t(), keyword()) :: FileContext.t()
+  def get(pid, content, opts \\ []) do
+    tid = get_tid(pid)
+    language_name = resolve_language_name(opts)
+    key = {md5(content), language_name}
+
+    case :ets.lookup(tid, key) do
+      [{_, ctx}] ->
+        ctx
+
+      [] ->
+        ctx = Pipeline.build_file_context(content, opts)
+        :ets.insert(tid, {key, ctx})
+        ctx
+    end
+  end
+
+  # --- GenServer callbacks ---
+
+  @impl true
+  def init(_opts) do
+    tid = :ets.new(:file_context, [:set, :public, read_concurrency: true])
+    {:ok, %{tid: tid}}
+  end
+
+  @impl true
+  def handle_call(:get_tid, _from, state) do
+    {:reply, state.tid, state}
+  end
+
+  # --- Private helpers ---
+
+  defp md5(content), do: :crypto.hash(:md5, content)
+
+  defp resolve_language_name(opts) do
+    cond do
+      lang = Keyword.get(opts, :language) ->
+        mod = Language.find(lang) || Unknown
+        mod.name()
+
+      path = Keyword.get(opts, :path) ->
+        Language.detect(path).name()
+
+      true ->
+        Unknown.name()
+    end
+  end
+end
diff --git a/lib/codeqa/analysis/file_metrics_server.ex b/lib/codeqa/analysis/file_metrics_server.ex
new file mode 100644
index 00000000..579a63d9
--- /dev/null
+++ b/lib/codeqa/analysis/file_metrics_server.ex
@@ -0,0 +1,107 @@
+defmodule CodeQA.Analysis.FileMetricsServer do
+  @moduledoc """
+  Per-run GenServer that caches `Registry.run_file_metrics/2` results.
+
+  Pre-populated from `pipeline_result` before block analysis starts so baseline
+  metrics are served directly from ETS without recomputation.
+
+  ETS layout:
+  - `{:path, path}` => metrics map   (baseline for existing files)
+  - `{:hash, md5_binary}` => metrics map  (computed on demand for reconstructed content)
+  """
+
+  use GenServer
+
+  alias CodeQA.Engine.Pipeline
+  alias CodeQA.Engine.Registry
+
+  # --- Public API ---
+
+  @spec start_link(keyword()) :: GenServer.on_start()
+  def start_link(opts \\ []) do
+    GenServer.start_link(__MODULE__, opts)
+  end
+
+  @doc "Returns the ETS table id. Callers may read directly from it."
+  @spec get_tid(pid()) :: :ets.tid()
+  def get_tid(pid), do: GenServer.call(pid, :get_tid)
+
+  @doc """
+  Bulk-inserts all baseline metrics from `pipeline_result` and cross-indexes by
+  content hash for each path present in `files_map`.
+
+  Call once after starting the supervisor, before beginning block analysis.
+  """
+  @spec populate(pid(), map(), map()) :: :ok
+  def populate(pid, pipeline_result, files_map) do
+    tid = get_tid(pid)
+    files_data = Map.get(pipeline_result, "files", %{})
+
+    Enum.each(files_data, fn {path, file_data} ->
+      metrics = Map.get(file_data, "metrics", %{})
+      :ets.insert(tid, {{:path, path}, metrics})
+    end)
+
+    Enum.each(files_map, fn {path, content} ->
+      hash = md5(content)
+
+      case :ets.lookup(tid, {:path, path}) do
+        [{_, metrics}] -> :ets.insert(tid, {{:hash, hash}, metrics})
+        [] -> :ok
+      end
+    end)
+
+    :ok
+  end
+
+  @doc "Returns pre-populated baseline metrics for `path`, or `nil` if not found."
+  @spec get_by_path(pid(), String.t()) :: map() | nil
+  def get_by_path(pid, path) do
+    tid = get_tid(pid)
+
+    case :ets.lookup(tid, {:path, path}) do
+      [{_, metrics}] -> metrics
+      [] -> nil
+    end
+  end
+
+  @doc """
+  Returns metrics for `content`, using the hash cache.
+
+  On a cache miss, builds the file context and runs metrics in the calling
+  process, then inserts the result into ETS for future lookups.
+  """
+  @spec get_for_content(pid(), Registry.t(), String.t(), keyword()) :: map()
+  def get_for_content(pid, registry, content, opts \\ []) do
+    tid = get_tid(pid)
+    hash = md5(content)
+
+    case :ets.lookup(tid, {:hash, hash}) do
+      [{_, metrics}] ->
+        metrics
+
+      [] ->
+        ctx = Pipeline.build_file_context(content, opts)
+        metrics = Registry.run_file_metrics(registry, ctx)
+        :ets.insert(tid, {{:hash, hash}, metrics})
+        metrics
+    end
+  end
+
+  # --- GenServer callbacks ---
+
+  @impl true
+  def init(_opts) do
+    tid = :ets.new(:file_metrics, [:set, :public, read_concurrency: true])
+    {:ok, %{tid: tid}}
+  end
+
+  @impl true
+  def handle_call(:get_tid, _from, state) do
+    {:reply, state.tid, state}
+  end
+
+  # --- Private helpers ---
+
+  defp md5(content), do: :crypto.hash(:md5, content)
+end
diff --git a/lib/codeqa/analysis/run_context.ex b/lib/codeqa/analysis/run_context.ex
new file mode 100644
index 00000000..e0e9d526
--- /dev/null
+++ b/lib/codeqa/analysis/run_context.ex
@@ -0,0 +1,15 @@
+defmodule CodeQA.Analysis.RunContext do
+  @moduledoc """
+  Holds PIDs for the per-run GenServers started under `RunSupervisor`.
+
+  Passed through the analysis call chain so all callers can access
+  cached state without named process registration.
+  """
+
+  defstruct [:behavior_config_pid, :file_context_pid]
+
+  @type t :: %__MODULE__{
+          behavior_config_pid: pid(),
+          file_context_pid: pid()
+        }
+end
diff --git a/lib/codeqa/analysis/run_supervisor.ex b/lib/codeqa/analysis/run_supervisor.ex
new file mode 100644
index 00000000..ab6bb10f
--- /dev/null
+++ b/lib/codeqa/analysis/run_supervisor.ex
@@ -0,0 +1,52 @@
+defmodule CodeQA.Analysis.RunSupervisor do
+  @moduledoc """
+  One-shot supervisor for the per-analysis-run GenServers.
+
+  Started at the top of `Analyzer.with_run_context/2` and stopped (via
+  `Supervisor.stop/1`) in an `after` block when the run completes.
+
+  Servers are not registered by name, preventing collisions when multiple
+  analysis runs share the same BEAM node (e.g. parallel tests).
+  """
+
+  use Supervisor
+
+  alias CodeQA.Analysis.{BehaviorConfigServer, FileContextServer, RunContext}
+
+  @spec start_link(keyword()) :: Supervisor.on_start()
+  def start_link(opts \\ []) do
+    Supervisor.start_link(__MODULE__, opts)
+  end
+
+  @doc """
+  Queries child PIDs from `sup` and returns a `RunContext` struct.
+
+  Call once after `start_link/1` succeeds, before beginning analysis.
+  """
+  @spec run_context(pid()) :: RunContext.t()
+  def run_context(sup) do
+    children = Supervisor.which_children(sup)
+
+    %RunContext{
+      behavior_config_pid: find_pid(children, BehaviorConfigServer),
+      file_context_pid: find_pid(children, FileContextServer)
+    }
+  end
+
+  @impl true
+  def init(_opts) do
+    children = [
+      {BehaviorConfigServer, []},
+      {FileContextServer, []}
+    ]
+
+    Supervisor.init(children, strategy: :one_for_one)
+  end
+
+  defp find_pid(children, module) do
+    {_id, pid, _type, _modules} =
+      Enum.find(children, fn {id, _pid, _type, _modules} -> id == module end)
+
+    pid
+  end
+end
diff --git a/lib/codeqa/analyzer.ex b/lib/codeqa/analyzer.ex
deleted file mode 100644
index ddcb6ab1..00000000
--- a/lib/codeqa/analyzer.ex
+++ /dev/null
@@ -1,130 +0,0 @@
-defmodule CodeQA.Analyzer do
-  @moduledoc "Orchestrates metric computation across files."
-
-  alias CodeQA.Registry
-  alias CodeQA.Metrics
-
-  def build_registry do
-    Registry.new()
-    |> Registry.register_file_metric(Metrics.Entropy)
-    |> Registry.register_file_metric(Metrics.Compression)
-    |> Registry.register_file_metric(Metrics.Zipf)
-    |> Registry.register_file_metric(Metrics.Heaps)
-    |> Registry.register_file_metric(Metrics.Vocabulary)
-    |> Registry.register_file_metric(Metrics.Ngram)
-    |> Registry.register_file_metric(Metrics.Halstead)
-    |> Registry.register_file_metric(Metrics.Readability)
-    |> Registry.register_file_metric(Metrics.CasingEntropy)
-    |> Registry.register_file_metric(Metrics.IdentifierLengthVariance)
-    |> Registry.register_file_metric(Metrics.Indentation)
-    |> Registry.register_file_metric(Metrics.Branching)
-    |> Registry.register_file_metric(Metrics.FunctionMetrics)
-    |> Registry.register_file_metric(Metrics.MagicNumberDensity)
-    |> Registry.register_file_metric(Metrics.SymbolDensity)
-    |> Registry.register_file_metric(Metrics.VowelDensity)
-    |> Registry.register_codebase_metric(Metrics.Similarity)
-  end
-
-  def analyze_codebase(files, opts \\ []) do
-    registry = build_registry()
-
-    opts =
-      if Keyword.get(opts, :experimental_stopwords, false) do
-        has_progress = Keyword.get(opts, :on_progress)
-
-        if has_progress,
-          do: IO.puts(:stderr, "  Analyzing Stopwords (Tokens and Fingerprints)...")
-
-        word_extractor = fn content ->
-          Regex.scan(~r/\b[a-zA-Z_]\w*\b/u, content) |> List.flatten()
-        end
-
-        word_stopwords =
-          CodeQA.Telemetry.time(:stopwords_words, fn ->
-            CodeQA.Stopwords.find_stopwords(files, word_extractor, opts)
-          end)
-
-        fp_extractor = fn content ->
-          CodeQA.Metrics.TokenNormalizer.normalize(content) |> CodeQA.Metrics.Winnowing.kgrams(5)
-        end
-
-        fp_stopwords =
-          CodeQA.Telemetry.time(:stopwords_fingerprints, fn ->
-            CodeQA.Stopwords.find_stopwords(files, fp_extractor, opts)
-          end)
-
-        if has_progress do
-          IO.puts(
-            :stderr,
-            "  Found #{MapSet.size(word_stopwords)} common word stopwords and #{MapSet.size(fp_stopwords)} common fingerprint stopwords."
-          )
-        end
-
-        opts
-        |> Keyword.put(:word_stopwords, word_stopwords)
-        |> Keyword.put(:fp_stopwords, fp_stopwords)
-      else
-        opts
-      end
-
-    file_results = CodeQA.Parallel.analyze_files(files, opts)
-    codebase_metrics = Registry.run_codebase_metrics(registry, files, opts)
-    aggregate = aggregate_file_metrics(file_results)
-
-    %{
-      "files" => file_results,
-      "codebase" => %{
-        "aggregate" => aggregate,
-        "similarity" => Map.get(codebase_metrics, "similarity", %{})
-      }
-    }
-  end
-
-  defp metric_data_to_triples({metric_name, metric_data}) do
-    metric_data
-    |> Enum.filter(fn {_k, v} -> is_number(v) end)
-    |> Enum.map(fn {key, value} -> {metric_name, key, value / 1} end)
-  end
-
-  defp aggregate_file_metrics(file_results) do
-    file_results
-    |> Map.values()
-    |> Enum.flat_map(fn file_data ->
-      file_data
-      |> Map.get("metrics", %{})
-      |> Enum.flat_map(&metric_data_to_triples/1)
-    end)
-    |> Enum.group_by(fn {metric, key, _val} -> {metric, key} end, fn {_, _, val} -> val end)
-    |> Enum.reduce(%{}, fn {{metric, key}, values}, acc ->
-      stats = compute_stats(values)
-      metric_agg = Map.get(acc, metric, %{})
-
-      updated =
-        Map.merge(metric_agg, %{
-          "mean_#{key}" => stats.mean,
-          "std_#{key}" => stats.std,
-          "min_#{key}" => stats.min,
-          "max_#{key}" => stats.max
-        })
-
-      Map.put(acc, metric, updated)
-    end)
-  end
-
-  defp compute_stats([]), do: %{mean: 0.0, std: 0.0, min: 0.0, max: 0.0}
-
-  defp compute_stats(values) do
-    n = length(values)
-    mean = Enum.sum(values) / n
-    sum_squares = Enum.reduce(values, 0.0, fn v, acc -> acc + (v - mean) ** 2 end)
-    variance = sum_squares / n
-    std = :math.sqrt(variance)
-
-    %{
-      mean: Float.round(mean * 1.0, 4),
-      std: Float.round(std * 1.0, 4),
-      min: Float.round(Enum.min(values) * 1.0, 4),
-      max: Float.round(Enum.max(values) * 1.0, 4)
-    }
-  end
-end
diff --git a/lib/codeqa/ast/classification/node_classifier.ex b/lib/codeqa/ast/classification/node_classifier.ex
new file mode 100644
index 00000000..7a71e584
--- /dev/null
+++ b/lib/codeqa/ast/classification/node_classifier.ex
@@ -0,0 +1,142 @@
+defmodule CodeQA.AST.Classification.NodeClassifier do
+  @moduledoc """
+  Classifies a Node into a typed struct by running classification signals
+  over its tokens and weighing their votes.
+
+  ## How it works
+
+  Six classification signals scan the node's token stream in parallel via
+  `SignalStream`. Each signal emits weighted votes (e.g. `{:function_vote, 3}`)
+  when it detects a pattern indicating a node type. The classifier sums weights
+  per type and picks the winner. Ties and no-votes fall back to `:code`.
+
+  ## Signals and votes
+
+  | Signal | Vote key | Patterns detected |
+  |---|---|---|
+  | `DocSignal` | `:doc_vote` | `<DOC>` token anywhere |
+  | `AttributeSignal` | `:attribute_vote` | `@name` at indent 0 |
+  | `FunctionSignal` | `:function_vote` | `def`, `func`, `fn`, etc. at indent 0 |
+  | `ModuleSignal` | `:module_vote` | `defmodule`, `class`, `module`, etc. at indent 0 |
+  | `ImportSignal` | `:import_vote` | `import`, `use`, `alias`, etc. at indent 0 |
+  | `TestSignal` | `:test_vote` | `test`, `describe`, `it`, etc. at indent 0 |
+
+  ## Weights
+
+  Weight 3 = first keyword seen (strong match); weight 1 = keyword later in
+  block (weak match, e.g. after a leading comment). `DocSignal` always emits
+  weight 3 and wins when a `<DOC>` token is present, since triple-quoted strings
+  are unambiguous.
+
+  ## Type-specific fields
+
+  `FunctionNode.name/arity/visibility`, `ModuleNode.name/kind`, etc. all default
+  to `nil`. Population of those fields is left to a future enrichment pass.
+  """
+
+  alias CodeQA.AST.Enrichment.Node
+
+  alias CodeQA.AST.Nodes.{
+    AttributeNode,
+    CodeNode,
+    DocNode,
+    FunctionNode,
+    ImportNode,
+    ModuleNode,
+    TestNode
+  }
+
+  alias CodeQA.AST.Parsing.SignalStream
+
+  alias CodeQA.AST.Signals.Classification.{
+    AttributeSignal,
+    DocSignal,
+    FunctionSignal,
+    ImportSignal,
+    ModuleSignal,
+    TestSignal
+  }
+
+  @classification_signals [
+    %DocSignal{},
+    %AttributeSignal{},
+    %FunctionSignal{},
+    %ModuleSignal{},
+    %ImportSignal{},
+    %TestSignal{}
+  ]
+
+  @type_modules %{
+    doc: DocNode,
+    attribute: AttributeNode,
+    function: FunctionNode,
+    module: ModuleNode,
+    import: ImportNode,
+    test: TestNode,
+    code: CodeNode
+  }
+
+  @doc """
+  Classify a Node into the most specific typed struct.
+
+  Runs classification signals, weighs votes, and delegates to the winning
+  struct's `cast/1` to build the result. Type-specific fields default to nil.
+  """
+  @spec classify(Node.t(), module()) :: term()
+  def classify(%Node{} = node, lang_mod), do: classify(node, lang_mod, nil)
+
+  @doc """
+  Classify a Node, optionally seeded with `parent_context` tokens that come
+  immediately before the node in its parent block.
+
+  Used for sub-blocks the bracket/keyword splitter has carved out of a parent:
+  e.g. the `{Bar, Baz}` of a multi-line `alias Foo.{Bar, Baz}`. Without the
+  context, the sub-block lacks the `alias` keyword and falls back to `:code`.
+  Prepending the parent's last line of tokens gives the existing classification
+  signals enough to vote correctly.
+  """
+  @spec classify(Node.t(), module(), [CodeQA.AST.Lexing.Token.t()] | nil) :: term()
+  def classify(%Node{} = node, lang_mod, parent_context) do
+    tokens = prepend_context(node.tokens, parent_context)
+    type = vote(tokens, lang_mod)
+    @type_modules[type].cast(node)
+  end
+
+  defp prepend_context(tokens, nil), do: tokens
+  defp prepend_context(tokens, []), do: tokens
+  defp prepend_context(tokens, ctx) when is_list(ctx), do: ctx ++ tokens
+
+  defp vote(tokens, lang_mod) do
+    tokens
+    |> run_signals(lang_mod)
+    |> tally()
+    |> winner()
+  end
+
+  defp run_signals(tokens, lang_mod) do
+    SignalStream.run(tokens, @classification_signals, lang_mod)
+    |> List.flatten()
+    |> Enum.filter(fn {_src, group, _name, _val} -> group == :classification end)
+  end
+
+  defp tally(emissions) do
+    Enum.reduce(emissions, %{}, fn {_src, _grp, name, weight}, acc ->
+      Map.update(acc, name, weight, &(&1 + weight))
+    end)
+  end
+
+  defp winner(votes) when map_size(votes) == 0, do: :code
+
+  defp winner(votes) do
+    {vote_name, _weight} = Enum.max_by(votes, fn {_, w} -> w end)
+    vote_to_type(vote_name)
+  end
+
+  defp vote_to_type(:doc_vote), do: :doc
+  defp vote_to_type(:attribute_vote), do: :attribute
+  defp vote_to_type(:function_vote), do: :function
+  defp vote_to_type(:module_vote), do: :module
+  defp vote_to_type(:import_vote), do: :import
+  defp vote_to_type(:test_vote), do: :test
+  defp vote_to_type(_), do: :code
+end
diff --git a/lib/codeqa/ast/classification/node_protocol.ex b/lib/codeqa/ast/classification/node_protocol.ex
new file mode 100644
index 00000000..fa4943df
--- /dev/null
+++ b/lib/codeqa/ast/classification/node_protocol.ex
@@ -0,0 +1,29 @@
+defprotocol CodeQA.AST.Classification.NodeProtocol do
+  @moduledoc """
+  Common interface for all typed AST node structs.
+
+  All node struct types (CodeNode, DocNode, FunctionNode, etc.) implement this
+  protocol, allowing downstream code to work with any node type uniformly.
+  """
+
+  @spec tokens(t()) :: [term()]
+  def tokens(node)
+
+  @spec flat_tokens(t()) :: [term()]
+  def flat_tokens(node)
+
+  @spec line_count(t()) :: non_neg_integer()
+  def line_count(node)
+
+  @spec children(t()) :: [term()]
+  def children(node)
+
+  @spec start_line(t()) :: non_neg_integer() | nil
+  def start_line(node)
+
+  @spec end_line(t()) :: non_neg_integer() | nil
+  def end_line(node)
+
+  @spec label(t()) :: term() | nil
+  def label(node)
+end
diff --git a/lib/codeqa/ast/classification/node_type_detector.ex b/lib/codeqa/ast/classification/node_type_detector.ex
new file mode 100644
index 00000000..50383713
--- /dev/null
+++ b/lib/codeqa/ast/classification/node_type_detector.ex
@@ -0,0 +1,20 @@
+defmodule CodeQA.AST.Classification.NodeTypeDetector do
+  @moduledoc """
+  Classifies a list of raw `Node` structs (from `Parser`) into typed structs.
+
+  Each node is classified by `NodeClassifier`, which runs classification signals
+  over the node's tokens and picks the highest-voted type. See `NodeClassifier`
+  for the full list of signals and their weights.
+  """
+
+  alias CodeQA.AST.Classification.NodeClassifier
+  alias CodeQA.AST.Enrichment.Node
+
+  @doc """
+  Classify each node in the list into the most specific typed struct.
+  """
+  @spec detect_types([Node.t()], module()) :: [term()]
+  def detect_types(blocks, lang_mod) do
+    Enum.map(blocks, &NodeClassifier.classify(&1, lang_mod))
+  end
+end
diff --git a/lib/codeqa/ast/classification/typed_node_kind.ex b/lib/codeqa/ast/classification/typed_node_kind.ex
new file mode 100644
index 00000000..4993ee1d
--- /dev/null
+++ b/lib/codeqa/ast/classification/typed_node_kind.ex
@@ -0,0 +1,24 @@
+defmodule CodeQA.AST.Classification.TypedNodeKind do
+  @moduledoc "Maps a typed node struct from `NodeClassifier` to its kind atom."
+
+  alias CodeQA.AST.Nodes.{
+    AttributeNode,
+    CodeNode,
+    DocNode,
+    FunctionNode,
+    ImportNode,
+    ModuleNode,
+    TestNode
+  }
+
+  @type kind :: :doc | :attribute | :function | :module | :import | :test | :code
+
+  @spec of(struct()) :: kind()
+  def of(%DocNode{}), do: :doc
+  def of(%AttributeNode{}), do: :attribute
+  def of(%FunctionNode{}), do: :function
+  def of(%ModuleNode{}), do: :module
+  def of(%ImportNode{}), do: :import
+  def of(%TestNode{}), do: :test
+  def of(%CodeNode{}), do: :code
+end
diff --git a/lib/codeqa/ast/enrichment/compound_node.ex b/lib/codeqa/ast/enrichment/compound_node.ex
new file mode 100644
index 00000000..88a594c2
--- /dev/null
+++ b/lib/codeqa/ast/enrichment/compound_node.ex
@@ -0,0 +1,41 @@
+defmodule CodeQA.AST.Enrichment.CompoundNode do
+  @moduledoc """
+  Groups semantically related typed nodes together.
+
+  A compound node represents a complete "unit" in source code — combining
+  documentation, type annotations, and implementation:
+
+  - `docs`       — [DocNode.t()] (triple-quoted docstrings)
+  - `typespecs`  — [AttributeNode.t()] (@spec, @type, etc.)
+  - `code`       — [Node.t()] with type :code (implementation clauses)
+
+  Boundaries span all constituent nodes in source order (docs → typespecs →
+  code), with leading/trailing whitespace tokens stripped. Column values are
+  read from the `col` field of the relevant Token structs — Node has no col
+  fields.
+
+  A bare code node with no preceding docs/typespecs is still wrapped in a
+  CompoundNode (with empty `docs` and `typespecs`).
+  """
+
+  alias CodeQA.AST.Enrichment.Node
+  alias CodeQA.AST.Nodes.AttributeNode
+
+  defstruct docs: [],
+            typespecs: [],
+            code: [],
+            start_line: nil,
+            start_col: nil,
+            end_line: nil,
+            end_col: nil
+
+  @type t :: %__MODULE__{
+          docs: [Node.t()],
+          typespecs: [AttributeNode.t()],
+          code: [Node.t()],
+          start_line: non_neg_integer() | nil,
+          start_col: non_neg_integer() | nil,
+          end_line: non_neg_integer() | nil,
+          end_col: non_neg_integer() | nil
+        }
+end
diff --git a/lib/codeqa/ast/enrichment/compound_node_builder.ex b/lib/codeqa/ast/enrichment/compound_node_builder.ex
new file mode 100644
index 00000000..27c61659
--- /dev/null
+++ b/lib/codeqa/ast/enrichment/compound_node_builder.ex
@@ -0,0 +1,157 @@
+defmodule CodeQA.AST.Enrichment.CompoundNodeBuilder do
+  @moduledoc """
+  Groups typed nodes into CompoundNode structs.
+
+  A new compound starts when:
+  1. A :doc or :typespec node appears after at least one :code node
+  2. The trailing whitespace of the previous node contains 2+ <NL> tokens
+
+  All consecutive :code nodes with no boundary between them accumulate
+  into the same compound's `code` list.
+
+  Sub-blocks of :code nodes that have type :doc or :typespec are
+  promoted to the compound's `docs`/`typespecs` lists.
+  """
+
+  alias CodeQA.AST.Classification.NodeProtocol
+  alias CodeQA.AST.Enrichment.CompoundNode
+  alias CodeQA.AST.Enrichment.Node
+  alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken}
+  alias CodeQA.AST.Nodes.{AttributeNode, DocNode}
+
+  @doc """
+  Groups a list of typed nodes into CompoundNode structs.
+  """
+  @spec build([Node.t()]) :: [CompoundNode.t()]
+  def build([]), do: []
+
+  def build(blocks) do
+    # Accumulator: {current_compound, prev_trailing_ws, finalized_compounds}
+    # prev_trailing_ws carries the trailing <NL>/<WS> tokens stripped from the
+    # PREVIOUS node. Blank-line boundaries are detected on the previous node's
+    # trailing whitespace — BlankLineRule places blank-line <NL> tokens at the
+    # END of the node that precedes the split, not at the start of the new one.
+    {current, _, compounds} =
+      Enum.reduce(blocks, {empty_compound(), [], []}, fn block,
+                                                         {current, prev_trailing_ws, acc} ->
+        {content_tokens, trailing_ws} = split_trailing_whitespace(block.tokens)
+        clean_block = %{block | tokens: content_tokens}
+        # Check the PREVIOUS node's trailing whitespace for blank-line boundary
+        blank_boundary = blank_line_boundary?(prev_trailing_ws)
+
+        cond do
+          # Rule 1: doc/typespec after code → flush and start new compound
+          (is_struct(block, DocNode) or is_struct(block, AttributeNode)) and current.code != [] ->
+            {start_compound(clean_block), trailing_ws, [finalize(current) | acc]}
+
+          # Rule 2: blank-line boundary on previous node → flush and start fresh
+          blank_boundary and not empty_compound?(current) ->
+            {start_compound(clean_block), trailing_ws, [finalize(current) | acc]}
+
+          # No boundary — accumulate into current
+          true ->
+            {add_block(current, clean_block), trailing_ws, acc}
+        end
+      end)
+
+    compounds
+    |> then(fn acc ->
+      if empty_compound?(current), do: acc, else: [finalize(current) | acc]
+    end)
+    |> Enum.reverse()
+  end
+
+  defp empty_compound, do: %CompoundNode{}
+
+  defp empty_compound?(%CompoundNode{docs: [], typespecs: [], code: []}), do: true
+  defp empty_compound?(_), do: false
+
+  defp add_block(%CompoundNode{} = compound, block) when is_struct(block, DocNode) do
+    %CompoundNode{compound | docs: compound.docs ++ [block]}
+  end
+
+  defp add_block(%CompoundNode{} = compound, block) when is_struct(block, AttributeNode) do
+    %CompoundNode{compound | typespecs: compound.typespecs ++ [block]}
+  end
+
+  defp add_block(%CompoundNode{} = compound, block) do
+    {promoted_docs, promoted_specs, clean_children} = promote_sub_blocks(block.children)
+    clean_block = %{block | children: clean_children}
+
+    %CompoundNode{
+      compound
+      | code: compound.code ++ [clean_block],
+        docs: compound.docs ++ promoted_docs,
+        typespecs: compound.typespecs ++ promoted_specs
+    }
+  end
+
+  defp start_compound(new_block) do
+    add_block(empty_compound(), new_block)
+  end
+
+  # Separates children by type — :doc/:typespec go up to the compound level.
+  defp promote_sub_blocks(children) do
+    Enum.reduce(children, {[], [], []}, fn sub, {docs, specs, code} ->
+      case sub.type do
+        :doc -> {docs ++ [sub], specs, code}
+        :typespec -> {docs, specs ++ [sub], code}
+        _ -> {docs, specs, code ++ [sub]}
+      end
+    end)
+  end
+
+  # Strips trailing <WS>/<NL> tokens from a node's token list.
+  # Returns {content_tokens, trailing_ws_tokens}.
+  defp split_trailing_whitespace(tokens) do
+    last_content_idx =
+      tokens
+      |> Enum.with_index()
+      |> Enum.reverse()
+      |> Enum.find_index(fn {t, _} ->
+        not is_map(t) or t.kind not in [WhitespaceToken.kind(), NewlineToken.kind()]
+      end)
+
+    case last_content_idx do
+      nil ->
+        {[], tokens}
+
+      rev_idx ->
+        content_len = length(tokens) - rev_idx
+        {Enum.slice(tokens, 0, content_len), Enum.slice(tokens, content_len..-1//1)}
+    end
+  end
+
+  # A blank-line boundary exists when the trailing whitespace contains 3+ <NL> tokens
+  # (i.e. 2+ blank lines). A single blank line (2 NLs: end-of-line + blank line) is
+  # common within a compound (e.g. between function clauses) and does not split.
+  defp blank_line_boundary?(trailing_ws) do
+    Enum.count(trailing_ws, &(&1.kind == NewlineToken.kind())) >= 3
+  end
+
+  # Computes boundaries from all constituent nodes in source order:
+  # docs → typespecs → code. Reads col directly from Token structs.
+  defp finalize(%CompoundNode{} = compound) do
+    all_blocks = compound.docs ++ compound.typespecs ++ compound.code
+    all_tokens = Enum.flat_map(all_blocks, &NodeProtocol.flat_tokens/1)
+
+    first_token =
+      Enum.find(
+        all_tokens,
+        &(is_map(&1) and &1.kind not in [WhitespaceToken.kind(), NewlineToken.kind()])
+      )
+
+    last_token =
+      all_tokens
+      |> Enum.reverse()
+      |> Enum.find(&(is_map(&1) and &1.kind not in [WhitespaceToken.kind(), NewlineToken.kind()]))
+
+    %CompoundNode{
+      compound
+      | start_line: first_token && first_token.line,
+        start_col: first_token && first_token.col,
+        end_line: last_token && last_token.line,
+        end_col: last_token && last_token.col
+    }
+  end
+end
diff --git a/lib/codeqa/ast/enrichment/node.ex b/lib/codeqa/ast/enrichment/node.ex
new file mode 100644
index 00000000..65e4b23d
--- /dev/null
+++ b/lib/codeqa/ast/enrichment/node.ex
@@ -0,0 +1,70 @@
+defmodule CodeQA.AST.Enrichment.Node do
+  @moduledoc """
+  A detected code node with optional nested sub-blocks.
+
+  ## Fields
+
+  - `tokens`      — aggregated code content: for leaf nodes, the original token stream;
+                    for non-leaf nodes, the flat concatenation of all children's `tokens`.
+                    Use this for content comparison and metrics.
+  - `line_count`  — number of source lines spanned by this node: `end_line - start_line + 1`
+                    when both are available, else `1`.
+  - `children`    — nested `Node.t()` structs detected by enclosure rules
+                    (`BracketRule`, `ColonIndentationRule`).
+  - `label`       — arbitrary term attached by the caller. Set to `"path:start_line"`
+                    by `NearDuplicateBlocks.analyze/2` for human-readable pair reporting.
+  - `start_line`  — 1-based line number of the first token in this node, populated by
+                    `Parser` from `List.first(tokens).line`.
+  - `end_line`    — 1-based line number of the last token in this node, populated by
+                    `Parser` from `List.last(tokens).line`.
+
+  `start_line` and `end_line` may be `nil` for synthetic nodes created in tests
+  without line metadata.
+  """
+
+  @enforce_keys [:tokens, :line_count, :children]
+  defstruct [
+    :tokens,
+    :line_count,
+    :children,
+    :label,
+    :start_line,
+    :end_line,
+    type: :code
+  ]
+
+  @type t :: %__MODULE__{
+          tokens: [CodeQA.AST.Lexing.Token.t()],
+          line_count: non_neg_integer(),
+          children: [term()],
+          label: term() | nil,
+          start_line: non_neg_integer() | nil,
+          end_line: non_neg_integer() | nil,
+          type: :code | :doc | :typespec
+        }
+
+  @spec children_count(t()) :: non_neg_integer()
+  def children_count(%__MODULE__{children: ch}), do: length(ch)
+
+  # Keep old name as deprecated alias during transition
+  @spec sub_block_count(t()) :: non_neg_integer()
+  def sub_block_count(%__MODULE__{children: ch}), do: length(ch)
+
+  @spec token_count(t()) :: non_neg_integer()
+  def token_count(%__MODULE__{tokens: tokens}), do: length(tokens)
+end
+
+defimpl CodeQA.AST.Classification.NodeProtocol, for: CodeQA.AST.Enrichment.Node do
+  def tokens(n), do: n.tokens
+  def line_count(n), do: n.line_count
+  def children(n), do: n.children
+  def start_line(n), do: n.start_line
+  def end_line(n), do: n.end_line
+  def label(n), do: n.label
+
+  def flat_tokens(n) do
+    if Enum.empty?(n.children),
+      do: n.tokens,
+      else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1)
+  end
+end
diff --git a/lib/codeqa/ast/enrichment/node_analyzer.ex b/lib/codeqa/ast/enrichment/node_analyzer.ex
new file mode 100644
index 00000000..2f6221cc
--- /dev/null
+++ b/lib/codeqa/ast/enrichment/node_analyzer.ex
@@ -0,0 +1,65 @@
+defmodule CodeQA.AST.Enrichment.NodeAnalyzer do
+  @moduledoc """
+  Extracts locally bound variable names from a token list.
+
+  Used by the domain tagger to subtract local bindings from the domain signal —
+  a variable bound within a node (e.g. `user = Repo.get!(id)`) is not a domain
+  reference and should not appear in the node's domain fingerprint.
+  """
+
+  alias CodeQA.AST.Lexing.NewlineToken
+  alias CodeQA.AST.Lexing.Token
+
+  @doc """
+  Returns a MapSet of lowercase identifier names that are locally bound
+  within the given token list.
+
+  Detected patterns:
+  - `<ID> "="` — simple assignment (guards against `==`, `=>`, `=~`, `!=`, `<=`, `>=`)
+  - `<ID> "<-"` — with/for binding (all `<ID>` tokens on the LHS of `<-`)
+
+  Function parameters are NOT extracted here (see `param_variables/1`).
+  """
+  @spec bound_variables([Token.t()]) :: MapSet.t(String.t())
+  def bound_variables(tokens) do
+    MapSet.union(
+      assignment_bindings(tokens),
+      arrow_bindings(tokens)
+    )
+  end
+
+  # Collect `<ID>` immediately before `=`
+  defp assignment_bindings(tokens) do
+    tokens
+    |> Enum.chunk_every(2, 1, :discard)
+    |> Enum.flat_map(fn
+      [%Token{kind: "<ID>", content: name}, %Token{kind: "="}] ->
+        [String.downcase(name)]
+
+      _ ->
+        []
+    end)
+    |> MapSet.new()
+  end
+
+  # Collect all `<ID>` tokens on the LHS of `<-` (within the same line).
+  # Resets the accumulator on `<NL>` so RHS tokens from prior lines don't leak.
+  defp arrow_bindings(tokens) do
+    tokens
+    |> Enum.reduce({[], MapSet.new()}, fn
+      %Token{kind: "<-"}, {lhs_ids, acc} ->
+        new_bindings = lhs_ids |> Enum.map(&String.downcase/1) |> MapSet.new()
+        {[], MapSet.union(acc, new_bindings)}
+
+      %NewlineToken{}, {_, acc} ->
+        {[], acc}
+
+      %Token{kind: "<ID>", content: name}, {lhs_ids, acc} ->
+        {[name | lhs_ids], acc}
+
+      _, {lhs_ids, acc} ->
+        {lhs_ids, acc}
+    end)
+    |> elem(1)
+  end
+end
diff --git a/lib/codeqa/ast/lexing/newline_token.ex b/lib/codeqa/ast/lexing/newline_token.ex
new file mode 100644
index 00000000..2ccb7129
--- /dev/null
+++ b/lib/codeqa/ast/lexing/newline_token.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.AST.Lexing.NewlineToken do
+  @moduledoc """
+  A newline token emitted by `TokenNormalizer.normalize_structural/1`.
+
+  Represents a `\\n` line boundary between two source lines.
+
+  ## Fields
+
+  - `kind`    — always `"<NL>"`.
+  - `content` — the original newline character (`"\\n"`).
+  - `line`    — 1-based line number of the line that ends here.
+  - `col`     — 0-based byte offset of the newline within that line.
+  """
+
+  @kind "<NL>"
+
+  defstruct [:content, :line, :col, kind: @kind]
+
+  @doc "Returns the normalized kind string for newline tokens."
+  @spec kind() :: String.t()
+  def kind, do: @kind
+
+  @type t :: %__MODULE__{
+          kind: String.t(),
+          content: String.t(),
+          line: non_neg_integer() | nil,
+          col: non_neg_integer() | nil
+        }
+end
diff --git a/lib/codeqa/ast/lexing/string_token.ex b/lib/codeqa/ast/lexing/string_token.ex
new file mode 100644
index 00000000..04fc2963
--- /dev/null
+++ b/lib/codeqa/ast/lexing/string_token.ex
@@ -0,0 +1,49 @@
+defmodule CodeQA.AST.Lexing.StringToken do
+  @moduledoc """
+  A string token emitted by `TokenNormalizer` for all string literals,
+  including triple-quoted heredocs.
+
+  ## Fields
+
+  - `kind`           — `"<STR>"` for single-line strings, `"<DOC>"` for
+                       triple-quoted heredoc delimiters.
+  - `content`        — original source text (the full quoted literal or delimiter).
+  - `line`, `col`    — source location.
+  - `interpolations` — list of interpolation expressions (`nil` for plain strings).
+  - `multiline`      — `true` for triple-quoted (`\"\"\"` / `'''`) tokens.
+  - `quotes`         — `:double`, `:single`, or `:backtick`.
+  """
+
+  @kind "<STR>"
+  @doc_kind "<DOC>"
+
+  defstruct [
+    :content,
+    :line,
+    :col,
+    kind: @kind,
+    interpolations: nil,
+    multiline: false,
+    quotes: :double
+  ]
+
+  @doc "Returns the normalized kind string for single-line string tokens."
+  @spec kind() :: String.t()
+  def kind, do: @kind
+
+  @doc "Returns the normalized kind string for triple-quoted doc string tokens."
+  @spec doc_kind() :: String.t()
+  def doc_kind, do: @doc_kind
+
+  @type quotes :: :double | :single | :backtick
+
+  @type t :: %__MODULE__{
+          content: String.t(),
+          line: non_neg_integer() | nil,
+          col: non_neg_integer() | nil,
+          kind: String.t(),
+          interpolations: [String.t()] | nil,
+          multiline: boolean(),
+          quotes: quotes()
+        }
+end
diff --git a/lib/codeqa/ast/lexing/token.ex b/lib/codeqa/ast/lexing/token.ex
new file mode 100644
index 00000000..f705f7e6
--- /dev/null
+++ b/lib/codeqa/ast/lexing/token.ex
@@ -0,0 +1,45 @@
+defmodule CodeQA.AST.Lexing.Token do
+  @moduledoc """
+  A single token emitted by `TokenNormalizer.normalize_structural/1`.
+
+  ## Fields
+
+  - `value`   — normalized form used for structural comparison: `<ID>`, `<STR>`,
+                `<NUM>`, `<NL>`, `<WS>`, or the literal character(s) for
+                punctuation and operators.
+  - `content` — original source text before normalization. Identical to `value`
+                for punctuation/structural tokens; differs for identifiers,
+                strings, and numbers. Enables source reconstruction and is the
+                correct field to check when matching declaration keywords.
+  - `line`    — 1-based line number in the source file.
+  - `col`     — 0-based byte offset from the start of the line.
+
+  String literals are emitted as `StringToken` structs, not `Token`, so that
+  the `interpolations` field does not pollute the common token shape.
+
+  ## Design notes (from tree-sitter, ctags, lizard)
+
+  - **value vs content split** — mirrors tree-sitter's distinction between a
+    node's `type` (structural kind) and its `text` (original source). `value`
+    is the kind used for pattern matching and comparison; `content` is the
+    original text used for reporting and reconstruction.
+  - **Normalization lives in value, not content** — `content` is never modified.
+    This means two tokens with different `content` but the same `value` (e.g.
+    `"foo"` and `"bar"` both normalizing to `<ID>`) are structurally equivalent
+    for duplicate detection but distinguishable for reporting.
+  - **Line + col for precise location** — ctags records line numbers; tree-sitter
+    records byte ranges. We store both line (for human-readable reporting) and
+    col (for IDE navigation and sub-block start/end precision).
+  - **No enforcement on line/col** — synthetic tokens created in tests may omit
+    line/col. Consumers that need location data should guard for nil.
+  """
+
+  defstruct [:kind, :content, :line, :col]
+
+  @type t :: %__MODULE__{
+          kind: String.t(),
+          content: String.t(),
+          line: non_neg_integer() | nil,
+          col: non_neg_integer() | nil
+        }
+end
diff --git a/lib/codeqa/ast/lexing/token_normalizer.ex b/lib/codeqa/ast/lexing/token_normalizer.ex
new file mode 100644
index 00000000..5cabba3a
--- /dev/null
+++ b/lib/codeqa/ast/lexing/token_normalizer.ex
@@ -0,0 +1,263 @@
+defmodule CodeQA.AST.Lexing.TokenNormalizer do
+  @moduledoc """
+  Abstracts raw source code into language-agnostic structural tokens.
+
+  See [lexical analysis](https://en.wikipedia.org/wiki/Lexical_analysis).
+  """
+
+  alias CodeQA.AST.Lexing.NewlineToken
+  alias CodeQA.AST.Lexing.StringToken
+  alias CodeQA.AST.Lexing.Token
+  alias CodeQA.AST.Lexing.WhitespaceToken
+
+  @doc """
+  Normalizes source code into language-agnostic structural tokens, preserving
+  newlines as `<NL>` and leading whitespace as `<WS>` tokens (one per
+  2-space / 1-tab indentation unit).
+
+  Returns `[Token.t()]` where each token carries its normalized `value`,
+  original source `content`, 1-based `line` number, and 0-based `col` offset.
+  Used for structural block detection.
+  """
+  @spec normalize_structural(String.t()) :: [Token.t()]
+  def normalize_structural(code) do
+    code = String.replace(code, ~r/[^\x00-\x7F]/, " ")
+    lines = String.split(code, "\n")
+    last_idx = length(lines) - 1
+
+    lines
+    |> Enum.with_index()
+    |> Enum.flat_map(fn {line, idx} ->
+      tokens_with_newline(line, idx, last_idx)
+    end)
+  end
+
+  defp tokens_with_newline(line, idx, last_idx) do
+    line_num = idx + 1
+    {tokens, last_token} = tokenize_line(line, line_num)
+
+    if idx < last_idx do
+      # last_token is tracked during scanning — O(1) vs List.last/1 which is O(N).
+      nl_col =
+        case last_token do
+          nil -> 0
+          t -> t.col + String.length(t.content)
+        end
+
+      tokens ++ [%NewlineToken{content: "\n", line: line_num, col: nl_col}]
+    else
+      tokens
+    end
+  end
+
+  # Returns {tokens, last_token} where last_token is the final token on the line
+  # (or nil for an empty line), allowing normalize_structural to compute nl_col
+  # in O(1) without calling List.last/1.
+  defp tokenize_line(line, line_num) do
+    indent_chars =
+      line
+      |> String.graphemes()
+      |> Enum.take_while(&(&1 in [" ", "\t"]))
+
+    indent_units =
+      indent_chars
+      |> Enum.reduce(0, fn
+        "\t", acc -> acc + 2
+        " ", acc -> acc + 1
+      end)
+      |> div(2)
+
+    indent_col_width = length(indent_chars)
+
+    ws_tokens =
+      for i <- 1..indent_units//1 do
+        %WhitespaceToken{content: "  ", line: line_num, col: (i - 1) * 2}
+      end
+
+    content = String.slice(line, indent_col_width..-1//1)
+    {content_tokens, last_content} = scan_content(content, line_num, indent_col_width)
+
+    # Last token on the line: prefer the last content token; fall back to the
+    # last WS token (only possible when the content portion is empty).
+    last_token = last_content || List.last(ws_tokens)
+
+    {ws_tokens ++ content_tokens, last_token}
+  end
+
+  # Multi-char operators matched longest-first so that e.g. `===` beats `==`.
+  # Tagged `:literal` so `next_token` uses the matched text as both value and content
+  # (unlike `<ID>`, `<STR>`, `<NUM>` which normalise content away).
+  @operator_regex ~r/^(?:===|!==|<=>|==|!=|<=|>=|\|>|<>|<-|->|=>|=~|!~|&&|\|\||\?\?|\?\.|:=|::|\.\.\.|\.\.|--|\+\+|\*\*|\/\/|\+=|-=|\*=|\/=|%=)/
+
+  # --- Individual rule atoms so dispatch groups can reference them directly ---
+  @skip_rule {:skip, ~r/^\s+/}
+  @operator_rule {:literal, @operator_regex}
+  @trip_quotes_rule {"<TRIP_QUOTES>", ~r/^"""|^'''/}
+  @str_interp_rule {"<STR_INTERP>", ~r/^"(?=[^"]*#\{)(?:[^"\\#]|\\.|#(?!\{)|#\{[^}]*\})*"/}
+  @str_dollar_interp_rule {"<STR_DOLLAR_INTERP>",
+                           ~r/^"(?=[^"]*\$\{)(?:[^"\\$]|\\.|\\$(?!\{)|\$\{[^}]*\})*"/}
+  @str_swift_interp_rule {"<STR_SWIFT_INTERP>", ~r/^"(?=[^"]*\\\()(?:[^"\\]|\\.)*"/}
+  @str_rule {"<STR>", ~r/^"(?:[^"\\]|\\.)*"|^'(?:[^'\\]|\\.)*'/}
+  @backtick_interp_rule {"<BACKTICK_INTERP>",
+                         ~r/^`(?=[^`]*\$\{)(?:[^`\\$]|\\.|\\$(?!\{)|\$\{[^}]*\})*`/}
+  @backtick_str_rule {"<BACKTICK_STR>", ~r/^`(?:[^`\\]|\\.)*`/}
+  @num_rule {"<NUM>", ~r/^\d+(?:\.\d+)?/}
+  @id_rule {"<ID>", ~r/^[a-zA-Z_]\w*/}
+
+  # Dispatch rule subsets by first character so the common cases (identifiers,
+  # numbers, whitespace, operators) skip irrelevant regex attempts entirely.
+  @double_quote_rules [
+    @trip_quotes_rule,
+    @str_interp_rule,
+    @str_dollar_interp_rule,
+    @str_swift_interp_rule,
+    @str_rule
+  ]
+  @single_quote_rules [@trip_quotes_rule, @str_rule]
+  @backtick_rules [@backtick_interp_rule, @backtick_str_rule]
+
+  # Returns the rule subset for the given first byte (ASCII codepoint).
+  defp dispatch_rules(?"), do: @double_quote_rules
+  defp dispatch_rules(?'), do: @single_quote_rules
+  defp dispatch_rules(?`), do: @backtick_rules
+  defp dispatch_rules(c) when c >= ?0 and c <= ?9, do: [@num_rule]
+
+  defp dispatch_rules(c)
+       when (c >= ?a and c <= ?z) or (c >= ?A and c <= ?Z) or c == ?_,
+       do: [@id_rule]
+
+  defp dispatch_rules(c)
+       when c in [?=, ?!, ?<, ?>, ?|, ?&, ??, ?:, ?., ?-, ?+, ?*, ?/, ?%],
+       do: [@operator_rule]
+
+  defp dispatch_rules(c) when c <= 32, do: [@skip_rule]
+
+  # Unknown first char — no rule applies; caller falls through to single-char token.
+  defp dispatch_rules(_), do: []
+
+  # Returns {tokens, last_token_or_nil} — last_token is tracked during scanning
+  # so callers get O(1) access to the final token without List.last/1.
+  defp scan_content(text, line_num, col_offset) do
+    {reversed, last} = do_scan(text, line_num, col_offset, [], nil)
+    {Enum.reverse(reversed), last}
+  end
+
+  defp do_scan("", _line, _col, acc, last), do: {acc, last}
+
+  defp do_scan(<<first, _::binary>> = text, line, col, acc, last) do
+    case next_token(first, text, line, col) do
+      {:skip, rest, advance} -> do_scan(rest, line, col + advance, acc, last)
+      {token, rest, advance} -> do_scan(rest, line, col + advance, [token | acc], token)
+    end
+  end
+
+  # next_token/4: dispatches on the first byte to select only candidate rules,
+  # avoiding regex attempts for rules whose first-char pattern can't possibly match.
+  defp next_token(first, text, line, col) do
+    rules = dispatch_rules(first)
+
+    result =
+      Enum.find_value(rules, fn {type, regex} ->
+        case Regex.run(regex, text) do
+          [m | _] -> {type, m}
+          nil -> nil
+        end
+      end)
+
+    case result do
+      {:skip, m} ->
+        len = String.length(m)
+        {:skip, String.slice(text, len..-1//1), len}
+
+      {:literal, m} ->
+        len = String.length(m)
+        {%Token{kind: m, content: m, line: line, col: col}, String.slice(text, len..-1//1), len}
+
+      {value, m} ->
+        len = String.length(m)
+        token = postprocess(value, %Token{kind: value, content: m, line: line, col: col})
+        {token, String.slice(text, len..-1//1), len}
+
+      nil ->
+        # No rule matched — emit the first character as a literal single-char token.
+        char = String.first(text)
+        {%Token{kind: char, content: char, line: line, col: col}, String.slice(text, 1..-1//1), 1}
+    end
+  end
+
+  # Extract #{...} interpolation expressions into `interpolations` and strip
+  # them from `content` so downstream consumers see only the static string parts.
+  # Nested braces (e.g. #{foo(%{a: 1})}) are left as-is in content — the
+  # lookahead in the scan rule ensures a match only when simple interpolations
+  # are present.
+  defp postprocess("<STR_INTERP>", token),
+    do: extract_interpolations(token, ~r/#\{([^}]*)\}/, ~r/#\{[^}]*\}/, quotes: :double)
+
+  defp postprocess("<STR_DOLLAR_INTERP>", token),
+    do: extract_interpolations(token, ~r/\$\{([^}]*)\}/, ~r/\$\{[^}]*\}/, quotes: :double)
+
+  defp postprocess("<STR_SWIFT_INTERP>", token),
+    do: extract_interpolations(token, ~r/\\\(([^)]*)\)/, ~r/\\\([^)]*\)/, quotes: :double)
+
+  defp postprocess("<BACKTICK_INTERP>", token),
+    do: extract_interpolations(token, ~r/\$\{([^}]*)\}/, ~r/\$\{[^}]*\}/, quotes: :backtick)
+
+  defp postprocess("<TRIP_QUOTES>", %Token{content: ~s(""")} = token),
+    do: %StringToken{
+      kind: StringToken.doc_kind(),
+      content: token.content,
+      line: token.line,
+      col: token.col,
+      multiline: true,
+      quotes: :double
+    }
+
+  defp postprocess("<TRIP_QUOTES>", token),
+    do: %StringToken{
+      kind: StringToken.doc_kind(),
+      content: token.content,
+      line: token.line,
+      col: token.col,
+      multiline: true,
+      quotes: :single
+    }
+
+  defp postprocess("<BACKTICK_STR>", token),
+    do: %StringToken{
+      kind: StringToken.kind(),
+      content: token.content,
+      line: token.line,
+      col: token.col,
+      quotes: :backtick
+    }
+
+  defp postprocess("<STR>", token) do
+    quotes = if String.starts_with?(token.content, "\""), do: :double, else: :single
+
+    %StringToken{
+      kind: StringToken.kind(),
+      content: token.content,
+      line: token.line,
+      col: token.col,
+      quotes: quotes
+    }
+  end
+
+  defp postprocess(_value, token), do: token
+
+  defp extract_interpolations(token, capture_regex, strip_regex, opts) do
+    quotes = Keyword.get(opts, :quotes, :double)
+
+    interpolations =
+      Regex.scan(capture_regex, token.content, capture: :all_but_first)
+      |> Enum.map(fn [expr] -> String.trim(expr) end)
+
+    %StringToken{
+      content: String.replace(token.content, strip_regex, ""),
+      line: token.line,
+      col: token.col,
+      interpolations: interpolations,
+      quotes: quotes
+    }
+  end
+end
diff --git a/lib/codeqa/ast/lexing/token_protocol.ex b/lib/codeqa/ast/lexing/token_protocol.ex
new file mode 100644
index 00000000..e38458d7
--- /dev/null
+++ b/lib/codeqa/ast/lexing/token_protocol.ex
@@ -0,0 +1,59 @@
+defprotocol CodeQA.AST.Lexing.TokenProtocol do
+  @moduledoc """
+  Protocol for token structs emitted by `TokenNormalizer`.
+
+  Both `Token` and `StringToken` implement this protocol, so code that
+  processes token streams does not need to branch on the concrete struct type.
+
+  ## Functions
+
+  - `kind/1`    — normalized structural kind (`<ID>`, `<STR>`, `<NL>`, …)
+  - `content/1` — original source text before normalization
+  - `line/1`    — 1-based line number in the source file (`nil` for synthetic tokens)
+  - `col/1`     — 0-based byte offset from the start of the line (`nil` for synthetic tokens)
+  """
+
+  @doc "Returns the normalized structural kind of the token."
+  @spec kind(t) :: String.t()
+  def kind(token)
+
+  @doc "Returns the original source text of the token."
+  @spec content(t) :: String.t()
+  def content(token)
+
+  @doc "Returns the 1-based line number of the token, or `nil` for synthetic tokens."
+  @spec line(t) :: non_neg_integer() | nil
+  def line(token)
+
+  @doc "Returns the 0-based column offset of the token, or `nil` for synthetic tokens."
+  @spec col(t) :: non_neg_integer() | nil
+  def col(token)
+end
+
+defimpl CodeQA.AST.Lexing.TokenProtocol, for: CodeQA.AST.Lexing.Token do
+  def kind(%CodeQA.AST.Lexing.Token{kind: k}), do: k
+  def content(%CodeQA.AST.Lexing.Token{content: c}), do: c
+  def line(%CodeQA.AST.Lexing.Token{line: l}), do: l
+  def col(%CodeQA.AST.Lexing.Token{col: c}), do: c
+end
+
+defimpl CodeQA.AST.Lexing.TokenProtocol, for: CodeQA.AST.Lexing.StringToken do
+  def kind(%CodeQA.AST.Lexing.StringToken{kind: k}), do: k
+  def content(%CodeQA.AST.Lexing.StringToken{content: c}), do: c
+  def line(%CodeQA.AST.Lexing.StringToken{line: l}), do: l
+  def col(%CodeQA.AST.Lexing.StringToken{col: c}), do: c
+end
+
+defimpl CodeQA.AST.Lexing.TokenProtocol, for: CodeQA.AST.Lexing.NewlineToken do
+  def kind(%CodeQA.AST.Lexing.NewlineToken{kind: k}), do: k
+  def content(%CodeQA.AST.Lexing.NewlineToken{content: c}), do: c
+  def line(%CodeQA.AST.Lexing.NewlineToken{line: l}), do: l
+  def col(%CodeQA.AST.Lexing.NewlineToken{col: c}), do: c
+end
+
+defimpl CodeQA.AST.Lexing.TokenProtocol, for: CodeQA.AST.Lexing.WhitespaceToken do
+  def kind(%CodeQA.AST.Lexing.WhitespaceToken{kind: k}), do: k
+  def content(%CodeQA.AST.Lexing.WhitespaceToken{content: c}), do: c
+  def line(%CodeQA.AST.Lexing.WhitespaceToken{line: l}), do: l
+  def col(%CodeQA.AST.Lexing.WhitespaceToken{col: c}), do: c
+end
diff --git a/lib/codeqa/ast/lexing/whitespace_token.ex b/lib/codeqa/ast/lexing/whitespace_token.ex
new file mode 100644
index 00000000..cb230827
--- /dev/null
+++ b/lib/codeqa/ast/lexing/whitespace_token.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.AST.Lexing.WhitespaceToken do
+  @moduledoc """
+  A whitespace/indentation token emitted by `TokenNormalizer.normalize_structural/1`.
+
+  Represents one indentation unit (2 spaces or 1 tab) at the start of a line.
+
+  ## Fields
+
+  - `kind`    — always `"<WS>"`.
+  - `content` — the original source text for this indentation unit (`"  "`).
+  - `line`    — 1-based line number in the source file.
+  - `col`     — 0-based byte offset from the start of the line.
+  """
+
+  @kind "<WS>"
+
+  defstruct [:content, :line, :col, kind: @kind]
+
+  @doc "Returns the normalized kind string for whitespace tokens."
+  @spec kind() :: String.t()
+  def kind, do: @kind
+
+  @type t :: %__MODULE__{
+          kind: String.t(),
+          content: String.t(),
+          line: non_neg_integer() | nil,
+          col: non_neg_integer() | nil
+        }
+end
diff --git a/lib/codeqa/ast/nodes/attribute_node.ex b/lib/codeqa/ast/nodes/attribute_node.ex
new file mode 100644
index 00000000..7dd106cb
--- /dev/null
+++ b/lib/codeqa/ast/nodes/attribute_node.ex
@@ -0,0 +1,67 @@
+defmodule CodeQA.AST.Nodes.AttributeNode do
+  @moduledoc """
+  AST node for fields, constants, decorators, annotations, and typespecs.
+  Subsumes the previous :typespec node type (kind: :typespec).
+  """
+
+  alias CodeQA.AST.Enrichment.Node
+  alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken}
+
+  defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label, :name, :kind]
+
+  @type t :: %__MODULE__{
+          tokens: [term()],
+          line_count: non_neg_integer(),
+          children: [term()],
+          start_line: non_neg_integer() | nil,
+          end_line: non_neg_integer() | nil,
+          label: term() | nil,
+          name: String.t() | nil,
+          kind: :field | :constant | :decorator | :annotation | :typespec | nil
+        }
+
+  @typespec_attrs MapSet.new(~w[spec type typep opaque callback macrocallback])
+
+  @doc "Build an AttributeNode from a raw %Node{}, detecting :typespec kind from tokens."
+  @spec cast(Node.t()) :: t()
+  def cast(%Node{} = node) do
+    %__MODULE__{
+      tokens: node.tokens,
+      line_count: node.line_count,
+      children: node.children,
+      start_line: node.start_line,
+      end_line: node.end_line,
+      label: node.label,
+      kind: detect_kind(node.tokens)
+    }
+  end
+
+  defp detect_kind(tokens) do
+    tokens
+    |> Enum.drop_while(&(&1.kind in [WhitespaceToken.kind(), NewlineToken.kind()]))
+    |> case do
+      [%{kind: "@"}, %{kind: "<ID>", content: name} | _] ->
+        if MapSet.member?(@typespec_attrs, name), do: :typespec, else: nil
+
+      _ ->
+        nil
+    end
+  end
+
+  defimpl CodeQA.AST.Classification.NodeProtocol do
+    alias CodeQA.AST.Classification.NodeProtocol
+
+    def tokens(n), do: n.tokens
+    def line_count(n), do: n.line_count
+    def children(n), do: n.children
+    def start_line(n), do: n.start_line
+    def end_line(n), do: n.end_line
+    def label(n), do: n.label
+
+    def flat_tokens(n) do
+      if Enum.empty?(n.children),
+        do: n.tokens,
+        else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1)
+    end
+  end
+end
diff --git a/lib/codeqa/ast/nodes/code_node.ex b/lib/codeqa/ast/nodes/code_node.ex
new file mode 100644
index 00000000..b7dfd9db
--- /dev/null
+++ b/lib/codeqa/ast/nodes/code_node.ex
@@ -0,0 +1,46 @@
+defmodule CodeQA.AST.Nodes.CodeNode do
+  @moduledoc "Catch-all AST node for unclassified code blocks."
+
+  alias CodeQA.AST.Enrichment.Node
+
+  defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label]
+
+  @type t :: %__MODULE__{
+          tokens: [term()],
+          line_count: non_neg_integer(),
+          children: [term()],
+          start_line: non_neg_integer() | nil,
+          end_line: non_neg_integer() | nil,
+          label: term() | nil
+        }
+
+  @doc "Build a CodeNode from a raw %Node{}, copying all base fields."
+  @spec cast(Node.t()) :: t()
+  def cast(%Node{} = node) do
+    %__MODULE__{
+      tokens: node.tokens,
+      line_count: node.line_count,
+      children: node.children,
+      start_line: node.start_line,
+      end_line: node.end_line,
+      label: node.label
+    }
+  end
+
+  defimpl CodeQA.AST.Classification.NodeProtocol do
+    alias CodeQA.AST.Classification.NodeProtocol
+
+    def tokens(n), do: n.tokens
+    def line_count(n), do: n.line_count
+    def children(n), do: n.children
+    def start_line(n), do: n.start_line
+    def end_line(n), do: n.end_line
+    def label(n), do: n.label
+
+    def flat_tokens(n) do
+      if Enum.empty?(n.children),
+        do: n.tokens,
+        else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1)
+    end
+  end
+end
diff --git a/lib/codeqa/ast/nodes/doc_node.ex b/lib/codeqa/ast/nodes/doc_node.ex
new file mode 100644
index 00000000..5e011ca3
--- /dev/null
+++ b/lib/codeqa/ast/nodes/doc_node.ex
@@ -0,0 +1,46 @@
+defmodule CodeQA.AST.Nodes.DocNode do
+  @moduledoc "AST node for documentation strings and comment blocks."
+
+  alias CodeQA.AST.Enrichment.Node
+
+  defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label]
+
+  @type t :: %__MODULE__{
+          tokens: [term()],
+          line_count: non_neg_integer(),
+          children: [term()],
+          start_line: non_neg_integer() | nil,
+          end_line: non_neg_integer() | nil,
+          label: term() | nil
+        }
+
+  @doc "Build a DocNode from a raw %Node{}, copying all base fields."
+  @spec cast(Node.t()) :: t()
+  def cast(%Node{} = node) do
+    %__MODULE__{
+      tokens: node.tokens,
+      line_count: node.line_count,
+      children: node.children,
+      start_line: node.start_line,
+      end_line: node.end_line,
+      label: node.label
+    }
+  end
+
+  defimpl CodeQA.AST.Classification.NodeProtocol do
+    alias CodeQA.AST.Classification.NodeProtocol
+
+    def tokens(n), do: n.tokens
+    def line_count(n), do: n.line_count
+    def children(n), do: n.children
+    def start_line(n), do: n.start_line
+    def end_line(n), do: n.end_line
+    def label(n), do: n.label
+
+    def flat_tokens(n) do
+      if Enum.empty?(n.children),
+        do: n.tokens,
+        else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1)
+    end
+  end
+end
diff --git a/lib/codeqa/ast/nodes/function_node.ex b/lib/codeqa/ast/nodes/function_node.ex
new file mode 100644
index 00000000..48c6a5d4
--- /dev/null
+++ b/lib/codeqa/ast/nodes/function_node.ex
@@ -0,0 +1,59 @@
+defmodule CodeQA.AST.Nodes.FunctionNode do
+  @moduledoc "AST node for function, method, or callable definitions."
+
+  alias CodeQA.AST.Enrichment.Node
+
+  defstruct [
+    :tokens,
+    :line_count,
+    :children,
+    :start_line,
+    :end_line,
+    :label,
+    :name,
+    :arity,
+    :visibility
+  ]
+
+  @type t :: %__MODULE__{
+          tokens: [term()],
+          line_count: non_neg_integer(),
+          children: [term()],
+          start_line: non_neg_integer() | nil,
+          end_line: non_neg_integer() | nil,
+          label: term() | nil,
+          name: String.t() | nil,
+          arity: non_neg_integer() | nil,
+          visibility: :public | :private | nil
+        }
+
+  @doc "Build a FunctionNode from a raw %Node{}, copying all base fields. Type-specific fields default to nil."
+  @spec cast(Node.t()) :: t()
+  def cast(%Node{} = node) do
+    %__MODULE__{
+      tokens: node.tokens,
+      line_count: node.line_count,
+      children: node.children,
+      start_line: node.start_line,
+      end_line: node.end_line,
+      label: node.label
+    }
+  end
+
+  defimpl CodeQA.AST.Classification.NodeProtocol do
+    alias CodeQA.AST.Classification.NodeProtocol
+
+    def tokens(n), do: n.tokens
+    def line_count(n), do: n.line_count
+    def children(n), do: n.children
+    def start_line(n), do: n.start_line
+    def end_line(n), do: n.end_line
+    def label(n), do: n.label
+
+    def flat_tokens(n) do
+      if Enum.empty?(n.children),
+        do: n.tokens,
+        else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1)
+    end
+  end
+end
diff --git a/lib/codeqa/ast/nodes/import_node.ex b/lib/codeqa/ast/nodes/import_node.ex
new file mode 100644
index 00000000..3730370a
--- /dev/null
+++ b/lib/codeqa/ast/nodes/import_node.ex
@@ -0,0 +1,47 @@
+defmodule CodeQA.AST.Nodes.ImportNode do
+  @moduledoc "AST node for import, require, use, alias, or include statements."
+
+  alias CodeQA.AST.Enrichment.Node
+
+  defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label, :target]
+
+  @type t :: %__MODULE__{
+          tokens: [term()],
+          line_count: non_neg_integer(),
+          children: [term()],
+          start_line: non_neg_integer() | nil,
+          end_line: non_neg_integer() | nil,
+          label: term() | nil,
+          target: String.t() | nil
+        }
+
+  @doc "Build an ImportNode from a raw %Node{}, copying all base fields. Type-specific fields default to nil."
+  @spec cast(Node.t()) :: t()
+  def cast(%Node{} = node) do
+    %__MODULE__{
+      tokens: node.tokens,
+      line_count: node.line_count,
+      children: node.children,
+      start_line: node.start_line,
+      end_line: node.end_line,
+      label: node.label
+    }
+  end
+
+  defimpl CodeQA.AST.Classification.NodeProtocol do
+    alias CodeQA.AST.Classification.NodeProtocol
+
+    def tokens(n), do: n.tokens
+    def line_count(n), do: n.line_count
+    def children(n), do: n.children
+    def start_line(n), do: n.start_line
+    def end_line(n), do: n.end_line
+    def label(n), do: n.label
+
+    def flat_tokens(n) do
+      if Enum.empty?(n.children),
+        do: n.tokens,
+        else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1)
+    end
+  end
+end
diff --git a/lib/codeqa/ast/nodes/module_node.ex b/lib/codeqa/ast/nodes/module_node.ex
new file mode 100644
index 00000000..c8d50723
--- /dev/null
+++ b/lib/codeqa/ast/nodes/module_node.ex
@@ -0,0 +1,48 @@
+defmodule CodeQA.AST.Nodes.ModuleNode do
+  @moduledoc "AST node for module, class, namespace, or struct definitions."
+
+  alias CodeQA.AST.Enrichment.Node
+
+  defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label, :name, :kind]
+
+  @type t :: %__MODULE__{
+          tokens: [term()],
+          line_count: non_neg_integer(),
+          children: [term()],
+          start_line: non_neg_integer() | nil,
+          end_line: non_neg_integer() | nil,
+          label: term() | nil,
+          name: String.t() | nil,
+          kind: :class | :module | :namespace | :struct | nil
+        }
+
+  @doc "Build a ModuleNode from a raw %Node{}, copying all base fields. Type-specific fields default to nil."
+  @spec cast(Node.t()) :: t()
+  def cast(%Node{} = node) do
+    %__MODULE__{
+      tokens: node.tokens,
+      line_count: node.line_count,
+      children: node.children,
+      start_line: node.start_line,
+      end_line: node.end_line,
+      label: node.label
+    }
+  end
+
+  defimpl CodeQA.AST.Classification.NodeProtocol do
+    alias CodeQA.AST.Classification.NodeProtocol
+
+    def tokens(n), do: n.tokens
+    def line_count(n), do: n.line_count
+    def children(n), do: n.children
+    def start_line(n), do: n.start_line
+    def end_line(n), do: n.end_line
+    def label(n), do: n.label
+
+    def flat_tokens(n) do
+      if Enum.empty?(n.children),
+        do: n.tokens,
+        else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1)
+    end
+  end
+end
diff --git a/lib/codeqa/ast/nodes/test_node.ex b/lib/codeqa/ast/nodes/test_node.ex
new file mode 100644
index 00000000..b3460cf4
--- /dev/null
+++ b/lib/codeqa/ast/nodes/test_node.ex
@@ -0,0 +1,47 @@
+defmodule CodeQA.AST.Nodes.TestNode do
+  @moduledoc "AST node for test cases, describe blocks, and it blocks."
+
+  alias CodeQA.AST.Enrichment.Node
+
+  defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label, :description]
+
+  @type t :: %__MODULE__{
+          tokens: [term()],
+          line_count: non_neg_integer(),
+          children: [term()],
+          start_line: non_neg_integer() | nil,
+          end_line: non_neg_integer() | nil,
+          label: term() | nil,
+          description: String.t() | nil
+        }
+
+  @doc "Build a TestNode from a raw %Node{}, copying all base fields. Type-specific fields default to nil."
+  @spec cast(Node.t()) :: t()
+  def cast(%Node{} = node) do
+    %__MODULE__{
+      tokens: node.tokens,
+      line_count: node.line_count,
+      children: node.children,
+      start_line: node.start_line,
+      end_line: node.end_line,
+      label: node.label
+    }
+  end
+
+  defimpl CodeQA.AST.Classification.NodeProtocol do
+    alias CodeQA.AST.Classification.NodeProtocol
+
+    def tokens(n), do: n.tokens
+    def line_count(n), do: n.line_count
+    def children(n), do: n.children
+    def start_line(n), do: n.start_line
+    def end_line(n), do: n.end_line
+    def label(n), do: n.label
+
+    def flat_tokens(n) do
+      if Enum.empty?(n.children),
+        do: n.tokens,
+        else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1)
+    end
+  end
+end
diff --git a/lib/codeqa/ast/parsing/parser.ex b/lib/codeqa/ast/parsing/parser.ex
new file mode 100644
index 00000000..2615bb9a
--- /dev/null
+++ b/lib/codeqa/ast/parsing/parser.ex
@@ -0,0 +1,234 @@
+defmodule CodeQA.AST.Parsing.Parser do
+  @moduledoc """
+  Recursively parses a token stream into a nested node tree.
+
+  Top-level nodes are found by splitting on blank lines and declaration keywords.
+  Each node is then recursively subdivided using enclosure rules (brackets,
+  colon-indentation) until no further subdivision is possible — forming an
+  arbitrarily-deep tree rather than a fixed two-level hierarchy.
+
+  ## Recursive parsing algorithm
+
+  `parse_block/3` is the recursive core:
+
+  1. Immediately create a `Node` spanning the whole token stream.
+  2. Apply enclosure rules to find sub-candidate streams.
+  3. **Idempotency check** — reject any enclosure that spans the entire stream
+     (e.g. `BracketRule` re-emitting its own input). This is the termination
+     condition: the node is a leaf when no strictly-smaller sub-candidates exist.
+  4. Recursively call `parse_block/3` on each sub-candidate to produce children.
+  5. Return the node with its children attached as `children`.
+
+  ## Design notes (from tree-sitter, ctags, lizard)
+
+  - **Recursive hierarchy** — replaces the old two-level (top + one level of sub-blocks)
+    model with an N-level tree. Each call to `parse_block/3` mirrors tree-sitter's
+    recursive descent: emit the node, then recurse into its contents.
+  - **Language detection by extension** — `language_from_path/1` follows ctags'
+    convention of inferring language from file extension.
+  - **Rule extensibility** — enclosure rules are selected per language via
+    `sub_block_rules/1`. Rules are composable and order-independent.
+  - **Error recovery** — mismatched brackets and malformed indentation are silently
+    skipped by individual rules. The parser emits partial nodes rather than failing,
+    consistent with tree-sitter's error-recovery philosophy.
+  """
+
+  alias CodeQA.AST.Enrichment.Node
+  alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken}
+  alias CodeQA.AST.Parsing.SignalStream
+
+  alias CodeQA.AST.Signals.Structural.{
+    BlankLineSignal,
+    BracketSignal,
+    ColonIndentSignal,
+    KeywordSignal,
+    TripleQuoteSignal
+  }
+
+  alias CodeQA.Language
+
+  @spec detect_blocks([CodeQA.AST.Lexing.Token.t()], module()) :: [Node.t()]
+  def detect_blocks([], _lang_mod), do: []
+
+  def detect_blocks(tokens, lang_mod) do
+    all_emissions =
+      SignalStream.run(
+        tokens,
+        [%TripleQuoteSignal{}, %BlankLineSignal{}, %KeywordSignal{}],
+        lang_mod
+      )
+      |> List.flatten()
+
+    triple_splits =
+      for {_, :split, :triple_split, v} <- all_emissions, do: v
+
+    protected_ranges = compute_protected_ranges(triple_splits)
+
+    split_points =
+      for(
+        {_, :split, name, v} <- all_emissions,
+        name in [:blank_split, :keyword_split],
+        not inside_protected?(v, protected_ranges),
+        do: v
+      )
+      |> Enum.concat(triple_splits)
+      |> Enum.uniq()
+      |> Enum.sort()
+
+    tokens
+    |> split_at(split_points)
+    |> Enum.reject(fn s -> Enum.empty?(s) or whitespace_only?(s) end)
+    |> merge_same_line_slices()
+    |> Enum.map(&parse_block(&1, lang_mod))
+  end
+
+  @spec language_module_for_path(String.t()) :: module()
+  def language_module_for_path(path), do: Language.detect(path)
+
+  @spec language_from_path(String.t()) :: atom()
+  def language_from_path(path),
+    do: path |> Language.detect() |> then(& &1.name()) |> String.to_atom()
+
+  # Recursively parse a token stream into a Node with nested children.
+  # Immediately creates a node spanning the whole stream, then attempts to
+  # subdivide it. Terminates when no strictly-smaller sub-candidates are found.
+  defp parse_block(tokens, lang_mod) do
+    start_line = block_start_line(tokens)
+    end_line = block_end_line(tokens)
+    line_count = if start_line && end_line, do: end_line - start_line + 1, else: 1
+
+    block = %Node{
+      tokens: tokens,
+      line_count: line_count,
+      children: [],
+      start_line: start_line,
+      end_line: end_line
+    }
+
+    case find_sub_candidates(tokens, lang_mod) do
+      [] ->
+        block
+
+      candidates ->
+        children = Enum.map(candidates, &parse_block(&1, lang_mod))
+        %{block | children: children}
+    end
+  end
+
+  # Collect enclosure regions from rules.
+  #
+  # If the token stream is itself a bracket pair (e.g. the stream IS `(foo, bar)`),
+  # we unwrap the outer brackets before running rules. Without this, BracketRule
+  # would only find the whole stream as a single enclosure — filtered by the
+  # idempotency check — and recursion would stop prematurely at every bracket level.
+  # Unwrapping lets us see the *inner* structure and keeps the tree growing deeper.
+  #
+  # Idempotency check: after unwrapping, reject any enclosure that still spans the
+  # entire search window (0..n-1), which would produce an infinite loop.
+  defp find_sub_candidates(tokens, lang_mod) do
+    {search_tokens, _} = maybe_unwrap_bracket(tokens)
+    n = length(search_tokens)
+
+    enclosure_signals =
+      if lang_mod.uses_colon_indent?() do
+        [%BracketSignal{}, %ColonIndentSignal{}]
+      else
+        [%BracketSignal{}]
+      end
+
+    SignalStream.run(search_tokens, enclosure_signals, lang_mod)
+    |> List.flatten()
+    |> Enum.filter(fn {_, group, _, _} -> group == :enclosure end)
+    |> Enum.map(fn {_, _, _, {s, e}} -> {s, e} end)
+    |> Enum.uniq()
+    |> Enum.sort()
+    |> Enum.reject(fn {s, e} -> s == 0 and e == n - 1 end)
+    |> Enum.map(fn {s, e} -> Enum.slice(search_tokens, s..e) end)
+    |> Enum.reject(&whitespace_only?/1)
+  end
+
+  @open_brackets MapSet.new(["(", "[", "{"])
+  @matching_close %{"(" => ")", "[" => "]", "{" => "}"}
+
+  # If the stream is a balanced bracket pair, return the inner tokens.
+  # Otherwise return the stream unchanged.
+  defp maybe_unwrap_bracket([first | rest] = tokens) do
+    last = List.last(tokens)
+
+    if last && MapSet.member?(@open_brackets, first.kind) &&
+         Map.get(@matching_close, first.kind) == last.kind do
+      {Enum.drop(rest, -1), 1}
+    else
+      {tokens, 0}
+    end
+  end
+
+  defp maybe_unwrap_bracket([]), do: {[], 0}
+
+  # Pairs consecutive triple-quote split indices into protected interior ranges.
+  # Uses chunk_every with :discard to safely handle odd counts (malformed input).
+  defp compute_protected_ranges(split_indices) do
+    split_indices
+    |> Enum.chunk_every(2, 2, :discard)
+    |> Enum.map(fn [a, b] -> {a + 1, b - 1} end)
+  end
+
+  defp inside_protected?(idx, ranges) do
+    Enum.any?(ranges, fn {lo, hi} -> idx >= lo and idx <= hi end)
+  end
+
+  # When TripleQuoteSignal splits `@doc """` mid-line, the tokens before the
+  # triple-quote land in one slice and the heredoc in the next — both on the same
+  # starting line. Merge adjacent slices that share a line boundary so `@doc """..."""`
+  # becomes a single token stream fed to parse_block rather than two separate nodes.
+  defp merge_same_line_slices([]), do: []
+  defp merge_same_line_slices([single]), do: [single]
+
+  defp merge_same_line_slices([slice_a, slice_b | rest]) do
+    last_line_a =
+      slice_a
+      |> Enum.reverse()
+      |> Enum.find(&(&1.kind not in [WhitespaceToken.kind(), NewlineToken.kind()]))
+      |> then(&(&1 && &1.line))
+
+    first_line_b =
+      slice_b
+      |> Enum.find(&(&1.kind not in [WhitespaceToken.kind(), NewlineToken.kind()]))
+      |> then(&(&1 && &1.line))
+
+    if last_line_a && first_line_b && last_line_a == first_line_b do
+      merge_same_line_slices([slice_a ++ slice_b | rest])
+    else
+      [slice_a | merge_same_line_slices([slice_b | rest])]
+    end
+  end
+
+  defp split_at(tokens, []), do: [tokens]
+
+  defp split_at(tokens, split_points) do
+    boundaries = [0 | split_points] ++ [length(tokens)]
+
+    boundaries
+    |> Enum.chunk_every(2, 1, :discard)
+    |> Enum.map(fn [start, stop] -> Enum.slice(tokens, start..(stop - 1)//1) end)
+  end
+
+  defp whitespace_only?(tokens) do
+    Enum.all?(tokens, &(&1.kind in [WhitespaceToken.kind(), NewlineToken.kind()]))
+  end
+
+  defp block_start_line([%{line: line} | _]), do: line
+  defp block_start_line([]), do: nil
+
+  defp block_end_line([]), do: nil
+
+  defp block_end_line(tokens) do
+    tokens
+    |> Enum.reverse()
+    |> Enum.find(&(&1.kind not in [WhitespaceToken.kind(), NewlineToken.kind()]))
+    |> case do
+      nil -> tokens |> List.last() |> Map.get(:line)
+      token -> token.line
+    end
+  end
+end
diff --git a/lib/codeqa/ast/parsing/signal.ex b/lib/codeqa/ast/parsing/signal.ex
new file mode 100644
index 00000000..dc2d19ec
--- /dev/null
+++ b/lib/codeqa/ast/parsing/signal.ex
@@ -0,0 +1,44 @@
+defprotocol CodeQA.AST.Parsing.Signal do
+  @moduledoc """
+  Protocol for token-stream signal emitters.
+
+  A signal is a stateful detector that receives one token at a time and emits
+  zero or more named values. All signals run independently over the same token
+  stream — each gets its own full pass, carrying its own state.
+
+  ## Protocol functions
+
+  - `source/1`  — the implementing module; used for debugging emission traces
+  - `group/1`   — atom grouping this signal's emissions (e.g. `:split`, `:enclosure`)
+  - `init/2`    — returns initial state; called once before the token stream starts
+  - `emit/3`    — called per token; returns `{MapSet.t({name, value}), new_state}`
+
+  ## State
+
+  State is owned externally (in `SignalStream`) as a positionally-aligned list.
+  The signal defines the shape; the orchestrator threads it through unchanged.
+
+  ## No-op emission
+
+  To emit nothing for a token, return `{MapSet.new(), state}`.
+  """
+
+  @doc "The module that implements this signal — for debugging traces."
+  @spec source(t) :: module()
+  def source(signal)
+
+  @doc "Group atom for all emissions from this signal (e.g. :split, :enclosure)."
+  @spec group(t) :: atom()
+  def group(signal)
+
+  @doc "Returns the initial state for this signal."
+  @spec init(t, module()) :: term()
+  def init(signal, lang_mod)
+
+  @doc """
+  Called once per token. Returns a MapSet of `{name, value}` emission pairs
+  and the updated state.
+  """
+  @spec emit(t, token :: term(), state :: term()) :: {MapSet.t(), term()}
+  def emit(signal, token, state)
+end
diff --git a/lib/codeqa/ast/parsing/signal_registry.ex b/lib/codeqa/ast/parsing/signal_registry.ex
new file mode 100644
index 00000000..0f4a7521
--- /dev/null
+++ b/lib/codeqa/ast/parsing/signal_registry.ex
@@ -0,0 +1,94 @@
+defmodule CodeQA.AST.Parsing.SignalRegistry do
+  @moduledoc """
+  Registry for structural and classification signals.
+
+  Use `default/0` for the standard signal set. Compose custom registries
+  with `register_structural/2` and `register_classification/2` for
+  language-specific or analysis-specific configurations.
+  """
+
+  alias CodeQA.AST.Signals.Structural.{
+    AccessModifierSignal,
+    AssignmentFunctionSignal,
+    BlankLineSignal,
+    BracketSignal,
+    BranchSplitSignal,
+    ColonIndentSignal,
+    CommentDividerSignal,
+    DecoratorSignal,
+    DedentToZeroSignal,
+    DocCommentLeadSignal,
+    KeywordSignal,
+    SQLBlockSignal,
+    TripleQuoteSignal
+  }
+
+  alias CodeQA.AST.Signals.Classification.{
+    AttributeSignal,
+    CommentDensitySignal,
+    ConfigSignal,
+    DataSignal,
+    DocSignal,
+    FunctionSignal,
+    ImportSignal,
+    ModuleSignal,
+    TestSignal,
+    TypeSignal
+  }
+
+  defstruct structural: [], classification: []
+
+  @type t :: %__MODULE__{
+          structural: [term()],
+          classification: [term()]
+        }
+
+  @spec new() :: t()
+  def new, do: %__MODULE__{}
+
+  @spec register_structural(t(), term()) :: t()
+  def register_structural(%__MODULE__{} = r, signal),
+    do: %{r | structural: r.structural ++ [signal]}
+
+  @spec register_classification(t(), term()) :: t()
+  def register_classification(%__MODULE__{} = r, signal),
+    do: %{r | classification: r.classification ++ [signal]}
+
+  @spec default() :: t()
+  def default do
+    new()
+    |> register_structural(%TripleQuoteSignal{})
+    |> register_structural(%BlankLineSignal{})
+    |> register_structural(%KeywordSignal{})
+    |> register_structural(%AccessModifierSignal{})
+    |> register_structural(%DecoratorSignal{})
+    |> register_structural(%CommentDividerSignal{})
+    |> register_structural(%DocCommentLeadSignal{})
+    |> register_structural(%AssignmentFunctionSignal{})
+    |> register_structural(%DedentToZeroSignal{})
+    |> register_structural(%BranchSplitSignal{})
+    |> register_structural(%BracketSignal{})
+    |> register_classification(%DocSignal{})
+    |> register_classification(%TestSignal{})
+    |> register_classification(%FunctionSignal{})
+    |> register_classification(%ModuleSignal{})
+    |> register_classification(%ImportSignal{})
+    |> register_classification(%AttributeSignal{})
+    |> register_classification(%TypeSignal{})
+    |> register_classification(%ConfigSignal{})
+    |> register_classification(%DataSignal{})
+    |> register_classification(%CommentDensitySignal{})
+  end
+
+  @spec python() :: t()
+  def python do
+    r = default()
+    %{r | structural: r.structural ++ [%ColonIndentSignal{}]}
+  end
+
+  @spec sql() :: t()
+  def sql do
+    r = default()
+    %{r | structural: r.structural ++ [%SQLBlockSignal{}]}
+  end
+end
diff --git a/lib/codeqa/ast/parsing/signal_stream.ex b/lib/codeqa/ast/parsing/signal_stream.ex
new file mode 100644
index 00000000..8b6f4519
--- /dev/null
+++ b/lib/codeqa/ast/parsing/signal_stream.ex
@@ -0,0 +1,58 @@
+defmodule CodeQA.AST.Parsing.SignalStream do
+  @moduledoc """
+  Runs a list of `Signal` implementations over a token stream.
+
+  Each signal receives its own full pass over the token stream and accumulates
+  its own state. Signals are independent — no shared state, no cross-signal
+  coordination.
+
+  ## Return value
+
+  Returns a list of emission lists, one per signal, in the same order as the
+  input signal list. Each emission is a 4-tuple:
+
+      {source, group, name, value}
+
+  ## Usage
+
+      SignalStream.run(tokens, [%BlankLineSignal{}, %KeywordSignal{}], [])
+      # => [[{BlankLineSignal, :split, :blank_split, 5}, ...], [...]]
+  """
+
+  alias CodeQA.AST.Parsing.Signal
+
+  @spec run([term()], [term()], module()) :: [list()]
+  def run(tokens, signals, lang_mod) do
+    prevs = [nil | tokens]
+    nexts = Enum.drop(tokens, 1) ++ [nil]
+    triples = Enum.zip_with([prevs, tokens, nexts], fn [p, c, n] -> {p, c, n} end)
+
+    Enum.map(signals, fn signal ->
+      init_state = Signal.init(signal, lang_mod)
+      source = Signal.source(signal)
+      group = Signal.group(signal)
+
+      {_final_state, emissions} =
+        Enum.reduce_while(triples, {init_state, []}, fn triple, {state, acc} ->
+          emit_step(signal, triple, state, acc, source, group)
+        end)
+
+      Enum.reverse(emissions)
+    end)
+  end
+
+  defp emit_step(signal, triple, state, acc, source, group) do
+    {emitted, new_state} = Signal.emit(signal, triple, state)
+
+    new_acc =
+      emitted
+      |> Enum.map(fn {name, value} -> {source, group, name, value} end)
+      |> Enum.reduce(acc, fn e, a -> [e | a] end)
+
+    if new_state == :halt do
+      {:halt, {new_state, new_acc}}
+    else
+      {:cont, {new_state, new_acc}}
+    end
+  end
+end
diff --git a/lib/codeqa/ast/signals/classification/attribute_signal.ex b/lib/codeqa/ast/signals/classification/attribute_signal.ex
new file mode 100644
index 00000000..aaaa6403
--- /dev/null
+++ b/lib/codeqa/ast/signals/classification/attribute_signal.ex
@@ -0,0 +1,68 @@
+defmodule CodeQA.AST.Signals.Classification.AttributeSignal do
+  @moduledoc """
+  Classification signal — votes `:attribute` when an `@identifier` pattern
+  appears at indent 0.
+
+  Weights:
+  - 3 for Elixir typespec attributes (@spec, @type, @typep, @opaque, @callback, @macrocallback)
+  - 2 for all other @name attributes
+
+  Skips @doc and @moduledoc — those nodes contain <DOC> tokens and are handled by DocSignal.
+  Emits at most one vote per token stream.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    @nl CodeQA.AST.Lexing.NewlineToken.kind()
+    @ws CodeQA.AST.Lexing.WhitespaceToken.kind()
+    @typespec_attrs MapSet.new(~w[spec type typep opaque callback macrocallback])
+    @skip_attrs MapSet.new(~w[doc moduledoc])
+
+    def source(_), do: CodeQA.AST.Signals.Classification.AttributeSignal
+    def group(_), do: :classification
+
+    def init(_, _lang_mod),
+      do: %{at_line_start: true, indent: 0, saw_at: false, voted: false}
+
+    def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state}
+
+    def emit(_, {_prev, token, _next}, %{at_line_start: als, indent: ind, saw_at: saw_at} = state) do
+      case token.kind do
+        @nl ->
+          {MapSet.new(), %{state | at_line_start: true, indent: 0, saw_at: false}}
+
+        @ws when als ->
+          {MapSet.new(), %{state | indent: ind + 1, at_line_start: true}}
+
+        @ws ->
+          {MapSet.new(), state}
+
+        "@" when ind == 0 ->
+          {MapSet.new(), %{state | saw_at: true, at_line_start: false}}
+
+        "<ID>" when saw_at ->
+          emit_attribute(token.content, state)
+
+        _ ->
+          {MapSet.new(), %{state | saw_at: false, at_line_start: false}}
+      end
+    end
+
+    defp emit_attribute(name, state) do
+      base_state = %{state | saw_at: false, at_line_start: false, voted: true}
+
+      cond do
+        MapSet.member?(@skip_attrs, name) ->
+          # @doc/@moduledoc: let DocSignal handle via <DOC> tokens
+          {MapSet.new(), base_state}
+
+        MapSet.member?(@typespec_attrs, name) ->
+          {MapSet.new([{:attribute_vote, 3}]), base_state}
+
+        true ->
+          {MapSet.new([{:attribute_vote, 2}]), base_state}
+      end
+    end
+  end
+end
diff --git a/lib/codeqa/ast/signals/classification/comment_density_signal.ex b/lib/codeqa/ast/signals/classification/comment_density_signal.ex
new file mode 100644
index 00000000..ceb4c23a
--- /dev/null
+++ b/lib/codeqa/ast/signals/classification/comment_density_signal.ex
@@ -0,0 +1,64 @@
+defmodule CodeQA.AST.Signals.Classification.CommentDensitySignal do
+  @moduledoc """
+  Classification signal — votes `:comment` when more than 60% of non-blank
+  lines begin with a comment prefix.
+
+  Requires `comment_prefixes: [String.t()]` in opts (from the language
+  module). Returns no vote if no prefixes are configured.
+
+  Emits at the end of the stream.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    @nl CodeQA.AST.Lexing.NewlineToken.kind()
+    @ws CodeQA.AST.Lexing.WhitespaceToken.kind()
+    def source(_), do: CodeQA.AST.Signals.Classification.CommentDensitySignal
+    def group(_), do: :classification
+
+    def init(_, lang_mod) do
+      prefixes = MapSet.new(lang_mod.comment_prefixes())
+      %{prefixes: prefixes, at_line_start: true, comment_lines: 0, total_lines: 0}
+    end
+
+    def emit(_, {_prev, token, next}, state) do
+      %{prefixes: prefixes, at_line_start: als} = state
+
+      state =
+        case token.kind do
+          @nl ->
+            %{state | at_line_start: true}
+
+          @ws ->
+            state
+
+          _ when als ->
+            is_comment = MapSet.member?(prefixes, token.content)
+
+            %{
+              state
+              | at_line_start: false,
+                total_lines: state.total_lines + 1,
+                comment_lines: state.comment_lines + if(is_comment, do: 1, else: 0)
+            }
+
+          _ ->
+            %{state | at_line_start: false}
+        end
+
+      maybe_emit_vote(next, prefixes, state)
+    end
+
+    defp maybe_emit_vote(nil, prefixes, state)
+         when map_size(prefixes) > 0 and state.total_lines > 0 do
+      if state.comment_lines / state.total_lines > 0.6 do
+        {MapSet.new([{:comment_vote, 2}]), :halt}
+      else
+        {MapSet.new(), state}
+      end
+    end
+
+    defp maybe_emit_vote(_next, _prefixes, state), do: {MapSet.new(), state}
+  end
+end
diff --git a/lib/codeqa/ast/signals/classification/config_signal.ex b/lib/codeqa/ast/signals/classification/config_signal.ex
new file mode 100644
index 00000000..43b58728
--- /dev/null
+++ b/lib/codeqa/ast/signals/classification/config_signal.ex
@@ -0,0 +1,58 @@
+defmodule CodeQA.AST.Signals.Classification.ConfigSignal do
+  @moduledoc """
+  Classification signal — votes `:config` when a configuration keyword
+  appears at indent 0 and bracket depth 0.
+
+  Matches `config` (Elixir Mix.Config), `configure`, `settings`, `options`,
+  `defaults`. Emits at most one vote.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    @nl CodeQA.AST.Lexing.NewlineToken.kind()
+    @ws CodeQA.AST.Lexing.WhitespaceToken.kind()
+    @config_keywords MapSet.new(["config", "configure", "settings", "options", "defaults"])
+    def source(_), do: CodeQA.AST.Signals.Classification.ConfigSignal
+    def group(_), do: :classification
+
+    def init(_, _lang_mod),
+      do: %{at_line_start: true, indent: 0, bracket_depth: 0, is_first: true}
+
+    def emit(_, {_prev, token, _next}, state) do
+      %{at_line_start: als, indent: ind, bracket_depth: bd, is_first: first} = state
+
+      case token.kind do
+        @nl ->
+          {MapSet.new(), %{state | at_line_start: true, indent: 0}}
+
+        @ws when als ->
+          {MapSet.new(), %{state | indent: ind + 1, at_line_start: true}}
+
+        @ws ->
+          {MapSet.new(), state}
+
+        v when v in ["(", "[", "{"] ->
+          {MapSet.new(), %{state | bracket_depth: bd + 1, at_line_start: false, is_first: false}}
+
+        v when v in [")", "]", "}"] ->
+          _ = v
+
+          {MapSet.new(),
+           %{state | bracket_depth: max(0, bd - 1), at_line_start: false, is_first: false}}
+
+        _ ->
+          emit_content_token(token, state, ind, bd, first)
+      end
+    end
+
+    defp emit_content_token(token, state, ind, bd, first) do
+      if ind == 0 and bd == 0 and MapSet.member?(@config_keywords, token.content) do
+        weight = if first, do: 3, else: 1
+        {MapSet.new([{:config_vote, weight}]), :halt}
+      else
+        {MapSet.new(), %{state | at_line_start: false, is_first: false}}
+      end
+    end
+  end
+end
diff --git a/lib/codeqa/ast/signals/classification/data_signal.ex b/lib/codeqa/ast/signals/classification/data_signal.ex
new file mode 100644
index 00000000..1d6aa773
--- /dev/null
+++ b/lib/codeqa/ast/signals/classification/data_signal.ex
@@ -0,0 +1,67 @@
+defmodule CodeQA.AST.Signals.Classification.DataSignal do
+  @moduledoc """
+  Classification signal — votes `:data` when a token stream consists primarily
+  of literal values (`<STR>`, `<NUM>`) with no control-flow keywords.
+
+  Emits at the end of the stream (when `next == nil`). Votes only when
+  literal ratio > 0.6 and no control-flow keywords were seen.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    @str CodeQA.AST.Lexing.StringToken.kind()
+    @control_flow MapSet.new([
+                    "if",
+                    "else",
+                    "elsif",
+                    "elif",
+                    "unless",
+                    "for",
+                    "while",
+                    "do",
+                    "case",
+                    "when",
+                    "cond",
+                    "switch",
+                    "loop",
+                    "until"
+                  ])
+    def source(_), do: CodeQA.AST.Signals.Classification.DataSignal
+    def group(_), do: :classification
+
+    def init(_, _lang_mod),
+      do: %{literal_count: 0, id_count: 0, has_control_flow: false}
+
+    def emit(_, {_prev, token, next}, state) do
+      state =
+        case token.kind do
+          kind when kind in [@str, "<NUM>"] ->
+            %{state | literal_count: state.literal_count + 1}
+
+          "<ID>" ->
+            if MapSet.member?(@control_flow, token.content) do
+              %{state | has_control_flow: true, id_count: state.id_count + 1}
+            else
+              %{state | id_count: state.id_count + 1}
+            end
+
+          _ ->
+            state
+        end
+
+      if next == nil do
+        total = state.literal_count + state.id_count
+
+        if total > 0 and not state.has_control_flow and
+             state.literal_count / total > 0.6 do
+          {MapSet.new([{:data_vote, 2}]), :halt}
+        else
+          {MapSet.new(), state}
+        end
+      else
+        {MapSet.new(), state}
+      end
+    end
+  end
+end
diff --git a/lib/codeqa/ast/signals/classification/doc_signal.ex b/lib/codeqa/ast/signals/classification/doc_signal.ex
new file mode 100644
index 00000000..615cf55c
--- /dev/null
+++ b/lib/codeqa/ast/signals/classification/doc_signal.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.AST.Signals.Classification.DocSignal do
+  @moduledoc """
+  Classification signal — votes `:doc` when a `<DOC>` (triple-quoted string) token
+  is found anywhere in the node's token stream.
+
+  Weight: 3 (unambiguous — triple-quoted strings are documentation).
+  Emits at most one vote per token stream.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    @doc_kind CodeQA.AST.Lexing.StringToken.doc_kind()
+    def source(_), do: CodeQA.AST.Signals.Classification.DocSignal
+    def group(_), do: :classification
+
+    def init(_, _lang_mod), do: %{voted: false}
+
+    def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state}
+
+    def emit(_, {_prev, token, _next}, state) do
+      if token.kind == @doc_kind do
+        {MapSet.new([{:doc_vote, 3}]), %{state | voted: true}}
+      else
+        {MapSet.new(), state}
+      end
+    end
+  end
+end
diff --git a/lib/codeqa/ast/signals/classification/function_signal.ex b/lib/codeqa/ast/signals/classification/function_signal.ex
new file mode 100644
index 00000000..62d3f487
--- /dev/null
+++ b/lib/codeqa/ast/signals/classification/function_signal.ex
@@ -0,0 +1,76 @@
+defmodule CodeQA.AST.Signals.Classification.FunctionSignal do
+  @moduledoc """
+  Classification signal — votes `:function` when a function definition keyword
+  appears at indent 0 and bracket depth 0.
+
+  Weights:
+  - 3 when it is the first content token of the block (strong match)
+  - 1 when found later in the block (weak match, e.g. after a leading comment)
+
+  Does NOT include module/class/namespace keywords (handled by ModuleSignal) or
+  test macros like `test`/`describe` (handled by TestSignal).
+  Emits at most one vote per token stream.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    @nl CodeQA.AST.Lexing.NewlineToken.kind()
+    @ws CodeQA.AST.Lexing.WhitespaceToken.kind()
+    def source(_), do: CodeQA.AST.Signals.Classification.FunctionSignal
+    def group(_), do: :classification
+
+    def init(_, lang_mod) do
+      %{
+        at_line_start: true,
+        indent: 0,
+        bracket_depth: 0,
+        is_first: true,
+        voted: false,
+        keywords: CodeQA.Language.function_keywords(lang_mod)
+      }
+    end
+
+    def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state}
+
+    def emit(
+          _,
+          {_prev, token, _next},
+          %{at_line_start: als, indent: ind, bracket_depth: bd, is_first: first} = state
+        ) do
+      case token.kind do
+        @nl ->
+          {MapSet.new(), %{state | at_line_start: true, indent: 0}}
+
+        @ws when als ->
+          {MapSet.new(), %{state | indent: ind + 1, at_line_start: true}}
+
+        @ws ->
+          {MapSet.new(), state}
+
+        v when v in ["(", "[", "{"] ->
+          {MapSet.new(), %{state | bracket_depth: bd + 1, is_first: false, at_line_start: false}}
+
+        v when v in [")", "]", "}"] ->
+          _ = v
+
+          {MapSet.new(),
+           %{state | bracket_depth: max(0, bd - 1), is_first: false, at_line_start: false}}
+
+        _ ->
+          emit_content_token(token, state, ind, bd, first)
+      end
+    end
+
+    defp emit_content_token(token, state, ind, bd, first) do
+      base_state = %{state | is_first: false, at_line_start: false}
+
+      if ind == 0 and bd == 0 and MapSet.member?(state.keywords, token.content) do
+        weight = if first, do: 3, else: 1
+        {MapSet.new([{:function_vote, weight}]), %{base_state | voted: true}}
+      else
+        {MapSet.new(), base_state}
+      end
+    end
+  end
+end
diff --git a/lib/codeqa/ast/signals/classification/import_signal.ex b/lib/codeqa/ast/signals/classification/import_signal.ex
new file mode 100644
index 00000000..e27ed8a8
--- /dev/null
+++ b/lib/codeqa/ast/signals/classification/import_signal.ex
@@ -0,0 +1,66 @@
+defmodule CodeQA.AST.Signals.Classification.ImportSignal do
+  @moduledoc """
+  Classification signal — votes `:import` when an import/require/use/alias keyword
+  appears at indent 0.
+
+  Weights:
+  - 3 when it is the first content token of the block (strong match)
+  - 1 when found later in the block
+
+  Covers: Elixir (import, require, use, alias), Python (import, from),
+  JavaScript/Go (import, package), C# (using), Ruby/Lua (require, include).
+  Emits at most one vote per token stream.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    @nl CodeQA.AST.Lexing.NewlineToken.kind()
+    @ws CodeQA.AST.Lexing.WhitespaceToken.kind()
+    def source(_), do: CodeQA.AST.Signals.Classification.ImportSignal
+    def group(_), do: :classification
+
+    def init(_, lang_mod) do
+      %{
+        at_line_start: true,
+        indent: 0,
+        is_first: true,
+        voted: false,
+        keywords: CodeQA.Language.import_keywords(lang_mod)
+      }
+    end
+
+    def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state}
+
+    def emit(
+          _,
+          {_prev, token, _next},
+          %{at_line_start: als, indent: ind, is_first: first} = state
+        ) do
+      case token.kind do
+        @nl ->
+          {MapSet.new(), %{state | at_line_start: true, indent: 0}}
+
+        @ws when als ->
+          {MapSet.new(), %{state | indent: ind + 1, at_line_start: true}}
+
+        @ws ->
+          {MapSet.new(), state}
+
+        _ ->
+          emit_content_token(token, state, ind, first)
+      end
+    end
+
+    defp emit_content_token(token, state, ind, first) do
+      base_state = %{state | is_first: false, at_line_start: false}
+
+      if ind == 0 and MapSet.member?(state.keywords, token.content) do
+        weight = if first, do: 3, else: 1
+        {MapSet.new([{:import_vote, weight}]), %{base_state | voted: true}}
+      else
+        {MapSet.new(), base_state}
+      end
+    end
+  end
+end
diff --git a/lib/codeqa/ast/signals/classification/module_signal.ex b/lib/codeqa/ast/signals/classification/module_signal.ex
new file mode 100644
index 00000000..4e9ca98e
--- /dev/null
+++ b/lib/codeqa/ast/signals/classification/module_signal.ex
@@ -0,0 +1,75 @@
+defmodule CodeQA.AST.Signals.Classification.ModuleSignal do
+  @moduledoc """
+  Classification signal — votes `:module` when a module/class/namespace definition
+  keyword appears at indent 0 and bracket depth 0.
+
+  Weights:
+  - 3 when it is the first content token of the block (strong match)
+  - 1 when found later in the block
+
+  Keyword set is disjoint from FunctionSignal and TestSignal to avoid conflicts.
+  Emits at most one vote per token stream.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    @nl CodeQA.AST.Lexing.NewlineToken.kind()
+    @ws CodeQA.AST.Lexing.WhitespaceToken.kind()
+    def source(_), do: CodeQA.AST.Signals.Classification.ModuleSignal
+    def group(_), do: :classification
+
+    def init(_, lang_mod) do
+      %{
+        at_line_start: true,
+        indent: 0,
+        bracket_depth: 0,
+        is_first: true,
+        voted: false,
+        keywords: CodeQA.Language.module_keywords(lang_mod)
+      }
+    end
+
+    def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state}
+
+    def emit(
+          _,
+          {_prev, token, _next},
+          %{at_line_start: als, indent: ind, bracket_depth: bd, is_first: first} = state
+        ) do
+      case token.kind do
+        @nl ->
+          {MapSet.new(), %{state | at_line_start: true, indent: 0}}
+
+        @ws when als ->
+          {MapSet.new(), %{state | indent: ind + 1, at_line_start: true}}
+
+        @ws ->
+          {MapSet.new(), state}
+
+        v when v in ["(", "[", "{"] ->
+          {MapSet.new(), %{state | bracket_depth: bd + 1, is_first: false, at_line_start: false}}
+
+        v when v in [")", "]", "}"] ->
+          _ = v
+
+          {MapSet.new(),
+           %{state | bracket_depth: max(0, bd - 1), is_first: false, at_line_start: false}}
+
+        _ ->
+          emit_content_token(token, state, ind, bd, first)
+      end
+    end
+
+    defp emit_content_token(token, state, ind, bd, first) do
+      base_state = %{state | is_first: false, at_line_start: false}
+
+      if ind == 0 and bd == 0 and MapSet.member?(state.keywords, token.content) do
+        weight = if first, do: 3, else: 1
+        {MapSet.new([{:module_vote, weight}]), %{base_state | voted: true}}
+      else
+        {MapSet.new(), base_state}
+      end
+    end
+  end
+end
diff --git a/lib/codeqa/ast/signals/classification/test_signal.ex b/lib/codeqa/ast/signals/classification/test_signal.ex
new file mode 100644
index 00000000..de6abe50
--- /dev/null
+++ b/lib/codeqa/ast/signals/classification/test_signal.ex
@@ -0,0 +1,67 @@
+defmodule CodeQA.AST.Signals.Classification.TestSignal do
+  @moduledoc """
+  Classification signal — votes `:test` when a test block keyword appears at
+  indent 0.
+
+  Weights:
+  - 3 when it is the first content token of the block (strong match)
+  - 1 when found later in the block
+
+  Covers: ExUnit (test, describe), RSpec/Jest/Mocha (it, context, describe),
+  Cucumber (scenario, given, feature). `test` takes priority over
+  FunctionSignal — Elixir test macros look like function calls but are test blocks.
+  Emits at most one vote per token stream.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    @nl CodeQA.AST.Lexing.NewlineToken.kind()
+    @ws CodeQA.AST.Lexing.WhitespaceToken.kind()
+    def source(_), do: CodeQA.AST.Signals.Classification.TestSignal
+    def group(_), do: :classification
+
+    def init(_, lang_mod) do
+      %{
+        at_line_start: true,
+        indent: 0,
+        is_first: true,
+        voted: false,
+        keywords: CodeQA.Language.test_keywords(lang_mod)
+      }
+    end
+
+    def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state}
+
+    def emit(
+          _,
+          {_prev, token, _next},
+          %{at_line_start: als, indent: ind, is_first: first} = state
+        ) do
+      case token.kind do
+        @nl ->
+          {MapSet.new(), %{state | at_line_start: true, indent: 0}}
+
+        @ws when als ->
+          {MapSet.new(), %{state | indent: ind + 1, at_line_start: true}}
+
+        @ws ->
+          {MapSet.new(), state}
+
+        _ ->
+          emit_content_token(token, state, ind, first)
+      end
+    end
+
+    defp emit_content_token(token, state, ind, first) do
+      base_state = %{state | is_first: false, at_line_start: false}
+
+      if ind == 0 and MapSet.member?(state.keywords, token.content) do
+        weight = if first, do: 3, else: 1
+        {MapSet.new([{:test_vote, weight}]), %{base_state | voted: true}}
+      else
+        {MapSet.new(), base_state}
+      end
+    end
+  end
+end
diff --git a/lib/codeqa/ast/signals/classification/type_signal.ex b/lib/codeqa/ast/signals/classification/type_signal.ex
new file mode 100644
index 00000000..fc4440f5
--- /dev/null
+++ b/lib/codeqa/ast/signals/classification/type_signal.ex
@@ -0,0 +1,53 @@
+defmodule CodeQA.AST.Signals.Classification.TypeSignal do
+  @moduledoc """
+  Classification signal — votes `:type` when an Elixir type definition
+  attribute (`@type`, `@typep`, `@opaque`) appears at indent 0.
+
+  Emits at most one vote. Complements `AttributeSignal`, which handles
+  `@spec`, `@doc`, and other attributes.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    @nl CodeQA.AST.Lexing.NewlineToken.kind()
+    @ws CodeQA.AST.Lexing.WhitespaceToken.kind()
+    @type_attrs MapSet.new(["type", "typep", "opaque"])
+    def source(_), do: CodeQA.AST.Signals.Classification.TypeSignal
+    def group(_), do: :classification
+
+    def init(_, _lang_mod),
+      do: %{at_line_start: true, indent: 0, saw_at: false, is_first: true}
+
+    def emit(_, {_prev, token, _next}, state) do
+      case token.kind do
+        @nl ->
+          {MapSet.new(), %{state | at_line_start: true, indent: 0, saw_at: false}}
+
+        @ws when state.at_line_start ->
+          {MapSet.new(), %{state | indent: state.indent + 1, at_line_start: true}}
+
+        @ws ->
+          {MapSet.new(), state}
+
+        "@" when state.indent == 0 ->
+          {MapSet.new(), %{state | saw_at: true, at_line_start: false}}
+
+        _ when state.saw_at and state.indent == 0 ->
+          emit_after_at(token, state)
+
+        _ ->
+          {MapSet.new(), %{state | saw_at: false, is_first: false, at_line_start: false}}
+      end
+    end
+
+    defp emit_after_at(token, state) do
+      if MapSet.member?(@type_attrs, token.content) do
+        weight = if state.is_first, do: 3, else: 1
+        {MapSet.new([{:type_vote, weight}]), :halt}
+      else
+        {MapSet.new(), %{state | saw_at: false, is_first: false, at_line_start: false}}
+      end
+    end
+  end
+end
diff --git a/lib/codeqa/ast/signals/structural/access_modifier_signal.ex b/lib/codeqa/ast/signals/structural/access_modifier_signal.ex
new file mode 100644
index 00000000..43ed0687
--- /dev/null
+++ b/lib/codeqa/ast/signals/structural/access_modifier_signal.ex
@@ -0,0 +1,80 @@
+defmodule CodeQA.AST.Signals.Structural.AccessModifierSignal do
+  alias CodeQA.AST.Lexing.NewlineToken
+  alias CodeQA.AST.Lexing.WhitespaceToken
+
+  @moduledoc """
+  Emits `:access_modifier_split` when an access modifier keyword appears at line
+  start with bracket_depth == 0.
+
+  Unlike `KeywordSignal`, this does NOT require indentation level 0, so it
+  detects class members inside bracket enclosures (e.g. `public void foo()` inside
+  a `class Foo { ... }` body).
+
+  When `opts[:language_module]` is set, uses that language's
+  `access_modifiers/0` callback.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    def source(_), do: CodeQA.AST.Signals.Structural.AccessModifierSignal
+    def group(_), do: :split
+
+    def init(_, lang_mod) do
+      modifiers = CodeQA.Language.access_modifiers(lang_mod)
+      %{idx: 0, bracket_depth: 0, at_line_start: true, seen_content: false, modifiers: modifiers}
+    end
+
+    def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state)
+        when k in ["(", "[", "{"],
+        do:
+          {MapSet.new(),
+           %{
+             state
+             | idx: idx + 1,
+               bracket_depth: bd + 1,
+               seen_content: true,
+               at_line_start: false
+           }}
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state)
+        when k in [")", "]", "}"],
+        do:
+          {MapSet.new(),
+           %{
+             state
+             | idx: idx + 1,
+               bracket_depth: max(0, bd - 1),
+               seen_content: true,
+               at_line_start: false
+           }}
+
+    def emit(_, {_, token, _}, %{idx: idx} = state) do
+      base = %{state | idx: idx + 1, seen_content: true, at_line_start: false}
+
+      emissions =
+        if modifier_split?(state, token),
+          do: MapSet.new([{:access_modifier_split, idx}]),
+          else: MapSet.new()
+
+      {emissions, base}
+    end
+
+    defp modifier_split?(
+           %{seen_content: true, bracket_depth: 0, at_line_start: true, modifiers: m},
+           %{content: c}
+         ),
+         do: MapSet.member?(m, c)
+
+    defp modifier_split?(_, _), do: false
+  end
+end
diff --git a/lib/codeqa/ast/signals/structural/assignment_function_signal.ex b/lib/codeqa/ast/signals/structural/assignment_function_signal.ex
new file mode 100644
index 00000000..a778d55b
--- /dev/null
+++ b/lib/codeqa/ast/signals/structural/assignment_function_signal.ex
@@ -0,0 +1,135 @@
+defmodule CodeQA.AST.Signals.Structural.AssignmentFunctionSignal do
+  alias CodeQA.AST.Lexing.NewlineToken
+  alias CodeQA.AST.Lexing.WhitespaceToken
+
+  @moduledoc """
+  Emits `:assignment_function_split` when a top-level assignment to a function
+  is detected at indent 0 and bracket depth 0.
+
+  Covers patterns such as:
+  - `identifier = function(...) {}`
+  - `identifier = async function(...) {}`
+  - `identifier = (...) => {}`
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    def source(_), do: CodeQA.AST.Signals.Structural.AssignmentFunctionSignal
+    def group(_), do: :split
+
+    def init(_, _lang_mod) do
+      %{
+        idx: 0,
+        indent: 0,
+        bracket_depth: 0,
+        at_line_start: true,
+        seen_content: false,
+        phase: :idle
+      }
+    end
+
+    def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, indent: 0, at_line_start: true, phase: :idle}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, indent: i, at_line_start: true} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, indent: i + 1, at_line_start: true}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd, phase: phase} = state)
+        when k in ["(", "[", "{"] do
+      new_bd = bd + 1
+      new_phase = advance_phase_open(phase, k)
+
+      {MapSet.new(),
+       %{
+         state
+         | idx: idx + 1,
+           bracket_depth: new_bd,
+           at_line_start: false,
+           seen_content: true,
+           phase: new_phase
+       }}
+    end
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd, phase: phase} = state)
+        when k in [")", "]", "}"] do
+      new_bd = max(0, bd - 1)
+      new_phase = advance_phase_close(phase, k)
+
+      {MapSet.new(),
+       %{
+         state
+         | idx: idx + 1,
+           bracket_depth: new_bd,
+           at_line_start: false,
+           seen_content: true,
+           phase: new_phase
+       }}
+    end
+
+    def emit(
+          _,
+          {_, token, _},
+          %{
+            idx: idx,
+            seen_content: sc,
+            indent: i,
+            bracket_depth: bd,
+            at_line_start: als,
+            phase: phase
+          } = state
+        ) do
+      {emissions, new_phase} = advance_phase(phase, token, idx, sc, i, bd, als)
+
+      {emissions,
+       %{state | idx: idx + 1, at_line_start: false, seen_content: true, phase: new_phase}}
+    end
+
+    defp advance_phase_open({:in_parens, id_idx, pd}, "("), do: {:in_parens, id_idx, pd + 1}
+    defp advance_phase_open({:in_parens, id_idx, pd}, _), do: {:in_parens, id_idx, pd}
+    defp advance_phase_open({:saw_eq, id_idx}, "("), do: {:in_parens, id_idx, 1}
+    defp advance_phase_open(_, _), do: :idle
+
+    defp advance_phase_close({:in_parens, id_idx, 1}, ")"), do: {:saw_close_paren, id_idx}
+
+    defp advance_phase_close({:in_parens, id_idx, pd}, ")") when pd > 1,
+      do: {:in_parens, id_idx, pd - 1}
+
+    defp advance_phase_close({:in_parens, id_idx, pd}, _), do: {:in_parens, id_idx, pd}
+    defp advance_phase_close(_, _), do: :idle
+
+    defp advance_phase(:idle, %{kind: "<ID>"}, idx, true, 0, 0, true),
+      do: {MapSet.new(), {:saw_id, idx}}
+
+    defp advance_phase(:idle, _, _, _, _, _, _), do: {MapSet.new(), :idle}
+
+    defp advance_phase({:saw_id, id_idx}, %{kind: "="}, _, _, _, _, _),
+      do: {MapSet.new(), {:saw_eq, id_idx}}
+
+    defp advance_phase({:saw_id, _}, %{kind: "<ID>"}, idx, _, _, _, _),
+      do: {MapSet.new(), {:saw_id, idx}}
+
+    defp advance_phase({:saw_id, id_idx}, %{kind: "."}, _, _, _, _, _),
+      do: {MapSet.new(), {:saw_id, id_idx}}
+
+    defp advance_phase({:saw_id, _}, _, _, _, _, _, _), do: {MapSet.new(), :idle}
+
+    defp advance_phase({:saw_eq, id_idx}, %{kind: "<ID>", content: "function"}, _, _, _, _, _),
+      do: {MapSet.new([{:assignment_function_split, id_idx}]), :idle}
+
+    defp advance_phase({:saw_eq, id_idx}, %{kind: "<ID>", content: "async"}, _, _, _, _, _),
+      do: {MapSet.new(), {:saw_eq, id_idx}}
+
+    defp advance_phase({:saw_eq, _}, _, _, _, _, _, _), do: {MapSet.new(), :idle}
+
+    defp advance_phase({:saw_close_paren, id_idx}, %{kind: "=>"}, _, _, _, _, _),
+      do: {MapSet.new([{:assignment_function_split, id_idx}]), :idle}
+
+    defp advance_phase({:saw_close_paren, _}, _, _, _, _, _, _), do: {MapSet.new(), :idle}
+
+    defp advance_phase(_, _, _, _, _, _, _), do: {MapSet.new(), :idle}
+  end
+end
diff --git a/lib/codeqa/ast/signals/structural/blank_line_signal.ex b/lib/codeqa/ast/signals/structural/blank_line_signal.ex
new file mode 100644
index 00000000..c484e1a1
--- /dev/null
+++ b/lib/codeqa/ast/signals/structural/blank_line_signal.ex
@@ -0,0 +1,45 @@
+defmodule CodeQA.AST.Signals.Structural.BlankLineSignal do
+  alias CodeQA.AST.Lexing.NewlineToken
+  alias CodeQA.AST.Lexing.WhitespaceToken
+
+  @moduledoc """
+  Emits `:blank_split` at the first substantive token after 2+ consecutive
+  blank lines that follow a known block-end token.
+
+  When `opts[:language_module]` is set, uses that language's
+  `block_end_tokens/0` callback.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    def source(_), do: CodeQA.AST.Signals.Structural.BlankLineSignal
+    def group(_), do: :split
+
+    def init(_, lang_mod) do
+      tokens = CodeQA.Language.block_end_tokens(lang_mod)
+      %{idx: 0, nl_run: 0, seen_content: false, last_content: nil, block_end_tokens: tokens}
+    end
+
+    def emit(_, {_, %NewlineToken{}, _}, %{idx: idx, nl_run: nl} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, nl_run: nl + 1}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+
+    def emit(_, {_, token, _}, %{idx: idx} = state) do
+      base = %{state | idx: idx + 1, nl_run: 0, seen_content: true, last_content: token.content}
+
+      emissions =
+        if blank_split?(state), do: MapSet.new([{:blank_split, idx}]), else: MapSet.new()
+
+      {emissions, base}
+    end
+
+    defp blank_split?(%{seen_content: true, nl_run: nl, block_end_tokens: t, last_content: lc})
+         when nl >= 2,
+         do: MapSet.member?(t, lc)
+
+    defp blank_split?(_), do: false
+  end
+end
diff --git a/lib/codeqa/ast/signals/structural/bracket_signal.ex b/lib/codeqa/ast/signals/structural/bracket_signal.ex
new file mode 100644
index 00000000..201f66e9
--- /dev/null
+++ b/lib/codeqa/ast/signals/structural/bracket_signal.ex
@@ -0,0 +1,51 @@
+defmodule CodeQA.AST.Signals.Structural.BracketSignal do
+  @moduledoc """
+  Emits `:bracket_enclosure` for each outermost bracket pair `()`, `[]`, `{}`.
+
+  Replaces `ParseRules.BracketRule`. State tracks: token index, bracket depth,
+  start index of current open bracket, and a stack of open bracket kinds for
+  mismatch detection.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    @close %{")" => "(", "]" => "[", "}" => "{"}
+
+    def source(_), do: CodeQA.AST.Signals.Structural.BracketSignal
+    def group(_), do: :enclosure
+
+    def init(_, _lang_mod), do: %{idx: 0, depth: 0, start_idx: nil, stack: []}
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx, depth: 0, stack: stack} = state)
+        when k in ["(", "[", "{"],
+        do: {MapSet.new(), %{state | idx: idx + 1, depth: 1, start_idx: idx, stack: [k | stack]}}
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx, depth: d, stack: stack} = state)
+        when k in ["(", "[", "{"],
+        do: {MapSet.new(), %{state | idx: idx + 1, depth: d + 1, stack: [k | stack]}}
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx, depth: d, stack: [top | rest]} = state)
+        when k in [")", "]", "}"] do
+      base = %{state | idx: idx + 1}
+
+      if @close[k] == top,
+        do: close_match(base, d, state.start_idx, idx, rest),
+        else: {MapSet.new(), base}
+    end
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx} = state) when k in [")", "]", "}"],
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+
+    def emit(_, {_, _, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+
+    defp close_match(state, 1, start_idx, idx, rest),
+      do:
+        {MapSet.new([{:bracket_enclosure, {start_idx, idx}}]),
+         %{state | depth: 0, start_idx: nil, stack: rest}}
+
+    defp close_match(state, d, _start_idx, _idx, rest),
+      do: {MapSet.new(), %{state | depth: d - 1, stack: rest}}
+  end
+end
diff --git a/lib/codeqa/ast/signals/structural/branch_split_signal.ex b/lib/codeqa/ast/signals/structural/branch_split_signal.ex
new file mode 100644
index 00000000..1d6d2644
--- /dev/null
+++ b/lib/codeqa/ast/signals/structural/branch_split_signal.ex
@@ -0,0 +1,58 @@
+defmodule CodeQA.AST.Signals.Structural.BranchSplitSignal do
+  alias CodeQA.AST.Lexing.NewlineToken
+  alias CodeQA.AST.Lexing.WhitespaceToken
+
+  @moduledoc """
+  Emits `:branch_split` when a branch keyword appears at bracket depth 0
+  and at least one token has been seen before it.
+
+  Unlike `KeywordSignal`, there is no indentation constraint — branches inside
+  functions are intentionally split into sibling child blocks by the parser's
+  recursive phase.
+
+  When `opts[:language_module]` is set, uses that language's
+  `branch_keywords/0` callback.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    def source(_), do: CodeQA.AST.Signals.Structural.BranchSplitSignal
+    def group(_), do: :branch_split
+
+    def init(_, lang_mod) do
+      keywords = CodeQA.Language.branch_keywords(lang_mod)
+      %{idx: 0, bracket_depth: 0, seen_content: false, keywords: keywords}
+    end
+
+    def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state)
+        when k in ["(", "[", "{"],
+        do: {MapSet.new(), %{state | idx: idx + 1, bracket_depth: bd + 1, seen_content: true}}
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state)
+        when k in [")", "]", "}"],
+        do:
+          {MapSet.new(),
+           %{state | idx: idx + 1, bracket_depth: max(0, bd - 1), seen_content: true}}
+
+    def emit(_, {_, token, _}, %{idx: idx} = state) do
+      base = %{state | idx: idx + 1, seen_content: true}
+
+      emissions =
+        if branch_split?(state, token), do: MapSet.new([{:branch_split, idx}]), else: MapSet.new()
+
+      {emissions, base}
+    end
+
+    defp branch_split?(%{seen_content: true, bracket_depth: 0, keywords: kw}, %{content: c}),
+      do: MapSet.member?(kw, c)
+
+    defp branch_split?(_, _), do: false
+  end
+end
diff --git a/lib/codeqa/ast/signals/structural/colon_indent_signal.ex b/lib/codeqa/ast/signals/structural/colon_indent_signal.ex
new file mode 100644
index 00000000..9189b795
--- /dev/null
+++ b/lib/codeqa/ast/signals/structural/colon_indent_signal.ex
@@ -0,0 +1,83 @@
+defmodule CodeQA.AST.Signals.Structural.ColonIndentSignal do
+  alias CodeQA.AST.Lexing.NewlineToken
+  alias CodeQA.AST.Lexing.WhitespaceToken
+
+  @moduledoc """
+  Emits `:colon_indent_enclosure` for colon-indented blocks (Python).
+
+  Only active when `opts[:language_module]` returns true for `uses_colon_indent?/0`. Replaces
+  `ParseRules.ColonIndentationRule`.
+
+  ## Limitation
+
+  The original rule flushes open blocks at EOF via `close_all_open/1`. Since
+  `emit/3` has no end-of-stream callback, open blocks are instead flushed at
+  each `<NL>` token. This correctly handles single-statement blocks; multi-line
+  blocks are closed at the first newline (conservative).
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    def source(_), do: CodeQA.AST.Signals.Structural.ColonIndentSignal
+    def group(_), do: :enclosure
+
+    def init(_, lang_mod) do
+      %{
+        enabled: lang_mod.uses_colon_indent?(),
+        idx: 0,
+        ci: 0,
+        last_colon_indent: nil,
+        stack: []
+      }
+    end
+
+    def emit(_, _, %{enabled: false} = state),
+      do: {MapSet.new(), %{state | idx: state.idx + 1}}
+
+    def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state) do
+      {emissions, _} = flush_stack(state.stack)
+      {emissions, %{state | idx: idx + 1, ci: 0, stack: []}}
+    end
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, ci: ci} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, ci: ci + 1}}
+
+    def emit(_, {_, %{kind: ":"}, _}, %{idx: idx, ci: ci} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, last_colon_indent: ci}}
+
+    def emit(_, {_, _, _}, %{idx: idx, ci: ci} = state) do
+      {dedent_emissions, remaining} = close_dedented(state.stack, ci)
+      new_stack = maybe_open_block(remaining, state.last_colon_indent, ci, idx)
+
+      {dedent_emissions,
+       %{state | idx: idx + 1, last_colon_indent: nil, stack: update_top(new_stack, idx)}}
+    end
+
+    defp close_dedented(stack, ci) do
+      {to_close, keep} = Enum.split_while(stack, fn e -> ci <= e.colon_indent end)
+      {build_emissions(to_close), keep}
+    end
+
+    defp flush_stack(stack), do: {build_emissions(stack), []}
+
+    defp maybe_open_block(stack, colon_indent, ci, idx)
+         when colon_indent != nil and ci > colon_indent,
+         do: [%{colon_indent: colon_indent, sub_start: idx, last_content_idx: idx} | stack]
+
+    defp maybe_open_block(stack, _, _, _), do: stack
+
+    defp build_emissions(entries) do
+      Enum.reduce(entries, MapSet.new(), fn
+        %{sub_start: s, last_content_idx: e}, acc when e != nil ->
+          MapSet.put(acc, {:colon_indent_enclosure, {s, e}})
+
+        _entry, acc ->
+          acc
+      end)
+    end
+
+    defp update_top([], _idx), do: []
+    defp update_top([top | rest], idx), do: [Map.put(top, :last_content_idx, idx) | rest]
+  end
+end
diff --git a/lib/codeqa/ast/signals/structural/comment_divider_signal.ex b/lib/codeqa/ast/signals/structural/comment_divider_signal.ex
new file mode 100644
index 00000000..d01e5e83
--- /dev/null
+++ b/lib/codeqa/ast/signals/structural/comment_divider_signal.ex
@@ -0,0 +1,76 @@
+defmodule CodeQA.AST.Signals.Structural.CommentDividerSignal do
+  alias CodeQA.AST.Lexing.NewlineToken
+  alias CodeQA.AST.Lexing.WhitespaceToken
+
+  @moduledoc """
+  Emits `:comment_divider_split` when a line is a "visual divider" comment —
+  a comment prefix at line start followed immediately by repetitive non-word
+  punctuation characters.
+
+  Used to detect section separators like `# ---`, `// ===`, `-- ---`.
+  No split is emitted for the first such line (seen_content must be true).
+
+  When `opts[:language_module]` is set, uses that language's
+  `comment_prefixes/0` callback.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    def source(_), do: CodeQA.AST.Signals.Structural.CommentDividerSignal
+    def group(_), do: :split
+
+    def init(_, lang_mod) do
+      comment_prefixes = MapSet.new(lang_mod.comment_prefixes())
+      divider_indicators = CodeQA.Language.divider_indicators(lang_mod)
+
+      %{
+        idx: 0,
+        at_line_start: true,
+        seen_content: false,
+        indent: 0,
+        comment_prefixes: comment_prefixes,
+        divider_indicators: divider_indicators
+      }
+    end
+
+    def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true, indent: 0}}
+
+    def emit(
+          _,
+          {_, %WhitespaceToken{}, _},
+          %{idx: idx, at_line_start: true, indent: indent} = state
+        ),
+        do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true, indent: indent + 1}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+
+    def emit(_, {_, token, next}, %{idx: idx} = state) do
+      base = %{state | idx: idx + 1, at_line_start: false, seen_content: true}
+
+      emissions =
+        if divider_split?(state, token, next),
+          do: MapSet.new([{:comment_divider_split, idx}]),
+          else: MapSet.new()
+
+      {emissions, base}
+    end
+
+    defp divider_split?(
+           %{
+             seen_content: true,
+             at_line_start: true,
+             indent: 0,
+             comment_prefixes: cp,
+             divider_indicators: di
+           },
+           %{kind: k},
+           next
+         ),
+         do: MapSet.member?(cp, k) and next != nil and MapSet.member?(di, next.kind)
+
+    defp divider_split?(_, _, _), do: false
+  end
+end
diff --git a/lib/codeqa/ast/signals/structural/decorator_signal.ex b/lib/codeqa/ast/signals/structural/decorator_signal.ex
new file mode 100644
index 00000000..0dc1f5be
--- /dev/null
+++ b/lib/codeqa/ast/signals/structural/decorator_signal.ex
@@ -0,0 +1,81 @@
+defmodule CodeQA.AST.Signals.Structural.DecoratorSignal do
+  alias CodeQA.AST.Lexing.NewlineToken
+  alias CodeQA.AST.Lexing.WhitespaceToken
+
+  @moduledoc """
+  Emits `:decorator_split` when a decorator/annotation marker appears at line
+  start with bracket_depth == 0.
+
+  Detects two patterns:
+  - `@` at line start (Python, TypeScript, Java, Elixir decorators/annotations)
+  - `#[` at line start (Rust attribute syntax)
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    def source(_), do: CodeQA.AST.Signals.Structural.DecoratorSignal
+    def group(_), do: :split
+
+    def init(_, _lang_mod),
+      do: %{idx: 0, bracket_depth: 0, at_line_start: true, seen_content: false}
+
+    def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state)
+        when k in ["(", "[", "{"],
+        do:
+          {MapSet.new(),
+           %{
+             state
+             | idx: idx + 1,
+               bracket_depth: bd + 1,
+               seen_content: true,
+               at_line_start: false
+           }}
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state)
+        when k in [")", "]", "}"],
+        do:
+          {MapSet.new(),
+           %{
+             state
+             | idx: idx + 1,
+               bracket_depth: max(0, bd - 1),
+               seen_content: true,
+               at_line_start: false
+           }}
+
+    def emit(
+          _,
+          {_, %{kind: "@"}, _},
+          %{idx: idx, seen_content: true, bracket_depth: 0, at_line_start: true} = state
+        ),
+        do:
+          {MapSet.new([{:decorator_split, idx}]),
+           %{state | idx: idx + 1, seen_content: true, at_line_start: false}}
+
+    def emit(
+          _,
+          {_, %{kind: "#"}, next},
+          %{idx: idx, seen_content: true, bracket_depth: 0, at_line_start: true} = state
+        ) do
+      emissions =
+        if next != nil and next.kind == "[",
+          do: MapSet.new([{:decorator_split, idx}]),
+          else: MapSet.new()
+
+      {emissions, %{state | idx: idx + 1, seen_content: true, at_line_start: false}}
+    end
+
+    def emit(_, {_, _, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, seen_content: true, at_line_start: false}}
+  end
+end
diff --git a/lib/codeqa/ast/signals/structural/dedent_to_zero_signal.ex b/lib/codeqa/ast/signals/structural/dedent_to_zero_signal.ex
new file mode 100644
index 00000000..d644dad4
--- /dev/null
+++ b/lib/codeqa/ast/signals/structural/dedent_to_zero_signal.ex
@@ -0,0 +1,87 @@
+defmodule CodeQA.AST.Signals.Structural.DedentToZeroSignal do
+  alias CodeQA.AST.Lexing.NewlineToken
+  alias CodeQA.AST.Lexing.WhitespaceToken
+
+  @moduledoc """
+  Emits `:dedent_split` when code returns to indent level 0 after having been
+  at indent > 0 on the previous line.
+
+  This is the primary split mechanism for Python and other indentation-significant
+  languages. The split fires at the first substantive token on a line that has no
+  leading `<WS>`, when the previous line did have leading `<WS>`.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    def source(_), do: CodeQA.AST.Signals.Structural.DedentToZeroSignal
+    def group(_), do: :split
+
+    def init(_, _lang_mod) do
+      %{
+        idx: 0,
+        at_line_start: true,
+        seen_content: false,
+        current_line_has_indent: false,
+        current_line_has_content: false,
+        prev_line_had_indent: false
+      }
+    end
+
+    def emit(
+          _,
+          {_, %NewlineToken{}, _},
+          %{
+            idx: idx,
+            current_line_has_content: clhc,
+            current_line_has_indent: clhi,
+            prev_line_had_indent: plhi
+          } = state
+        ) do
+      new_plhi = if clhc, do: clhi, else: plhi
+
+      {MapSet.new(),
+       %{
+         state
+         | idx: idx + 1,
+           at_line_start: true,
+           prev_line_had_indent: new_plhi,
+           current_line_has_indent: false,
+           current_line_has_content: false
+       }}
+    end
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state),
+      do:
+        {MapSet.new(),
+         %{state | idx: idx + 1, current_line_has_indent: true, at_line_start: true}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+
+    def emit(_, {_, _, _}, %{idx: idx} = state) do
+      base = %{
+        state
+        | idx: idx + 1,
+          at_line_start: false,
+          seen_content: true,
+          current_line_has_content: true
+      }
+
+      emissions =
+        if dedent_split?(state), do: MapSet.new([{:dedent_split, idx}]), else: MapSet.new()
+
+      {emissions, base}
+    end
+
+    defp dedent_split?(%{
+           at_line_start: true,
+           current_line_has_indent: false,
+           prev_line_had_indent: true,
+           seen_content: true
+         }),
+         do: true
+
+    defp dedent_split?(_), do: false
+  end
+end
diff --git a/lib/codeqa/ast/signals/structural/doc_comment_lead_signal.ex b/lib/codeqa/ast/signals/structural/doc_comment_lead_signal.ex
new file mode 100644
index 00000000..c5e5c4e3
--- /dev/null
+++ b/lib/codeqa/ast/signals/structural/doc_comment_lead_signal.ex
@@ -0,0 +1,65 @@
+defmodule CodeQA.AST.Signals.Structural.DocCommentLeadSignal do
+  alias CodeQA.AST.Lexing.NewlineToken
+  alias CodeQA.AST.Lexing.WhitespaceToken
+
+  @moduledoc """
+  Emits `:doc_comment_split` when a doc-comment opener appears at line start.
+
+  Detects:
+  - `///` — Rust/C# XML doc comments: `//` token immediately followed by `/`
+  - `/**` — Java/JS JSDoc: `/` token at line start immediately followed by `*`
+
+  No split is emitted for the first such line (seen_content must be true).
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    def source(_), do: CodeQA.AST.Signals.Structural.DocCommentLeadSignal
+    def group(_), do: :split
+
+    def init(_, _lang_mod), do: %{idx: 0, at_line_start: true, seen_content: false}
+
+    def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+
+    def emit(
+          _,
+          {_, %{kind: "//"}, next},
+          %{idx: idx, at_line_start: true, seen_content: true} = state
+        ) do
+      base = %{state | idx: idx + 1, at_line_start: false}
+
+      emissions =
+        if next != nil and next.kind == "/",
+          do: MapSet.new([{:doc_comment_split, idx}]),
+          else: MapSet.new()
+
+      {emissions, base}
+    end
+
+    def emit(
+          _,
+          {_, %{kind: "/"}, next},
+          %{idx: idx, at_line_start: true, seen_content: true} = state
+        ) do
+      base = %{state | idx: idx + 1, at_line_start: false}
+
+      emissions =
+        if next != nil and next.kind in ["*", "**"],
+          do: MapSet.new([{:doc_comment_split, idx}]),
+          else: MapSet.new()
+
+      {emissions, base}
+    end
+
+    def emit(_, {_, _, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: false, seen_content: true}}
+  end
+end
diff --git a/lib/codeqa/ast/signals/structural/keyword_signal.ex b/lib/codeqa/ast/signals/structural/keyword_signal.ex
new file mode 100644
index 00000000..c13d3cf9
--- /dev/null
+++ b/lib/codeqa/ast/signals/structural/keyword_signal.ex
@@ -0,0 +1,83 @@
+defmodule CodeQA.AST.Signals.Structural.KeywordSignal do
+  alias CodeQA.AST.Lexing.NewlineToken
+  alias CodeQA.AST.Lexing.WhitespaceToken
+
+  @moduledoc """
+  Emits `:keyword_split` when a declaration keyword appears at bracket depth 0
+  and indentation level 0.
+
+  When `opts[:language_module]` is set, uses that language's
+  `declaration_keywords/0` callback.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    def source(_), do: CodeQA.AST.Signals.Structural.KeywordSignal
+    def group(_), do: :split
+
+    def init(_, lang_mod) do
+      keywords = CodeQA.Language.declaration_keywords(lang_mod)
+
+      %{
+        idx: 0,
+        bracket_depth: 0,
+        indent: 0,
+        at_line_start: true,
+        seen_content: false,
+        keywords: keywords
+      }
+    end
+
+    def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, indent: 0, at_line_start: true}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, indent: i, at_line_start: true} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, indent: i + 1, at_line_start: true}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state)
+        when k in ["(", "[", "{"],
+        do:
+          {MapSet.new(),
+           %{
+             state
+             | idx: idx + 1,
+               bracket_depth: bd + 1,
+               seen_content: true,
+               at_line_start: false
+           }}
+
+    def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state)
+        when k in [")", "]", "}"],
+        do:
+          {MapSet.new(),
+           %{
+             state
+             | idx: idx + 1,
+               bracket_depth: max(0, bd - 1),
+               seen_content: true,
+               at_line_start: false
+           }}
+
+    def emit(_, {_, token, _}, %{idx: idx} = state) do
+      base = %{state | idx: idx + 1, seen_content: true, at_line_start: false}
+
+      emissions =
+        if keyword_split?(state, token),
+          do: MapSet.new([{:keyword_split, idx}]),
+          else: MapSet.new()
+
+      {emissions, base}
+    end
+
+    defp keyword_split?(%{seen_content: true, bracket_depth: 0, indent: 0, keywords: kw}, %{
+           content: c
+         }),
+         do: MapSet.member?(kw, c)
+
+    defp keyword_split?(_, _), do: false
+  end
+end
diff --git a/lib/codeqa/ast/signals/structural/sql_block_signal.ex b/lib/codeqa/ast/signals/structural/sql_block_signal.ex
new file mode 100644
index 00000000..1e376f59
--- /dev/null
+++ b/lib/codeqa/ast/signals/structural/sql_block_signal.ex
@@ -0,0 +1,55 @@
+defmodule CodeQA.AST.Signals.Structural.SQLBlockSignal do
+  alias CodeQA.AST.Lexing.NewlineToken
+  alias CodeQA.AST.Lexing.WhitespaceToken
+
+  @moduledoc """
+  Emits `:sql_block_split` when a SQL DDL or DML statement keyword appears
+  at line start after prior content has been seen.
+
+  Recognises uppercase and lowercase SQL statement starters:
+  DDL: CREATE, DROP, ALTER, TRUNCATE
+  DML: INSERT, UPDATE, DELETE, SELECT
+  Procedures/transactions: BEGIN, COMMIT, ROLLBACK, CALL, EXECUTE
+
+  When `opts[:language_module]` is set, uses that language's
+  `statement_keywords/0` callback.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    def source(_), do: CodeQA.AST.Signals.Structural.SQLBlockSignal
+    def group(_), do: :split
+
+    def init(_, lang_mod) do
+      keywords = CodeQA.Language.statement_keywords(lang_mod)
+      %{idx: 0, at_line_start: true, seen_content: false, keywords: keywords}
+    end
+
+    def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}}
+
+    def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+
+    def emit(_, {_, %{kind: "<ID>"} = token, _}, %{idx: idx} = state) do
+      base = %{state | idx: idx + 1, at_line_start: false, seen_content: true}
+
+      emissions =
+        if sql_split?(state, token), do: MapSet.new([{:sql_block_split, idx}]), else: MapSet.new()
+
+      {emissions, base}
+    end
+
+    def emit(_, {_, _, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: false, seen_content: true}}
+
+    defp sql_split?(%{seen_content: true, at_line_start: true, keywords: kw}, %{content: c}),
+      do: MapSet.member?(kw, String.downcase(c))
+
+    defp sql_split?(_, _), do: false
+  end
+end
diff --git a/lib/codeqa/ast/signals/structural/triple_quote_signal.ex b/lib/codeqa/ast/signals/structural/triple_quote_signal.ex
new file mode 100644
index 00000000..ac5808db
--- /dev/null
+++ b/lib/codeqa/ast/signals/structural/triple_quote_signal.ex
@@ -0,0 +1,31 @@
+defmodule CodeQA.AST.Signals.Structural.TripleQuoteSignal do
+  @moduledoc """
+  Emits `:triple_split` at each `<DOC>` token boundary.
+
+  The first of each pair marks the opening of a heredoc; the second marks the
+  token after the closing delimiter. These split values are used by the Parser
+  to compute protected ranges, preventing other signals' splits from being
+  applied inside heredoc content.
+
+  Replaces `ParseRules.TripleQuoteRule`.
+  """
+
+  defstruct []
+
+  defimpl CodeQA.AST.Parsing.Signal do
+    @doc_kind CodeQA.AST.Lexing.StringToken.doc_kind()
+    def source(_), do: CodeQA.AST.Signals.Structural.TripleQuoteSignal
+    def group(_), do: :split
+
+    def init(_, _lang_mod), do: %{idx: 0, inside: false}
+
+    def emit(_, {_, %{kind: @doc_kind}, _}, %{idx: idx, inside: false} = state),
+      do: {MapSet.new([{:triple_split, idx}]), %{state | idx: idx + 1, inside: true}}
+
+    def emit(_, {_, %{kind: @doc_kind}, _}, %{idx: idx, inside: true} = state),
+      do: {MapSet.new([{:triple_split, idx + 1}]), %{state | idx: idx + 1, inside: false}}
+
+    def emit(_, {_, _, _}, %{idx: idx} = state),
+      do: {MapSet.new(), %{state | idx: idx + 1}}
+  end
+end
diff --git a/lib/codeqa/block_impact/codebase_impact.ex b/lib/codeqa/block_impact/codebase_impact.ex
new file mode 100644
index 00000000..50fa5ba3
--- /dev/null
+++ b/lib/codeqa/block_impact/codebase_impact.ex
@@ -0,0 +1,22 @@
+defmodule CodeQA.BlockImpact.CodebaseImpact do
+  @moduledoc """
+  Leave-one-out codebase aggregate: reconstruct file content without a target node,
+  replace the file in the files map, and re-run the codebase aggregate.
+  """
+
+  alias CodeQA.AST.Enrichment.Node
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.BlockImpact.FileImpact
+  alias CodeQA.Engine.Analyzer
+
+  @doc """
+  Returns the codebase aggregate after removing the target node from the given file.
+  """
+  @spec compute(String.t(), String.t(), Node.t(), map()) :: map()
+  def compute(path, content, node, files_map) do
+    root_tokens = TokenNormalizer.normalize_structural(content)
+    reconstructed = FileImpact.reconstruct_without(root_tokens, node)
+    updated_files = Map.put(files_map, path, reconstructed)
+    Analyzer.analyze_codebase_aggregate(updated_files)
+  end
+end
diff --git a/lib/codeqa/block_impact/file_impact.ex b/lib/codeqa/block_impact/file_impact.ex
new file mode 100644
index 00000000..10bd1f9f
--- /dev/null
+++ b/lib/codeqa/block_impact/file_impact.ex
@@ -0,0 +1,46 @@
+defmodule CodeQA.BlockImpact.FileImpact do
+  @moduledoc """
+  Leave-one-out file metrics: reconstruct file content without a target node's tokens
+  and return the re-run file metrics map.
+  """
+
+  alias CodeQA.AST.Enrichment.Node
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.Engine.Analyzer
+
+  @min_tokens 10
+
+  @doc """
+  Computes file metrics for the content with the target node's tokens removed.
+
+  Returns `nil` if the node has fewer than `#{@min_tokens}` tokens.
+  Returns a raw `%{"group" => %{"key" => value}}` metrics map otherwise.
+  """
+  @spec compute(String.t(), Node.t()) :: map() | nil
+  def compute(_content, %Node{tokens: tokens}) when length(tokens) < @min_tokens, do: nil
+
+  def compute(content, node) do
+    root_tokens = TokenNormalizer.normalize_structural(content)
+    reconstructed = reconstruct_without(root_tokens, node)
+    Analyzer.analyze_file("", reconstructed)
+  end
+
+  @spec reconstruct_without([CodeQA.AST.Lexing.Token.t()], Node.t()) :: String.t()
+  def reconstruct_without(root_tokens, %Node{tokens: []}) do
+    Enum.map_join(root_tokens, "", & &1.content)
+  end
+
+  def reconstruct_without(root_tokens, node) do
+    first = List.first(node.tokens)
+
+    case Enum.find_index(root_tokens, fn t -> t.line == first.line and t.col == first.col end) do
+      nil ->
+        Enum.map_join(root_tokens, "", & &1.content)
+
+      start_idx ->
+        end_idx = start_idx + length(node.tokens)
+        remaining = Enum.take(root_tokens, start_idx) ++ Enum.drop(root_tokens, end_idx)
+        Enum.map_join(remaining, "", & &1.content)
+    end
+  end
+end
diff --git a/lib/codeqa/block_impact/refactoring_potentials.ex b/lib/codeqa/block_impact/refactoring_potentials.ex
new file mode 100644
index 00000000..4dcceb77
--- /dev/null
+++ b/lib/codeqa/block_impact/refactoring_potentials.ex
@@ -0,0 +1,145 @@
+defmodule CodeQA.BlockImpact.RefactoringPotentials do
+  @moduledoc """
+  Computes named refactoring potentials for a code block using leave-one-out cosine deltas.
+
+  Given baseline and without-node metrics at both file scope and codebase scope,
+  computes the cosine delta per behavior, merges the two scopes via max(), and
+  returns the top N behaviors sorted by delta descending.
+
+  Positive delta = removing the block improved that behavior's cosine → the block
+  is a contributor to that anti-pattern.
+  """
+
+  alias CodeQA.CombinedMetrics.FileScorer
+  alias CodeQA.CombinedMetrics.SampleRunner
+  alias CodeQA.CombinedMetrics.Scorer
+
+  @doc """
+  Returns top N refactoring potentials for a code block.
+
+  ## Parameters
+
+  - `baseline_file_cosines` — pre-computed cosines list from `SampleRunner.diagnose_aggregate/2` for the baseline file
+  - `without_file_metrics` — raw `%{"group" => %{"key" => val}}` with the node's tokens removed
+  - `baseline_codebase_cosines` — pre-computed cosines list for the full codebase baseline
+  - `without_codebase_agg` — `%{"group" => %{"mean_key" => val}}` with the node removed from the codebase
+
+  ## Options
+
+  - `:top` — number of potentials to return (default 3)
+
+  ## Result shape
+
+      [%{"category" => "function_design", "behavior" => "cyclomatic_complexity_under_10", "cosine_delta" => 0.41}]
+  """
+  @spec compute([map()], map(), [map()], map(), keyword()) :: [map()]
+  def compute(
+        baseline_file_cosines,
+        without_file_metrics,
+        baseline_codebase_cosines,
+        without_codebase_agg,
+        opts \\ []
+      ) do
+    top_n = Keyword.get(opts, :top, 3)
+    language = Keyword.get(opts, :language)
+    languages = Keyword.get(opts, :languages)
+    behavior_map = Keyword.get(opts, :behavior_map)
+    block_type = Keyword.get(opts, :block_type)
+
+    file_delta =
+      compute_file_delta(baseline_file_cosines, without_file_metrics, language, behavior_map)
+
+    codebase_delta =
+      compute_codebase_delta(
+        baseline_codebase_cosines,
+        without_codebase_agg,
+        languages,
+        behavior_map
+      )
+
+    all_keys = Enum.uniq(Map.keys(file_delta) ++ Map.keys(codebase_delta))
+
+    all_keys
+    |> Enum.reject(fn {category, behavior} ->
+      excluded?(category, behavior, block_type, behavior_map)
+    end)
+    |> Enum.map(fn {category, behavior} ->
+      file_d = Map.get(file_delta, {category, behavior}, 0.0)
+      codebase_d = Map.get(codebase_delta, {category, behavior}, 0.0)
+      merged = max(file_d, codebase_d)
+      {category, behavior, merged}
+    end)
+    |> Enum.sort_by(fn {_, _, delta} -> delta end, :desc)
+    |> Enum.take(top_n)
+    |> Enum.map(fn {category, behavior, delta} ->
+      %{
+        "category" => category,
+        "behavior" => behavior,
+        "cosine_delta" => Float.round(delta / 1.0, 4)
+      }
+    end)
+  end
+
+  defp compute_file_delta(baseline_cosines, without_metrics, language, behavior_map) do
+    without_agg = FileScorer.file_to_aggregate(without_metrics)
+
+    without_cosines =
+      SampleRunner.diagnose_aggregate(without_agg,
+        top: 99_999,
+        language: language,
+        behavior_map: behavior_map
+      )
+
+    cosines_to_delta(baseline_cosines, without_cosines)
+  end
+
+  defp compute_codebase_delta(baseline_cosines, without_agg, languages, behavior_map) do
+    without_cosines =
+      SampleRunner.diagnose_aggregate(without_agg,
+        top: 99_999,
+        languages: languages,
+        behavior_map: behavior_map
+      )
+
+    cosines_to_delta(baseline_cosines, without_cosines)
+  end
+
+  defp cosines_to_delta(baseline_cosines, without_cosines) do
+    without_map =
+      Map.new(without_cosines, fn %{category: c, behavior: b, cosine: cos} -> {{c, b}, cos} end)
+
+    Map.new(baseline_cosines, fn %{category: c, behavior: b, cosine: cos} ->
+      without_cos = Map.get(without_map, {c, b}, 0.0)
+      {{c, b}, without_cos - cos}
+    end)
+  end
+
+  defp excluded?(_category, _behavior, nil, _behavior_map), do: false
+
+  defp excluded?(category, behavior, block_type, behavior_map) do
+    Atom.to_string(block_type) in excludes_for(category, behavior, behavior_map)
+  end
+
+  defp excludes_for(category, behavior, behavior_map) when is_map(behavior_map) do
+    with [_ | _] = behaviors <- Map.get(behavior_map, category, []),
+         {^behavior, data} <- Enum.find(behaviors, fn {b, _} -> b == behavior end),
+         list when is_list(list) <- Map.get(data, "_excludes_block_types") do
+      list
+    else
+      _ -> []
+    end
+  end
+
+  defp excludes_for(category, behavior, nil) do
+    yaml_path = "priv/combined_metrics/#{category}.yml"
+
+    with %{} = yamls <- Scorer.all_yamls(),
+         %{} = data <- Map.get(yamls, yaml_path),
+         %{} = behavior_data <- Map.get(data, behavior),
+         list when is_list(list) <- Map.get(behavior_data, "_excludes_block_types") do
+      list
+    else
+      _ -> []
+    end
+  end
+end
diff --git a/lib/codeqa/block_impact_analyzer.ex b/lib/codeqa/block_impact_analyzer.ex
new file mode 100644
index 00000000..69da2fe7
--- /dev/null
+++ b/lib/codeqa/block_impact_analyzer.ex
@@ -0,0 +1,479 @@
+defmodule CodeQA.BlockImpactAnalyzer do
+  @moduledoc """
+  Orchestrates block impact analysis across all files in a pipeline result.
+
+  For each file, tokenizes its content, parses it into a node tree, and for each
+  node (recursively including children) computes refactoring potentials via
+  leave-one-out impact scoring at both file scope and codebase scope.
+
+  The pipeline result is returned with a `"nodes"` key added to each file entry.
+  All other keys in the result are preserved unchanged.
+
+  ## Telemetry
+
+  Emits the following events (all durations in microseconds):
+
+    - `[:codeqa, :block_impact, :analyze]` — full run
+      measurements: `%{duration: us}`
+      metadata: `%{file_count: n}`
+
+    - `[:codeqa, :block_impact, :codebase_cosines]` — codebase baseline cosine computation
+      measurements: `%{duration: us}`
+      metadata: `%{behavior_count: n}`
+
+    - `[:codeqa, :block_impact, :file]` — per-file node computation
+      measurements: `%{duration: us, tokenize_us: us, parse_us: us, file_cosines_us: us, node_count: n}`
+      metadata: `%{path: string}`
+
+    - `[:codeqa, :block_impact, :node]` — per-node leave-one-out computation
+      measurements: `%{duration: us, reconstruct_us: us, analyze_file_us: us, aggregate_us: us, refactoring_us: us}`
+      metadata: `%{path: string, token_count: n}`
+  """
+
+  alias CodeQA.Analysis.BehaviorConfigServer
+  alias CodeQA.AST.Classification.{NodeClassifier, TypedNodeKind}
+  alias CodeQA.AST.Enrichment.Node
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Parser
+  alias CodeQA.BlockImpact.{FileImpact, RefactoringPotentials}
+  alias CodeQA.CombinedMetrics.{FileScorer, SampleRunner}
+  alias CodeQA.Engine.Analyzer
+  alias CodeQA.Languages.Unknown
+
+  @min_tokens 10
+
+  @doc """
+  Analyzes all files in the pipeline result, adding `"nodes"` to each file entry.
+
+  ## Parameters
+
+  - `pipeline_result` — direct return value of `Engine.Analyzer.analyze_codebase/2`,
+    containing `"files"` and `"codebase"` keys
+  - `files_map` — raw `%{path => content}` map used for file-scope leave-one-out
+  - `opts` — keyword options
+
+  ## Options
+
+  - `:nodes_top` — number of refactoring potentials per node (default 3)
+  - `:workers` — parallelism for `Task.async_stream` (default `System.schedulers_online()`)
+  - `:baseline_codebase_agg` — pre-computed codebase aggregate (skips redundant analysis)
+  """
+  @spec analyze(map(), map(), keyword()) :: map()
+  def analyze(pipeline_result, files_map, opts \\ []) do
+    nodes_top = Keyword.get(opts, :nodes_top, 3)
+    workers = Keyword.get(opts, :workers, System.schedulers_online())
+
+    t0 = now()
+
+    baseline_codebase_agg =
+      Keyword.get_lazy(opts, :baseline_codebase_agg, fn ->
+        Analyzer.analyze_codebase_aggregate(files_map)
+      end)
+
+    cached_behaviors =
+      case Keyword.get(opts, :behavior_config_pid) do
+        nil -> nil
+        pid -> BehaviorConfigServer.get_all_behaviors(pid)
+      end
+
+    project_langs = project_languages(files_map)
+
+    filtered_behaviors =
+      if cached_behaviors && project_langs != [] do
+        filter_behaviors_by_languages(cached_behaviors, project_langs)
+      else
+        cached_behaviors
+      end
+
+    {baseline_codebase_cosines, cosines_us} =
+      timed(fn ->
+        SampleRunner.diagnose_aggregate(baseline_codebase_agg,
+          top: 99_999,
+          languages: project_langs,
+          behavior_map: filtered_behaviors
+        )
+      end)
+
+    :telemetry.execute(
+      [:codeqa, :block_impact, :codebase_cosines],
+      %{duration: cosines_us},
+      %{behavior_count: length(baseline_codebase_cosines)}
+    )
+
+    file_results = pipeline_result["files"]
+
+    updated_files =
+      file_results
+      |> Task.async_stream(
+        fn {path, file_data} ->
+          content = Map.get(files_map, path, "")
+          baseline_file_metrics = Map.get(file_data, "metrics", %{})
+
+          {nodes, file_measurements} =
+            compute_nodes_timed(
+              path,
+              content,
+              baseline_file_metrics,
+              file_results,
+              baseline_codebase_cosines,
+              nodes_top,
+              filtered_behaviors
+            )
+
+          :telemetry.execute(
+            [:codeqa, :block_impact, :file],
+            file_measurements,
+            %{path: path}
+          )
+
+          {path, Map.put(file_data, "nodes", nodes)}
+        end,
+        max_concurrency: workers,
+        ordered: false,
+        timeout: :infinity
+      )
+      |> Enum.reduce(%{}, fn {:ok, {path, data}}, acc -> Map.put(acc, path, data) end)
+
+    :telemetry.execute(
+      [:codeqa, :block_impact, :analyze],
+      %{duration: now() - t0},
+      %{file_count: map_size(file_results)}
+    )
+
+    Map.put(pipeline_result, "files", updated_files)
+  end
+
+  defp compute_nodes_timed(
+         path,
+         content,
+         baseline_file_metrics,
+         file_results,
+         baseline_codebase_cosines,
+         nodes_top,
+         cached_behaviors
+       ) do
+    if content == "" do
+      {[], %{duration: 0, tokenize_us: 0, parse_us: 0, file_cosines_us: 0, node_count: 0}}
+    else
+      t0 = now()
+
+      {root_tokens, tokenize_us} = timed(fn -> TokenNormalizer.normalize_structural(content) end)
+      {top_level_nodes, parse_us} = timed(fn -> Parser.detect_blocks(root_tokens, Unknown) end)
+
+      baseline_file_agg = FileScorer.file_to_aggregate(baseline_file_metrics)
+      lang_mod = CodeQA.Language.detect(path)
+      language = lang_mod.name()
+
+      {baseline_file_cosines, file_cosines_us} =
+        timed(fn ->
+          SampleRunner.diagnose_aggregate(baseline_file_agg,
+            top: 99_999,
+            language: language,
+            behavior_map: cached_behaviors
+          )
+        end)
+
+      inc_agg = build_incremental_agg(file_results)
+      old_file_triples = file_metrics_to_triples(baseline_file_metrics)
+      project_langs = project_languages(file_results)
+
+      node_ctx = %{
+        inc_agg: inc_agg,
+        old_file_triples: old_file_triples,
+        project_langs: project_langs,
+        cached_behaviors: cached_behaviors,
+        lang_mod: lang_mod,
+        baseline_file_metrics: baseline_file_metrics
+      }
+
+      nodes =
+        top_level_nodes
+        |> Enum.map(fn node ->
+          serialize_node(
+            node,
+            path,
+            root_tokens,
+            baseline_file_cosines,
+            baseline_codebase_cosines,
+            nodes_top,
+            language,
+            node_ctx
+          )
+        end)
+        |> Enum.sort_by(fn n -> {n["start_line"], n["column_start"]} end)
+
+      measurements = %{
+        duration: now() - t0,
+        tokenize_us: tokenize_us,
+        parse_us: parse_us,
+        file_cosines_us: file_cosines_us,
+        node_count: length(top_level_nodes),
+        token_count: length(root_tokens),
+        bytes: byte_size(content)
+      }
+
+      {nodes, measurements}
+    end
+  end
+
+  defp serialize_node(
+         node,
+         path,
+         root_tokens,
+         baseline_file_cosines,
+         baseline_codebase_cosines,
+         nodes_top,
+         language,
+         node_ctx,
+         parent_context \\ nil
+       ) do
+    block_type =
+      node
+      |> NodeClassifier.classify(node_ctx.lang_mod, parent_context)
+      |> TypedNodeKind.of()
+
+    potentials =
+      if length(node.tokens) < @min_tokens do
+        []
+      else
+        compute_potentials_timed(
+          node,
+          path,
+          root_tokens,
+          baseline_file_cosines,
+          baseline_codebase_cosines,
+          nodes_top,
+          language,
+          node_ctx,
+          block_type
+        )
+      end
+
+    children =
+      node.children
+      |> Enum.map(fn child ->
+        child_context = parent_context_for(node.tokens, child)
+
+        serialize_node(
+          child,
+          path,
+          root_tokens,
+          baseline_file_cosines,
+          baseline_codebase_cosines,
+          nodes_top,
+          language,
+          node_ctx,
+          child_context
+        )
+      end)
+      |> Enum.sort_by(fn n -> {n["start_line"], n["column_start"]} end)
+
+    first_token = List.first(node.tokens)
+    char_length = Enum.reduce(node.tokens, 0, fn t, acc -> acc + byte_size(t.content) end)
+
+    %{
+      "start_line" => node.start_line,
+      "end_line" => node.end_line,
+      "column_start" => (first_token && first_token.col) || 0,
+      "char_length" => char_length,
+      "type" => Atom.to_string(block_type),
+      "token_count" => length(node.tokens),
+      "refactoring_potentials" => potentials,
+      "children" => children
+    }
+  end
+
+  # Returns the parent's tokens that come strictly before `child`'s first token,
+  # bounded to the same source line (everything since the last newline) and with
+  # leading whitespace stripped so the classification signals see the keyword at
+  # indent 0. Lets NodeClassifier see the keyword that drove the bracket-split
+  # (`alias`, `@name`, etc.) when classifying a sub-block.
+  defp parent_context_for(parent_tokens, child) do
+    case List.first(child.tokens) do
+      nil ->
+        []
+
+      child_first ->
+        nl_kind = CodeQA.AST.Lexing.NewlineToken.kind()
+        ws_kind = CodeQA.AST.Lexing.WhitespaceToken.kind()
+
+        parent_tokens
+        |> Enum.take_while(fn t -> t != child_first end)
+        |> Enum.reverse()
+        |> Enum.take_while(fn t -> t.kind != nl_kind end)
+        |> Enum.reverse()
+        |> Enum.drop_while(fn t -> t.kind == ws_kind end)
+    end
+  end
+
+  defp compute_potentials_timed(
+         %Node{} = node,
+         path,
+         root_tokens,
+         baseline_file_cosines,
+         baseline_codebase_cosines,
+         nodes_top,
+         language,
+         node_ctx,
+         block_type
+       ) do
+    t0 = now()
+
+    {reconstructed, reconstruct_us} =
+      timed(fn -> FileImpact.reconstruct_without(root_tokens, node) end)
+
+    block_content = Enum.map_join(node.tokens, "", & &1.content)
+
+    {without_file_metrics, analyze_file_us} =
+      timed(fn ->
+        Analyzer.analyze_file_for_loo_partial(
+          path,
+          reconstructed,
+          node_ctx.baseline_file_metrics,
+          block_content
+        )
+      end)
+
+    {without_codebase_agg, aggregate_us} =
+      timed(fn ->
+        new_triples = file_metrics_to_triples(without_file_metrics)
+
+        node_ctx.inc_agg
+        |> swap_file_in_agg(node_ctx.old_file_triples, new_triples)
+        |> incremental_agg_to_aggregate()
+      end)
+
+    {potentials, refactoring_us} =
+      timed(fn ->
+        RefactoringPotentials.compute(
+          baseline_file_cosines,
+          without_file_metrics,
+          baseline_codebase_cosines,
+          without_codebase_agg,
+          top: nodes_top,
+          language: language,
+          languages: node_ctx.project_langs,
+          behavior_map: node_ctx.cached_behaviors,
+          block_type: block_type
+        )
+      end)
+
+    :telemetry.execute(
+      [:codeqa, :block_impact, :node],
+      %{
+        duration: now() - t0,
+        reconstruct_us: reconstruct_us,
+        analyze_file_us: analyze_file_us,
+        aggregate_us: aggregate_us,
+        refactoring_us: refactoring_us
+      },
+      %{path: path, token_count: length(node.tokens)}
+    )
+
+    potentials
+  end
+
+  defp file_metrics_to_triples(metrics) when is_map(metrics) do
+    metrics
+    |> Enum.flat_map(fn
+      {metric_name, metric_data} when is_map(metric_data) ->
+        metric_data
+        |> Enum.filter(fn {_k, v} -> is_number(v) end)
+        |> Enum.map(fn {key, value} -> {metric_name, key, value / 1} end)
+
+      _ ->
+        []
+    end)
+  end
+
+  defp build_incremental_agg(file_results) do
+    file_results
+    |> Map.values()
+    |> Enum.flat_map(fn file_data ->
+      file_data |> Map.get("metrics", %{}) |> file_metrics_to_triples()
+    end)
+    |> Enum.group_by(fn {metric, key, _val} -> {metric, key} end, fn {_, _, val} -> val end)
+    |> Map.new(fn {{metric, key}, values} ->
+      n = length(values)
+      sum = Enum.sum(values)
+      sum_sq = Enum.reduce(values, 0.0, fn v, acc -> acc + v * v end)
+
+      {{metric, key},
+       %{sum: sum, sum_sq: sum_sq, min: Enum.min(values), max: Enum.max(values), count: n}}
+    end)
+  end
+
+  defp swap_file_in_agg(inc_agg, old_triples, new_triples) do
+    old_map = Map.new(old_triples, fn {metric, key, val} -> {{metric, key}, val} end)
+    new_map = Map.new(new_triples, fn {metric, key, val} -> {{metric, key}, val} end)
+    all_keys = Enum.uniq(Map.keys(old_map) ++ Map.keys(new_map))
+
+    Enum.reduce(all_keys, inc_agg, fn mk, acc ->
+      case Map.get(acc, mk) do
+        nil ->
+          acc
+
+        state ->
+          old_val = Map.get(old_map, mk, 0.0)
+          new_val = Map.get(new_map, mk, 0.0)
+
+          Map.put(acc, mk, %{
+            sum: state.sum - old_val + new_val,
+            sum_sq: state.sum_sq - old_val * old_val + new_val * new_val,
+            min: min(state.min, new_val),
+            max: max(state.max, new_val),
+            count: state.count
+          })
+      end
+    end)
+  end
+
+  defp incremental_agg_to_aggregate(inc_agg) do
+    Enum.reduce(inc_agg, %{}, fn {{metric, key}, state}, acc ->
+      n = state.count
+      mean = if n > 0, do: state.sum / n, else: 0.0
+      variance = if n > 0, do: max(state.sum_sq / n - mean * mean, 0.0), else: 0.0
+      std = :math.sqrt(variance)
+
+      metric_agg = Map.get(acc, metric, %{})
+
+      updated =
+        Map.merge(metric_agg, %{
+          "mean_#{key}" => Float.round(mean * 1.0, 4),
+          "std_#{key}" => Float.round(std * 1.0, 4),
+          "min_#{key}" => Float.round(state.min * 1.0, 4),
+          "max_#{key}" => Float.round(state.max * 1.0, 4)
+        })
+
+      Map.put(acc, metric, updated)
+    end)
+  end
+
+  defp filter_behaviors_by_languages(behaviors_map, project_langs) do
+    Map.new(behaviors_map, fn {category, behaviors} ->
+      filtered =
+        Enum.filter(behaviors, fn {_behavior, behavior_data} ->
+          behavior_langs = Map.get(behavior_data, "_languages", [])
+          behavior_langs == [] or Enum.any?(behavior_langs, &(&1 in project_langs))
+        end)
+
+      {category, filtered}
+    end)
+  end
+
+  defp project_languages(path_keyed_map) do
+    path_keyed_map
+    |> Map.keys()
+    |> Enum.map(&CodeQA.Language.detect(&1).name())
+    |> Enum.reject(&(&1 == "unknown"))
+    |> Enum.uniq()
+  end
+
+  defp timed(fun) do
+    t = now()
+    result = fun.()
+    {result, now() - t}
+  end
+
+  defp now, do: System.monotonic_time(:microsecond)
+end
diff --git a/lib/codeqa/cli.ex b/lib/codeqa/cli.ex
index 210654d5..3e36d57c 100644
--- a/lib/codeqa/cli.ex
+++ b/lib/codeqa/cli.ex
@@ -3,27 +3,32 @@ defmodule CodeQA.CLI do
 
   @commands %{
     "analyze" => CodeQA.CLI.Analyze,
-    "compare" => CodeQA.CLI.Compare,
     "history" => CodeQA.CLI.History,
     "correlate" => CodeQA.CLI.Correlate,
-    "stopwords" => CodeQA.CLI.Stopwords,
-    "health-report" => CodeQA.CLI.HealthReport
+    "health-report" => CodeQA.CLI.HealthReport,
+    "diagnose" => CodeQA.CLI.Diagnose
   }
 
   def main(args) do
     case args do
-      [cmd | rest] when is_map_key(@commands, cmd) -> @commands[cmd].run(rest)
-      _ -> print_usage()
+      [cmd | rest] when is_map_key(@commands, cmd) ->
+        output = @commands[cmd].run(rest)
+        unless output == "", do: IO.puts(output)
+        output
+
+      _ ->
+        output = build_usage()
+        IO.puts(output)
+        output
     end
   end
 
-  defp print_usage do
+  defp build_usage do
     command_usages =
       @commands
       |> Enum.sort_by(fn {name, _} -> name end)
-      |> Enum.map(fn {_name, mod} -> mod.usage() end)
-      |> Enum.join("\n")
+      |> Enum.map_join("\n", fn {_name, mod} -> mod.usage() end)
 
-    IO.puts("Usage: codeqa <command> [options]\n\n" <> command_usages)
+    "Usage: codeqa <command> [options]\n\n" <> command_usages
   end
 end
diff --git a/lib/codeqa/cli/analyze.ex b/lib/codeqa/cli/analyze.ex
index 4473011e..9c1f8402 100644
--- a/lib/codeqa/cli/analyze.ex
+++ b/lib/codeqa/cli/analyze.ex
@@ -4,6 +4,9 @@ defmodule CodeQA.CLI.Analyze do
   @behaviour CodeQA.CLI.Command
 
   alias CodeQA.CLI.Options
+  alias CodeQA.Config
+  alias CodeQA.Engine.Analyzer
+  alias CodeQA.Engine.Collector
 
   @version "0.1.0"
 
@@ -32,19 +35,18 @@ defmodule CodeQA.CLI.Analyze do
 
   @impl CodeQA.CLI.Command
   def run(args) when args in [["--help"], ["-h"]] do
-    IO.puts(usage())
+    usage()
   end
 
   def run(args) do
     {opts, [path], _} =
-      Options.parse(args, [output: :string], [o: :output])
-
-    if opts[:telemetry], do: CodeQA.Telemetry.setup()
+      Options.parse(args, [output: :string], o: :output)
 
     Options.validate_dir!(path)
+    Config.load(path)
 
-    ignore_patterns = Options.parse_ignore_paths(opts[:ignore_paths]) ++ Options.load_config_ignore_paths(path)
-    files = CodeQA.Collector.collect_files(path, ignore_patterns: ignore_patterns)
+    files =
+      Collector.collect_files(path, Options.parse_ignore_paths(opts[:ignore_paths]))
 
     if map_size(files) == 0 do
       IO.puts(:stderr, "Warning: no source files found in '#{path}'")
@@ -53,10 +55,11 @@ defmodule CodeQA.CLI.Analyze do
 
     print_progress(opts, files)
 
-    analyze_opts = Options.build_analyze_opts(opts)
+    analyze_opts =
+      Options.build_analyze_opts(opts) ++ Config.near_duplicate_blocks_opts()
 
     start_time = System.monotonic_time(:millisecond)
-    results = CodeQA.Analyzer.analyze_codebase(files, analyze_opts)
+    results = Analyzer.analyze_codebase(files, analyze_opts)
     end_time = System.monotonic_time(:millisecond)
 
     IO.puts(:stderr, "Analysis completed in #{end_time - start_time}ms")
@@ -80,14 +83,13 @@ defmodule CodeQA.CLI.Analyze do
 
     case opts[:output] do
       nil ->
-        IO.puts(json)
+        json
 
       file ->
         File.write!(file, json)
         IO.puts(:stderr, "Report written to #{file}")
+        ""
     end
-
-    if opts[:telemetry], do: CodeQA.Telemetry.print_report()
   end
 
   defp print_progress(opts, files) do
diff --git a/lib/codeqa/cli/command.ex b/lib/codeqa/cli/command.ex
index e2702a11..c6cd4a19 100644
--- a/lib/codeqa/cli/command.ex
+++ b/lib/codeqa/cli/command.ex
@@ -1,6 +1,6 @@
 defmodule CodeQA.CLI.Command do
   @moduledoc "Behaviour for CLI commands."
 
-  @callback run([String.t()]) :: :ok
+  @callback run([String.t()]) :: String.t()
   @callback usage() :: String.t()
 end
diff --git a/lib/codeqa/cli/compare.ex b/lib/codeqa/cli/compare.ex
deleted file mode 100644
index b86bc32f..00000000
--- a/lib/codeqa/cli/compare.ex
+++ /dev/null
@@ -1,242 +0,0 @@
-defmodule CodeQA.CLI.Compare do
-  @moduledoc false
-
-  @behaviour CodeQA.CLI.Command
-
-  alias CodeQA.CLI.Options
-
-  @version "0.1.0"
-
-  @impl CodeQA.CLI.Command
-  def usage do
-    """
-    Usage: codeqa compare <path> [options]
-
-      Compare code quality metrics between two git refs.
-
-    Options:
-      --base-ref REF        Base git ref to compare from (required)
-      --head-ref REF        Head git ref to compare to (default: HEAD)
-      --changes-only        Only analyze changed files
-      --all-files           Analyze all source files (default)
-      --format FORMAT       Output format: json, markdown, or github (default: json)
-      --output MODE         Output mode: auto, summary, or changes (default: auto)
-      --progress            Show per-file progress on stderr
-      -w, --workers N       Number of parallel workers
-      --cache               Enable caching file metrics
-      --cache-dir DIR       Directory to store cache (default: .codeqa_cache)
-      -t, --timeout MS      Timeout for similarity analysis (default: 5000)
-      --show-ncd            Compute and show NCD similarity metric
-      --ncd-top N           Number of top similar files to show per file
-      --ncd-paths PATHS     Comma-separated list of paths to compute NCD for
-      --show-files          Include individual file metrics in the output
-      --show-file-paths P   Comma-separated list of paths to include in the output
-      --ignore-paths PATHS  Comma-separated list of path patterns to ignore (supports wildcards, e.g. "test/*,docs/*")
-    """
-  end
-
-  @impl CodeQA.CLI.Command
-  def run(args) when args in [["--help"], ["-h"]] do
-    IO.puts(usage())
-  end
-
-  def run(args) do
-    {opts, [path], _} =
-      Options.parse(args,
-        [
-          base_ref: :string,
-          head_ref: :string,
-          changes_only: :boolean,
-          all_files: :boolean,
-          format: :string,
-          output: :string
-        ],
-        []
-      )
-
-    if opts[:telemetry], do: CodeQA.Telemetry.setup()
-
-    base_ref = opts[:base_ref] || raise "Missing --base-ref"
-    head_ref = opts[:head_ref] || "HEAD"
-    changes_only = if opts[:changes_only], do: true, else: false
-    format = opts[:format] || "json"
-    output_mode = opts[:output] || "auto"
-
-    Options.validate_dir!(path)
-
-    ignore_patterns = Options.parse_ignore_paths(opts[:ignore_paths]) ++ Options.load_config_ignore_paths(path)
-    opts = Keyword.put(opts, :ignore_patterns, ignore_patterns)
-
-    {base_result, head_result, changes} =
-      run_comparison(path, base_ref, head_ref, changes_only, opts)
-
-    comparison =
-      CodeQA.Comparator.compare_results(base_result, head_result, changes)
-      |> enrich_metadata(base_ref, head_ref, changes_only)
-      |> filter_files_for_output(opts, format)
-
-    output_comparison(comparison, format, output_mode)
-
-    if opts[:telemetry], do: CodeQA.Telemetry.print_report()
-  end
-
-  defp run_comparison(path, base_ref, head_ref, changes_only, opts) do
-    ignore_patterns = opts[:ignore_patterns] || []
-    changes = CodeQA.Git.changed_files(path, base_ref, head_ref)
-    changes = CodeQA.Collector.reject_ignored(changes, ignore_patterns, & &1.path)
-
-    file_paths =
-      if changes_only do
-        IO.puts(:stderr, "Comparing #{length(changes)} changed files...")
-        Enum.map(changes, & &1.path)
-      else
-        IO.puts(:stderr, "Comparing all source files...")
-        nil
-      end
-
-    empty = %{"files" => %{}, "codebase" => %{"aggregate" => %{}, "similarity" => %{}}}
-
-    if changes_only and length(changes) == 0 do
-      IO.puts(:stderr, "No source files changed — nothing to compare.")
-      {empty, empty, []}
-    else
-      base_files = CodeQA.Git.collect_files_at_ref(path, base_ref, file_paths)
-      head_files = CodeQA.Git.collect_files_at_ref(path, head_ref, file_paths)
-      base_files = CodeQA.Collector.reject_ignored_map(base_files, ignore_patterns)
-      head_files = CodeQA.Collector.reject_ignored_map(head_files, ignore_patterns)
-
-      if map_size(base_files) == 0 and map_size(head_files) == 0 do
-        IO.puts(:stderr, "Warning: no source files found at either ref")
-        exit({:shutdown, 1})
-      end
-
-      print_progress(opts, base_files, head_files)
-
-      analyze_opts = Options.build_analyze_opts(opts)
-
-      base_result =
-        if map_size(base_files) > 0,
-          do: CodeQA.Analyzer.analyze_codebase(base_files, analyze_opts),
-          else: empty
-
-      head_result =
-        if map_size(head_files) > 0,
-          do: CodeQA.Analyzer.analyze_codebase(head_files, analyze_opts),
-          else: empty
-
-      changes = if changes_only, do: changes, else: synthesize_changes(base_files, head_files)
-
-      {base_result, head_result, changes}
-    end
-  end
-
-  defp print_progress(opts, base_files, head_files) do
-    if opts[:progress] do
-      step_prefix = if opts[:show_ncd], do: "1/5 ", else: "1/1 "
-
-      IO.puts(
-        :stderr,
-        "  #{step_prefix}Analyzing base (#{map_size(base_files)} files) and head (#{map_size(head_files)} files)..."
-      )
-    else
-      IO.puts(
-        :stderr,
-        "Analyzing base (#{map_size(base_files)} files) and head (#{map_size(head_files)} files)..."
-      )
-    end
-  end
-
-  defp enrich_metadata(comparison, base_ref, head_ref, changes_only) do
-    comparison
-    |> put_in(["metadata", "base_ref"], base_ref)
-    |> put_in(["metadata", "head_ref"], head_ref)
-    |> put_in(["metadata", "changes_only"], changes_only)
-    |> put_in(["metadata", "version"], @version)
-    |> put_in(["metadata", "timestamp"], DateTime.utc_now() |> DateTime.to_iso8601())
-  end
-
-  defp output_comparison(comparison, "markdown", output_mode) do
-    IO.puts(CodeQA.Formatter.format_markdown(comparison, output_mode))
-  end
-
-  defp output_comparison(comparison, "github", output_mode) do
-    IO.puts(CodeQA.Formatter.format_github(comparison, output_mode))
-  end
-
-  defp output_comparison(comparison, _format, output_mode) do
-    codebase_summary = CodeQA.Summarizer.summarize_codebase(comparison)
-
-    file_summaries =
-      Map.new(Map.get(comparison, "files", %{}), fn {path, data} ->
-        {path, CodeQA.Summarizer.summarize_file(path, data)}
-      end)
-
-    IO.puts(
-      Jason.encode!(build_json_output(comparison, codebase_summary, file_summaries, output_mode),
-        pretty: true
-      )
-    )
-  end
-
-  defp build_json_output(comparison, codebase_summary, file_summaries, output_mode) do
-    result = %{"metadata" => comparison["metadata"]}
-
-    result =
-      if output_mode in ["auto", "summary"] do
-        result
-        |> Map.put("summary", codebase_summary)
-        |> Map.put("codebase", comparison["codebase"])
-      else
-        result
-      end
-
-    if output_mode in ["auto", "changes"] and Map.has_key?(comparison, "files") do
-      files_with_summaries =
-        Map.new(comparison["files"], fn {path, data} ->
-          {path, Map.put(data, "summary", Map.get(file_summaries, path, %{}))}
-        end)
-
-      Map.put(result, "files", files_with_summaries)
-    else
-      result
-    end
-  end
-
-  defp synthesize_changes(base_files, head_files) do
-    all_paths = MapSet.union(MapSet.new(Map.keys(base_files)), MapSet.new(Map.keys(head_files)))
-
-    all_paths
-    |> Enum.sort()
-    |> Enum.map(fn path ->
-      status =
-        cond do
-          Map.has_key?(base_files, path) and Map.has_key?(head_files, path) -> "modified"
-          Map.has_key?(head_files, path) -> "added"
-          true -> "deleted"
-        end
-
-      %CodeQA.Git.ChangedFile{path: path, status: status}
-    end)
-  end
-
-  defp filter_files_for_output(results, _opts, format) when format in ["github", "markdown"],
-    do: results
-
-  defp filter_files_for_output(results, opts, _format) do
-    cond do
-      opts[:show_files] ->
-        results
-
-      opts[:show_file_paths] ->
-        target_paths = String.split(opts[:show_file_paths], ",") |> MapSet.new()
-
-        filtered =
-          Map.filter(results["files"], fn {path, _} -> MapSet.member?(target_paths, path) end)
-
-        Map.put(results, "files", filtered)
-
-      true ->
-        Map.delete(results, "files")
-    end
-  end
-end
diff --git a/lib/codeqa/cli/correlate.ex b/lib/codeqa/cli/correlate.ex
index a3fd2f73..c38a2481 100644
--- a/lib/codeqa/cli/correlate.ex
+++ b/lib/codeqa/cli/correlate.ex
@@ -4,6 +4,7 @@ defmodule CodeQA.CLI.Correlate do
   @behaviour CodeQA.CLI.Command
 
   alias CodeQA.CLI.Options
+  alias CodeQA.CLI.UI
 
   @impl CodeQA.CLI.Command
   def usage do
@@ -25,7 +26,7 @@ defmodule CodeQA.CLI.Correlate do
 
   @impl CodeQA.CLI.Command
   def run(args) when args in [["--help"], ["-h"]] do
-    IO.puts(usage())
+    usage()
   end
 
   def run(args) do
@@ -82,7 +83,7 @@ defmodule CodeQA.CLI.Correlate do
     sorted = Enum.sort_by(correlations, &abs(&1["correlation"]), :desc)
     top = Enum.take(sorted, top_n)
 
-    IO.puts(Jason.encode!(top, pretty: true))
+    Jason.encode!(top, pretty: true)
   end
 
   defp extract_metric_series(path, files) do
@@ -204,7 +205,16 @@ defmodule CodeQA.CLI.Correlate do
 
     pairs_stream
     |> Task.async_stream(
-      &correlate_pair(&1, counter, total_pairs, update_interval, total_start, series, category_map, opts),
+      &correlate_pair(
+        &1,
+        counter,
+        total_pairs,
+        update_interval,
+        total_start,
+        series,
+        category_map,
+        opts
+      ),
       max_concurrency: System.schedulers_online(),
       timeout: :infinity
     )
@@ -257,9 +267,7 @@ defmodule CodeQA.CLI.Correlate do
       eta_ms = round((total_pairs - current) * avg_time)
 
       output =
-        CodeQA.CLI.UI.progress_bar(current, total_pairs,
-          eta: CodeQA.CLI.UI.format_eta(eta_ms)
-        )
+        UI.progress_bar(current, total_pairs, eta: UI.format_eta(eta_ms))
 
       IO.write(:stderr, "\r" <> output)
       if current == total_pairs, do: IO.puts(:stderr, "")
diff --git a/lib/codeqa/cli/diagnose.ex b/lib/codeqa/cli/diagnose.ex
new file mode 100644
index 00000000..93c2e8d0
--- /dev/null
+++ b/lib/codeqa/cli/diagnose.ex
@@ -0,0 +1,71 @@
+defmodule CodeQA.CLI.Diagnose do
+  @moduledoc false
+
+  @behaviour CodeQA.CLI.Command
+
+  @impl CodeQA.CLI.Command
+  def usage do
+    """
+    Usage: codeqa diagnose [options]
+
+      Diagnose likely code quality issues using cosine similarity against behavior profiles.
+
+    Options:
+      --path PATH           File or directory path to analyze (required)
+      --mode MODE           Output mode: aggregate (default) or per-file
+      --top N               Number of top issues to display (default: 15)
+      --format FORMAT       Output format: plain (default) or json
+      --combined-top N      Number of worst offender files per behavior (default: 2)
+    """
+  end
+
+  @impl CodeQA.CLI.Command
+  def run(args) when args in [["--help"], ["-h"]] do
+    usage()
+  end
+
+  def run(args) do
+    {opts, _, _} =
+      OptionParser.parse(args,
+        strict: [
+          path: :string,
+          mode: :string,
+          top: :integer,
+          format: :string,
+          combined_top: :integer
+        ]
+      )
+
+    path = opts[:path]
+
+    unless path do
+      IO.puts(:stderr, "Error: --path required")
+      exit({:shutdown, 1})
+    end
+
+    unless File.exists?(path) do
+      IO.puts(:stderr, "Error: '#{path}' does not exist")
+      exit({:shutdown, 1})
+    end
+
+    mode =
+      case opts[:mode] do
+        "per-file" -> :per_file
+        _ -> :aggregate
+      end
+
+    format =
+      case opts[:format] do
+        "json" -> :json
+        _ -> :plain
+      end
+
+    CodeQA.Diagnostics.run(
+      path: path,
+      mode: mode,
+      top: opts[:top] || 15,
+      format: format,
+      combined_top: opts[:combined_top] || 2
+    )
+  end
+end
diff --git a/lib/codeqa/cli/health_report.ex b/lib/codeqa/cli/health_report.ex
index 8f39186f..5dc8e6b6 100644
--- a/lib/codeqa/cli/health_report.ex
+++ b/lib/codeqa/cli/health_report.ex
@@ -4,6 +4,11 @@ defmodule CodeQA.CLI.HealthReport do
   @behaviour CodeQA.CLI.Command
 
   alias CodeQA.CLI.Options
+  alias CodeQA.Config
+  alias CodeQA.Engine.Analyzer
+  alias CodeQA.Engine.Collector
+  alias CodeQA.Git
+  alias CodeQA.HealthReport
 
   @impl CodeQA.CLI.Command
   def usage do
@@ -24,33 +29,44 @@ defmodule CodeQA.CLI.HealthReport do
       --cache-dir DIR       Directory to store cache (default: .codeqa_cache)
       -t, --timeout MS      Timeout for similarity analysis (default: 5000)
       --ignore-paths PATHS  Comma-separated list of path patterns to ignore (supports wildcards, e.g. "test/*,docs/*")
+      --base-ref REF        Base git ref for PR comparison (enables delta and block scoping)
+      --head-ref REF        Head git ref (default: HEAD)
+      --comment             Multi-part mode: writes numbered part files to TMPDIR for PR comments
     """
   end
 
   @impl CodeQA.CLI.Command
   def run(args) when args in [["--help"], ["-h"]] do
-    IO.puts(usage())
+    usage()
   end
 
+  @command_options [
+    output: :string,
+    config: :string,
+    detail: :string,
+    top: :integer,
+    format: :string,
+    ignore_paths: :string,
+    base_ref: :string,
+    head_ref: :string,
+    telemetry: :boolean,
+    comment: :boolean
+  ]
+
   def run(args) do
-    {opts, [path], _} =
-      Options.parse(args,
-        [
-          output: :string,
-          config: :string,
-          detail: :string,
-          top: :integer,
-          format: :string
-        ],
-        [o: :output]
-      )
+    {opts, [path], _} = Options.parse(args, @command_options, o: :output)
+    Options.validate_dir!(path)
+    extra_ignore_patterns = Options.parse_ignore_paths(opts[:ignore_paths])
 
-    if opts[:telemetry], do: CodeQA.Telemetry.setup()
+    base_ref = opts[:base_ref]
+    head_ref = opts[:head_ref] || "HEAD"
 
-    Options.validate_dir!(path)
+    collect_t0 = System.monotonic_time(:microsecond)
 
-    ignore_patterns = Options.parse_ignore_paths(opts[:ignore_paths]) ++ Options.load_config_ignore_paths(path)
-    files = CodeQA.Collector.collect_files(path, ignore_patterns: ignore_patterns)
+    files =
+      Collector.collect_files(path, extra_ignore_patterns)
+
+    collect_us = System.monotonic_time(:microsecond) - collect_t0
 
     if map_size(files) == 0 do
       IO.puts(:stderr, "Warning: no source files found in '#{path}'")
@@ -59,14 +75,21 @@ defmodule CodeQA.CLI.HealthReport do
 
     IO.puts(:stderr, "Analyzing #{map_size(files)} files for health report...")
 
-    analyze_opts = Options.build_analyze_opts(opts)
+    telemetry_pid = if opts[:telemetry], do: attach_telemetry()
+
+    analyze_opts =
+      Options.build_analyze_opts(opts) ++
+        Config.near_duplicate_blocks_opts() ++ [compute_nodes: true]
 
     start_time = System.monotonic_time(:millisecond)
-    results = CodeQA.Analyzer.analyze_codebase(files, analyze_opts)
+    results = Analyzer.analyze_codebase(files, analyze_opts)
     end_time = System.monotonic_time(:millisecond)
 
     IO.puts(:stderr, "Analysis completed in #{end_time - start_time}ms")
 
+    if telemetry_pid,
+      do: record_phase(telemetry_pid, :analyze, (end_time - start_time) * 1_000)
+
     total_bytes = results["files"] |> Map.values() |> Enum.map(& &1["bytes"]) |> Enum.sum()
 
     results =
@@ -77,29 +100,108 @@ defmodule CodeQA.CLI.HealthReport do
         "total_bytes" => total_bytes
       })
 
+    {base_results, changed_files, diff_line_ranges} =
+      if base_ref do
+        IO.puts(:stderr, "Collecting base snapshot at #{base_ref}...")
+        base_files = Git.collect_files_at_ref(path, base_ref)
+        changed = Git.changed_files(path, base_ref, head_ref)
+
+        diff_ranges =
+          case Git.diff_line_ranges(path, base_ref, head_ref) do
+            {:ok, ranges} ->
+              ranges
+
+            {:error, reason} ->
+              IO.puts(:stderr, "Warning: failed to parse diff line ranges: #{inspect(reason)}")
+              IO.puts(:stderr, "Block scoping disabled - showing all blocks in changed files")
+              %{}
+          end
+
+        IO.puts(:stderr, "Analyzing base snapshot (#{map_size(base_files)} files)...")
+        base_res = Analyzer.analyze_codebase(base_files, analyze_opts)
+
+        {base_res, changed, diff_ranges}
+      else
+        {nil, [], %{}}
+      end
+
     detail = parse_detail(opts[:detail])
     format = parse_format(opts[:format])
     top_n = opts[:top] || 5
 
+    report_gen_t0 = System.monotonic_time(:microsecond)
+
     report =
-      CodeQA.HealthReport.generate(results,
+      HealthReport.generate(results,
         config: opts[:config],
         detail: detail,
-        top: top_n
+        top: top_n,
+        base_results: base_results,
+        changed_files: changed_files,
+        diff_line_ranges: diff_line_ranges
       )
 
-    markdown = CodeQA.HealthReport.to_markdown(report, detail, format)
+    report_gen_us = System.monotonic_time(:microsecond) - report_gen_t0
 
-    case opts[:output] do
-      nil ->
-        IO.puts(markdown)
+    if telemetry_pid do
+      record_phase(telemetry_pid, :collect, collect_us)
+      record_phase(telemetry_pid, :report_gen, report_gen_us)
+    end
+
+    output =
+      if opts[:comment] do
+        write_comment_parts(report, detail)
+      else
+        render_t0 = System.monotonic_time(:microsecond)
+        markdown = HealthReport.to_markdown(report, detail, format)
+        render_us = System.monotonic_time(:microsecond) - render_t0
+        if telemetry_pid, do: record_phase(telemetry_pid, :render, render_us)
+
+        case opts[:output] do
+          nil ->
+            markdown
+
+          file ->
+            File.write!(file, markdown)
+            IO.puts(:stderr, "Health report written to #{file}")
+            ""
+        end
+      end
+
+    if telemetry_pid, do: print_telemetry(telemetry_pid)
+
+    output
+  end
 
-      file ->
-        File.write!(file, markdown)
-        IO.puts(:stderr, "Health report written to #{file}")
+  defp write_comment_parts(report, detail) do
+    tmpdir = System.get_env("TMPDIR", "/tmp")
+    parts = HealthReport.Formatter.render_parts(report, detail: detail)
+
+    # Write each part to a numbered file
+    Enum.with_index(parts, 1)
+    |> Enum.each(fn {content, n} ->
+      path = Path.join(tmpdir, "codeqa-part-#{n}.md")
+      File.write!(path, content)
+      IO.puts(:stderr, "Part #{n} written to #{path} (#{byte_size(content)} bytes)")
+    end)
+
+    # Ensure at least 3 parts exist for stale cleanup
+    actual_count = length(parts)
+    padded_count = max(actual_count, 3)
+
+    for n <- (actual_count + 1)..padded_count//1 do
+      path = Path.join(tmpdir, "codeqa-part-#{n}.md")
+      placeholder = "> _No content for this section._\n\n<!-- codeqa-health-report-#{n} -->"
+      File.write!(path, placeholder)
+      IO.puts(:stderr, "Part #{n} (placeholder) written to #{path}")
     end
 
-    if opts[:telemetry], do: CodeQA.Telemetry.print_report()
+    # Write part count for run.sh to read
+    count_path = Path.join(tmpdir, "codeqa-part-count.txt")
+    File.write!(count_path, to_string(padded_count))
+    IO.puts(:stderr, "Part count (#{padded_count}) written to #{count_path}")
+
+    ""
   end
 
   defp parse_detail(nil), do: :default
@@ -120,4 +222,329 @@ defmodule CodeQA.CLI.HealthReport do
     IO.puts(:stderr, "Warning: unknown format '#{other}', using 'plain'")
     :plain
   end
+
+  # ---------------------------------------------------------------------------
+  # Pipeline telemetry (block-impact + stage + per-metric + CLI phases)
+  # ---------------------------------------------------------------------------
+
+  @telemetry_handler "codeqa-telemetry-reporter"
+
+  defp attach_telemetry do
+    {:ok, pid} =
+      Agent.start_link(fn ->
+        %{
+          nodes: [],
+          files: [],
+          codebase_cosines_us: 0,
+          stages: %{},
+          file_metrics: %{},
+          codebase_metrics: %{},
+          phases: %{},
+          loo_breakdown: %{},
+          loo_breakdown_calls: 0,
+          cosine_breakdown: %{},
+          cosine_breakdown_calls: 0
+        }
+      end)
+
+    :telemetry.attach_many(
+      @telemetry_handler,
+      [
+        [:codeqa, :block_impact, :codebase_cosines],
+        [:codeqa, :block_impact, :file],
+        [:codeqa, :block_impact, :node],
+        [:codeqa, :stage],
+        [:codeqa, :file_metric],
+        [:codeqa, :codebase_metric],
+        [:codeqa, :loo_breakdown],
+        [:codeqa, :cosine_breakdown]
+      ],
+      &handle_event(&1, &2, &3, &4),
+      pid
+    )
+
+    pid
+  end
+
+  defp record_phase(pid, name, duration_us) do
+    Agent.update(pid, fn state ->
+      Map.update!(state, :phases, &Map.put(&1, name, duration_us))
+    end)
+  end
+
+  defp handle_event(
+         [:codeqa, :block_impact, :codebase_cosines],
+         measurements,
+         _metadata,
+         pid
+       ) do
+    Agent.update(pid, &Map.put(&1, :codebase_cosines_us, measurements.duration))
+  end
+
+  defp handle_event([:codeqa, :block_impact, :file], measurements, metadata, pid) do
+    Agent.update(pid, fn state ->
+      Map.update!(state, :files, &[{metadata.path, measurements} | &1])
+    end)
+  end
+
+  defp handle_event([:codeqa, :block_impact, :node], measurements, metadata, pid) do
+    Agent.update(pid, fn state ->
+      Map.update!(state, :nodes, &[{metadata.path, measurements} | &1])
+    end)
+  end
+
+  defp handle_event([:codeqa, :stage], measurements, metadata, pid) do
+    Agent.update(pid, fn state ->
+      Map.update!(state, :stages, fn stages ->
+        Map.put(stages, metadata.stage, measurements.duration)
+      end)
+    end)
+  end
+
+  defp handle_event([:codeqa, :file_metric], measurements, metadata, pid) do
+    Agent.update(pid, fn state ->
+      Map.update!(state, :file_metrics, fn fm ->
+        Map.update(fm, metadata.metric, {1, measurements.duration}, fn {n, sum} ->
+          {n + 1, sum + measurements.duration}
+        end)
+      end)
+    end)
+  end
+
+  defp handle_event([:codeqa, :codebase_metric], measurements, metadata, pid) do
+    Agent.update(pid, fn state ->
+      Map.update!(state, :codebase_metrics, &Map.put(&1, metadata.metric, measurements.duration))
+    end)
+  end
+
+  defp handle_event([:codeqa, :loo_breakdown], measurements, _metadata, pid) do
+    Agent.update(pid, fn state ->
+      merged =
+        Enum.reduce(measurements, state.loo_breakdown, fn {k, v}, acc ->
+          Map.update(acc, k, v, &(&1 + v))
+        end)
+
+      state
+      |> Map.put(:loo_breakdown, merged)
+      |> Map.update!(:loo_breakdown_calls, &(&1 + 1))
+    end)
+  end
+
+  defp handle_event([:codeqa, :cosine_breakdown], measurements, _metadata, pid) do
+    Agent.update(pid, fn state ->
+      merged =
+        Enum.reduce(measurements, state.cosine_breakdown, fn {k, v}, acc ->
+          Map.update(acc, k, v, &(&1 + v))
+        end)
+
+      state
+      |> Map.put(:cosine_breakdown, merged)
+      |> Map.update!(:cosine_breakdown_calls, &(&1 + 1))
+    end)
+  end
+
+  defp print_telemetry(pid) do
+    state = Agent.get(pid, & &1)
+    Agent.stop(pid)
+    :telemetry.detach(@telemetry_handler)
+
+    nodes = state.nodes
+    files = state.files
+
+    total_nodes = length(nodes)
+    total_files = length(files)
+
+    node_totals = Enum.map(nodes, fn {_, m} -> m end)
+    file_totals = Enum.map(files, fn {_, m} -> m end)
+
+    IO.puts(:stderr, """
+
+    ── CLI Phases ──────────────────────────────────────────
+    #{format_phases(state.phases)}
+
+    ── Top-Level Stages (inside Analyzer.analyze_codebase) ─
+    #{format_stages(state.stages)}
+
+    ── Codebase Metrics (run once over all files) ──────────
+    #{format_codebase_metrics(state.codebase_metrics)}
+
+    ── File Metrics (summed over all files; #{total_files} files) ──
+    #{format_file_metrics(state.file_metrics, total_files)}
+
+    ── Block Impact Telemetry ──────────────────────────────
+    Codebase cosines:     #{us(state.codebase_cosines_us)}
+    Files processed:      #{total_files}
+    Nodes processed:      #{total_nodes}
+
+    Per-file breakdown (avg across #{total_files} files):
+      tokenize:           #{avg_us(file_totals, :tokenize_us)}
+      parse blocks:       #{avg_us(file_totals, :parse_us)}
+      file cosines:       #{avg_us(file_totals, :file_cosines_us)}
+      total/file:         #{avg_us(file_totals, :duration)}
+
+    Per-node breakdown (avg across #{total_nodes} nodes):
+      reconstruct:        #{avg_us(node_totals, :reconstruct_us)}
+      analyze_file:       #{avg_us(node_totals, :analyze_file_us)}
+      aggregate:          #{avg_us(node_totals, :aggregate_us)}
+      refactoring cosine: #{avg_us(node_totals, :refactoring_us)}
+      total/node:         #{avg_us(node_totals, :duration)}
+
+    Top 5 slowest files (total node time):
+    #{top_slow_files(files, nodes)}
+
+    ── LOO breakdown (per analyze_file_for_loo_partial call) ─
+    Calls: #{state.loo_breakdown_calls}
+    #{format_breakdown_avg(state.loo_breakdown, state.loo_breakdown_calls)}
+
+    ── Cosine breakdown (per diagnose_aggregate call) ──────
+    Calls: #{state.cosine_breakdown_calls}
+    #{format_breakdown_avg(state.cosine_breakdown, state.cosine_breakdown_calls)}
+
+    ── File-size scaling (block_impact: total node time) ──
+    #{format_scaling(files, nodes)}
+    ────────────────────────────────────────────────────────
+    """)
+  end
+
+  defp format_breakdown_avg(breakdown, calls) when map_size(breakdown) == 0 or calls == 0,
+    do: "  (no data)"
+
+  defp format_breakdown_avg(breakdown, calls) do
+    breakdown
+    |> Enum.sort_by(fn {_, v} -> -v end)
+    |> Enum.take(25)
+    |> Enum.map_join("\n", fn {key, total_us} ->
+      avg = div(total_us, calls)
+      pct = total_us * 100 / Enum.sum(Map.values(breakdown))
+
+      "  #{String.pad_trailing(to_string(key), 32)} total #{us(total_us)}  avg/call #{us(avg)}  (#{Float.round(pct, 1)}%)"
+    end)
+  end
+
+  defp format_scaling(files, nodes) do
+    nodes_by_path = Enum.group_by(nodes, fn {p, _} -> p end, fn {_, m} -> m end)
+
+    rows =
+      files
+      |> Enum.map(fn {path, fm} ->
+        node_durations = nodes_by_path |> Map.get(path, []) |> Enum.map(& &1.duration)
+        total_node_us = Enum.sum(node_durations)
+
+        %{
+          path: path,
+          bytes: Map.get(fm, :bytes, 0),
+          tokens: Map.get(fm, :token_count, 0),
+          nodes: Map.get(fm, :node_count, 0),
+          file_us: fm.duration,
+          total_node_us: total_node_us
+        }
+      end)
+
+    bins = [
+      {"<2KB ", fn r -> r.bytes < 2_000 end},
+      {"2-8KB", fn r -> r.bytes >= 2_000 and r.bytes < 8_000 end},
+      {"8-32KB", fn r -> r.bytes >= 8_000 and r.bytes < 32_000 end},
+      {">32KB", fn r -> r.bytes >= 32_000 end}
+    ]
+
+    bin_rows =
+      bins
+      |> Enum.map(fn {label, pred} ->
+        bucket = Enum.filter(rows, pred)
+        n = length(bucket)
+
+        if n == 0 do
+          "  #{label}  (none)"
+        else
+          avg_bytes = div(Enum.sum(Enum.map(bucket, & &1.bytes)), n)
+          avg_tokens = div(Enum.sum(Enum.map(bucket, & &1.tokens)), n)
+          avg_nodes = div(Enum.sum(Enum.map(bucket, & &1.nodes)), n)
+          avg_node_us = div(Enum.sum(Enum.map(bucket, & &1.total_node_us)), n)
+          tokens_per_node_us = if avg_nodes > 0, do: div(avg_node_us, avg_nodes), else: 0
+
+          "  #{label}  files=#{n}  avg bytes=#{avg_bytes} tokens=#{avg_tokens} nodes=#{avg_nodes}  total_node=#{us(avg_node_us)}  per_node=#{us(tokens_per_node_us)}"
+        end
+      end)
+
+    Enum.join(bin_rows, "\n")
+  end
+
+  defp format_phases(phases) when map_size(phases) == 0, do: "  (no phases recorded)"
+
+  defp format_phases(phases) do
+    [:collect, :analyze, :report_gen, :render]
+    |> Enum.filter(&Map.has_key?(phases, &1))
+    |> Enum.map_join("\n", fn name ->
+      "  #{String.pad_trailing(Atom.to_string(name), 12)} #{us(phases[name])}"
+    end)
+  end
+
+  defp format_stages(stages) when map_size(stages) == 0, do: "  (no stages recorded)"
+
+  defp format_stages(stages) do
+    stages
+    |> Enum.sort_by(fn {_, dur} -> -dur end)
+    |> Enum.map_join("\n", fn {name, dur} ->
+      "  #{String.pad_trailing(Atom.to_string(name), 20)} #{us(dur)}"
+    end)
+  end
+
+  defp format_codebase_metrics(m) when map_size(m) == 0, do: "  (none recorded)"
+
+  defp format_codebase_metrics(m) do
+    m
+    |> Enum.sort_by(fn {_, dur} -> -dur end)
+    |> Enum.map_join("\n", fn {name, dur} ->
+      "  #{String.pad_trailing(to_string(name), 32)} #{us(dur)}"
+    end)
+  end
+
+  defp format_file_metrics(m, _file_count) when map_size(m) == 0, do: "  (none recorded)"
+
+  defp format_file_metrics(m, file_count) do
+    fc = max(file_count, 1)
+
+    m
+    |> Enum.map(fn {name, {n, sum}} ->
+      avg = if n > 0, do: div(sum, n), else: 0
+      {name, sum, avg, n}
+    end)
+    |> Enum.sort_by(fn {_, sum, _, _} -> -sum end)
+    |> Enum.map_join("\n", fn {name, sum, avg, n} ->
+      "  #{String.pad_trailing(to_string(name), 32)} total #{us(sum)}  avg/file #{us(div(sum, fc))}  (#{n} calls, avg/call #{us(avg)})"
+    end)
+  end
+
+  defp top_slow_files(files, nodes) do
+    node_time_by_file =
+      nodes
+      |> Enum.group_by(fn {path, _} -> path end, fn {_, m} -> m.duration end)
+      |> Map.new(fn {path, durations} -> {path, Enum.sum(durations)} end)
+
+    files
+    |> Enum.map(fn {path, fm} ->
+      node_time = Map.get(node_time_by_file, path, 0)
+      {path, fm.node_count, node_time}
+    end)
+    |> Enum.sort_by(fn {_, _, t} -> -t end)
+    |> Enum.take(5)
+    |> Enum.map_join("\n", fn {path, node_count, node_time} ->
+      "  #{path}  (#{node_count} nodes, #{us(node_time)} node time)"
+    end)
+  end
+
+  defp avg_us([], _key), do: "n/a"
+
+  defp avg_us(measurements, key) do
+    total = Enum.sum(Enum.map(measurements, &Map.get(&1, key, 0)))
+    us(div(total, length(measurements)))
+  end
+
+  defp us(microseconds) when microseconds >= 1_000_000,
+    do: "#{Float.round(microseconds / 1_000_000, 2)}s"
+
+  defp us(microseconds) when microseconds >= 1_000,
+    do: "#{Float.round(microseconds / 1_000, 1)}ms"
+
+  defp us(microseconds), do: "#{microseconds}µs"
 end
diff --git a/lib/codeqa/cli/history.ex b/lib/codeqa/cli/history.ex
index 4c73acee..ca40669c 100644
--- a/lib/codeqa/cli/history.ex
+++ b/lib/codeqa/cli/history.ex
@@ -4,6 +4,11 @@ defmodule CodeQA.CLI.History do
   @behaviour CodeQA.CLI.Command
 
   alias CodeQA.CLI.Options
+  alias CodeQA.CLI.Progress
+  alias CodeQA.Config
+  alias CodeQA.Engine.Analyzer
+  alias CodeQA.Engine.Collector
+  alias CodeQA.Git
 
   @version "0.1.0"
 
@@ -34,18 +39,20 @@ defmodule CodeQA.CLI.History do
 
   @impl CodeQA.CLI.Command
   def run(args) when args in [["--help"], ["-h"]] do
-    IO.puts(usage())
+    usage()
   end
 
   def run(args) do
     {opts, [path], _} =
-      Options.parse(args,
+      Options.parse(
+        args,
         [
           commits: :integer,
           commit_list: :string,
           output_dir: :string
         ],
-        [n: :commits, o: :output_dir]
+        n: :commits,
+        o: :output_dir
       )
 
     output_dir = opts[:output_dir] || raise "Missing --output-dir"
@@ -56,14 +63,19 @@ defmodule CodeQA.CLI.History do
     commits = resolve_commits(opts, path)
     IO.puts(:stderr, "Found #{length(commits)} commits to analyze.")
 
-    analyze_opts = Options.build_analyze_opts(opts)
-    ignore_patterns = Options.parse_ignore_paths(opts[:ignore_paths]) ++ Options.load_config_ignore_paths(path)
+    Config.load(path)
+
+    analyze_opts =
+      Options.build_analyze_opts(opts) ++ Config.near_duplicate_blocks_opts()
+
+    ignore_patterns = Options.parse_ignore_paths(opts[:ignore_paths])
 
     commits
     |> Enum.with_index(1)
     |> Enum.each(&analyze_commit(&1, path, output_dir, analyze_opts, ignore_patterns, opts))
 
     IO.puts(:stderr, "Done writing history to #{output_dir}")
+    ""
   end
 
   defp resolve_commits(opts, path) do
@@ -90,14 +102,13 @@ defmodule CodeQA.CLI.History do
     current_opts =
       if opts[:progress],
         do: [
-          {:on_progress,
-           fn c, t, p, _tt -> CodeQA.CLI.Progress.callback(c, t, p, start_time_progress) end}
+          {:on_progress, fn c, t, p, _tt -> Progress.callback(c, t, p, start_time_progress) end}
           | analyze_opts
         ],
         else: analyze_opts
 
-    files = CodeQA.Git.collect_files_at_ref(path, commit)
-    files = CodeQA.Collector.reject_ignored_map(files, ignore_patterns)
+    files = Git.collect_files_at_ref(path, commit)
+    files = Collector.reject_ignored_map(files, ignore_patterns)
 
     if map_size(files) == 0 do
       IO.puts(:stderr, "Warning: no source files found at commit #{commit}")
@@ -108,7 +119,7 @@ defmodule CodeQA.CLI.History do
 
   defp write_commit_result(commit, path, output_dir, files, analyze_opts) do
     start_time = System.monotonic_time(:millisecond)
-    results = CodeQA.Analyzer.analyze_codebase(files, analyze_opts)
+    results = Analyzer.analyze_codebase(files, analyze_opts)
     end_time = System.monotonic_time(:millisecond)
 
     IO.puts(:stderr, "  Analysis completed in #{end_time - start_time}ms")
diff --git a/lib/codeqa/cli/options.ex b/lib/codeqa/cli/options.ex
index c735d56a..199a95df 100644
--- a/lib/codeqa/cli/options.ex
+++ b/lib/codeqa/cli/options.ex
@@ -1,6 +1,8 @@
 defmodule CodeQA.CLI.Options do
   @moduledoc false
 
+  alias CodeQA.CLI.Progress
+
   @common_strict [
     workers: :integer,
     cache: :boolean,
@@ -10,13 +12,11 @@ defmodule CodeQA.CLI.Options do
     ncd_top: :integer,
     ncd_paths: :string,
     combinations: :boolean,
-    telemetry: :boolean,
-    experimental_stopwords: :boolean,
-    stopwords_threshold: :float,
     show_files: :boolean,
     show_file_paths: :string,
     ignore_paths: :string,
-    progress: :boolean
+    progress: :boolean,
+    nodes_top: :integer
   ]
 
   @common_aliases [w: :workers, t: :timeout]
@@ -27,7 +27,7 @@ defmodule CodeQA.CLI.Options do
   @spec common_aliases() :: keyword()
   def common_aliases, do: @common_aliases
 
-  @spec parse(list(String.t()), keyword()) :: {keyword(), list(String.t()), list()}
+  @spec parse(list(String.t()), keyword(), keyword()) :: {keyword(), list(String.t()), list()}
   def parse(args, extra_strict \\ [], extra_aliases \\ []) do
     OptionParser.parse(args,
       strict: Keyword.merge(@common_strict, extra_strict),
@@ -54,22 +54,6 @@ defmodule CodeQA.CLI.Options do
     |> Enum.map(&String.trim/1)
   end
 
-  @spec load_config_ignore_paths(String.t()) :: [String.t()]
-  def load_config_ignore_paths(path) do
-    config_file = Path.join(path, ".codeqa.yml")
-
-    case File.read(config_file) do
-      {:ok, contents} ->
-        case YamlElixir.read_from_string(contents) do
-          {:ok, %{"ignore_paths" => patterns}} when is_list(patterns) -> patterns
-          _ -> []
-        end
-
-      {:error, _} ->
-        []
-    end
-  end
-
   @spec build_analyze_opts(keyword()) :: keyword()
   def build_analyze_opts(opts) do
     start_time_progress = System.monotonic_time(:millisecond)
@@ -79,17 +63,14 @@ defmodule CodeQA.CLI.Options do
       :show_ncd,
       :ncd_top,
       :combinations,
-      :telemetry,
-      :experimental_stopwords,
-      :stopwords_threshold
+      :nodes_top
     ]
 
     base =
       [{:timeout, opts[:timeout] || 5000}]
       |> maybe_add(
         opts[:progress],
-        {:on_progress,
-         fn c, t, p, _tt -> CodeQA.CLI.Progress.callback(c, t, p, start_time_progress) end}
+        {:on_progress, fn c, t, p, _tt -> Progress.callback(c, t, p, start_time_progress) end}
       )
       |> maybe_add(opts[:cache], {:cache_dir, opts[:cache_dir] || ".codeqa_cache"})
       |> maybe_add(
diff --git a/lib/codeqa/cli/progress.ex b/lib/codeqa/cli/progress.ex
index 6ffdd14d..aa09b05f 100644
--- a/lib/codeqa/cli/progress.ex
+++ b/lib/codeqa/cli/progress.ex
@@ -1,6 +1,8 @@
 defmodule CodeQA.CLI.Progress do
   @moduledoc false
 
+  alias CodeQA.CLI.UI
+
   @spec callback(integer(), integer(), String.t(), integer()) :: :ok
   def callback(completed, total, path, start_time) do
     now = System.monotonic_time(:millisecond)
@@ -11,8 +13,8 @@ defmodule CodeQA.CLI.Progress do
     label = if String.length(path) > 30, do: "..." <> String.slice(path, -27..-1), else: path
 
     output =
-      CodeQA.CLI.UI.progress_bar(completed, total,
-        eta: CodeQA.CLI.UI.format_eta(eta_ms),
+      UI.progress_bar(completed, total,
+        eta: UI.format_eta(eta_ms),
         label: label
       )
 
diff --git a/lib/codeqa/cli/stopwords.ex b/lib/codeqa/cli/stopwords.ex
deleted file mode 100644
index f79027b5..00000000
--- a/lib/codeqa/cli/stopwords.ex
+++ /dev/null
@@ -1,97 +0,0 @@
-defmodule CodeQA.CLI.Stopwords do
-  @moduledoc false
-
-  @behaviour CodeQA.CLI.Command
-
-  alias CodeQA.CLI.Options
-
-  @impl CodeQA.CLI.Command
-  def usage do
-    """
-    Usage: codeqa stopwords <path> [options]
-
-      Print codebase-specific stopwords based on frequency analysis.
-
-    Options:
-      --stopwords-threshold FLOAT  Frequency threshold for stopword detection
-      --progress                   Show per-file progress on stderr
-      -w, --workers N              Number of parallel workers
-      --ignore-paths PATHS         Comma-separated list of path patterns to ignore (supports wildcards, e.g. "test/*,docs/*")
-    """
-  end
-
-  @impl CodeQA.CLI.Command
-  def run(args) when args in [["--help"], ["-h"]] do
-    IO.puts(usage())
-  end
-
-  def run(args) do
-    {opts, [path], _} =
-      OptionParser.parse(args,
-        strict: [
-          workers: :integer,
-          stopwords_threshold: :float,
-          progress: :boolean,
-          ignore_paths: :string
-        ],
-        aliases: [w: :workers]
-      )
-
-    Options.validate_dir!(path)
-
-    ignore_patterns = Options.parse_ignore_paths(opts[:ignore_paths]) ++ Options.load_config_ignore_paths(path)
-    files = CodeQA.Collector.collect_files(path, ignore_patterns: ignore_patterns)
-
-    if map_size(files) == 0 do
-      IO.puts(:stderr, "Warning: no source files found in '#{path}'")
-      exit({:shutdown, 1})
-    end
-
-    IO.puts(:stderr, "Extracting stopwords for #{map_size(files)} files...")
-    start_time = System.monotonic_time(:millisecond)
-
-    word_stopwords = find_word_stopwords(files, opts)
-    fp_stopwords = find_fingerprint_stopwords(files, opts)
-
-    end_time = System.monotonic_time(:millisecond)
-
-    IO.puts(:stderr, "\nAnalysis completed in #{end_time - start_time}ms")
-    print_word_stopwords(word_stopwords)
-    IO.puts(:stderr, "\n--- Fingerprint Stopwords (#{MapSet.size(fp_stopwords)}) ---")
-    IO.puts(:stderr, "Found #{MapSet.size(fp_stopwords)} structural k-gram hashes.")
-  end
-
-  defp find_word_stopwords(files, opts) do
-    word_extractor = fn content ->
-      Regex.scan(~r/\b[a-zA-Z_]\w*\b/u, content) |> List.flatten()
-    end
-
-    CodeQA.Stopwords.find_stopwords(
-      files,
-      word_extractor,
-      Keyword.put(opts, :progress_label, "Words")
-    )
-  end
-
-  defp find_fingerprint_stopwords(files, opts) do
-    fp_extractor = fn content ->
-      CodeQA.Metrics.TokenNormalizer.normalize(content) |> CodeQA.Metrics.Winnowing.kgrams(5)
-    end
-
-    CodeQA.Stopwords.find_stopwords(
-      files,
-      fp_extractor,
-      Keyword.put(opts, :progress_label, "Fingerprints")
-    )
-  end
-
-  defp print_word_stopwords(word_stopwords) do
-    IO.puts(:stderr, "\n--- Word Stopwords (#{MapSet.size(word_stopwords)}) ---")
-
-    word_stopwords
-    |> MapSet.to_list()
-    |> Enum.sort()
-    |> Enum.chunk_every(10)
-    |> Enum.each(fn chunk -> IO.puts(Enum.join(chunk, ", ")) end)
-  end
-end
diff --git a/lib/codeqa/collector.ex b/lib/codeqa/collector.ex
deleted file mode 100644
index 02e6f349..00000000
--- a/lib/codeqa/collector.ex
+++ /dev/null
@@ -1,99 +0,0 @@
-defmodule CodeQA.Collector do
-  @moduledoc false
-
-  @source_extensions MapSet.new(~w[
-    .py .js .ts .jsx .tsx .java .rs .go .c .cpp .h .hpp .rb .ex .exs
-    .swift .kt .scala .sh .css .scss .html .vue .svelte .zig .lua .pl
-    .pm .r .jl .cs .fs .ml .hs .erl .clj .dart
-  ])
-
-  @skip_dirs MapSet.new(~w[
-    .git .hg .svn node_modules __pycache__ _build dist build vendor
-    .tox .venv venv target .mypy_cache .pytest_cache deps .elixir_ls
-    .next coverage
-  ])
-
-  @spec collect_files(String.t(), keyword()) :: %{String.t() => String.t()}
-  def collect_files(root, opts \\ []) do
-    root_path = Path.expand(root)
-    ignore_patterns = Keyword.get(opts, :ignore_patterns, [])
-
-    unless File.dir?(root_path) do
-      raise File.Error, reason: :enoent, path: root, action: "find directory"
-    end
-
-    root_path
-    |> walk_directory()
-    |> Map.new(fn path ->
-      rel = Path.relative_to(path, root_path)
-      {rel, File.read!(path)}
-    end)
-    |> reject_ignored_map(ignore_patterns)
-  end
-
-  def source_extensions, do: @source_extensions
-
-  @doc false
-  def ignored?(path, patterns) do
-    Enum.any?(patterns, fn pattern ->
-      match_pattern?(path, pattern)
-    end)
-  end
-
-  @doc false
-  def reject_ignored_map(files_map, []), do: files_map
-
-  def reject_ignored_map(files_map, patterns) do
-    Map.reject(files_map, fn {path, _} -> ignored?(path, patterns) end)
-  end
-
-  @doc false
-  def reject_ignored(list, [], _key_fn), do: list
-
-  def reject_ignored(list, patterns, key_fn) do
-    Enum.reject(list, fn item -> ignored?(key_fn.(item), patterns) end)
-  end
-
-  defp match_pattern?(path, pattern) do
-    # Convert glob pattern to regex:
-    # - ** matches any number of directories
-    # - * matches anything except /
-    # - ? matches a single character except /
-    regex_str =
-      pattern
-      |> String.replace(".", "\\.")
-      |> String.replace("**", "\0GLOBSTAR\0")
-      |> String.replace("*", "[^/]*")
-      |> String.replace("?", "[^/]")
-      |> String.replace("\0GLOBSTAR\0", ".*")
-
-    case Regex.compile("^#{regex_str}$") do
-      {:ok, regex} -> Regex.match?(regex, path)
-      _ -> false
-    end
-  end
-
-  defp walk_directory(dir) do
-    dir
-    |> File.ls!()
-    |> Enum.flat_map(fn entry ->
-      full_path = Path.join(dir, entry)
-
-      cond do
-        File.dir?(full_path) and not skip_dir?(entry) ->
-          walk_directory(full_path)
-
-        File.regular?(full_path) and source_file?(entry) ->
-          [full_path]
-
-        true ->
-          []
-      end
-    end)
-  end
-
-  defp skip_dir?(name), do: MapSet.member?(@skip_dirs, name) or String.starts_with?(name, ".")
-
-  defp source_file?(name),
-    do: MapSet.member?(@source_extensions, Path.extname(name) |> String.downcase())
-end
diff --git a/lib/codeqa/combined_metrics/category.ex b/lib/codeqa/combined_metrics/category.ex
new file mode 100644
index 00000000..def09ad1
--- /dev/null
+++ b/lib/codeqa/combined_metrics/category.ex
@@ -0,0 +1,40 @@
+defmodule CodeQA.CombinedMetrics.Category do
+  @moduledoc """
+  Macro helper for defining combined-metric category modules.
+
+  Each category module (e.g. `VariableNaming`, `Documentation`) calls
+  `use CodeQA.CombinedMetrics.Category, yaml_path: "priv/..."`.
+
+  This injects:
+  - `@callback score(metrics :: map()) :: float()` — making the caller a behaviour
+  - `compute_score/2` — delegates to `Scorer` with the baked-in yaml path
+
+  ## Example
+
+      defmodule CodeQA.CombinedMetrics.VariableNaming do
+        use CodeQA.CombinedMetrics.Category,
+          yaml_path: "priv/combined_metrics/variable_naming.yml"
+      end
+
+  Leaf modules then declare `@behaviour CodeQA.CombinedMetrics.VariableNaming`
+  and call `VariableNaming.compute_score("key", metrics)`.
+  """
+
+  defmacro __using__(yaml_path: yaml_path) do
+    quote do
+      alias CodeQA.CombinedMetrics.Scorer
+
+      @callback score(metrics :: map()) :: float()
+
+      @doc """
+      Computes the score for `metric_name` using scalars from this category's YAML file.
+
+      Delegates to `CodeQA.CombinedMetrics.Scorer.compute_score/3`.
+      """
+      @spec compute_score(String.t(), map()) :: float()
+      def compute_score(metric_name, metrics) do
+        Scorer.compute_score(unquote(yaml_path), metric_name, metrics)
+      end
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/code_smells.ex b/lib/codeqa/combined_metrics/code_smells.ex
new file mode 100644
index 00000000..13586ba5
--- /dev/null
+++ b/lib/codeqa/combined_metrics/code_smells.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.CombinedMetrics.CodeSmells do
+  @moduledoc """
+  Behaviour and submodule registry for code smell detection metrics.
+
+  Scalar weights are defined in `priv/combined_metrics/code_smells.yml`.
+  See `CodeQA.CombinedMetrics.Category` for the scoring model.
+  """
+
+  @yaml_path "priv/combined_metrics/code_smells.yml"
+
+  use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path
+
+  @behaviors @yaml_path
+             |> YamlElixir.read_from_file!()
+             |> Enum.filter(fn {_k, v} -> is_map(v) end)
+             |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end)
+
+  for {key, doc} <- @behaviors do
+    defmodule Module.concat(CodeQA.CombinedMetrics.CodeSmells, Macro.camelize(key)) do
+      alias CodeQA.CombinedMetrics.CodeSmells
+      @moduledoc doc
+      @behaviour CodeSmells
+      @score_key key
+      @impl true
+      def score(metrics),
+        do: CodeSmells.compute_score(@score_key, metrics)
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/consistency.ex b/lib/codeqa/combined_metrics/consistency.ex
new file mode 100644
index 00000000..1c4af0c0
--- /dev/null
+++ b/lib/codeqa/combined_metrics/consistency.ex
@@ -0,0 +1,30 @@
+defmodule CodeQA.CombinedMetrics.Consistency do
+  @moduledoc """
+  Behaviour and submodule registry for codebase consistency metrics.
+
+  Covers naming style uniformity, structural patterns, and cross-file coherence.
+  Scalar weights are defined in `priv/combined_metrics/consistency.yml`.
+  See `CodeQA.CombinedMetrics.Category` for the scoring model.
+  """
+
+  @yaml_path "priv/combined_metrics/consistency.yml"
+
+  use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path
+
+  @behaviors @yaml_path
+             |> YamlElixir.read_from_file!()
+             |> Enum.filter(fn {_k, v} -> is_map(v) end)
+             |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end)
+
+  for {key, doc} <- @behaviors do
+    defmodule Module.concat(CodeQA.CombinedMetrics.Consistency, Macro.camelize(key)) do
+      alias CodeQA.CombinedMetrics.Consistency
+      @moduledoc doc
+      @behaviour Consistency
+      @score_key key
+      @impl true
+      def score(metrics),
+        do: Consistency.compute_score(@score_key, metrics)
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/cosine_vector.ex b/lib/codeqa/combined_metrics/cosine_vector.ex
new file mode 100644
index 00000000..36bbe23f
--- /dev/null
+++ b/lib/codeqa/combined_metrics/cosine_vector.ex
@@ -0,0 +1,90 @@
+defmodule CodeQA.CombinedMetrics.CosineVector do
+  @moduledoc """
+  Computes cosine similarity between a behavior's scalar weight vector and a
+  log-metric vector derived from an aggregate.
+
+  Pure math — no I/O, no YAML loading. Intended for internal use by `SampleRunner`.
+  """
+
+  alias CodeQA.CombinedMetrics.Scorer
+
+  @doc """
+  Builds the cosine result entry for a single behavior against the given aggregate.
+
+  Returns a one-element list `[result_map]` on success or `[]` when the behavior
+  has no non-zero scalars (no sample data) and should be excluded.
+
+  ## Options
+
+    * `:log_metrics` - precomputed log-metric map `%{group => %{key => log_val}}`.
+      When present, values are looked up directly instead of being recomputed via
+      `:math.log/1`. Falls back to inline computation when absent or when a key is
+      not found in the map.
+  """
+  @spec compute(String.t(), String.t(), map(), map(), String.t(), keyword()) :: [map()]
+  def compute(yaml_path, behavior, behavior_data, aggregate, category, opts \\ []) do
+    scalars = Scorer.scalars_for(yaml_path, behavior)
+
+    if map_size(scalars) == 0 do
+      []
+    else
+      build_result(yaml_path, behavior, behavior_data, aggregate, category, scalars, opts)
+    end
+  end
+
+  # --- Internal helpers ---
+
+  defp build_result(yaml_path, behavior, behavior_data, aggregate, category, scalars, opts) do
+    log_baseline = Map.get(behavior_data, "_log_baseline", 0.0) / 1.0
+    log_metrics = Keyword.get(opts, :log_metrics)
+
+    {dot, norm_s_sq, norm_v_sq, contributions} =
+      Enum.reduce(scalars, {0.0, 0.0, 0.0, []}, fn {{group, key}, scalar},
+                                                   {d, ns, nv, contribs} ->
+        log_m = lookup_log_metric(log_metrics, aggregate, group, key)
+        contrib = scalar * log_m
+
+        {d + contrib, ns + scalar * scalar, nv + log_m * log_m,
+         [{:"#{group}.#{key}", contrib} | contribs]}
+      end)
+
+    cos_sim =
+      if norm_s_sq > 0 and norm_v_sq > 0,
+        do: dot / (:math.sqrt(norm_s_sq) * :math.sqrt(norm_v_sq)),
+        else: 0.0
+
+    raw_score = Scorer.compute_score(yaml_path, behavior, aggregate)
+    calibrated = :math.log(max(raw_score, 1.0e-300)) - log_baseline
+
+    top_metrics =
+      contributions
+      |> Enum.sort_by(fn {_, c} -> c end)
+      |> Enum.take(5)
+      |> Enum.map(fn {metric, contribution} ->
+        %{metric: to_string(metric), contribution: Float.round(contribution, 4)}
+      end)
+
+    [
+      %{
+        category: category,
+        behavior: behavior,
+        cosine: Float.round(cos_sim, 4),
+        score: Float.round(calibrated, 4),
+        top_metrics: top_metrics
+      }
+    ]
+  end
+
+  # Returns a precomputed log value when available, otherwise computes inline.
+  # Both paths apply the same max(val, 1.0e-300) floor guard to ensure identical
+  # results regardless of whether log_metrics was precomputed or not.
+  defp lookup_log_metric(nil, aggregate, group, key),
+    do: :math.log(max(Scorer.get(aggregate, group, key) / 1.0, 1.0e-300))
+
+  defp lookup_log_metric(log_metrics, aggregate, group, key) do
+    case get_in(log_metrics, [group, key]) do
+      nil -> :math.log(max(Scorer.get(aggregate, group, key) / 1.0, 1.0e-300))
+      log_val -> log_val
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/dependencies.ex b/lib/codeqa/combined_metrics/dependencies.ex
new file mode 100644
index 00000000..f0b25aa9
--- /dev/null
+++ b/lib/codeqa/combined_metrics/dependencies.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.CombinedMetrics.Dependencies do
+  @moduledoc """
+  Behaviour and submodule registry for dependency and coupling quality metrics.
+
+  Scalar weights are defined in `priv/combined_metrics/dependencies.yml`.
+  See `CodeQA.CombinedMetrics.Category` for the scoring model.
+  """
+
+  @yaml_path "priv/combined_metrics/dependencies.yml"
+
+  use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path
+
+  @behaviors @yaml_path
+             |> YamlElixir.read_from_file!()
+             |> Enum.filter(fn {_k, v} -> is_map(v) end)
+             |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end)
+
+  for {key, doc} <- @behaviors do
+    defmodule Module.concat(CodeQA.CombinedMetrics.Dependencies, Macro.camelize(key)) do
+      alias CodeQA.CombinedMetrics.Dependencies
+      @moduledoc doc
+      @behaviour Dependencies
+      @score_key key
+      @impl true
+      def score(metrics),
+        do: Dependencies.compute_score(@score_key, metrics)
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/documentation.ex b/lib/codeqa/combined_metrics/documentation.ex
new file mode 100644
index 00000000..94f8a95f
--- /dev/null
+++ b/lib/codeqa/combined_metrics/documentation.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.CombinedMetrics.Documentation do
+  @moduledoc """
+  Behaviour and submodule registry for documentation quality metrics.
+
+  Scalar weights are defined in `priv/combined_metrics/documentation.yml`.
+  See `CodeQA.CombinedMetrics.Category` for the scoring model.
+  """
+
+  @yaml_path "priv/combined_metrics/documentation.yml"
+
+  use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path
+
+  @behaviors @yaml_path
+             |> YamlElixir.read_from_file!()
+             |> Enum.filter(fn {_k, v} -> is_map(v) end)
+             |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end)
+
+  for {key, doc} <- @behaviors do
+    defmodule Module.concat(CodeQA.CombinedMetrics.Documentation, Macro.camelize(key)) do
+      alias CodeQA.CombinedMetrics.Documentation
+      @moduledoc doc
+      @behaviour Documentation
+      @score_key key
+      @impl true
+      def score(metrics),
+        do: Documentation.compute_score(@score_key, metrics)
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/error_handling.ex b/lib/codeqa/combined_metrics/error_handling.ex
new file mode 100644
index 00000000..9039ef61
--- /dev/null
+++ b/lib/codeqa/combined_metrics/error_handling.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.CombinedMetrics.ErrorHandling do
+  @moduledoc """
+  Behaviour and submodule registry for error handling quality metrics.
+
+  Scalar weights are defined in `priv/combined_metrics/error_handling.yml`.
+  See `CodeQA.CombinedMetrics.Category` for the scoring model.
+  """
+
+  @yaml_path "priv/combined_metrics/error_handling.yml"
+
+  use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path
+
+  @behaviors @yaml_path
+             |> YamlElixir.read_from_file!()
+             |> Enum.filter(fn {_k, v} -> is_map(v) end)
+             |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end)
+
+  for {key, doc} <- @behaviors do
+    defmodule Module.concat(CodeQA.CombinedMetrics.ErrorHandling, Macro.camelize(key)) do
+      alias CodeQA.CombinedMetrics.ErrorHandling
+      @moduledoc doc
+      @behaviour ErrorHandling
+      @score_key key
+      @impl true
+      def score(metrics),
+        do: ErrorHandling.compute_score(@score_key, metrics)
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/file_scorer.ex b/lib/codeqa/combined_metrics/file_scorer.ex
new file mode 100644
index 00000000..e7479b08
--- /dev/null
+++ b/lib/codeqa/combined_metrics/file_scorer.ex
@@ -0,0 +1,109 @@
+defmodule CodeQA.CombinedMetrics.FileScorer do
+  @moduledoc """
+  Scores individual files against combined metric behaviors.
+
+  Converts per-file raw metric maps to aggregate-compatible format and
+  identifies which behaviors each file most likely exhibits.
+  """
+
+  alias CodeQA.CombinedMetrics.SampleRunner
+  alias CodeQA.Config
+  alias CodeQA.HealthReport.Grader
+  alias CodeQA.Language
+
+  @doc """
+  Converts a single file's raw metric map to aggregate format.
+
+  Wraps each key in each group with the `mean_` prefix so the resulting
+  map is compatible with `SampleRunner.diagnose_aggregate/2`.
+
+  ## Example
+
+      iex> CodeQA.CombinedMetrics.FileScorer.file_to_aggregate(%{"halstead" => %{"tokens" => 42.0}})
+      %{"halstead" => %{"mean_tokens" => 42.0}}
+  """
+  @spec file_to_aggregate(map()) :: map()
+  def file_to_aggregate(metrics) do
+    Map.new(metrics, fn {group, keys} ->
+      prefixed_keys = Map.new(keys, fn {key, value} -> {"mean_" <> key, value} end)
+      {group, prefixed_keys}
+    end)
+  end
+
+  @doc """
+  Identifies the worst files per combined metric behavior.
+
+  For each file in `files_map`, converts its metrics to aggregate format and
+  runs `SampleRunner.diagnose_aggregate/2`. The results are collected per
+  behavior and sorted ascending by cosine similarity (most negative = worst first),
+  then truncated to `combined_top` entries.
+
+  ## Options
+
+    * `:combined_top` - number of worst files to keep per behavior (default: 2)
+
+  ## Result shape
+
+      %{
+        "function_design.no_boolean_parameter" => [
+          %{file: "lib/foo/bar.ex", cosine: -0.71},
+          %{file: "lib/foo/baz.ex", cosine: -0.44}
+        ],
+        ...
+      }
+  """
+  @spec worst_files_per_behavior(map(), keyword()) ::
+          %{
+            String.t() => [
+              %{file: String.t(), cosine: float(), top_metrics: list(), top_nodes: list()}
+            ]
+          }
+  def worst_files_per_behavior(files_map, opts \\ []) do
+    # NOTE: cosine similarity is computed at file level; a line-level mapping would require computing a separate
+    # cosine score for each AST node by projecting that node's metric vector against the behavior's
+    # feature-weight vector. This is not currently possible because serialized nodes do not carry their own
+    # metric values.
+    top_n = Keyword.get(opts, :combined_top, 2)
+
+    files_map
+    |> Enum.reject(fn {_path, file_data} ->
+      file_data |> Map.get("metrics", %{}) |> map_size() == 0
+    end)
+    |> Enum.reduce(%{}, fn {path, file_data}, acc ->
+      accumulate_file_behaviors(path, file_data, acc)
+    end)
+    |> Map.new(fn {key, entries} ->
+      threshold = Config.cosine_significance_threshold()
+
+      sorted =
+        entries
+        |> Enum.filter(fn e -> e.cosine <= -threshold end)
+        |> Enum.sort_by(& &1.cosine)
+        |> Enum.take(top_n)
+
+      {key, sorted}
+    end)
+  end
+
+  # Diagnoses a single file's metrics and merges per-behavior entries into the accumulator.
+  defp accumulate_file_behaviors(path, file_data, acc) do
+    top_nodes = Grader.top_3_nodes(Map.get(file_data, "nodes"))
+    language = Language.detect(path).name()
+
+    file_data
+    |> Map.get("metrics", %{})
+    |> file_to_aggregate()
+    |> SampleRunner.diagnose_aggregate(top: 99_999, language: language)
+    |> Enum.reduce(acc, fn %{
+                             category: category,
+                             behavior: behavior,
+                             cosine: cosine,
+                             top_metrics: top_metrics
+                           },
+                           inner_acc ->
+      key = "#{category}.#{behavior}"
+      entry = %{file: path, cosine: cosine, top_metrics: top_metrics, top_nodes: top_nodes}
+      Map.update(inner_acc, key, [entry], &[entry | &1])
+    end)
+  end
+end
diff --git a/lib/codeqa/combined_metrics/file_structure.ex b/lib/codeqa/combined_metrics/file_structure.ex
new file mode 100644
index 00000000..aa6f153a
--- /dev/null
+++ b/lib/codeqa/combined_metrics/file_structure.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.CombinedMetrics.FileStructure do
+  @moduledoc """
+  Behaviour and submodule registry for file structure quality metrics.
+
+  Scalar weights are defined in `priv/combined_metrics/file_structure.yml`.
+  See `CodeQA.CombinedMetrics.Category` for the scoring model.
+  """
+
+  @yaml_path "priv/combined_metrics/file_structure.yml"
+
+  use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path
+
+  @behaviors @yaml_path
+             |> YamlElixir.read_from_file!()
+             |> Enum.filter(fn {_k, v} -> is_map(v) end)
+             |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end)
+
+  for {key, doc} <- @behaviors do
+    defmodule Module.concat(CodeQA.CombinedMetrics.FileStructure, Macro.camelize(key)) do
+      alias CodeQA.CombinedMetrics.FileStructure
+      @moduledoc doc
+      @behaviour FileStructure
+      @score_key key
+      @impl true
+      def score(metrics),
+        do: FileStructure.compute_score(@score_key, metrics)
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/function_design.ex b/lib/codeqa/combined_metrics/function_design.ex
new file mode 100644
index 00000000..3eab5f78
--- /dev/null
+++ b/lib/codeqa/combined_metrics/function_design.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.CombinedMetrics.FunctionDesign do
+  @moduledoc """
+  Behaviour and submodule registry for function design quality metrics.
+
+  Scalar weights are defined in `priv/combined_metrics/function_design.yml`.
+  See `CodeQA.CombinedMetrics.Category` for the scoring model.
+  """
+
+  @yaml_path "priv/combined_metrics/function_design.yml"
+
+  use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path
+
+  @behaviors @yaml_path
+             |> YamlElixir.read_from_file!()
+             |> Enum.filter(fn {_k, v} -> is_map(v) end)
+             |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end)
+
+  for {key, doc} <- @behaviors do
+    defmodule Module.concat(CodeQA.CombinedMetrics.FunctionDesign, Macro.camelize(key)) do
+      alias CodeQA.CombinedMetrics.FunctionDesign
+      @moduledoc doc
+      @behaviour FunctionDesign
+      @score_key key
+      @impl true
+      def score(metrics),
+        do: FunctionDesign.compute_score(@score_key, metrics)
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/naming_conventions.ex b/lib/codeqa/combined_metrics/naming_conventions.ex
new file mode 100644
index 00000000..eafb5dcb
--- /dev/null
+++ b/lib/codeqa/combined_metrics/naming_conventions.ex
@@ -0,0 +1,31 @@
+defmodule CodeQA.CombinedMetrics.NamingConventions do
+  @moduledoc """
+  Behaviour and submodule registry for broader naming convention metrics.
+
+  Covers class, file, and function naming patterns not captured by
+  `VariableNaming`. Scalar weights are defined in
+  `priv/combined_metrics/naming_conventions.yml`.
+  See `CodeQA.CombinedMetrics.Category` for the scoring model.
+  """
+
+  @yaml_path "priv/combined_metrics/naming_conventions.yml"
+
+  use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path
+
+  @behaviors @yaml_path
+             |> YamlElixir.read_from_file!()
+             |> Enum.filter(fn {_k, v} -> is_map(v) end)
+             |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end)
+
+  for {key, doc} <- @behaviors do
+    defmodule Module.concat(CodeQA.CombinedMetrics.NamingConventions, Macro.camelize(key)) do
+      alias CodeQA.CombinedMetrics.NamingConventions
+      @moduledoc doc
+      @behaviour NamingConventions
+      @score_key key
+      @impl true
+      def score(metrics),
+        do: NamingConventions.compute_score(@score_key, metrics)
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/sample_runner.ex b/lib/codeqa/combined_metrics/sample_runner.ex
new file mode 100644
index 00000000..318d007a
--- /dev/null
+++ b/lib/codeqa/combined_metrics/sample_runner.ex
@@ -0,0 +1,508 @@
+defmodule CodeQA.CombinedMetrics.SampleRunner do
+  @moduledoc """
+  Discovers sample directories, analyzes them, and scores each behavior formula.
+
+  Returns structured results suitable for rendering a separation table, enabling
+  manual scalar tuning of combined metric formulas.
+  """
+
+  alias CodeQA.CombinedMetrics.{CosineVector, ScalarApplier, Scorer}
+  alias CodeQA.Engine.{Analyzer, Collector}
+
+  @samples_root "priv/combined_metrics/samples"
+
+  # ---------------------------------------------------------------------------
+  # Public API
+  # ---------------------------------------------------------------------------
+
+  @doc """
+  Runs all behaviors found in sample directories, optionally filtered by category.
+
+  ## Options
+
+    * `:category` - restrict to one category (e.g. `"variable_naming"`)
+    * `:verbose`  - when `true`, populates `:metric_detail` in each result
+
+  ## Result shape
+
+      %{
+        category:     "variable_naming",
+        behavior:     "name_is_generic",
+        bad_score:    0.074,
+        good_score:   0.550,
+        ratio:        7.43,
+        direction_ok: true,
+        metric_detail: [...]   # empty unless verbose: true
+      }
+  """
+  @spec run(keyword()) :: [map()]
+  def run(opts \\ []) do
+    filter_category = opts[:category]
+
+    @samples_root
+    |> list_behaviors()
+    |> Enum.filter(fn {category, behavior} ->
+      (filter_category == nil or category == filter_category) and
+        has_both_dirs?(category, behavior)
+    end)
+    |> Enum.map(fn {category, behavior} ->
+      score_behavior(category, behavior, opts)
+    end)
+  end
+
+  @doc """
+  Builds a per-behavior metric correlation report for scalar tuning.
+
+  For each behavior with sample data, computes all `mean_*` metric values for
+  both good and bad samples, then suggests normalized scalars in [-2, 2] using
+  the log-linear method:
+
+      log_diff = log(good_val) - log(bad_val)
+      suggested_scalar = 2.0 * log_diff / max(|all log_diffs| for this behavior)
+
+  The strongest signal for each behavior maps to ±2.0; all others scale
+  proportionally. This lets you paste the suggested scalars into the YAML as a
+  starting point and refine from there.
+
+  ## Result shape (keyed by "category.behavior")
+
+      %{
+        "variable_naming.name_is_generic" => %{
+          "identifier_length_variance.mean_variance" => %{
+            bad: 5.131, good: 25.109,
+            log_bad: 1.635, log_good: 3.224,
+            ratio: 4.895,
+            suggested_scalar: 2.0
+          },
+          ...
+        }
+      }
+  """
+  @spec build_metric_report(keyword()) :: map()
+  def build_metric_report(opts \\ []) do
+    filter_category = opts[:category]
+
+    @samples_root
+    |> list_behaviors()
+    |> Enum.filter(fn {category, behavior} ->
+      (filter_category == nil or category == filter_category) and
+        has_both_dirs?(category, behavior)
+    end)
+    |> Map.new(fn {category, behavior} ->
+      {"#{category}.#{behavior}", behavior_metric_table(category, behavior)}
+    end)
+  end
+
+  @doc """
+  Scores all combined metric behaviors against the given codebase aggregate map.
+
+  Reads all YAML config files from `priv/combined_metrics/` and returns one entry
+  per YAML category, each containing the scores for all behaviors within it.
+  Behaviors are sorted ascending by score so the lowest-scoring (worst) appear first.
+
+  ## Result shape
+
+      [
+        %{
+          category: "variable_naming",
+          name: "Variable Naming",
+          behaviors: [
+            %{behavior: "name_is_generic", score: 3.45},
+            ...
+          ]
+        },
+        ...
+      ]
+  """
+  @spec score_aggregate(map(), keyword()) :: [map()]
+  def score_aggregate(aggregate, opts \\ []) do
+    languages = Keyword.get(opts, :languages)
+
+    Scorer.all_yamls()
+    |> Enum.sort_by(fn {path, _} -> path end)
+    |> Enum.map(fn {yaml_path, data} ->
+      category = yaml_path |> Path.basename() |> String.trim_trailing(".yml")
+
+      behaviors =
+        data
+        |> Enum.filter(fn {_k, v} -> is_map(v) end)
+        |> Enum.reject(fn {_behavior, behavior_data} ->
+          behavior_langs = Map.get(behavior_data, "_languages", [])
+          not behavior_language_applies?(behavior_langs, nil, languages)
+        end)
+        |> Enum.map(fn {behavior, behavior_data} ->
+          log_baseline = Map.get(behavior_data, "_log_baseline", 0.0) / 1.0
+          raw_score = Scorer.compute_score(yaml_path, behavior, aggregate)
+          calibrated = :math.log(max(raw_score, 1.0e-300)) - log_baseline
+          %{behavior: behavior, score: Float.round(calibrated, 4)}
+        end)
+        |> Enum.sort_by(& &1.score)
+
+      %{category: category, name: humanize(category), behaviors: behaviors}
+    end)
+  end
+
+  @doc """
+  Identifies the most likely code quality issues in an aggregate by cosine similarity.
+
+  For each behavior, computes the cosine similarity between its scalar weight vector
+  `s` and the file's log-metric vector `v`:
+
+      cos_sim = (s · v) / (|s| × |v|)
+
+  A negative cosine means the file's metric profile anti-aligns with what good code
+  looks like for that behavior — i.e. the file likely exhibits that anti-pattern.
+
+  Results are sorted by cosine similarity ascending (most negative = most likely
+  issue). Behaviors with no non-zero scalars (no sample data) are excluded.
+
+  ## Options
+
+    * `:top`       - number of results to return (default 15)
+    * `:language`  - single language string for per-file filtering; when set, only
+                     behaviors whose `_languages` list includes this language are scored
+    * `:languages` - list of language strings for project-level filtering; when set, only
+                     behaviors whose `_languages` list overlaps with this list are scored
+
+  ## Result shape
+
+      %{
+        category:  "function_design",
+        behavior:  "no_boolean_parameter",
+        cosine:    -0.83,
+        score:     -13.54,
+        top_metrics: [%{metric: "branching.mean_branching_density", contribution: -4.1}, ...]
+      }
+  """
+  @spec diagnose_aggregate(map(), keyword()) :: [map()]
+  def diagnose_aggregate(aggregate, opts \\ []) do
+    top_n = Keyword.get(opts, :top, 15)
+    language = Keyword.get(opts, :language)
+    languages = Keyword.get(opts, :languages)
+    behavior_map = Keyword.get(opts, :behavior_map)
+
+    {pre_us, log_metrics} = :timer.tc(fn -> precompute_log_metrics(aggregate) end)
+    cosine_opts = [log_metrics: log_metrics]
+
+    Process.put(:codeqa_cosine_breakdown, %{precompute_log_us: pre_us})
+
+    behaviors_stream =
+      if behavior_map do
+        behavior_map
+        |> Enum.sort_by(fn {category, _} -> category end)
+        |> Enum.flat_map(
+          &diagnose_from_behavior_map_entry(&1, aggregate, language, languages, cosine_opts)
+        )
+      else
+        Scorer.all_yamls()
+        |> Enum.sort_by(fn {path, _} -> path end)
+        |> Enum.flat_map(&diagnose_from_yaml(&1, aggregate, language, languages, cosine_opts))
+      end
+
+    {sort_us, result} =
+      :timer.tc(fn ->
+        behaviors_stream
+        |> Enum.sort_by(& &1.cosine)
+        |> Enum.take(top_n)
+      end)
+
+    breakdown =
+      Process.get(:codeqa_cosine_breakdown, %{})
+      |> Map.put(:sort_take_us, sort_us)
+
+    Process.delete(:codeqa_cosine_breakdown)
+    :telemetry.execute([:codeqa, :cosine_breakdown], breakdown, %{})
+    result
+  end
+
+  @doc """
+  Applies suggested scalars from sample analysis back to the YAML config files.
+
+  For each behavior that has sample data, rewrites its scalar entries using the
+  log-linear suggestion method. Metrics whose ratio falls in the deadzone are
+  excluded. All non-deadzoned metrics are written, even if they were not
+  previously present in the YAML.
+
+  Behaviors without sample data are left unchanged.
+
+  Returns a list of per-category stats maps.
+  """
+  @spec apply_scalars(keyword()) :: [map()]
+  def apply_scalars(opts \\ []) do
+    report = build_metric_report(opts)
+    ScalarApplier.apply_scalars(report, opts)
+  end
+
+  @doc """
+  Updates only the `_languages` field in YAML config files based on sample data.
+
+  Scans `bad/` and `good/` sample directories for each behavior, detects languages
+  from file extensions via `CodeQA.Language.detect/1`, and writes the intersection
+  as `_languages` to the YAML. Behaviors without sample data are left without a
+  `_languages` key (treated as applying to all languages at scoring time).
+  All existing scalars and baselines are preserved.
+
+  Returns a list of `%{category: String.t(), behaviors_with_languages: non_neg_integer()}`.
+  """
+  @spec apply_languages(keyword()) :: [map()]
+  def apply_languages(opts \\ []) do
+    ScalarApplier.apply_languages(opts)
+  end
+
+  # ---------------------------------------------------------------------------
+  # Sample discovery
+  # ---------------------------------------------------------------------------
+
+  defp list_behaviors(samples_root) do
+    samples_root
+    |> File.ls!()
+    |> Enum.flat_map(fn category ->
+      Path.join([samples_root, category])
+      |> File.ls!()
+      |> Enum.map(&{category, &1})
+    end)
+  end
+
+  defp has_both_dirs?(category, behavior) do
+    File.dir?(sample_path(category, behavior, "bad")) and
+      File.dir?(sample_path(category, behavior, "good"))
+  end
+
+  defp sample_path(category, behavior, kind) do
+    Path.join([@samples_root, category, behavior, kind])
+  end
+
+  defp analyze(dir) do
+    dir
+    |> Collector.collect_files()
+    |> Analyzer.analyze_codebase()
+    |> get_in(["codebase", "aggregate"])
+  end
+
+  # ---------------------------------------------------------------------------
+  # Sample scoring
+  # ---------------------------------------------------------------------------
+
+  defp score_behavior(category, behavior, opts) do
+    yaml_path = "priv/combined_metrics/#{category}.yml"
+    bad_agg = analyze(sample_path(category, behavior, "bad"))
+    good_agg = analyze(sample_path(category, behavior, "good"))
+
+    bad_score = Scorer.compute_score(yaml_path, behavior, bad_agg)
+    good_score = Scorer.compute_score(yaml_path, behavior, good_agg)
+    ratio = if bad_score > 0, do: good_score / bad_score, else: 0.0
+
+    base = %{
+      category: category,
+      behavior: behavior,
+      bad_score: bad_score,
+      good_score: good_score,
+      ratio: Float.round(ratio, 2),
+      direction_ok: good_score >= bad_score
+    }
+
+    if opts[:verbose] do
+      Map.put(base, :metric_detail, metric_detail(yaml_path, behavior, bad_agg, good_agg))
+    else
+      Map.put(base, :metric_detail, [])
+    end
+  end
+
+  defp metric_detail(yaml_path, behavior, bad_agg, good_agg) do
+    Scorer.scalars_for(yaml_path, behavior)
+    |> Enum.map(fn {{group, key}, scalar} ->
+      bad_val = Scorer.get(bad_agg, group, key)
+      good_val = Scorer.get(good_agg, group, key)
+      ratio = if bad_val > 0, do: Float.round(good_val / bad_val, 2), else: 0.0
+      %{group: group, key: key, scalar: scalar, bad: bad_val, good: good_val, ratio: ratio}
+    end)
+    |> Enum.sort_by(&abs(&1.ratio - 1.0), :desc)
+  end
+
+  # ---------------------------------------------------------------------------
+  # Metric report (vector building)
+  # ---------------------------------------------------------------------------
+
+  defp behavior_metric_table(category, behavior) do
+    bad_agg = analyze(sample_path(category, behavior, "bad"))
+    good_agg = analyze(sample_path(category, behavior, "good"))
+
+    entries =
+      Scorer.default_scalars()
+      |> Map.keys()
+      |> Enum.map(fn {group, key} ->
+        bad_val = Scorer.get(bad_agg, group, key)
+        good_val = Scorer.get(good_agg, group, key)
+        log_bad = :math.log(bad_val)
+        log_good = :math.log(good_val)
+        ratio = good_val / bad_val
+        log_diff = log_good - log_bad
+        {"#{group}.#{key}", bad_val, good_val, log_bad, log_good, ratio, log_diff}
+      end)
+
+    max_abs_log_diff =
+      entries
+      |> Enum.map(fn {_, _, _, _, _, _, ld} -> abs(ld) end)
+      |> Enum.max(fn -> 1.0 end)
+      |> max(1.0e-10)
+
+    Map.new(entries, fn {metric_key, bad_val, good_val, log_bad, log_good, ratio, log_diff} ->
+      suggested_scalar = Float.round(2.0 * log_diff / max_abs_log_diff, 4)
+
+      {metric_key,
+       %{
+         bad: r4(bad_val),
+         good: r4(good_val),
+         log_bad: r4(log_bad),
+         log_good: r4(log_good),
+         ratio: r4(ratio),
+         suggested_scalar: suggested_scalar
+       }}
+    end)
+  end
+
+  defp r4(f), do: Float.round(f / 1.0, 4)
+
+  # ---------------------------------------------------------------------------
+  # Cosine diagnosis (delegates vector math to CosineVector)
+  # ---------------------------------------------------------------------------
+
+  # Builds a nested map of precomputed log values for all numeric entries in the
+  # aggregate: %{group => %{key => :math.log(max(val, 1.0e-300))}}.
+  # Called once per diagnose_aggregate/2 invocation so the inner reduce in
+  # CosineVector.build_result can do O(1) lookups instead of recomputing log
+  # for every (behavior, metric) pair.
+  defp precompute_log_metrics(aggregate) do
+    aggregate
+    |> Enum.filter(fn {_group, sub_map} -> is_map(sub_map) end)
+    |> Map.new(fn {group, sub_map} ->
+      log_sub =
+        sub_map
+        |> Enum.filter(fn {_key, val} -> is_number(val) end)
+        |> Map.new(fn {key, val} ->
+          {key, :math.log(max(val / 1.0, 1.0e-300))}
+        end)
+
+      {group, log_sub}
+    end)
+  end
+
+  defp diagnose_from_behavior_map_entry(
+         {category, behaviors},
+         aggregate,
+         language,
+         languages,
+         cosine_opts
+       ) do
+    yaml_path = "priv/combined_metrics/#{category}.yml"
+
+    Enum.flat_map(behaviors, fn {behavior, behavior_data} ->
+      maybe_diagnose_behavior(
+        yaml_path,
+        behavior,
+        behavior_data,
+        aggregate,
+        category,
+        language,
+        languages,
+        cosine_opts
+      )
+    end)
+  end
+
+  defp diagnose_from_yaml({yaml_path, data}, aggregate, language, languages, cosine_opts) do
+    category = yaml_path |> Path.basename() |> String.trim_trailing(".yml")
+
+    data
+    |> Enum.filter(fn {_k, v} -> is_map(v) end)
+    |> Enum.flat_map(fn {behavior, behavior_data} ->
+      maybe_diagnose_behavior(
+        yaml_path,
+        behavior,
+        behavior_data,
+        aggregate,
+        category,
+        language,
+        languages,
+        cosine_opts
+      )
+    end)
+  end
+
+  defp maybe_diagnose_behavior(
+         yaml_path,
+         behavior,
+         behavior_data,
+         aggregate,
+         category,
+         language,
+         languages,
+         cosine_opts
+       ) do
+    behavior_langs = Map.get(behavior_data, "_languages", [])
+
+    if behavior_language_applies?(behavior_langs, language, languages) do
+      {us, result} =
+        :timer.tc(fn ->
+          CosineVector.compute(
+            yaml_path,
+            behavior,
+            behavior_data,
+            aggregate,
+            category,
+            cosine_opts
+          )
+        end)
+
+      track_behavior_us(behavior, us)
+      result
+    else
+      []
+    end
+  end
+
+  defp track_behavior_us(behavior, us) do
+    case Process.get(:codeqa_cosine_breakdown) do
+      nil ->
+        :ok
+
+      breakdown ->
+        cur = Map.get(breakdown, behavior, 0)
+        Process.put(:codeqa_cosine_breakdown, Map.put(breakdown, behavior, cur + us))
+    end
+  end
+
+  # ---------------------------------------------------------------------------
+  # Language filtering
+  # ---------------------------------------------------------------------------
+
+  # behavior_langs: the "_languages" list from the YAML ([] = applies to all)
+  # language: single language string from :language opt (nil = no filter)
+  # languages: project language list from :languages opt (nil = no filter)
+  defp behavior_language_applies?(_behavior_langs, nil, nil), do: true
+
+  # Empty behavior_langs means "applies to all languages" — always include.
+  defp behavior_language_applies?([], _language, _languages), do: true
+
+  defp behavior_language_applies?(_behavior_langs, nil, []), do: true
+
+  defp behavior_language_applies?(behavior_langs, language, nil) when is_binary(language),
+    do: language in behavior_langs
+
+  defp behavior_language_applies?(behavior_langs, nil, languages) when is_list(languages),
+    do: Enum.any?(behavior_langs, &(&1 in languages))
+
+  defp behavior_language_applies?(behavior_langs, language, languages)
+       when is_binary(language) and is_list(languages),
+       do: language in behavior_langs or Enum.any?(behavior_langs, &(&1 in languages))
+
+  # ---------------------------------------------------------------------------
+  # Misc
+  # ---------------------------------------------------------------------------
+
+  defp humanize(slug) do
+    slug
+    |> String.split("_")
+    |> Enum.map_join(" ", &String.capitalize/1)
+  end
+end
diff --git a/lib/codeqa/combined_metrics/scalar_applier.ex b/lib/codeqa/combined_metrics/scalar_applier.ex
new file mode 100644
index 00000000..1c8ec4b7
--- /dev/null
+++ b/lib/codeqa/combined_metrics/scalar_applier.ex
@@ -0,0 +1,209 @@
+defmodule CodeQA.CombinedMetrics.ScalarApplier do
+  @moduledoc """
+  Writes suggested scalars and language metadata back to the combined-metrics YAML
+  config files under `priv/combined_metrics/`.
+
+  Intended for internal use by `SampleRunner`. Two entry points:
+
+  * `apply_scalars/2`   — rewrites scalar weights using log-linear suggestions
+  * `apply_languages/2` — rewrites `_languages` based on sample file extensions
+  """
+
+  alias CodeQA.CombinedMetrics.YamlFormatter
+
+  @samples_root "priv/combined_metrics/samples"
+  @yaml_dir "priv/combined_metrics"
+  @deadzone_low 0.995
+  @deadzone_high 1.005
+
+  @doc """
+  Applies suggested scalars from `report` (a `build_metric_report/1` result) to
+  the YAML files under `priv/combined_metrics/`.
+
+  Returns a list of per-category stats maps with `:category`, `:updated`,
+  `:deadzoned`, and `:skipped` keys.
+  """
+  @spec apply_scalars(map(), keyword()) :: [map()]
+  def apply_scalars(report, opts \\ []) do
+    filter_category = opts[:category]
+
+    @yaml_dir
+    |> File.ls!()
+    |> Enum.filter(fn yml_file ->
+      String.ends_with?(yml_file, ".yml") and
+        (filter_category == nil or String.trim_trailing(yml_file, ".yml") == filter_category)
+    end)
+    |> Enum.sort()
+    |> Enum.map(fn yml_file ->
+      category = String.trim_trailing(yml_file, ".yml")
+      yaml_path = Path.join(@yaml_dir, yml_file)
+      {:ok, existing} = YamlElixir.read_from_file(yaml_path)
+
+      {updated_yaml, stats} = apply_to_category(existing, category, report)
+      File.write!(yaml_path, YamlFormatter.format(updated_yaml))
+
+      Map.put(stats, :category, category)
+    end)
+  end
+
+  @doc """
+  Updates only the `_languages` field in YAML config files based on sample data.
+
+  Returns a list of `%{category: String.t(), behaviors_with_languages: non_neg_integer()}`.
+  """
+  @spec apply_languages(keyword()) :: [map()]
+  def apply_languages(opts \\ []) do
+    filter_category = opts[:category]
+
+    @yaml_dir
+    |> File.ls!()
+    |> Enum.filter(fn yml_file ->
+      String.ends_with?(yml_file, ".yml") and
+        (filter_category == nil or String.trim_trailing(yml_file, ".yml") == filter_category)
+    end)
+    |> Enum.sort()
+    |> Enum.map(fn yml_file ->
+      category = String.trim_trailing(yml_file, ".yml")
+      yaml_path = Path.join(@yaml_dir, yml_file)
+      {:ok, existing} = YamlElixir.read_from_file(yaml_path)
+
+      updated =
+        existing
+        |> Enum.filter(fn {_k, v} -> is_map(v) end)
+        |> Map.new(fn {behavior, groups} ->
+          langs = languages_for_behavior(category, behavior)
+          {behavior, maybe_put_languages(groups, langs)}
+        end)
+
+      File.write!(yaml_path, YamlFormatter.format(updated))
+
+      behaviors_with_languages =
+        Enum.count(updated, fn {_b, groups} -> Map.has_key?(groups, "_languages") end)
+
+      %{category: category, behaviors_with_languages: behaviors_with_languages}
+    end)
+  end
+
+  # ---------------------------------------------------------------------------
+  # Scalar application helpers
+  # ---------------------------------------------------------------------------
+
+  defp apply_to_category(existing, category, report) do
+    existing
+    |> Enum.filter(fn {_k, v} -> is_map(v) end)
+    |> Enum.reduce({%{}, %{updated: 0, deadzoned: 0, skipped: 0}}, fn
+      {behavior, current_groups}, {acc_yaml, stats} ->
+        report_key = "#{category}.#{behavior}"
+        doc = read_behavior_doc(category, behavior)
+
+        case Map.get(report, report_key) do
+          nil ->
+            groups = maybe_put_doc(current_groups, doc)
+            {Map.put(acc_yaml, behavior, groups), Map.update!(stats, :skipped, &(&1 + 1))}
+
+          metrics ->
+            apply_metrics(acc_yaml, stats, behavior, current_groups, metrics, doc)
+        end
+    end)
+  end
+
+  defp apply_metrics(acc_yaml, stats, behavior, current_groups, metrics, doc) do
+    {new_groups, log_baseline, n_updated, n_deadzoned} = groups_from_report(metrics)
+    # Fall back to current groups if everything was deadzoned
+    base_groups = if map_size(new_groups) > 0, do: new_groups, else: current_groups
+
+    groups =
+      base_groups
+      |> Map.put("_log_baseline", Float.round(log_baseline, 6))
+      |> maybe_put_doc(doc)
+
+    {Map.put(acc_yaml, behavior, groups),
+     %{
+       stats
+       | updated: stats.updated + n_updated,
+         deadzoned: stats.deadzoned + n_deadzoned
+     }}
+  end
+
+  defp groups_from_report(metrics) do
+    Enum.reduce(metrics, {%{}, 0.0, 0, 0}, fn {metric_key, data},
+                                              {groups, log_baseline, n_updated, n_deadzoned} ->
+      [group, key] = String.split(metric_key, ".", parts: 2)
+
+      if deadzone?(data.ratio) do
+        {groups, log_baseline, n_updated, n_deadzoned + 1}
+      else
+        accumulate_metric(groups, log_baseline, n_updated, n_deadzoned, group, key, data)
+      end
+    end)
+  end
+
+  defp accumulate_metric(groups, log_baseline, n_updated, n_deadzoned, group, key, data) do
+    new_groups =
+      Map.update(
+        groups,
+        group,
+        %{key => data.suggested_scalar},
+        &Map.put(&1, key, data.suggested_scalar)
+      )
+
+    geo_mean = :math.sqrt(max(data.bad, 1.0e-10) * max(data.good, 1.0e-10))
+    new_baseline = log_baseline + data.suggested_scalar * :math.log(geo_mean)
+    {new_groups, new_baseline, n_updated + 1, n_deadzoned}
+  end
+
+  defp deadzone?(ratio), do: ratio >= @deadzone_low and ratio <= @deadzone_high
+
+  defp read_behavior_doc(category, behavior) do
+    config_path = Path.join([@samples_root, category, behavior, "config.yml"])
+
+    case File.read(config_path) do
+      {:ok, content} ->
+        case YamlElixir.read_from_string(content) do
+          {:ok, %{"doc" => doc}} when is_binary(doc) -> doc
+          _ -> nil
+        end
+
+      _ ->
+        nil
+    end
+  end
+
+  defp maybe_put_doc(groups, nil), do: groups
+  defp maybe_put_doc(groups, doc), do: Map.put(groups, "_doc", doc)
+
+  # ---------------------------------------------------------------------------
+  # Language detection helpers
+  # ---------------------------------------------------------------------------
+
+  defp dir_languages(dir) do
+    case File.ls(dir) do
+      {:ok, files} ->
+        files
+        |> Enum.map(&CodeQA.Language.detect/1)
+        |> Enum.map(& &1.name())
+        |> MapSet.new()
+
+      _ ->
+        MapSet.new()
+    end
+  end
+
+  defp languages_for_behavior(category, behavior) do
+    bad_langs = dir_languages(sample_path(category, behavior, "bad"))
+    good_langs = dir_languages(sample_path(category, behavior, "good"))
+
+    bad_langs
+    |> MapSet.intersection(good_langs)
+    |> MapSet.to_list()
+    |> Enum.reject(&(&1 == "unknown"))
+    |> Enum.sort()
+  end
+
+  defp maybe_put_languages(groups, []), do: groups
+  defp maybe_put_languages(groups, langs), do: Map.put(groups, "_languages", langs)
+
+  defp sample_path(category, behavior, kind) do
+    Path.join([@samples_root, category, behavior, kind])
+  end
+end
diff --git a/lib/codeqa/combined_metrics/scope_and_assignment.ex b/lib/codeqa/combined_metrics/scope_and_assignment.ex
new file mode 100644
index 00000000..0b3e616b
--- /dev/null
+++ b/lib/codeqa/combined_metrics/scope_and_assignment.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.CombinedMetrics.ScopeAndAssignment do
+  @moduledoc """
+  Behaviour and submodule registry for variable scope and assignment quality metrics.
+
+  Scalar weights are defined in `priv/combined_metrics/scope_and_assignment.yml`.
+  See `CodeQA.CombinedMetrics.Category` for the scoring model.
+  """
+
+  @yaml_path "priv/combined_metrics/scope_and_assignment.yml"
+
+  use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path
+
+  @behaviors @yaml_path
+             |> YamlElixir.read_from_file!()
+             |> Enum.filter(fn {_k, v} -> is_map(v) end)
+             |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end)
+
+  for {key, doc} <- @behaviors do
+    defmodule Module.concat(CodeQA.CombinedMetrics.ScopeAndAssignment, Macro.camelize(key)) do
+      alias CodeQA.CombinedMetrics.ScopeAndAssignment
+      @moduledoc doc
+      @behaviour ScopeAndAssignment
+      @score_key key
+      @impl true
+      def score(metrics),
+        do: ScopeAndAssignment.compute_score(@score_key, metrics)
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/scorer.ex b/lib/codeqa/combined_metrics/scorer.ex
new file mode 100644
index 00000000..b1220aa9
--- /dev/null
+++ b/lib/codeqa/combined_metrics/scorer.ex
@@ -0,0 +1,106 @@
+defmodule CodeQA.CombinedMetrics.Scorer do
+  alias CodeQA.Engine.Analyzer
+
+  @moduledoc """
+  Pure computation engine for combined metric formulas.
+
+  Loads scalar weights from a YAML file and computes a score as a product of
+  metric powers:
+
+      score = metric_a ^ s_a  *  metric_b ^ s_b  *  ...
+
+  Scalars of 0.0 contribute nothing (x^0 = 1.0) and are the default for all
+  metric keys not listed in the YAML. Negative scalars penalise a metric
+  (higher raw value → lower score).
+  """
+
+  @doc """
+  Computes the score for `metric_name` using scalars from `yaml_path`.
+
+  `metrics` is the `codebase.aggregate` map returned by `codeqa analyze`.
+  """
+  @spec compute_score(String.t(), String.t(), map()) :: float()
+  def compute_score(yaml_path, metric_name, metrics) do
+    default_scalars()
+    |> Map.merge(scalars_for(yaml_path, metric_name))
+    |> Enum.reduce(1.0, fn {{group, key}, scalar}, acc ->
+      acc * pow(get(metrics, group, key), scalar)
+    end)
+  end
+
+  @doc "Returns the non-zero scalar overrides for `metric_name` from `yaml_path`."
+  @spec scalars_for(String.t(), String.t()) :: %{{String.t(), String.t()} => float()}
+  def scalars_for(yaml_path, metric_name) do
+    yaml_path
+    |> yaml_data()
+    |> Map.get(metric_name, %{})
+    |> Enum.flat_map(fn
+      {group, keys} when is_map(keys) ->
+        Enum.map(keys, fn {key, scalar} -> {{group, key}, scalar / 1.0} end)
+
+      _ ->
+        []
+    end)
+    |> Map.new()
+  end
+
+  @doc "Returns the full default scalar map: all registered file metric keys mapped to 0.0."
+  @spec default_scalars() :: %{{String.t(), String.t()} => float()}
+  def default_scalars do
+    Analyzer.build_registry().file_metrics
+    |> Enum.flat_map(fn mod ->
+      Enum.map(mod.keys(), fn key -> {{mod.name(), "mean_" <> key}, 0.0} end)
+    end)
+    |> Map.new()
+  end
+
+  @doc "Safely fetches a nested metric value, returning 1.0 if missing or non-positive."
+  @spec get(map(), String.t(), String.t()) :: float()
+  def get(metrics, group, key) do
+    case get_in(metrics, [group, key]) do
+      val when is_number(val) and val > 0 -> val / 1.0
+      _ -> 1.0
+    end
+  end
+
+  @doc "Computes `base ^ scalar`, returning 1.0 for non-positive bases."
+  @spec pow(float(), float()) :: float()
+  def pow(base, scalar) when base > 0, do: :math.pow(base, scalar)
+  def pow(_base, _scalar), do: 1.0
+
+  @yaml_dir "priv/combined_metrics"
+  @yaml_paths Path.wildcard(Path.join(@yaml_dir, "*.yml"))
+  for path <- @yaml_paths, do: @external_resource(path)
+
+  @compiled_yamls Map.new(@yaml_paths, fn path ->
+                    {:ok, data} = YamlElixir.read_from_file(path)
+                    {path, data}
+                  end)
+
+  @doc "Returns all compiled YAML data as `%{path => parsed_map}`."
+  @spec all_yamls() :: %{String.t() => map()}
+  def all_yamls, do: @compiled_yamls
+
+  @referenced_file_metric_names for(
+                                  {_path, behaviors} <- @compiled_yamls,
+                                  is_map(behaviors),
+                                  {_behavior, body} <- behaviors,
+                                  is_map(body),
+                                  {key, _val} <- body,
+                                  not String.starts_with?(key, "_"),
+                                  do: key
+                                )
+                                |> MapSet.new()
+
+  @doc """
+  Returns the set of file-metric module names (e.g. "halstead", "ngram") that
+  are referenced by any behavior in any compiled YAML.
+
+  Computed at compile time from `@compiled_yamls`. Used by the LOO cache to
+  skip recompute of metrics whose values cannot influence any cosine.
+  """
+  @spec referenced_file_metric_names() :: MapSet.t()
+  def referenced_file_metric_names, do: @referenced_file_metric_names
+
+  defp yaml_data(yaml_path), do: Map.get(@compiled_yamls, yaml_path, %{})
+end
diff --git a/lib/codeqa/combined_metrics/testing.ex b/lib/codeqa/combined_metrics/testing.ex
new file mode 100644
index 00000000..52b41e40
--- /dev/null
+++ b/lib/codeqa/combined_metrics/testing.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.CombinedMetrics.Testing do
+  @moduledoc """
+  Behaviour and submodule registry for test quality metrics.
+
+  Scalar weights are defined in `priv/combined_metrics/testing.yml`.
+  See `CodeQA.CombinedMetrics.Category` for the scoring model.
+  """
+
+  @yaml_path "priv/combined_metrics/testing.yml"
+
+  use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path
+
+  @behaviors @yaml_path
+             |> YamlElixir.read_from_file!()
+             |> Enum.filter(fn {_k, v} -> is_map(v) end)
+             |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end)
+
+  for {key, doc} <- @behaviors do
+    defmodule Module.concat(CodeQA.CombinedMetrics.Testing, Macro.camelize(key)) do
+      alias CodeQA.CombinedMetrics.Testing
+      @moduledoc doc
+      @behaviour Testing
+      @score_key key
+      @impl true
+      def score(metrics),
+        do: Testing.compute_score(@score_key, metrics)
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/type_and_value.ex b/lib/codeqa/combined_metrics/type_and_value.ex
new file mode 100644
index 00000000..d461c60b
--- /dev/null
+++ b/lib/codeqa/combined_metrics/type_and_value.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.CombinedMetrics.TypeAndValue do
+  @moduledoc """
+  Behaviour and submodule registry for type safety and value assignment quality metrics.
+
+  Scalar weights are defined in `priv/combined_metrics/type_and_value.yml`.
+  See `CodeQA.CombinedMetrics.Category` for the scoring model.
+  """
+
+  @yaml_path "priv/combined_metrics/type_and_value.yml"
+
+  use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path
+
+  @behaviors @yaml_path
+             |> YamlElixir.read_from_file!()
+             |> Enum.filter(fn {_k, v} -> is_map(v) end)
+             |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end)
+
+  for {key, doc} <- @behaviors do
+    defmodule Module.concat(CodeQA.CombinedMetrics.TypeAndValue, Macro.camelize(key)) do
+      alias CodeQA.CombinedMetrics.TypeAndValue
+      @moduledoc doc
+      @behaviour TypeAndValue
+      @score_key key
+      @impl true
+      def score(metrics),
+        do: TypeAndValue.compute_score(@score_key, metrics)
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/variable_naming.ex b/lib/codeqa/combined_metrics/variable_naming.ex
new file mode 100644
index 00000000..db49793e
--- /dev/null
+++ b/lib/codeqa/combined_metrics/variable_naming.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.CombinedMetrics.VariableNaming do
+  @moduledoc """
+  Behaviour and submodule registry for variable naming quality metrics.
+
+  Scalar weights are defined in `priv/combined_metrics/variable_naming.yml`.
+  See `CodeQA.CombinedMetrics.Category` for the scoring model.
+  """
+
+  @yaml_path "priv/combined_metrics/variable_naming.yml"
+
+  use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path
+
+  @behaviors @yaml_path
+             |> YamlElixir.read_from_file!()
+             |> Enum.filter(fn {_k, v} -> is_map(v) end)
+             |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end)
+
+  for {key, doc} <- @behaviors do
+    defmodule Module.concat(CodeQA.CombinedMetrics.VariableNaming, Macro.camelize(key)) do
+      alias CodeQA.CombinedMetrics.VariableNaming
+      @moduledoc doc
+      @behaviour VariableNaming
+      @score_key key
+      @impl true
+      def score(metrics),
+        do: VariableNaming.compute_score(@score_key, metrics)
+    end
+  end
+end
diff --git a/lib/codeqa/combined_metrics/yaml_formatter.ex b/lib/codeqa/combined_metrics/yaml_formatter.ex
new file mode 100644
index 00000000..8c76a668
--- /dev/null
+++ b/lib/codeqa/combined_metrics/yaml_formatter.ex
@@ -0,0 +1,84 @@
+defmodule CodeQA.CombinedMetrics.YamlFormatter do
+  @moduledoc """
+  Serialises a combined-metrics behavior map back to the hand-authored YAML format.
+
+  Intended for internal use by `SampleRunner`. The output format preserves the
+  conventions used across `priv/combined_metrics/*.yml`:
+
+  - Behaviors sorted alphabetically
+  - Meta-keys (`_doc`, `_fix_hint`, `_languages`, `_log_baseline`) emitted before
+    group sections
+  - Groups and keys within groups sorted alphabetically
+  - Floats written with four decimal places
+  """
+
+  @doc """
+  Serialises a `%{behavior => groups}` map to a YAML string.
+  """
+  @spec format(map()) :: String.t()
+  def format(data) do
+    lines =
+      data
+      |> Enum.sort_by(fn {behavior, _} -> behavior end)
+      |> Enum.flat_map(fn {behavior, groups} -> behavior_lines(behavior, groups) end)
+
+    Enum.join(lines, "\n") <> "\n"
+  end
+
+  # --- Behavior-level serialisation ---
+
+  defp behavior_lines(behavior, groups) do
+    doc_line = doc_line(Map.get(groups, "_doc"))
+    baseline_line = baseline_line(Map.get(groups, "_log_baseline"))
+    fix_hint_line = fix_hint_line(Map.get(groups, "_fix_hint"))
+    languages_line = languages_line(Map.get(groups, "_languages"))
+
+    excludes_block_types_line =
+      excludes_block_types_line(Map.get(groups, "_excludes_block_types"))
+
+    group_lines = group_lines(groups)
+
+    ["#{behavior}:" | doc_line] ++
+      excludes_block_types_line ++
+      fix_hint_line ++ languages_line ++ baseline_line ++ group_lines ++ [""]
+  end
+
+  defp doc_line(nil), do: []
+  defp doc_line(doc), do: ["  _doc: #{inspect(doc)}"]
+
+  defp baseline_line(nil), do: []
+  defp baseline_line(val), do: ["  _log_baseline: #{fmt_scalar(val)}"]
+
+  defp fix_hint_line(nil), do: []
+  defp fix_hint_line(hint), do: ["  _fix_hint: #{inspect(hint)}"]
+
+  defp languages_line(nil), do: []
+  defp languages_line([]), do: []
+  defp languages_line(langs), do: ["  _languages: [#{Enum.join(langs, ", ")}]"]
+
+  defp excludes_block_types_line(nil), do: []
+  defp excludes_block_types_line([]), do: []
+
+  defp excludes_block_types_line(types),
+    do: ["  _excludes_block_types: [#{Enum.join(types, ", ")}]"]
+
+  defp group_lines(groups) do
+    groups
+    |> Enum.filter(fn {k, v} ->
+      k not in ["_doc", "_log_baseline", "_fix_hint", "_languages", "_excludes_block_types"] and
+        is_map(v)
+    end)
+    |> Enum.sort_by(fn {group, _} -> group end)
+    |> Enum.flat_map(fn {group, keys} ->
+      key_lines =
+        keys
+        |> Enum.sort_by(fn {key, _} -> key end)
+        |> Enum.map(fn {key, scalar} -> "    #{key}: #{fmt_scalar(scalar)}" end)
+
+      ["  #{group}:" | key_lines]
+    end)
+  end
+
+  defp fmt_scalar(f) when is_float(f), do: :erlang.float_to_binary(f, decimals: 4)
+  defp fmt_scalar(n) when is_integer(n), do: "#{n}.0"
+end
diff --git a/lib/codeqa/comparator.ex b/lib/codeqa/comparator.ex
deleted file mode 100644
index 4fbfa40e..00000000
--- a/lib/codeqa/comparator.ex
+++ /dev/null
@@ -1,109 +0,0 @@
-defmodule CodeQA.Comparator do
-  @moduledoc "Compare two analysis results and compute metric deltas."
-
-  def compare_results(base_result, head_result, changes) do
-    base_files = Map.get(base_result, "files", %{})
-    head_files = Map.get(head_result, "files", %{})
-
-    {file_comparisons, status_counts} =
-      changes
-      |> Enum.reduce({%{}, %{"added" => 0, "modified" => 0, "deleted" => 0}}, fn change,
-                                                                                 {files, counts} ->
-        base_data = Map.get(base_files, change.path)
-        head_data = Map.get(head_files, change.path)
-        delta = compute_file_delta(base_data, head_data)
-
-        file_entry = %{
-          "status" => change.status,
-          "base" => base_data,
-          "head" => head_data,
-          "delta" => delta
-        }
-
-        {Map.put(files, change.path, file_entry), Map.update!(counts, change.status, &(&1 + 1))}
-      end)
-
-    base_agg = get_in(base_result, ["codebase", "aggregate"]) || %{}
-    head_agg = get_in(head_result, ["codebase", "aggregate"]) || %{}
-    agg_delta = compute_aggregate_delta(base_agg, head_agg)
-
-    summary = build_summary(status_counts)
-
-    %{
-      "metadata" => %{
-        "total_files_compared" => length(changes),
-        "summary" => summary
-      },
-      "files" => file_comparisons,
-      "codebase" => %{
-        "base" => %{"aggregate" => base_agg},
-        "head" => %{"aggregate" => head_agg},
-        "delta" => %{"aggregate" => agg_delta}
-      }
-    }
-  end
-
-  defp compute_file_delta(nil, _head), do: nil
-  defp compute_file_delta(_base, nil), do: nil
-
-  defp compute_file_delta(base_data, head_data) do
-    top_delta =
-      ["bytes", "lines"]
-      |> Enum.reduce(%{}, fn key, acc ->
-        case {Map.get(base_data, key), Map.get(head_data, key)} do
-          {b, h} when is_number(b) and is_number(h) -> Map.put(acc, key, h - b)
-          _ -> acc
-        end
-      end)
-
-    base_metrics = Map.get(base_data, "metrics", %{})
-    head_metrics = Map.get(head_data, "metrics", %{})
-
-    metrics_delta =
-      MapSet.new(Map.keys(base_metrics) ++ Map.keys(head_metrics))
-      |> Enum.reduce(%{}, fn metric_name, acc ->
-        base_m = Map.get(base_metrics, metric_name, %{})
-        head_m = Map.get(head_metrics, metric_name, %{})
-        delta = compute_numeric_delta(base_m, head_m)
-        if delta == %{}, do: acc, else: Map.put(acc, metric_name, delta)
-      end)
-
-    Map.put(top_delta, "metrics", metrics_delta)
-  end
-
-  defp compute_aggregate_delta(base_agg, head_agg) do
-    MapSet.new(Map.keys(base_agg) ++ Map.keys(head_agg))
-    |> Enum.reduce(%{}, fn metric_name, acc ->
-      base_m = Map.get(base_agg, metric_name, %{})
-      head_m = Map.get(head_agg, metric_name, %{})
-      delta = compute_numeric_delta(base_m, head_m)
-      if delta == %{}, do: acc, else: Map.put(acc, metric_name, delta)
-    end)
-  end
-
-  defp compute_numeric_delta(base, head) do
-    MapSet.new(Map.keys(base) ++ Map.keys(head))
-    |> Enum.reduce(%{}, fn key, acc ->
-      case {Map.get(base, key), Map.get(head, key)} do
-        {b, h} when is_number(b) and is_number(h) ->
-          Map.put(acc, key, Float.round((h - b) / 1, 4))
-
-        _ ->
-          acc
-      end
-    end)
-  end
-
-  defp build_summary(counts) do
-    parts =
-      [
-        {"added", counts["added"]},
-        {"modified", counts["modified"]},
-        {"deleted", counts["deleted"]}
-      ]
-      |> Enum.filter(fn {_, c} -> c > 0 end)
-      |> Enum.map(fn {status, count} -> "#{count} #{status}" end)
-
-    if parts == [], do: "no changes", else: Enum.join(parts, ", ")
-  end
-end
diff --git a/lib/codeqa/config.ex b/lib/codeqa/config.ex
new file mode 100644
index 00000000..5171eacb
--- /dev/null
+++ b/lib/codeqa/config.ex
@@ -0,0 +1,99 @@
+defmodule CodeQA.Config do
+  @moduledoc "Loads and caches .codeqa.yml configuration via :persistent_term."
+
+  @key {__MODULE__, :config}
+
+  @default_impact %{
+    "complexity" => 5,
+    "file_structure" => 4,
+    "function_design" => 4,
+    "code_smells" => 3,
+    "naming_conventions" => 2,
+    "error_handling" => 2,
+    "consistency" => 2,
+    "documentation" => 1,
+    "testing" => 1
+  }
+
+  defstruct ignore_paths: [],
+            impact_map: @default_impact,
+            combined_top: 2,
+            cosine_significance_threshold: 0.15,
+            near_duplicate_blocks: []
+
+  @spec load(String.t()) :: :ok
+  def load(path) do
+    if :persistent_term.get(@key, nil) == nil do
+      config = parse(path)
+      :persistent_term.put(@key, config)
+    end
+
+    :ok
+  end
+
+  @spec reset() :: :ok
+  def reset do
+    :persistent_term.erase(@key)
+    :ok
+  end
+
+  @spec ignore_paths() :: [String.t()]
+  def ignore_paths, do: fetch().ignore_paths
+
+  @spec impact_map() :: %{String.t() => pos_integer()}
+  def impact_map, do: fetch().impact_map
+
+  @spec combined_top() :: pos_integer()
+  def combined_top, do: fetch().combined_top
+
+  @spec cosine_significance_threshold() :: float()
+  def cosine_significance_threshold, do: fetch().cosine_significance_threshold
+
+  @spec near_duplicate_blocks_opts() :: keyword()
+  def near_duplicate_blocks_opts, do: fetch().near_duplicate_blocks
+
+  defp fetch do
+    :persistent_term.get(@key, %__MODULE__{})
+  end
+
+  defp parse(path) do
+    config_file = Path.join(path, ".codeqa.yml")
+
+    case File.read(config_file) do
+      {:ok, contents} ->
+        case YamlElixir.read_from_string(contents) do
+          {:ok, yaml} -> from_yaml(yaml)
+          _ -> %__MODULE__{}
+        end
+
+      {:error, _} ->
+        %__MODULE__{}
+    end
+  end
+
+  defp from_yaml(yaml) do
+    %__MODULE__{
+      ignore_paths: parse_ignore_paths(yaml),
+      impact_map: parse_impact(yaml),
+      combined_top: Map.get(yaml, "combined_top", 2),
+      cosine_significance_threshold: Map.get(yaml, "cosine_significance_threshold", 0.15),
+      near_duplicate_blocks: parse_near_duplicate_blocks(yaml)
+    }
+  end
+
+  defp parse_ignore_paths(%{"ignore_paths" => patterns}) when is_list(patterns), do: patterns
+  defp parse_ignore_paths(_), do: []
+
+  defp parse_impact(%{"impact" => overrides}) when is_map(overrides) do
+    string_overrides = Map.new(overrides, fn {k, v} -> {to_string(k), v} end)
+    Map.merge(@default_impact, string_overrides)
+  end
+
+  defp parse_impact(_), do: @default_impact
+
+  defp parse_near_duplicate_blocks(%{"near_duplicate_blocks" => %{"max_pairs_per_bucket" => n}})
+       when is_integer(n),
+       do: [max_pairs_per_bucket: n]
+
+  defp parse_near_duplicate_blocks(_), do: []
+end
diff --git a/lib/codeqa/diagnostics.ex b/lib/codeqa/diagnostics.ex
new file mode 100644
index 00000000..f2479e09
--- /dev/null
+++ b/lib/codeqa/diagnostics.ex
@@ -0,0 +1,171 @@
+defmodule CodeQA.Diagnostics do
+  @moduledoc """
+  Diagnoses a codebase by identifying likely code quality issues using
+  cosine similarity against combined metric behavior profiles.
+  """
+
+  alias CodeQA.CombinedMetrics.FileScorer
+  alias CodeQA.CombinedMetrics.SampleRunner
+  alias CodeQA.Engine.Analyzer
+  alias CodeQA.Engine.Collector
+  alias CodeQA.HealthReport.Grader
+
+  @doc """
+  Runs diagnostics on the given path and returns results as a string.
+
+  ## Options
+
+    * `:path` - file or directory path (required)
+    * `:mode` - `:aggregate` (default) or `:per_file`
+    * `:top` - number of top issues to display (default 15)
+    * `:format` - `:plain` or `:json` (default `:plain`)
+    * `:combined_top` - worst offender files per behavior (default 2)
+  """
+  @spec run(keyword()) :: String.t()
+  def run(opts) do
+    path = opts[:path]
+    mode = opts[:mode] || :aggregate
+    top = opts[:top] || 15
+    format = opts[:format] || :plain
+
+    files = Collector.collect_files(path)
+    result = Analyzer.analyze_codebase(files, [])
+
+    case mode do
+      :per_file -> run_per_file(result, top, format)
+      _ -> run_aggregate(result, top, format)
+    end
+  end
+
+  defp run_aggregate(result, top, format) do
+    aggregate = get_in(result, ["codebase", "aggregate"])
+    files = Map.get(result, "files", %{})
+    project_langs = project_languages(files)
+
+    issues_task =
+      Task.async(fn ->
+        SampleRunner.diagnose_aggregate(aggregate, top: top, languages: project_langs)
+      end)
+
+    categories_task =
+      Task.async(fn -> SampleRunner.score_aggregate(aggregate, languages: project_langs) end)
+
+    issues = Task.await(issues_task)
+    categories = Task.await(categories_task)
+
+    case format do
+      :json ->
+        Jason.encode!(%{issues: issues, categories: categories}, pretty: true)
+
+      _ ->
+        "## Diagnose: aggregate\n\n" <>
+          issues_table(issues) <>
+          "\n" <>
+          categories_text(categories)
+    end
+  end
+
+  defp run_per_file(result, top, format) do
+    files = Map.get(result, "files", %{})
+
+    file_diagnoses =
+      Map.new(files, fn {file_path, file_data} ->
+        metrics = Map.get(file_data, "metrics", %{})
+        file_agg = FileScorer.file_to_aggregate(metrics)
+        language = CodeQA.Language.detect(file_path).name()
+        diagnoses = SampleRunner.diagnose_aggregate(file_agg, top: top, language: language)
+        {file_path, diagnoses}
+      end)
+
+    case format do
+      :json ->
+        files_json =
+          Enum.map(file_diagnoses, fn {file_path, diagnoses} ->
+            %{file: file_path, behaviors: Enum.map(diagnoses, &diagnosis_to_map/1)}
+          end)
+
+        Jason.encode!(%{files: files_json}, pretty: true)
+
+      _ ->
+        file_rows =
+          Enum.flat_map(file_diagnoses, fn {file_path, diagnoses} ->
+            diagnoses_to_rows(file_path, diagnoses)
+          end)
+
+        "## Diagnose: per-file\n\n" <> per_file_table(file_rows, top)
+    end
+  end
+
+  defp diagnosis_to_map(d) do
+    %{
+      behavior: "#{d.category}.#{d.behavior}",
+      cosine: d.cosine,
+      score: Grader.score_cosine(d.cosine)
+    }
+  end
+
+  defp diagnoses_to_rows(file_path, diagnoses) do
+    Enum.map(diagnoses, fn %{category: cat, behavior: beh, cosine: cosine, score: score} ->
+      {file_path, "#{cat}.#{beh}", cosine, score}
+    end)
+  end
+
+  defp project_languages(files_map) do
+    files_map
+    |> Map.keys()
+    |> Enum.map(&CodeQA.Language.detect(&1).name())
+    |> Enum.reject(&(&1 == "unknown"))
+    |> Enum.uniq()
+  end
+
+  defp issues_table(issues) do
+    rows =
+      Enum.map(issues, fn %{category: cat, behavior: beh, cosine: cosine, score: score} ->
+        cosine_str = :erlang.float_to_binary(cosine / 1.0, decimals: 2)
+        score_str = :erlang.float_to_binary(score / 1.0, decimals: 2)
+        "| #{cat}.#{beh} | #{cosine_str} | #{score_str} |"
+      end)
+
+    Enum.join(
+      ["| Behavior | Cosine | Score |", "|----------|--------|-------|"] ++ rows ++ [""],
+      "\n"
+    )
+  end
+
+  defp categories_text(categories) do
+    Enum.map_join(categories, "\n", fn %{name: name, behaviors: behaviors} ->
+      rows =
+        Enum.map(behaviors, fn %{behavior: beh, score: score} ->
+          score_str = :erlang.float_to_binary(score / 1.0, decimals: 2)
+          "| #{beh} | #{score_str} |"
+        end)
+
+      Enum.join(
+        ["### #{name}", "| Behavior | Score |", "|----------|-------|"] ++ rows ++ [""],
+        "\n"
+      )
+    end)
+  end
+
+  defp per_file_table(rows, top) do
+    data_rows =
+      rows
+      |> Enum.group_by(fn {file_path, _, _, _} -> file_path end)
+      |> Enum.flat_map(fn {_file_path, file_rows} ->
+        file_rows
+        |> Enum.sort_by(fn {_, _, cosine, _} -> cosine end)
+        |> Enum.take(top)
+      end)
+      |> Enum.map(fn {file_path, behavior_key, cosine, _score} ->
+        cosine_str = :erlang.float_to_binary(cosine / 1.0, decimals: 2)
+        cosine_score = Grader.score_cosine(cosine)
+        "| #{file_path} | #{behavior_key} | #{cosine_str} | #{cosine_score} |"
+      end)
+
+    Enum.join(
+      ["| File | Behavior | Cosine | Score |", "|------|----------|--------|-------|"] ++
+        data_rows,
+      "\n"
+    )
+  end
+end
diff --git a/lib/codeqa/engine/analyzer.ex b/lib/codeqa/engine/analyzer.ex
new file mode 100644
index 00000000..2436581c
--- /dev/null
+++ b/lib/codeqa/engine/analyzer.ex
@@ -0,0 +1,221 @@
+defmodule CodeQA.Engine.Analyzer do
+  @moduledoc "Orchestrates metric computation across files."
+
+  alias CodeQA.Analysis.RunSupervisor
+  alias CodeQA.BlockImpactAnalyzer
+  alias CodeQA.Engine.Parallel
+  alias CodeQA.Engine.Pipeline
+  alias CodeQA.Engine.Registry
+  alias CodeQA.Metrics.Codebase, as: CodebaseMetrics
+  alias CodeQA.Metrics.File, as: Metrics
+
+  @registry Registry.new()
+            |> Registry.register_file_metric(Metrics.Entropy)
+            |> Registry.register_file_metric(Metrics.Compression)
+            |> Registry.register_file_metric(Metrics.Zipf)
+            |> Registry.register_file_metric(Metrics.Heaps)
+            |> Registry.register_file_metric(Metrics.Vocabulary)
+            |> Registry.register_file_metric(Metrics.Ngram)
+            |> Registry.register_file_metric(Metrics.Halstead)
+            |> Registry.register_file_metric(Metrics.Readability)
+            |> Registry.register_file_metric(Metrics.CasingEntropy)
+            |> Registry.register_file_metric(Metrics.IdentifierLengthVariance)
+            |> Registry.register_file_metric(Metrics.Indentation)
+            |> Registry.register_file_metric(Metrics.Branching)
+            |> Registry.register_file_metric(Metrics.FunctionMetrics)
+            |> Registry.register_file_metric(Metrics.MagicNumberDensity)
+            |> Registry.register_file_metric(Metrics.SymbolDensity)
+            |> Registry.register_file_metric(Metrics.VowelDensity)
+            |> Registry.register_file_metric(Metrics.Brevity)
+            |> Registry.register_file_metric(Metrics.PunctuationDensity)
+            |> Registry.register_file_metric(Metrics.CommentStructure)
+            |> Registry.register_file_metric(Metrics.SeparatorCounts)
+            |> Registry.register_file_metric(Metrics.LinePatterns)
+            |> Registry.register_codebase_metric(CodebaseMetrics.Similarity)
+            |> Registry.register_file_metric(Metrics.NearDuplicateBlocksFile)
+            |> Registry.register_codebase_metric(CodebaseMetrics.NearDuplicateBlocksCodebase)
+
+  def build_registry, do: @registry
+
+  @spec analyze_file(String.t(), String.t()) :: map()
+  def analyze_file(_path, content) do
+    ctx = Pipeline.build_file_context(content)
+    Registry.run_file_metrics(@registry, ctx, [])
+  end
+
+  @spec analyze_file_for_loo(String.t(), String.t()) :: map()
+  def analyze_file_for_loo(_path, content) do
+    ctx = Pipeline.build_file_context(content, skip_structural: true)
+    Registry.run_file_metrics(@registry, ctx, [])
+  end
+
+  @doc """
+  Like `analyze_file_for_loo/2` but only re-runs file metrics whose name is in
+  `Scorer.referenced_file_metric_names/0`. Metrics not referenced by any
+  behavior YAML inherit their value from `baseline_metrics`. Metrics that
+  implement the optional `analyze_loo/2` callback derive their LOO value from
+  the baseline + the removed block's content, skipping a full file re-analyze.
+  """
+  @spec analyze_file_for_loo_partial(String.t(), String.t(), map(), String.t()) :: map()
+  def analyze_file_for_loo_partial(_path, content, baseline_metrics, block_content \\ "") do
+    referenced = CodeQA.CombinedMetrics.Scorer.referenced_file_metric_names()
+
+    {ctx_us, ctx} =
+      :timer.tc(fn -> Pipeline.build_file_context(content, skip_structural: true) end)
+
+    {result, breakdown} =
+      Enum.reduce(baseline_metrics, {[], %{ctx: ctx_us}}, fn {name, baseline_value},
+                                                             {acc, breakdown} ->
+        if MapSet.member?(referenced, name) do
+          mod = registered_module_for(name)
+
+          {us, value} =
+            if function_exported?(mod, :analyze_loo, 2) do
+              :timer.tc(fn -> mod.analyze_loo(baseline_value, block_content) end)
+            else
+              :timer.tc(fn -> mod.analyze(ctx) end)
+            end
+
+          {[{name, value} | acc], Map.put(breakdown, name, us)}
+        else
+          {[{name, baseline_value} | acc], breakdown}
+        end
+      end)
+
+    :telemetry.execute([:codeqa, :loo_breakdown], breakdown, %{})
+    Map.new(result)
+  end
+
+  defp registered_module_for(name) do
+    Enum.find(@registry.file_metrics, fn mod -> mod.name() == name end) ||
+      raise "no registered file metric module for name #{inspect(name)}"
+  end
+
+  @spec analyze_codebase_aggregate(map(), keyword()) :: map()
+  def analyze_codebase_aggregate(files_map, opts \\ []) do
+    with_run_context(opts, fn opts ->
+      file_results = Parallel.analyze_files(files_map, opts)
+      aggregate_file_metrics(file_results)
+    end)
+  end
+
+  def analyze_codebase(files, opts \\ []) do
+    with_run_context(opts, &do_analyze_codebase(files, &1))
+  end
+
+  defp with_run_context(opts, fun) do
+    {:ok, sup} = RunSupervisor.start_link()
+    run_ctx = RunSupervisor.run_context(sup)
+    opts = Keyword.put(opts, :file_context_pid, run_ctx.file_context_pid)
+    opts = Keyword.put(opts, :behavior_config_pid, run_ctx.behavior_config_pid)
+
+    try do
+      fun.(opts)
+    after
+      Supervisor.stop(sup)
+    end
+  end
+
+  defp do_analyze_codebase(files, opts) do
+    registry = @registry
+
+    file_results =
+      stage(:parallel_files, %{file_count: map_size(files)}, fn ->
+        Parallel.analyze_files(files, opts)
+      end)
+
+    aggregate = stage(:aggregate, %{}, fn -> aggregate_file_metrics(file_results) end)
+
+    if Keyword.get(opts, :compute_nodes, false) do
+      nodes_opts =
+        [baseline_codebase_agg: aggregate] ++
+          Keyword.take(opts, [:nodes_top, :workers, :behavior_config_pid])
+
+      pipeline_result = %{
+        "files" => file_results,
+        "codebase" => %{"aggregate" => aggregate}
+      }
+
+      updated_pipeline_result =
+        stage(:block_impact, %{file_count: map_size(files)}, fn ->
+          BlockImpactAnalyzer.analyze(pipeline_result, files, nodes_opts)
+        end)
+
+      codebase_metrics =
+        stage(:codebase_metrics, %{file_count: map_size(files)}, fn ->
+          Registry.run_codebase_metrics(registry, files, opts)
+        end)
+
+      updated_codebase =
+        Map.merge(codebase_metrics, updated_pipeline_result["codebase"])
+
+      Map.put(updated_pipeline_result, "codebase", updated_codebase)
+    else
+      codebase_metrics =
+        stage(:codebase_metrics, %{file_count: map_size(files)}, fn ->
+          Registry.run_codebase_metrics(registry, files, opts)
+        end)
+
+      %{
+        "files" => file_results,
+        "codebase" => Map.put(codebase_metrics, "aggregate", aggregate)
+      }
+    end
+  end
+
+  defp stage(name, metadata, fun) do
+    t0 = System.monotonic_time(:microsecond)
+    result = fun.()
+    duration = System.monotonic_time(:microsecond) - t0
+    :telemetry.execute([:codeqa, :stage], %{duration: duration}, Map.put(metadata, :stage, name))
+    result
+  end
+
+  defp metric_data_to_triples({metric_name, metric_data}) do
+    metric_data
+    |> Enum.filter(fn {_k, v} -> is_number(v) end)
+    |> Enum.map(fn {key, value} -> {metric_name, key, value / 1} end)
+  end
+
+  def aggregate_file_metrics(file_results) do
+    file_results
+    |> Map.values()
+    |> Enum.flat_map(fn file_data ->
+      file_data
+      |> Map.get("metrics", %{})
+      |> Enum.flat_map(&metric_data_to_triples/1)
+    end)
+    |> Enum.group_by(fn {metric, key, _val} -> {metric, key} end, fn {_, _, val} -> val end)
+    |> Enum.reduce(%{}, fn {{metric, key}, values}, acc ->
+      stats = compute_stats(values)
+      metric_agg = Map.get(acc, metric, %{})
+
+      updated =
+        Map.merge(metric_agg, %{
+          "mean_#{key}" => stats.mean,
+          "std_#{key}" => stats.std,
+          "min_#{key}" => stats.min,
+          "max_#{key}" => stats.max
+        })
+
+      Map.put(acc, metric, updated)
+    end)
+  end
+
+  defp compute_stats([]), do: %{mean: 0.0, std: 0.0, min: 0.0, max: 0.0}
+
+  defp compute_stats(values) do
+    n = length(values)
+    mean = Enum.sum(values) / n
+    sum_squares = Enum.reduce(values, 0.0, fn v, acc -> acc + (v - mean) ** 2 end)
+    variance = sum_squares / n
+    std = :math.sqrt(variance)
+
+    %{
+      mean: Float.round(mean * 1.0, 4),
+      std: Float.round(std * 1.0, 4),
+      min: Float.round(Enum.min(values) * 1.0, 4),
+      max: Float.round(Enum.max(values) * 1.0, 4)
+    }
+  end
+end
diff --git a/lib/codeqa/engine/collector.ex b/lib/codeqa/engine/collector.ex
new file mode 100644
index 00000000..3d1b8b41
--- /dev/null
+++ b/lib/codeqa/engine/collector.ex
@@ -0,0 +1,112 @@
+defmodule CodeQA.Engine.Collector do
+  @moduledoc false
+
+  @skip_dirs MapSet.new(~w[
+    .git .hg .svn node_modules __pycache__ _build dist build vendor
+    .tox .venv venv target .mypy_cache .pytest_cache deps .elixir_ls
+    .next coverage
+  ])
+
+  @default_ignore_patterns ~w[**/*.md **/*.mdx]
+
+  @spec source_extensions() :: MapSet.t()
+  def source_extensions do
+    CodeQA.Language.all()
+    |> Enum.flat_map(& &1.extensions())
+    |> Enum.map(&".#{&1}")
+    |> MapSet.new()
+  end
+
+  @spec collect_files(String.t(), [String.t()]) :: %{String.t() => String.t()}
+  def collect_files(root, extra_ignore_patterns \\ []) do
+    root_path = Path.expand(root)
+    CodeQA.Config.load(root_path)
+    patterns = all_ignore_patterns(extra_ignore_patterns)
+    extensions = source_extensions()
+
+    unless File.dir?(root_path) do
+      raise File.Error, reason: :enoent, path: root, action: "find directory"
+    end
+
+    files_map =
+      root_path
+      |> walk_directory(extensions)
+      |> Map.new(fn path ->
+        rel = Path.relative_to(path, root_path)
+        {rel, File.read!(path)}
+      end)
+      |> do_reject_ignored_map(patterns)
+
+    gitignored = CodeQA.Git.gitignored_files(root_path, Map.keys(files_map))
+    Map.reject(files_map, fn {path, _} -> MapSet.member?(gitignored, path) end)
+  end
+
+  @doc false
+  def ignored?(path, patterns) do
+    Enum.any?(patterns, fn pattern ->
+      match_pattern?(path, pattern)
+    end)
+  end
+
+  @doc false
+  def reject_ignored_map(files_map, extra_patterns \\ []) do
+    do_reject_ignored_map(files_map, all_ignore_patterns(extra_patterns))
+  end
+
+  @doc false
+  def reject_ignored(list, key_fn, extra_patterns \\ []) do
+    patterns = all_ignore_patterns(extra_patterns)
+    Enum.reject(list, fn item -> ignored?(key_fn.(item), patterns) end)
+  end
+
+  defp all_ignore_patterns(extra),
+    do: extra ++ @default_ignore_patterns ++ CodeQA.Config.ignore_paths()
+
+  defp do_reject_ignored_map(files_map, patterns) do
+    Map.reject(files_map, fn {path, _} -> ignored?(path, patterns) end)
+  end
+
+  defp match_pattern?(path, pattern) do
+    # Convert glob pattern to regex:
+    # - ** matches any number of directories
+    # - * matches anything except /
+    # - ? matches a single character except /
+    regex_str =
+      pattern
+      |> String.replace(".", "\\.")
+      |> String.replace("**", "\0GLOBSTAR\0")
+      |> String.replace("*", "[^/]*")
+      |> String.replace("?", "[^/]")
+      |> String.replace("\0GLOBSTAR\0", ".*")
+
+    case Regex.compile("^#{regex_str}$") do
+      {:ok, regex} -> Regex.match?(regex, path)
+      _ -> false
+    end
+  end
+
+  defp walk_directory(dir, extensions) do
+    dir
+    |> File.ls!()
+    |> Enum.flat_map(fn entry ->
+      full_path = Path.join(dir, entry)
+
+      cond do
+        File.dir?(full_path) and not skip_dir?(entry) ->
+          walk_directory(full_path, extensions)
+
+        File.regular?(full_path) and source_file?(entry, extensions) and
+            not String.starts_with?(entry, ".") ->
+          [full_path]
+
+        true ->
+          []
+      end
+    end)
+  end
+
+  defp skip_dir?(name), do: MapSet.member?(@skip_dirs, name) or String.starts_with?(name, ".")
+
+  defp source_file?(name, extensions),
+    do: MapSet.member?(extensions, Path.extname(name) |> String.downcase())
+end
diff --git a/lib/codeqa/engine/file_context.ex b/lib/codeqa/engine/file_context.ex
new file mode 100644
index 00000000..6e1da6ba
--- /dev/null
+++ b/lib/codeqa/engine/file_context.ex
@@ -0,0 +1,29 @@
+defmodule CodeQA.Engine.FileContext do
+  @moduledoc "Immutable pre-computed data shared across all file metrics."
+  @enforce_keys [
+    :content,
+    :tokens,
+    :token_counts,
+    :words,
+    :identifiers,
+    :lines,
+    :encoded,
+    :byte_count,
+    :line_count
+  ]
+  defstruct @enforce_keys ++ [:path, :blocks]
+
+  @type t :: %__MODULE__{
+          content: String.t(),
+          tokens: [CodeQA.Engine.Pipeline.Token.t()],
+          token_counts: map(),
+          words: list(),
+          identifiers: list(),
+          lines: list(),
+          encoded: String.t(),
+          byte_count: non_neg_integer(),
+          line_count: non_neg_integer(),
+          path: String.t() | nil,
+          blocks: [CodeQA.AST.Enrichment.Node.t()] | nil
+        }
+end
diff --git a/lib/codeqa/parallel.ex b/lib/codeqa/engine/parallel.ex
similarity index 66%
rename from lib/codeqa/parallel.ex
rename to lib/codeqa/engine/parallel.ex
index 0e2cc460..f5a8da15 100644
--- a/lib/codeqa/parallel.ex
+++ b/lib/codeqa/engine/parallel.ex
@@ -1,4 +1,8 @@
-defmodule CodeQA.Parallel do
+defmodule CodeQA.Engine.Parallel do
+  alias CodeQA.Analysis.FileContextServer
+  alias CodeQA.Engine.Analyzer
+  alias CodeQA.Engine.Registry
+
   @moduledoc "Parallel file analysis using Flow (GenStage-based)."
 
   def analyze_files(files, opts \\ []) when is_map(files) do
@@ -22,7 +26,7 @@ defmodule CodeQA.Parallel do
     |> Flow.map(fn {path, content} ->
       start_time = System.monotonic_time(:millisecond)
 
-      result = maybe_cached_analyze(content, cache_dir, opts)
+      result = maybe_cached_analyze(path, content, cache_dir, opts)
 
       end_time = System.monotonic_time(:millisecond)
       time_taken = end_time - start_time
@@ -38,9 +42,10 @@ defmodule CodeQA.Parallel do
     |> Enum.into(%{})
   end
 
-  defp maybe_cached_analyze(content, nil, opts), do: analyze_single_file(content, opts)
+  defp maybe_cached_analyze(path, content, nil, opts),
+    do: analyze_single_file(path, content, opts)
 
-  defp maybe_cached_analyze(content, cache_dir, opts) do
+  defp maybe_cached_analyze(path, content, cache_dir, opts) do
     hash = :crypto.hash(:sha256, content) |> Base.encode16(case: :lower)
     cache_file = Path.join(cache_dir, hash <> ".json")
 
@@ -51,30 +56,25 @@ defmodule CodeQA.Parallel do
             data
 
           _ ->
-            data = analyze_single_file(content, opts)
+            data = analyze_single_file(path, content, opts)
             File.write!(cache_file, Jason.encode!(data))
             data
         end
 
       _ ->
-        data = analyze_single_file(content, opts)
+        data = analyze_single_file(path, content, opts)
         File.write!(cache_file, Jason.encode!(data))
         data
     end
   end
 
-  defp analyze_single_file(content, opts) do
-    registry = CodeQA.Analyzer.build_registry()
-
-    ctx =
-      CodeQA.Telemetry.time(:pipeline_build_context, fn ->
-        CodeQA.Pipeline.build_file_context(content, opts)
-      end)
+  defp analyze_single_file(path, content, opts) do
+    registry = Analyzer.build_registry()
+    file_opts = Keyword.put(opts, :path, path)
+    pid = Keyword.fetch!(opts, :file_context_pid)
 
-    metrics =
-      CodeQA.Telemetry.time(:registry_run_metrics, fn ->
-        CodeQA.Registry.run_file_metrics(registry, ctx, opts)
-      end)
+    ctx = FileContextServer.get(pid, content, file_opts)
+    metrics = Registry.run_file_metrics(registry, ctx, opts)
 
     %{
       "bytes" => ctx.byte_count,
diff --git a/lib/codeqa/engine/pipeline.ex b/lib/codeqa/engine/pipeline.ex
new file mode 100644
index 00000000..53e25b4f
--- /dev/null
+++ b/lib/codeqa/engine/pipeline.ex
@@ -0,0 +1,114 @@
+defmodule CodeQA.Engine.Pipeline do
+  @moduledoc "Pre-computed shared context for file-level metrics."
+
+  defmodule Token do
+    @moduledoc "A lexical token with its string content, kind tag, and 1-based source line."
+    defstruct [:content, :kind, :line]
+
+    @type t :: %__MODULE__{
+            content: String.t(),
+            kind: String.t(),
+            line: pos_integer()
+          }
+  end
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Parser
+  alias CodeQA.Engine.FileContext
+  alias CodeQA.Language
+
+  @word_re ~r/\b[a-zA-Z_]\w*\b/u
+
+  @spec build_file_context(String.t(), keyword()) :: FileContext.t()
+  def build_file_context(content, opts \\ []) when is_binary(content) do
+    tokens = tokenize(content)
+    token_counts = tokens |> Enum.map(& &1.content) |> Enum.frequencies()
+
+    keywords = cached_keywords()
+
+    words =
+      Regex.scan(@word_re, content)
+      |> List.flatten()
+
+    identifiers = Enum.reject(words, &MapSet.member?(keywords, &1))
+    lines = content |> String.split("\n") |> trim_trailing_empty()
+    encoded = content
+
+    skip_structural = Keyword.get(opts, :skip_structural, false)
+
+    {path, blocks} =
+      case Keyword.get(opts, :path) do
+        nil ->
+          {nil, nil}
+
+        p when skip_structural ->
+          {p, nil}
+
+        p ->
+          lang_mod = Language.detect(p)
+          structural_tokens = TokenNormalizer.normalize_structural(content)
+          {p, Parser.detect_blocks(structural_tokens, lang_mod)}
+      end
+
+    %FileContext{
+      content: content,
+      tokens: tokens,
+      token_counts: token_counts,
+      words: words,
+      identifiers: identifiers,
+      lines: lines,
+      encoded: encoded,
+      byte_count: byte_size(content),
+      line_count: length(lines),
+      path: path,
+      blocks: blocks
+    }
+  end
+
+  # Matches identifiers, integer/float literals, and single non-whitespace chars.
+  @token_re ~r/[a-zA-Z_]\w*|[0-9]+(?:\.[0-9]+)?|[^\s]/u
+
+  defp tokenize(content) do
+    content
+    |> String.split("\n")
+    |> Enum.with_index(1)
+    |> Enum.flat_map(fn {line, line_num} ->
+      @token_re
+      |> Regex.scan(line)
+      |> List.flatten()
+      |> Enum.map(&%Token{content: &1, kind: classify(&1), line: line_num})
+    end)
+  end
+
+  defp classify(tok) do
+    cond do
+      Regex.match?(~r/^[a-zA-Z_]\w*$/, tok) -> "<ID>"
+      Regex.match?(~r/^[0-9]/, tok) -> "<NUM>"
+      true -> "<PUNCT>"
+    end
+  end
+
+  # Caches the all-languages keyword MapSet across calls. Without the cache,
+  # MapSet.new(Language.all_keywords()) ran ~150ms per call (driven by the
+  # :application.get_key reflection in Language.all/0) — multiplied by every
+  # block-impact LOO call, this dominated the analyzer hot path.
+  defp cached_keywords do
+    case :persistent_term.get({__MODULE__, :keywords}, nil) do
+      nil ->
+        set = MapSet.new(Language.all_keywords())
+        :persistent_term.put({__MODULE__, :keywords}, set)
+        set
+
+      set ->
+        set
+    end
+  end
+
+  defp trim_trailing_empty(lines) do
+    # Match Python's str.splitlines() behavior
+    case List.last(lines) do
+      "" -> List.delete_at(lines, -1)
+      _ -> lines
+    end
+  end
+end
diff --git a/lib/codeqa/registry.ex b/lib/codeqa/engine/registry.ex
similarity index 59%
rename from lib/codeqa/registry.ex
rename to lib/codeqa/engine/registry.ex
index 76dfe23b..135385ac 100644
--- a/lib/codeqa/registry.ex
+++ b/lib/codeqa/engine/registry.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Registry do
+defmodule CodeQA.Engine.Registry do
   @moduledoc "Metric registration and execution."
 
   defstruct file_metrics: [], codebase_metrics: []
@@ -16,15 +16,22 @@ defmodule CodeQA.Registry do
   def run_file_metrics(%__MODULE__{} = reg, ctx, opts \\ []) do
     base_metrics =
       Map.new(reg.file_metrics, fn mod ->
-        {mod.name(),
-         CodeQA.Telemetry.time(String.to_atom("metric_" <> mod.name()), fn -> mod.analyze(ctx) end)}
+        t0 = System.monotonic_time(:microsecond)
+        result = mod.analyze(ctx)
+        duration = System.monotonic_time(:microsecond) - t0
+
+        :telemetry.execute(
+          [:codeqa, :file_metric],
+          %{duration: duration},
+          %{metric: mod.name()}
+        )
+
+        {mod.name(), result}
       end)
 
     if Keyword.get(opts, :combinations, false) do
-      CodeQA.Telemetry.time(:registry_combinations, fn ->
-        combinations = generate_combinations(flat_numeric_metrics(base_metrics), [])
-        Map.merge(base_metrics, Map.new(combinations))
-      end)
+      combinations = generate_combinations(flat_numeric_metrics(base_metrics), [])
+      Map.merge(base_metrics, Map.new(combinations))
     else
       base_metrics
     end
@@ -60,6 +67,26 @@ defmodule CodeQA.Registry do
   end
 
   def run_codebase_metrics(%__MODULE__{} = reg, files, opts \\ []) do
-    Map.new(reg.codebase_metrics, fn mod -> {mod.name(), mod.analyze(files, opts)} end)
+    has_progress = Keyword.has_key?(opts, :on_progress)
+    total = length(reg.codebase_metrics)
+
+    reg.codebase_metrics
+    |> Enum.with_index(1)
+    |> Map.new(fn {mod, idx} ->
+      if has_progress,
+        do: IO.puts(:stderr, "\nCODEBASE #{idx}/#{total}: #{mod.name()}...")
+
+      t0 = System.monotonic_time(:microsecond)
+      result = mod.analyze(files, opts)
+      duration = System.monotonic_time(:microsecond) - t0
+
+      :telemetry.execute(
+        [:codeqa, :codebase_metric],
+        %{duration: duration},
+        %{metric: mod.name()}
+      )
+
+      {mod.name(), result}
+    end)
   end
 end
diff --git a/lib/codeqa/formatter.ex b/lib/codeqa/formatter.ex
deleted file mode 100644
index 55ba6efe..00000000
--- a/lib/codeqa/formatter.ex
+++ /dev/null
@@ -1,344 +0,0 @@
-defmodule CodeQA.Formatter do
-  @moduledoc false
-
-  @summary_metrics [
-    {"entropy", "char_entropy", "Entropy"},
-    {"halstead", "volume", "Halstead Vol."},
-    {"halstead", "difficulty", "Difficulty"},
-    {"readability", "flesch_adapted", "Readability"},
-    {"compression", "redundancy", "Redundancy"}
-  ]
-
-  @bar_width 20
-  @filled "█"
-  @empty "░"
-
-  def format_github(comparison, output_mode \\ "auto") do
-    metadata = comparison["metadata"]
-    files = comparison["files"] || %{}
-    codebase = comparison["codebase"] || %{}
-
-    if metadata["total_files_compared"] == 0 do
-      "## Code Quality: PR Comparison\n\nNo file changes detected."
-    else
-      build_github_report(metadata, files, codebase, output_mode)
-    end
-  end
-
-  defp build_github_report(metadata, files, codebase, output_mode) do
-    categories = CodeQA.HealthReport.Categories.defaults()
-    scale = CodeQA.HealthReport.Categories.default_grade_scale()
-
-    base_agg = get_in(codebase, ["base", "aggregate"]) || %{}
-    head_agg = get_in(codebase, ["head", "aggregate"]) || %{}
-
-    base_grades = CodeQA.HealthReport.Grader.grade_aggregate(categories, base_agg, scale)
-    head_grades = CodeQA.HealthReport.Grader.grade_aggregate(categories, head_agg, scale)
-
-    paired = Enum.zip(base_grades, head_grades)
-
-    lines =
-      [
-        "## Code Quality: PR Comparison",
-        "",
-        "**#{metadata["total_files_compared"]} files compared** (#{metadata["summary"]})",
-        ""
-      ] ++
-        mermaid_chart(head_grades) ++
-        progress_bars(paired) ++
-        [""] ++
-        file_details(files, codebase, output_mode) ++
-        aggregate_details(codebase)
-
-    Enum.join(lines, "\n")
-  end
-
-  defp mermaid_chart(head_grades) do
-    names = Enum.map(head_grades, fn g -> ~s("#{g.name}") end) |> Enum.join(", ")
-    scores = Enum.map(head_grades, fn g -> to_string(g.score) end) |> Enum.join(", ")
-
-    [
-      "```mermaid",
-      "%%{init: {'theme': 'neutral'}}%%",
-      "xychart-beta",
-      "    title \"Code Health After PR\"",
-      "    x-axis [#{names}]",
-      "    y-axis \"Score\" 0 --> 100",
-      "    bar [#{scores}]",
-      "```",
-      ""
-    ]
-  end
-
-  defp progress_bars(paired) do
-    max_name_len =
-      Enum.reduce(paired, 0, fn {_base, head}, acc ->
-        max(acc, String.length(head.name))
-      end)
-
-    rows =
-      Enum.map(paired, fn {base, head} ->
-        name = String.pad_trailing(head.name, max_name_len)
-        base_bar = build_bar(base.score)
-        head_bar = build_bar(head.score)
-        emoji = grade_emoji(head.grade)
-        delta = head.score - base.score
-        delta_str = if delta >= 0, do: "+#{delta}", else: to_string(delta)
-        "#{name}  #{base_bar} #{base.score} → #{head_bar} #{head.score}  #{emoji} #{delta_str}"
-      end)
-
-    ["```"] ++ rows ++ ["```"]
-  end
-
-  defp file_details(files, codebase, _output_mode) do
-    codebase_summary = CodeQA.Summarizer.summarize_codebase(%{"files" => files, "codebase" => codebase})
-
-    file_summaries =
-      Map.new(files, fn {path, data} ->
-        {path, CodeQA.Summarizer.summarize_file(path, data)}
-      end)
-
-    inner =
-      (format_file_table(files, file_summaries) ++ [""])
-      |> Enum.join("\n")
-
-    [
-      "<details>",
-      "<summary><strong>File changes — #{codebase_summary["gist"]}</strong></summary>",
-      "",
-      inner,
-      "</details>",
-      ""
-    ]
-  end
-
-  defp aggregate_details(codebase) do
-    inner =
-      format_aggregate_table(codebase, build_direction_map())
-      |> Enum.join("\n")
-
-    if inner == "" do
-      []
-    else
-      [
-        "<details>",
-        "<summary><strong>Aggregate metrics</strong></summary>",
-        "",
-        inner,
-        "",
-        "</details>",
-        ""
-      ]
-    end
-  end
-
-  defp build_bar(score) do
-    filled = round(score / 100 * @bar_width)
-    filled = min(max(filled, 0), @bar_width)
-    empty = @bar_width - filled
-    String.duplicate(@filled, filled) <> String.duplicate(@empty, empty)
-  end
-
-  defp grade_emoji(grade) do
-    cond do
-      grade in ["A", "A-"] -> "🟢"
-      grade in ["B+", "B", "B-"] -> "🟡"
-      grade in ["C+", "C", "C-"] -> "🟠"
-      true -> "🔴"
-    end
-  end
-
-  def format_markdown(comparison, output_mode \\ "auto") do
-    metadata = comparison["metadata"]
-    files = comparison["files"] || %{}
-    codebase = comparison["codebase"]
-
-    if metadata["total_files_compared"] == 0 do
-      "## Code Quality: PR Comparison\n\nNo file changes detected."
-    else
-      build_report(metadata, files, codebase, output_mode)
-    end
-  end
-
-  defp build_report(metadata, files, codebase, output_mode) do
-    codebase_summary =
-      CodeQA.Summarizer.summarize_codebase(%{"files" => files, "codebase" => codebase})
-
-    lines = [
-      "## Code Quality: PR Comparison",
-      "",
-      "**#{metadata["total_files_compared"]} files compared** (#{metadata["summary"]})",
-      ""
-    ]
-
-    lines =
-      if output_mode in ["auto", "summary"] do
-        lines ++ ["> #{codebase_summary["gist"]}", ""]
-      else
-        lines
-      end
-
-    lines =
-      if output_mode in ["auto", "changes"] do
-        file_summaries =
-          Map.new(files, fn {path, data} ->
-            {path, CodeQA.Summarizer.summarize_file(path, data)}
-          end)
-
-        lines ++ format_file_table(files, file_summaries) ++ [""]
-      else
-        lines
-      end
-
-    lines =
-      if output_mode in ["auto", "summary"] do
-        lines ++ format_aggregate_table(codebase)
-      else
-        lines
-      end
-
-    Enum.join(lines, "\n")
-  end
-
-  defp format_file_table(files, file_summaries) do
-    columns = detect_columns(files)
-
-    if columns == [],
-      do: ["No metric data available."],
-      else: build_file_rows(files, file_summaries, columns)
-  end
-
-  defp build_file_rows(files, file_summaries, columns) do
-    header =
-      "| File | Status | Summary | " <>
-        Enum.map_join(columns, " | ", fn {_, _, label} -> label end) <> " |"
-
-    separator =
-      "|------|--------|---------|" <> Enum.map_join(columns, "", fn _ -> "--------|" end)
-
-    rows =
-      files
-      |> Enum.sort_by(fn {path, _} -> path end)
-      |> Enum.map(fn {path, data} ->
-        gist = get_in(file_summaries, [path, "gist"]) || ""
-        cells = format_file_row(data, columns)
-        "| `#{path}` | #{data["status"]} | #{gist} | " <> Enum.join(cells, " | ") <> " |"
-      end)
-
-    [header, separator | rows]
-  end
-
-  defp format_file_row(data, columns) do
-    Enum.map(columns, fn {metric_name, key, _label} ->
-      case data["status"] do
-        "modified" -> format_modified_cell(data, metric_name, key)
-        "added" -> format_added_cell(data, metric_name, key)
-        "deleted" -> format_deleted_cell(data, metric_name, key)
-        _ -> "—"
-      end
-    end)
-  end
-
-  defp format_modified_cell(data, metric_name, key) do
-    case get_in(data, ["delta", "metrics", metric_name, key]) do
-      nil -> "—"
-      val -> format_delta(val)
-    end
-  end
-
-  defp format_added_cell(data, metric_name, key) do
-    case get_in(data, ["head", "metrics", metric_name, key]) do
-      nil -> "—"
-      val -> "*#{format_value(val)}*"
-    end
-  end
-
-  defp format_deleted_cell(data, metric_name, key) do
-    case get_in(data, ["base", "metrics", metric_name, key]) do
-      nil -> "—"
-      val -> "~~#{format_value(val)}~~"
-    end
-  end
-
-  defp format_aggregate_table(codebase, direction_map \\ %{}) do
-    base_agg = get_in(codebase, ["base", "aggregate"]) || %{}
-    head_agg = get_in(codebase, ["head", "aggregate"]) || %{}
-    delta_agg = get_in(codebase, ["delta", "aggregate"]) || %{}
-
-    if base_agg == %{} and head_agg == %{},
-      do: [],
-      else: build_aggregate_rows(base_agg, head_agg, delta_agg, direction_map)
-  end
-
-  defp build_aggregate_rows(base_agg, head_agg, delta_agg, direction_map) do
-    header = [
-      "### Aggregate Metrics",
-      "",
-      "| Metric | Base | Head | Delta |",
-      "|--------|------|------|-------|"
-    ]
-
-    rows =
-      MapSet.new(Map.keys(base_agg) ++ Map.keys(head_agg))
-      |> Enum.sort()
-      |> Enum.flat_map(fn metric_name ->
-        base_m = Map.get(base_agg, metric_name, %{})
-        head_m = Map.get(head_agg, metric_name, %{})
-        delta_m = Map.get(delta_agg, metric_name, %{})
-
-        MapSet.new(Map.keys(base_m) ++ Map.keys(head_m))
-        |> Enum.sort()
-        |> Enum.map(fn key ->
-          direction = Map.get(direction_map, "#{metric_name}.#{key}")
-          delta_cell = format_delta_with_direction(delta_m[key], direction)
-          "| #{metric_name}.#{key} | #{format_value(base_m[key])} | #{format_value(head_m[key])} | #{delta_cell} |"
-        end)
-      end)
-
-    header ++ rows
-  end
-
-  defp build_direction_map do
-    CodeQA.HealthReport.Categories.defaults()
-    |> Enum.flat_map(fn cat ->
-      Enum.map(cat.metrics, fn m -> {"#{m.source}.mean_#{m.name}", m.good} end)
-    end)
-    |> Map.new()
-  end
-
-  defp format_delta_with_direction(nil, _direction), do: "—"
-
-  defp format_delta_with_direction(value, direction) do
-    formatted = format_delta(value)
-    emoji = delta_emoji(value, direction)
-    if emoji, do: "#{emoji} #{formatted}", else: formatted
-  end
-
-  defp delta_emoji(_value, nil), do: nil
-  defp delta_emoji(value, :high) when value > 0, do: "🟢"
-  defp delta_emoji(value, :high) when value < 0, do: "🔴"
-  defp delta_emoji(value, :low) when value < 0, do: "🟢"
-  defp delta_emoji(value, :low) when value > 0, do: "🔴"
-  defp delta_emoji(_value, _direction), do: nil
-
-  defp detect_columns(files) do
-    Enum.filter(@summary_metrics, fn {metric_name, key, _label} ->
-      Enum.any?(files, fn {_path, data} ->
-        source = data["head"] || data["base"]
-        source && get_in(source, ["metrics", metric_name, key]) != nil
-      end)
-    end)
-  end
-
-  defp format_delta(nil), do: "—"
-
-  defp format_delta(value) when value > 0,
-    do: "+#{:erlang.float_to_binary(value / 1, decimals: 2)}"
-
-  defp format_delta(value) when value < 0, do: :erlang.float_to_binary(value / 1, decimals: 2)
-  defp format_delta(_), do: "0.00"
-
-  defp format_value(nil), do: "—"
-  defp format_value(value) when is_float(value), do: :erlang.float_to_binary(value, decimals: 2)
-  defp format_value(value), do: to_string(value)
-end
diff --git a/lib/codeqa/git.ex b/lib/codeqa/git.ex
index 78c0bdb8..44892058 100644
--- a/lib/codeqa/git.ex
+++ b/lib/codeqa/git.ex
@@ -8,8 +8,26 @@ defmodule CodeQA.Git do
     defstruct @enforce_keys
   end
 
+  alias CodeQA.Engine.Collector
+
   @status_map %{"A" => "added", "M" => "modified", "D" => "deleted"}
 
+  @spec gitignored_files(String.t(), [String.t()]) :: MapSet.t()
+  def gitignored_files(_repo_path, []), do: MapSet.new()
+
+  def gitignored_files(repo_path, paths) do
+    {output, _exit_code} =
+      System.cmd("git", ["check-ignore", "--no-index" | paths],
+        cd: repo_path,
+        stderr_to_stdout: false
+      )
+
+    output
+    |> String.trim()
+    |> String.split("\n", trim: true)
+    |> MapSet.new()
+  end
+
   def changed_files(repo_path, base_ref, head_ref) do
     {output, 0} =
       System.cmd(
@@ -25,6 +43,78 @@ defmodule CodeQA.Git do
     |> Enum.flat_map(&parse_change_line/1)
   end
 
+  @doc """
+  Returns a map of file paths to lists of changed line ranges in the head version.
+
+  Each range is a tuple `{start_line, end_line}` representing lines that were
+  added or modified in the diff between base_ref and head_ref.
+  """
+  @spec diff_line_ranges(String.t(), String.t(), String.t()) ::
+          {:ok, %{String.t() => [{pos_integer(), pos_integer()}]}} | {:error, term()}
+  def diff_line_ranges(repo_path, base_ref, head_ref) do
+    case System.cmd(
+           "git",
+           ["diff", "-U0", "#{base_ref}..#{head_ref}"],
+           cd: repo_path,
+           stderr_to_stdout: false
+         ) do
+      {output, 0} ->
+        {:ok, parse_diff_hunks(output)}
+
+      {_output, code} ->
+        {:error, "git diff exited with code #{code}"}
+    end
+  end
+
+  @typep parse_state :: {String.t() | nil, %{String.t() => [{pos_integer(), pos_integer()}]}}
+
+  @spec parse_diff_hunks(String.t()) :: %{String.t() => [{pos_integer(), pos_integer()}]}
+  defp parse_diff_hunks(diff_output) do
+    diff_output
+    |> String.split("\n")
+    |> Enum.reduce({nil, %{}}, &parse_diff_line/2)
+    |> elem(1)
+    |> Map.new(fn {path, ranges} -> {path, Enum.reverse(ranges)} end)
+  end
+
+  @spec parse_diff_line(String.t(), parse_state()) :: parse_state()
+  defp parse_diff_line("diff --git a/" <> rest, {_current_file, acc}) do
+    # Extract the "b/..." path from the diff header
+    case Regex.run(~r/ b\/(.+)$/, rest) do
+      [_, path] -> {path, acc}
+      nil -> {nil, acc}
+    end
+  end
+
+  defp parse_diff_line("@@ " <> rest, {current_file, acc}) when is_binary(current_file) do
+    # Parse hunk header: @@ -old_start,old_count +new_start,new_count @@
+    case Regex.run(~r/\+(\d+)(?:,(\d+))?/, rest) do
+      [_, start_str] ->
+        # Single line change (no count means 1 line)
+        start = String.to_integer(start_str)
+        updated = Map.update(acc, current_file, [{start, start}], &[{start, start} | &1])
+        {current_file, updated}
+
+      [_, start_str, count_str] ->
+        start = String.to_integer(start_str)
+        count = String.to_integer(count_str)
+
+        if count == 0 do
+          # Deletion only, no new lines
+          {current_file, acc}
+        else
+          end_line = start + count - 1
+          updated = Map.update(acc, current_file, [{start, end_line}], &[{start, end_line} | &1])
+          {current_file, updated}
+        end
+
+      nil ->
+        {current_file, acc}
+    end
+  end
+
+  defp parse_diff_line(_line, state), do: state
+
   def read_file_at_ref(repo_path, ref, path) do
     case System.cmd("git", ["show", "#{ref}:#{path}"], cd: repo_path, stderr_to_stdout: true) do
       {output, 0} -> output
@@ -66,6 +156,6 @@ defmodule CodeQA.Git do
 
   defp source_file?(path) do
     ext = path |> Path.extname() |> String.downcase()
-    MapSet.member?(CodeQA.Collector.source_extensions(), ext)
+    MapSet.member?(Collector.source_extensions(), ext)
   end
 end
diff --git a/lib/codeqa/health_report.ex b/lib/codeqa/health_report.ex
index 982b4698..183b737a 100644
--- a/lib/codeqa/health_report.ex
+++ b/lib/codeqa/health_report.ex
@@ -1,46 +1,116 @@
 defmodule CodeQA.HealthReport do
   @moduledoc "Orchestrates health report generation from analysis results."
 
-  alias CodeQA.HealthReport.{Config, Grader, Formatter}
+  alias CodeQA.CombinedMetrics.{FileScorer, SampleRunner}
+  alias CodeQA.HealthReport.{Config, Delta, Formatter, Grader, TopBlocks}
 
   @spec generate(map(), keyword()) :: map()
   def generate(analysis_results, opts \\ []) do
     config_path = Keyword.get(opts, :config)
-    detail = Keyword.get(opts, :detail, :default)
-    top_n = Keyword.get(opts, :top, 5)
+    base_results = Keyword.get(opts, :base_results)
+    changed_files = Keyword.get(opts, :changed_files, [])
+    diff_line_ranges = Keyword.get(opts, :diff_line_ranges, %{})
+
+    %{
+      categories: categories,
+      grade_scale: grade_scale,
+      impact_map: impact_map,
+      combined_top: combined_top,
+      block_min_lines: block_min_lines,
+      block_max_lines: block_max_lines
+    } =
+      Config.load(config_path)
 
-    %{categories: categories, grade_scale: grade_scale} = Config.load(config_path)
     aggregate = get_in(analysis_results, ["codebase", "aggregate"]) || %{}
     files = Map.get(analysis_results, "files", %{})
+    project_langs = project_languages(files)
 
-    category_grades = Grader.grade_aggregate(categories, aggregate, grade_scale)
-
-    category_grades =
-      Enum.zip(categories, category_grades)
-      |> Enum.map(fn {cat_def, graded} ->
+    threshold_grades =
+      categories
+      |> Grader.grade_aggregate(aggregate, grade_scale)
+      |> Enum.zip(categories)
+      |> Enum.map(fn {graded, _cat_def} ->
         summary = build_category_summary(graded)
 
-        cat_top = Map.get(cat_def, :top, top_n)
+        graded
+        |> Map.put(:type, :threshold)
+        |> Map.merge(%{summary: summary, worst_offenders: []})
+      end)
+
+    worst_files_map = FileScorer.worst_files_per_behavior(files, combined_top: combined_top)
+
+    all_cosines =
+      SampleRunner.diagnose_aggregate(aggregate, top: 99_999, languages: project_langs)
 
-        worst =
-          case detail do
-            :summary -> []
-            :full -> Grader.worst_offenders(cat_def, files, map_size(files), grade_scale)
-            _default -> Grader.worst_offenders(cat_def, files, cat_top, grade_scale)
-          end
+    cosines_by_category = Enum.group_by(all_cosines, & &1.category)
 
-        Map.merge(graded, %{summary: summary, worst_offenders: worst})
+    cosine_grades =
+      Grader.grade_cosine_categories(cosines_by_category, worst_files_map, grade_scale)
+
+    all_categories =
+      (threshold_grades ++ cosine_grades)
+      |> Enum.map(fn cat ->
+        Map.put(cat, :impact, Map.get(impact_map, to_string(cat.key), 1))
       end)
 
-    {overall_score, overall_grade} = Grader.overall_score(category_grades, grade_scale)
+    {overall_score, overall_grade} = Grader.overall_score(all_categories, grade_scale, impact_map)
 
     metadata = build_metadata(analysis_results)
 
+    top_issues = Enum.take(all_cosines, 10)
+
+    codebase_cosine_lookup =
+      Map.new(all_cosines, fn i -> {{i.category, i.behavior}, i.cosine} end)
+
+    block_opts = [
+      block_min_lines: block_min_lines,
+      block_max_lines: block_max_lines,
+      diff_line_ranges: diff_line_ranges
+    ]
+
+    top_blocks =
+      TopBlocks.build(analysis_results, changed_files, codebase_cosine_lookup, block_opts)
+
+    worst_blocks_by_category =
+      TopBlocks.worst_per_category(
+        analysis_results,
+        changed_files,
+        codebase_cosine_lookup,
+        block_opts
+      )
+
+    grading_cfg = %{
+      category_defs: categories,
+      grade_scale: grade_scale,
+      impact_map: impact_map,
+      combined_top: combined_top
+    }
+
+    {codebase_delta, pr_summary} =
+      if base_results do
+        build_delta_and_summary(
+          base_results,
+          analysis_results,
+          overall_score,
+          overall_grade,
+          grading_cfg,
+          changed_files,
+          top_blocks
+        )
+      else
+        {nil, nil}
+      end
+
     %{
       metadata: metadata,
+      pr_summary: pr_summary,
       overall_score: overall_score,
       overall_grade: overall_grade,
-      categories: category_grades
+      codebase_delta: codebase_delta,
+      categories: all_categories,
+      top_issues: top_issues,
+      top_blocks: top_blocks,
+      worst_blocks_by_category: worst_blocks_by_category
     }
   end
 
@@ -49,6 +119,77 @@ defmodule CodeQA.HealthReport do
     Formatter.format_markdown(report, detail, format)
   end
 
+  defp build_delta_and_summary(
+         base_results,
+         head_results,
+         head_score,
+         head_grade,
+         %{
+           category_defs: category_defs,
+           grade_scale: grade_scale,
+           impact_map: impact_map,
+           combined_top: combined_top
+         },
+         changed_files,
+         top_blocks
+       ) do
+    delta = Delta.compute(base_results, head_results)
+
+    base_aggregate = get_in(base_results, ["codebase", "aggregate"]) || %{}
+    base_files = Map.get(base_results, "files", %{})
+    base_project_langs = project_languages(base_files)
+
+    base_threshold_grades =
+      category_defs
+      |> Grader.grade_aggregate(base_aggregate, grade_scale)
+      |> Enum.zip(category_defs)
+      |> Enum.map(fn {graded, _cat_def} ->
+        graded
+        |> Map.put(:type, :threshold)
+        |> Map.merge(%{summary: "", worst_offenders: []})
+      end)
+
+    base_worst_files_map =
+      FileScorer.worst_files_per_behavior(base_files, combined_top: combined_top)
+
+    base_cosines_by_category =
+      SampleRunner.diagnose_aggregate(base_aggregate, top: 99_999, languages: base_project_langs)
+      |> Enum.group_by(& &1.category)
+
+    base_cosine_grades =
+      Grader.grade_cosine_categories(
+        base_cosines_by_category,
+        base_worst_files_map,
+        grade_scale
+      )
+
+    base_all_categories =
+      (base_threshold_grades ++ base_cosine_grades)
+      |> Enum.map(fn cat ->
+        Map.put(cat, :impact, Map.get(impact_map, to_string(cat.key), 1))
+      end)
+
+    {base_score, base_grade} = Grader.overall_score(base_all_categories, grade_scale, impact_map)
+
+    blocks_flagged = length(top_blocks)
+    files_added = Enum.count(changed_files, &(&1.status == "added"))
+    files_modified = Enum.count(changed_files, &(&1.status == "modified"))
+
+    summary = %{
+      base_score: base_score,
+      head_score: head_score,
+      score_delta: head_score - base_score,
+      base_grade: base_grade,
+      head_grade: head_grade,
+      blocks_flagged: blocks_flagged,
+      files_changed: length(changed_files),
+      files_added: files_added,
+      files_modified: files_modified
+    }
+
+    {delta, summary}
+  end
+
   defp build_metadata(analysis_results) do
     meta = Map.get(analysis_results, "metadata", %{})
 
@@ -59,6 +200,16 @@ defmodule CodeQA.HealthReport do
     }
   end
 
+  defp project_languages(files_map) do
+    files_map
+    |> Map.keys()
+    |> Enum.map(&CodeQA.Language.detect(&1).name())
+    |> Enum.reject(&(&1 == "unknown"))
+    |> Enum.uniq()
+  end
+
+  defp build_category_summary(%{type: :cosine}), do: ""
+
   defp build_category_summary(graded) do
     low_scorers =
       graded.metric_scores
diff --git a/lib/codeqa/health_report/behavior_labels.ex b/lib/codeqa/health_report/behavior_labels.ex
new file mode 100644
index 00000000..3cd4f94b
--- /dev/null
+++ b/lib/codeqa/health_report/behavior_labels.ex
@@ -0,0 +1,77 @@
+defmodule CodeQA.HealthReport.BehaviorLabels do
+  @moduledoc "Maps category/behavior pairs to human-readable labels and action items."
+
+  alias CodeQA.CombinedMetrics.Scorer
+
+  @labels %{
+    {"function_design", "no_boolean_parameter"} =>
+      {"Boolean parameter increases coupling", "Use separate functions or options map"},
+    {"function_design", "boolean_function_has_question_mark"} =>
+      {"Boolean function missing ? suffix", "Rename to use question mark convention"},
+    {"function_design", "has_verb_in_name"} =>
+      {"Function name lacks verb", "Use action verbs in function names"},
+    {"function_design", "no_magic_numbers"} =>
+      {"Magic numbers detected", "Extract constants with descriptive names"},
+    {"function_design", "uses_ternary_expression"} =>
+      {"Ternary expression overuse", "Use pattern matching or if/else"},
+    {"code_smells", "cyclomatic_complexity_under_10"} =>
+      {"High cyclomatic complexity", "Reduce branching or extract guard clauses"},
+    {"code_smells", "no_deeply_nested_code"} =>
+      {"Deeply nested code", "Extract helper functions to reduce nesting"},
+    {"code_smells", "function_length_under_25"} =>
+      {"Long function likely untestable", "Split into smaller functions"},
+    {"code_smells", "no_duplicate_code"} => {"Duplicate logic detected", "Extract shared helper"},
+    {"code_smells", "no_debug_print_statements"} =>
+      {"Debug print left in code", "Remove `IO.puts`/`IO.inspect`/`console.log` or use a logger"},
+    {"scope_and_assignment", "used_only_once"} =>
+      {"Variable used only once", "Inline the expression unless the name aids readability"},
+    {"consistency", "consistent_error_return_shape"} =>
+      {"Mixed error-return shapes",
+       "Return errors in one shape (e.g. `{:error, reason}` everywhere)"},
+    {"file_structure", "single_module_per_file"} =>
+      {"Multiple modules in one file", "Split into separate files"},
+    {"file_structure", "file_length_under_300"} =>
+      {"File too long", "Split into focused modules"},
+    {"dependencies", "no_circular_dependencies"} =>
+      {"Circular dependency detected", "Reorganize module boundaries"},
+    {"error_handling", "uses_tagged_tuples"} =>
+      {"Missing tagged tuple returns", "Use {:ok, val} / {:error, reason} pattern"},
+    {"naming_conventions", "filename_matches_module"} =>
+      {"Filename doesn't match module", "Rename file to match module"},
+    {"scope_and_assignment", "no_unused_variables"} =>
+      {"Unused variables", "Remove or prefix with underscore"},
+    {"testing", "test_file_exists"} => {"Missing test file", "Add tests for this module"},
+    {"documentation", "has_moduledoc"} => {"Missing @moduledoc", "Add module documentation"}
+  }
+
+  @spec label(String.t(), String.t()) :: String.t()
+  def label(category, behavior) do
+    case Map.get(@labels, {category, behavior}) do
+      {label, _action} -> label
+      nil -> humanize(behavior)
+    end
+  end
+
+  @spec action(String.t(), String.t()) :: String.t()
+  def action(category, behavior) do
+    case Map.get(@labels, {category, behavior}) do
+      {_label, action} -> action
+      nil -> fix_hint_fallback(category, behavior)
+    end
+  end
+
+  defp fix_hint_fallback(category, behavior) do
+    Scorer.all_yamls()
+    |> Enum.find_value(fn {yaml_path, data} ->
+      cat = yaml_path |> Path.basename() |> String.trim_trailing(".yml")
+      if cat == category, do: get_in(data, [behavior, "_fix_hint"])
+    end) || "Review this code block"
+  end
+
+  defp humanize(behavior) do
+    behavior
+    |> String.replace("_", " ")
+    |> String.split()
+    |> Enum.map_join(" ", &String.capitalize/1)
+  end
+end
diff --git a/lib/codeqa/health_report/categories.ex b/lib/codeqa/health_report/categories.ex
index 69970beb..98b2e972 100644
--- a/lib/codeqa/health_report/categories.ex
+++ b/lib/codeqa/health_report/categories.ex
@@ -36,28 +36,36 @@ defmodule CodeQA.HealthReport.Categories do
             source: "readability",
             weight: 0.4,
             good: :high,
-            thresholds: %{a: 70, b: 50, c: 35, d: 20}
+            thresholds: %{a: 70, b: 50, c: 35, d: 20},
+            fix_hint:
+              "Low readability score — simplify sentences, prefer short identifiers, avoid deeply nested expressions"
           },
           %{
             name: "fog_adapted",
             source: "readability",
             weight: 0.3,
             good: :low,
-            thresholds: %{a: 6, b: 10, c: 15, d: 22}
+            thresholds: %{a: 6, b: 10, c: 15, d: 22},
+            fix_hint:
+              "High fog index — reduce complex multi-word identifiers and long compound expressions"
           },
           %{
             name: "avg_tokens_per_line",
             source: "readability",
             weight: 0.2,
             good: :low,
-            thresholds: %{a: 6, b: 10, c: 14, d: 20}
+            thresholds: %{a: 6, b: 10, c: 14, d: 20},
+            fix_hint:
+              "Too many tokens per line — break long lines into multiple shorter statements"
           },
           %{
             name: "avg_line_length",
             source: "readability",
             weight: 0.1,
             good: :low,
-            thresholds: %{a: 40, b: 60, c: 80, d: 100}
+            thresholds: %{a: 40, b: 60, c: 80, d: 100},
+            fix_hint:
+              "Lines too long — wrap at 80–120 characters and extract intermediate variables"
           }
         ]
       },
@@ -70,28 +78,35 @@ defmodule CodeQA.HealthReport.Categories do
             source: "halstead",
             weight: 0.35,
             good: :low,
-            thresholds: %{a: 10, b: 20, c: 35, d: 50}
+            thresholds: %{a: 10, b: 20, c: 35, d: 50},
+            fix_hint:
+              "High operator/operand ratio — extract repeated sub-expressions into named variables"
           },
           %{
             name: "effort",
             source: "halstead",
             weight: 0.30,
             good: :low,
-            thresholds: %{a: 5000, b: 20000, c: 50000, d: 100_000}
+            thresholds: %{a: 5000, b: 20_000, c: 50_000, d: 100_000},
+            fix_hint:
+              "High implementation effort — simplify logic by extracting helpers and reducing branching"
           },
           %{
             name: "volume",
             source: "halstead",
             weight: 0.20,
             good: :low,
-            thresholds: %{a: 300, b: 1000, c: 3000, d: 8000}
+            thresholds: %{a: 300, b: 1000, c: 3000, d: 8000},
+            fix_hint:
+              "High token volume — extract helper functions to reduce the total operation count"
           },
           %{
             name: "estimated_bugs",
             source: "halstead",
             weight: 0.15,
             good: :low,
-            thresholds: %{a: 0.1, b: 0.5, c: 1.0, d: 3.0}
+            thresholds: %{a: 0.1, b: 0.5, c: 1.0, d: 3.0},
+            fix_hint: "High defect estimate — reduce complexity; simpler code has fewer bugs"
           }
         ]
       },
@@ -104,56 +119,69 @@ defmodule CodeQA.HealthReport.Categories do
             source: "branching",
             weight: 0.25,
             good: :low,
-            thresholds: %{a: 0.08, b: 0.17, c: 0.30, d: 0.45}
+            thresholds: %{a: 0.08, b: 0.17, c: 0.30, d: 0.45},
+            fix_hint:
+              "Too many branches per line — flatten conditionals using guard clauses or early returns"
           },
           %{
             name: "mean_depth",
             source: "indentation",
             weight: 0.2,
             good: :low,
-            thresholds: %{a: 3.5, b: 7, c: 10, d: 15}
+            thresholds: %{a: 3.5, b: 7, c: 10, d: 15},
+            fix_hint: "High average nesting — extract inner blocks into helper functions"
           },
           %{
             name: "avg_function_lines",
             source: "function_metrics",
             weight: 0.2,
             good: :low,
-            thresholds: %{a: 8, b: 15, c: 30, d: 65}
+            thresholds: %{a: 8, b: 15, c: 30, d: 65},
+            fix_hint:
+              "Functions too long on average — split into smaller single-purpose functions"
           },
           %{
             name: "max_depth",
             source: "indentation",
             weight: 0.1,
             good: :low,
-            thresholds: %{a: 8, b: 16, c: 25, d: 35}
+            thresholds: %{a: 8, b: 16, c: 25, d: 35},
+            fix_hint: "Deep nesting — restructure using early returns or extract nested logic"
           },
           %{
             name: "max_function_lines",
             source: "function_metrics",
             weight: 0.1,
             good: :low,
-            thresholds: %{a: 20, b: 50, c: 100, d: 200}
+            thresholds: %{a: 20, b: 50, c: 100, d: 200},
+            fix_hint:
+              "Largest function too long — decompose the longest function into focused helpers"
           },
           %{
             name: "variance",
             source: "indentation",
             weight: 0.1,
             good: :low,
-            thresholds: %{a: 7, b: 20, c: 40, d: 65}
+            thresholds: %{a: 7, b: 20, c: 40, d: 65},
+            fix_hint:
+              "Inconsistent indentation depth — standardize nesting by flattening or restructuring"
           },
           %{
             name: "avg_param_count",
             source: "function_metrics",
             weight: 0.03,
             good: :low,
-            thresholds: %{a: 2, b: 3, c: 5, d: 7}
+            thresholds: %{a: 2, b: 3, c: 5, d: 7},
+            fix_hint: "Too many parameters on average — group related params into a struct or map"
           },
           %{
             name: "max_param_count",
             source: "function_metrics",
             weight: 0.02,
             good: :low,
-            thresholds: %{a: 3, b: 5, c: 7, d: 10}
+            thresholds: %{a: 3, b: 5, c: 7, d: 10},
+            fix_hint:
+              "Function has too many parameters — introduce a parameter object or options map"
           }
         ]
       },
@@ -166,21 +194,27 @@ defmodule CodeQA.HealthReport.Categories do
             source: "compression",
             weight: 0.5,
             good: :low,
-            thresholds: %{a: 0.3, b: 0.5, c: 0.65, d: 0.8}
+            thresholds: %{a: 0.3, b: 0.5, c: 0.65, d: 0.8},
+            fix_hint:
+              "High redundancy — extract repeated patterns into shared helpers or abstractions"
           },
           %{
             name: "bigram_repetition_rate",
             source: "ngram",
             weight: 0.3,
             good: :low,
-            thresholds: %{a: 0.15, b: 0.30, c: 0.45, d: 0.60}
+            thresholds: %{a: 0.15, b: 0.30, c: 0.45, d: 0.60},
+            fix_hint:
+              "Repeated two-token sequences — consolidate duplicated patterns into named functions"
           },
           %{
             name: "trigram_repetition_rate",
             source: "ngram",
             weight: 0.2,
             good: :low,
-            thresholds: %{a: 0.05, b: 0.15, c: 0.30, d: 0.45}
+            thresholds: %{a: 0.05, b: 0.15, c: 0.30, d: 0.45},
+            fix_hint:
+              "Repeated three-token sequences — extract duplicated logic into reusable abstractions"
           }
         ]
       },
@@ -193,28 +227,34 @@ defmodule CodeQA.HealthReport.Categories do
             source: "casing_entropy",
             weight: 0.3,
             good: :low,
-            thresholds: %{a: 1.0, b: 1.5, c: 2.0, d: 2.3}
+            thresholds: %{a: 1.0, b: 1.5, c: 2.0, d: 2.3},
+            fix_hint:
+              "Mixed casing styles — use a single consistent casing convention throughout the file"
           },
           %{
             name: "mean",
             source: "identifier_length_variance",
             weight: 0.25,
             good: :low,
-            thresholds: %{a: 12, b: 18, c: 25, d: 35}
+            thresholds: %{a: 12, b: 18, c: 25, d: 35},
+            fix_hint: "Identifiers too long on average — prefer concise, intent-revealing names"
           },
           %{
             name: "variance",
             source: "identifier_length_variance",
             weight: 0.25,
             good: :low,
-            thresholds: %{a: 15, b: 30, c: 50, d: 80}
+            thresholds: %{a: 15, b: 30, c: 50, d: 80},
+            fix_hint: "High identifier length variance — standardize name length conventions"
           },
           %{
             name: "avg_sub_words_per_id",
             source: "readability",
             weight: 0.2,
             good: :low,
-            thresholds: %{a: 3, b: 4, c: 5, d: 7}
+            thresholds: %{a: 3, b: 4, c: 5, d: 7},
+            fix_hint:
+              "Identifiers have too many sub-words — simplify to 2–3 word names where possible"
           }
         ]
       },
@@ -227,7 +267,8 @@ defmodule CodeQA.HealthReport.Categories do
             source: "magic_number_density",
             weight: 1.0,
             good: :low,
-            thresholds: %{a: 0.02, b: 0.05, c: 0.10, d: 0.20}
+            thresholds: %{a: 0.02, b: 0.05, c: 0.10, d: 0.20},
+            fix_hint: "Too many magic numbers — replace literal values with named constants"
           }
         ]
       }
diff --git a/lib/codeqa/health_report/config.ex b/lib/codeqa/health_report/config.ex
index 15bf125f..7c457b29 100644
--- a/lib/codeqa/health_report/config.ex
+++ b/lib/codeqa/health_report/config.ex
@@ -3,9 +3,24 @@ defmodule CodeQA.HealthReport.Config do
 
   alias CodeQA.HealthReport.Categories
 
-  @spec load(String.t() | nil) :: %{categories: [map()], grade_scale: [{number(), String.t()}]}
-  def load(nil),
-    do: %{categories: Categories.defaults(), grade_scale: Categories.default_grade_scale()}
+  @spec load(String.t() | nil) :: %{
+          categories: [map()],
+          grade_scale: [{number(), String.t()}],
+          impact_map: %{String.t() => pos_integer()},
+          combined_top: pos_integer(),
+          block_min_lines: pos_integer(),
+          block_max_lines: pos_integer()
+        }
+  def load(nil) do
+    %{
+      categories: Categories.defaults(),
+      grade_scale: Categories.default_grade_scale(),
+      impact_map: CodeQA.Config.impact_map(),
+      combined_top: CodeQA.Config.combined_top(),
+      block_min_lines: 3,
+      block_max_lines: 20
+    }
+  end
 
   def load(path) do
     yaml = YamlElixir.read_from_file!(path)
@@ -30,8 +45,26 @@ defmodule CodeQA.HealthReport.Config do
       end)
 
     grade_scale = parse_grade_scale(Map.get(yaml, "grade_scale"))
+    impact_map = parse_impact(Map.get(yaml, "impact"))
+    combined_top = Map.get(yaml, "combined_top", 2)
+    block_min_lines = Map.get(yaml, "block_min_lines", 3)
+    block_max_lines = Map.get(yaml, "block_max_lines", 20)
+
+    %{
+      categories: categories,
+      grade_scale: grade_scale,
+      impact_map: impact_map,
+      combined_top: combined_top,
+      block_min_lines: block_min_lines,
+      block_max_lines: block_max_lines
+    }
+  end
+
+  defp parse_impact(nil), do: CodeQA.Config.impact_map()
 
-    %{categories: categories, grade_scale: grade_scale}
+  defp parse_impact(overrides) when is_map(overrides) do
+    string_overrides = Map.new(overrides, fn {k, v} -> {to_string(k), v} end)
+    Map.merge(CodeQA.Config.impact_map(), string_overrides)
   end
 
   defp parse_grade_scale(nil), do: Categories.default_grade_scale()
diff --git a/lib/codeqa/health_report/delta.ex b/lib/codeqa/health_report/delta.ex
new file mode 100644
index 00000000..52b0085e
--- /dev/null
+++ b/lib/codeqa/health_report/delta.ex
@@ -0,0 +1,42 @@
+defmodule CodeQA.HealthReport.Delta do
+  @moduledoc "Computes aggregate metric delta between two codebase analysis results."
+
+  @spec compute(map(), map()) :: %{
+          base: %{aggregate: map()},
+          head: %{aggregate: map()},
+          delta: %{aggregate: map()}
+        }
+  def compute(base_results, head_results) do
+    base_agg = get_in(base_results, ["codebase", "aggregate"]) || %{}
+    head_agg = get_in(head_results, ["codebase", "aggregate"]) || %{}
+
+    %{
+      base: %{aggregate: base_agg},
+      head: %{aggregate: head_agg},
+      delta: %{aggregate: compute_aggregate_delta(base_agg, head_agg)}
+    }
+  end
+
+  defp compute_aggregate_delta(base_agg, head_agg) do
+    MapSet.new(Map.keys(base_agg) ++ Map.keys(head_agg))
+    |> Enum.reduce(%{}, fn metric_name, acc ->
+      base_m = Map.get(base_agg, metric_name, %{})
+      head_m = Map.get(head_agg, metric_name, %{})
+      delta = compute_numeric_delta(base_m, head_m)
+      if delta == %{}, do: acc, else: Map.put(acc, metric_name, delta)
+    end)
+  end
+
+  defp compute_numeric_delta(base, head) do
+    MapSet.new(Map.keys(base) ++ Map.keys(head))
+    |> Enum.reduce(%{}, fn key, acc ->
+      case {Map.get(base, key), Map.get(head, key)} do
+        {b, h} when is_number(b) and is_number(h) ->
+          Map.put(acc, key, Float.round((h - b) * 1.0, 4))
+
+        _ ->
+          acc
+      end
+    end)
+  end
+end
diff --git a/lib/codeqa/health_report/formatter.ex b/lib/codeqa/health_report/formatter.ex
index df17d8d9..d166f145 100644
--- a/lib/codeqa/health_report/formatter.ex
+++ b/lib/codeqa/health_report/formatter.ex
@@ -8,4 +8,23 @@ defmodule CodeQA.HealthReport.Formatter do
 
   def format_markdown(report, detail, :plain, _opts), do: Plain.render(report, detail)
   def format_markdown(report, detail, :github, opts), do: Github.render(report, detail, opts)
+
+  @doc """
+  Renders the report as multiple parts for GitHub PR comments.
+  Returns a flat list of strings: [part_1, part_2, part_3, ...].
+
+  Part 1: Header, summary, PR summary, delta, chart, progress bars
+  Part 2: Top issues, category detail sections
+  Part 3+: Blocks section, sliced at 60,000 chars per part
+
+  Each part ends with a sentinel comment for sticky comment identification.
+  """
+  @spec render_parts(map(), keyword()) :: [String.t()]
+  def render_parts(report, opts \\ []) do
+    part_1 = Github.render_part_1(report, opts)
+    part_2 = Github.render_part_2(report, opts)
+    parts_3 = Github.render_parts_3(report, opts)
+
+    [part_1, part_2 | parts_3]
+  end
 end
diff --git a/lib/codeqa/health_report/formatter/github.ex b/lib/codeqa/health_report/formatter/github.ex
index 72bb9ee8..5bf9f7f2 100644
--- a/lib/codeqa/health_report/formatter/github.ex
+++ b/lib/codeqa/health_report/formatter/github.ex
@@ -8,18 +8,120 @@ defmodule CodeQA.HealthReport.Formatter.Github do
   @spec render(map(), atom(), keyword()) :: String.t()
   def render(report, detail, opts \\ []) do
     chart? = Keyword.get(opts, :chart, true)
+    display_categories = merge_cosine_categories(report.categories)
+    worst_blocks = Map.get(report, :worst_blocks_by_category, %{})
 
     [
+      pr_summary_section(Map.get(report, :pr_summary)),
       header(report),
-      if(chart?, do: mermaid_chart(report.categories), else: []),
-      progress_bars(report.categories),
-      category_sections(report.categories, detail),
+      cosine_legend(),
+      delta_section(Map.get(report, :codebase_delta)),
+      if(chart?, do: mermaid_chart(display_categories), else: []),
+      progress_bars(display_categories),
+      top_issues_section(Map.get(report, :top_issues, []), detail),
+      blocks_section(Map.get(report, :top_blocks, [])),
+      category_sections(display_categories, detail, worst_blocks),
       footer()
     ]
     |> List.flatten()
     |> Enum.join("\n")
   end
 
+  @doc """
+  Renders Part 1: header, summary table, PR summary, delta, mermaid chart, progress bars.
+  Each part ends with a sentinel HTML comment for sticky comment identification.
+  """
+  @spec render_part_1(map(), keyword()) :: String.t()
+  def render_part_1(report, opts \\ []) do
+    chart? = Keyword.get(opts, :chart, true)
+    display_categories = merge_cosine_categories(report.categories)
+
+    [
+      pr_summary_section(Map.get(report, :pr_summary)),
+      header(report),
+      cosine_legend(),
+      delta_section(Map.get(report, :codebase_delta)),
+      if(chart?, do: mermaid_chart(display_categories), else: []),
+      progress_bars(display_categories),
+      sentinel(1)
+    ]
+    |> List.flatten()
+    |> Enum.join("\n")
+  end
+
+  @doc """
+  Renders Part 2: top issues + all category detail sections.
+  """
+  @spec render_part_2(map(), keyword()) :: String.t()
+  def render_part_2(report, opts \\ []) do
+    detail = Keyword.get(opts, :detail, :default)
+    display_categories = merge_cosine_categories(report.categories)
+    worst_blocks = Map.get(report, :worst_blocks_by_category, %{})
+
+    [
+      top_issues_section(Map.get(report, :top_issues, []), detail),
+      category_sections(display_categories, detail, worst_blocks),
+      sentinel(2)
+    ]
+    |> List.flatten()
+    |> Enum.join("\n")
+  end
+
+  @doc """
+  Renders Part 3: blocks section (top 10 blocks with code).
+  Returns a list with a single part since blocks are now limited to top 10.
+  """
+  @spec render_parts_3(map(), keyword()) :: [String.t()]
+  def render_parts_3(report, _opts \\ []) do
+    top_blocks = Map.get(report, :top_blocks, [])
+    blocks_content = blocks_section(top_blocks) |> List.flatten() |> Enum.join("\n")
+    [blocks_content <> "\n\n" <> sentinel_str(3)]
+  end
+
+  defp sentinel(n), do: [sentinel_str(n)]
+
+  defp sentinel_str(n), do: "<!-- codeqa-health-report-#{n} -->"
+
+  defp merge_cosine_categories(categories) do
+    {cosine, threshold} = Enum.split_with(categories, &(&1.type == :cosine))
+
+    case cosine do
+      [] ->
+        threshold
+
+      _ ->
+        total_impact = Enum.sum(Enum.map(cosine, & &1.impact))
+
+        combined_score =
+          round(Enum.sum(Enum.map(cosine, &(&1.score * &1.impact))) / max(total_impact, 1))
+
+        combined = %{
+          type: :cosine_group,
+          key: "combined_metrics",
+          name: "Combined Metrics",
+          score: combined_score,
+          grade: grade_letter_from_score(combined_score),
+          categories: cosine
+        }
+
+        threshold ++ [combined]
+    end
+  end
+
+  defp grade_letter_from_score(score) when score >= 97, do: "A+"
+  defp grade_letter_from_score(score) when score >= 93, do: "A"
+  defp grade_letter_from_score(score) when score >= 90, do: "A-"
+  defp grade_letter_from_score(score) when score >= 87, do: "B+"
+  defp grade_letter_from_score(score) when score >= 83, do: "B"
+  defp grade_letter_from_score(score) when score >= 80, do: "B-"
+  defp grade_letter_from_score(score) when score >= 77, do: "C+"
+  defp grade_letter_from_score(score) when score >= 73, do: "C"
+  defp grade_letter_from_score(score) when score >= 70, do: "C-"
+  defp grade_letter_from_score(score) when score >= 67, do: "D+"
+  defp grade_letter_from_score(score) when score >= 63, do: "D"
+  defp grade_letter_from_score(score) when score >= 60, do: "D-"
+  defp grade_letter_from_score(_score), do: "F"
+
   defp header(report) do
     emoji = grade_emoji(report.overall_grade)
 
@@ -31,9 +133,16 @@ defmodule CodeQA.HealthReport.Formatter.Github do
     ]
   end
 
+  defp cosine_legend do
+    [
+      "> *Combined metric scores use cosine similarity: +1 = metric profile perfectly matches healthy pattern for this behavior, 0 = no signal, −1 = anti-pattern detected. Mapped to 0–100 using breakpoints (approx: ≥0.5→A, ≥0.2→B, ≥0.0→C, ≥−0.3→D, <−0.3→F); actual letter grades use the full 15-step scale.*",
+      ""
+    ]
+  end
+
   defp mermaid_chart(categories) do
-    names = Enum.map(categories, fn c -> ~s("#{c.name}") end) |> Enum.join(", ")
-    scores = Enum.map(categories, fn c -> to_string(c.score) end) |> Enum.join(", ")
+    names = Enum.map_join(categories, ", ", fn c -> ~s("#{c.name}") end)
+    scores = Enum.map_join(categories, ", ", fn c -> to_string(c.score) end)
 
     [
       "```mermaid",
@@ -74,35 +183,159 @@ defmodule CodeQA.HealthReport.Formatter.Github do
     String.duplicate(@filled, filled) <> String.duplicate(@empty, empty)
   end
 
-  defp category_sections(_categories, :summary), do: []
+  defp category_sections(_categories, :summary, _worst_blocks), do: []
+
+  defp category_sections(categories, detail, worst_blocks) do
+    Enum.flat_map(categories, &render_category(&1, detail, worst_blocks))
+  end
+
+  defp render_category(%{type: :cosine_group} = group, detail, worst_blocks) do
+    emoji = grade_emoji(group.grade)
+    summary_line = "#{emoji} #{group.name} — #{group.grade} (#{group.score}/100)"
+
+    inner =
+      cosine_group_content(group, detail, worst_blocks)
+      |> List.flatten()
+      |> Enum.join("\n")
+
+    [
+      "<details>",
+      "<summary><strong>#{summary_line}</strong></summary>",
+      "",
+      inner,
+      "",
+      "</details>",
+      ""
+    ]
+  end
+
+  defp render_category(%{type: :cosine} = cat, detail, worst_blocks) do
+    emoji = grade_emoji(cat.grade)
+    summary_line = "#{emoji} #{cat.name} — #{cat.grade} (#{cat.score}/100)"
+
+    inner =
+      cosine_section_content(cat, detail, worst_blocks)
+      |> List.flatten()
+      |> Enum.join("\n")
+
+    [
+      "<details>",
+      "<summary><strong>#{summary_line}</strong></summary>",
+      "",
+      inner,
+      "",
+      "</details>",
+      ""
+    ]
+  end
+
+  defp render_category(cat, detail, _worst_blocks) do
+    emoji = grade_emoji(cat.grade)
+    summary_line = "#{emoji} #{cat.name} — #{cat.grade} (#{cat.score}/100)"
+
+    inner =
+      section_content(cat, detail)
+      |> List.flatten()
+      |> Enum.join("\n")
+
+    [
+      "<details>",
+      "<summary><strong>#{summary_line}</strong></summary>",
+      "",
+      inner,
+      "",
+      "</details>",
+      ""
+    ]
+  end
+
+  defp cosine_group_content(group, detail, worst_blocks) do
+    rows =
+      Enum.map(group.categories, fn cat ->
+        emoji = grade_emoji(cat.grade)
+        "| #{cat.name} | #{cat.score} | #{emoji} #{cat.grade} |"
+      end)
+
+    summary_table = [
+      "| Category | Score | Grade |",
+      "|----------|-------|-------|"
+      | rows
+    ]
+
+    sub_sections =
+      Enum.flat_map(group.categories, fn cat ->
+        emoji = grade_emoji(cat.grade)
 
-  defp category_sections(categories, detail) do
-    Enum.flat_map(categories, fn cat ->
-      emoji = grade_emoji(cat.grade)
-      summary_line = "#{emoji} #{cat.name} — #{cat.grade} (#{cat.score}/100)"
+        inner =
+          cosine_section_content(cat, detail, worst_blocks)
+          |> List.flatten()
+          |> Enum.join("\n")
+
+        [
+          "<details>",
+          "<summary><strong>#{emoji} #{cat.name} — #{cat.grade} (#{cat.score}/100)</strong></summary>",
+          "",
+          inner,
+          "",
+          "</details>",
+          ""
+        ]
+      end)
+
+    summary_table ++ [""] ++ sub_sections
+  end
 
-      inner =
-        section_content(cat, detail)
-        |> List.flatten()
-        |> Enum.join("\n")
+  defp cosine_section_content(cat, _detail, worst_blocks) do
+    n = length(cat.behaviors)
+    category_key = to_string(cat.key)
+
+    behaviors_rows =
+      Enum.map(cat.behaviors, fn b ->
+        "| #{b.behavior} | #{format_num(b.cosine)} | #{b.score} | #{b.grade} |"
+      end)
+
+    behaviors_table = [
+      "> Cosine similarity scores for #{n} behaviors.",
+      "",
+      "| Behavior | Cosine | Score | Grade |",
+      "|----------|--------|-------|-------|"
+      | behaviors_rows
+    ]
+
+    worst_block_section =
+      case Map.get(worst_blocks, category_key) do
+        nil -> []
+        block -> render_worst_block(block)
+      end
+
+    behaviors_table ++ [""] ++ worst_block_section
+  end
+
+  defp render_worst_block(block) do
+    line_count = (block.end_line || block.start_line) - block.start_line + 1
+    location = "#{block.path}:#{block.start_line}-#{block.end_line}"
+
+    if line_count >= 1 and line_count <= 15 and block.source do
+      lang = block.language || ""
 
       [
-        "<details>",
-        "<summary><strong>#{summary_line}</strong></summary>",
-        "",
-        inner,
-        "",
-        "</details>",
+        "> **Worst offender** (`#{location}`):",
+        "> ```#{lang}",
+        block.source |> String.split("\n") |> Enum.map(&"> #{&1}") |> Enum.join("\n"),
+        "> ```",
         ""
       ]
-    end)
+    else
+      [
+        "> **Worst offender**: `#{location}` (#{line_count} lines)",
+        ""
+      ]
+    end
   end
 
   defp section_content(cat, _detail) do
     metric_summary =
-      cat.metric_scores
-      |> Enum.map(fn m -> "#{m.name}=#{format_num(m.value)}" end)
-      |> Enum.join(", ")
+      Enum.map_join(cat.metric_scores, ", ", fn m -> "#{m.name}=#{format_num(m.value)}" end)
 
     metrics_table =
       if cat.metric_scores != [] do
@@ -124,42 +357,35 @@ defmodule CodeQA.HealthReport.Formatter.Github do
       "Codebase averages: #{metric_summary}",
       ""
       | metrics_table
-    ] ++ [""] ++ worst_offenders(cat)
+    ] ++ [""]
   end
 
-  defp worst_offenders(cat) do
-    offenders = Map.get(cat, :worst_offenders, [])
+  defp top_issues_section([], _detail), do: []
+  defp top_issues_section(_issues, :summary), do: []
 
-    if offenders == [] do
-      []
-    else
-      averages = Map.new(cat.metric_scores, &{&1.name, &1.value})
-
-      rows =
-        Enum.map(offenders, fn f ->
-          issues =
-            f.metric_scores
-            |> Enum.map(fn m ->
-              avg = Map.get(averages, m.name)
-              avg_str = if avg, do: " (avg: #{format_num(avg)})", else: ""
-              "#{direction(m.good)}#{m.name}=#{format_num(m.value)}#{avg_str}"
-            end)
-            |> Enum.join("<br>")
-
-          "| #{format_path(f.path)}<br>#{format_lines(f[:lines])} lines · #{format_size(f[:bytes])} | #{f.grade} (#{f.score}) | #{issues} |"
-        end)
+  defp top_issues_section(issues, _detail) do
+    rows =
+      Enum.map_join(issues, "\n", fn i ->
+        "| `#{i.category}.#{i.behavior}` | #{format_num(i.cosine)} | #{format_num(i.score)} |"
+      end)
 
-      [
-        "**Worst Offenders**",
-        "",
-        "| File | Grade | Issues |",
-        "|------|-------|--------|"
-        | rows
-      ]
-    end
+    table = "| Behavior | Cosine | Score |\n|----------|--------|-------|\n#{rows}"
+
+    [
+      "<details>",
+      "<summary><strong>🔍 Top Likely Issues (cosine similarity)</strong></summary>",
+      "",
+      "> Most negative cosine = file's metric profile best matches this anti-pattern.",
+      "",
+      table,
+      "",
+      "</details>",
+      ""
+    ]
   end
 
   defp footer do
+    # Legacy footer for single-part render/3 (used by --output file mode)
     ["<!-- Sticky Pull Request Commentcodeqa-health-report -->", ""]
   end
 
@@ -179,29 +405,244 @@ defmodule CodeQA.HealthReport.Formatter.Github do
 
   defp extract_project_name(_), do: "unknown"
 
-  defp format_path(path) when byte_size(path) < 80, do: "`#{path}`"
+  defp format_num(value) when is_float(value), do: :erlang.float_to_binary(value, decimals: 2)
+  defp format_num(value) when is_integer(value), do: to_string(value)
+  defp format_num(value), do: to_string(value)
+
+  defp format_date(timestamp) when is_binary(timestamp), do: String.slice(timestamp, 0, 10)
+  defp format_date(_), do: "unknown"
+
+  defp pr_summary_section(nil), do: []
+
+  defp pr_summary_section(summary) do
+    delta_str =
+      if summary.score_delta >= 0,
+        do: "+#{summary.score_delta}",
+        else: "#{summary.score_delta}"
+
+    status_str = "#{summary.files_modified} modified, #{summary.files_added} added"
+
+    [
+      "> **Score:** #{summary.base_grade} → #{summary.head_grade}  |  **Δ** #{delta_str} pts  |  **#{summary.blocks_flagged}** blocks flagged across #{summary.files_changed} files  |  #{status_str}",
+      ""
+    ]
+  end
+
+  defp delta_section(nil), do: []
+
+  defp delta_section(delta) do
+    base_agg = delta.base.aggregate
+    head_agg = delta.head.aggregate
 
-  defp format_path(path) do
-    case String.split(path, "/") do
-      [file] -> "`#{file}`"
-      parts -> Enum.join(Enum.drop(parts, -1), "/") <> "/<br>`#{List.last(parts)}`"
+    metrics = [
+      {"Readability", "readability", "mean_flesch_adapted"},
+      {"Complexity", "halstead", "mean_difficulty"},
+      {"Duplication", "compression", "mean_redundancy"},
+      {"Structure", "branching", "mean_branch_count"}
+    ]
+
+    rows = Enum.flat_map(metrics, &format_metric_row(&1, base_agg, head_agg))
+
+    if rows == [] do
+      []
+    else
+      [
+        "## Metric Changes",
+        "",
+        "| Category | Base | Head | Δ |",
+        "|----------|------|------|---|"
+        | rows
+      ] ++ [""]
     end
   end
 
-  defp direction(:high), do: "↑ "
-  defp direction(_), do: "↓ "
+  defp format_metric_row({label, group, key}, base_agg, head_agg) do
+    base_val = get_in(base_agg, [group, key])
+    head_val = get_in(head_agg, [group, key])
 
-  defp format_lines(nil), do: "—"
-  defp format_lines(n), do: to_string(n)
+    if is_number(base_val) and is_number(head_val) do
+      diff = Float.round(head_val - base_val, 2)
+      diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}"
+      ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"]
+    else
+      []
+    end
+  end
 
-  defp format_size(nil), do: "—"
-  defp format_size(bytes) when bytes < 1024, do: "#{bytes} B"
-  defp format_size(bytes), do: "#{Float.round(bytes / 1024, 1)} KB"
+  defp blocks_section([]) do
+    ["> 🟢 **No block-level issues detected**", ""]
+  end
 
-  defp format_num(value) when is_float(value), do: :erlang.float_to_binary(value, decimals: 2)
-  defp format_num(value) when is_integer(value), do: to_string(value)
-  defp format_num(value), do: to_string(value)
+  defp blocks_section(top_blocks) do
+    alias CodeQA.HealthReport.BehaviorLabels
 
-  defp format_date(timestamp) when is_binary(timestamp), do: String.slice(timestamp, 0, 10)
-  defp format_date(_), do: "unknown"
+    severity_counts = count_severities(top_blocks)
+    worst = worst_severity(severity_counts)
+    {icon, verdict} = verdict_text(worst, severity_counts)
+
+    {actionable, medium_blocks} =
+      Enum.split_with(top_blocks, fn b ->
+        top = List.first(b.potentials)
+        top && top.severity in [:critical, :high]
+      end)
+
+    verdict_box = [
+      "> ### #{icon} #{verdict}",
+      "> #{severity_summary(severity_counts)}",
+      ""
+    ]
+
+    action_table =
+      if actionable != [] do
+        rows =
+          Enum.map(actionable, fn block ->
+            top = List.first(block.potentials)
+            sev_icon = severity_icon(top.severity)
+            label = BehaviorLabels.label(top.category, top.behavior)
+            location = "`#{block.path}:#{block.start_line}-#{block.end_line || block.start_line}`"
+            action = BehaviorLabels.action(top.category, top.behavior)
+            "| #{sev_icon} #{label} | #{location} | #{action} |"
+          end)
+
+        [
+          "| What | Where | Action |",
+          "|------|-------|--------|"
+          | rows
+        ] ++ [""]
+      else
+        []
+      end
+
+    actionable_details = Enum.flat_map(actionable, &format_block_card/1)
+
+    medium_section =
+      if medium_blocks != [] do
+        n = length(medium_blocks)
+        word = if n == 1, do: "block", else: "blocks"
+        inner = Enum.flat_map(medium_blocks, &format_block_card/1) |> Enum.join("\n")
+
+        [
+          "<details>",
+          "<summary>#{n} medium-severity #{word} (expand)</summary>",
+          "",
+          inner,
+          "",
+          "</details>",
+          ""
+        ]
+      else
+        []
+      end
+
+    verdict_box ++ action_table ++ actionable_details ++ medium_section
+  end
+
+  defp count_severities(blocks) do
+    blocks
+    |> Enum.map(fn b -> (List.first(b.potentials) || %{severity: :medium}).severity end)
+    |> Enum.frequencies()
+  end
+
+  defp worst_severity(counts) do
+    cond do
+      Map.get(counts, :critical, 0) > 0 -> :critical
+      Map.get(counts, :high, 0) > 0 -> :high
+      Map.get(counts, :medium, 0) > 0 -> :medium
+      true -> :none
+    end
+  end
+
+  defp verdict_text(:critical, counts) do
+    n = Map.get(counts, :critical, 0)
+    {"🔴", "#{n} critical #{pl(n, "block")} — review required before merge"}
+  end
+
+  defp verdict_text(:high, counts) do
+    n = Map.get(counts, :high, 0) + Map.get(counts, :critical, 0)
+    {"🟠", "#{n} #{pl(n, "block")} need attention before merge"}
+  end
+
+  defp verdict_text(:medium, counts) do
+    n = Map.get(counts, :medium, 0)
+    {"🟡", "#{n} #{pl(n, "block")} with minor issues (safe to merge)"}
+  end
+
+  defp verdict_text(:none, _), do: {"🟢", "No block-level issues detected"}
+
+  defp pl(1, word), do: word
+  defp pl(_, word), do: word <> "s"
+
+  defp severity_summary(counts) do
+    [:critical, :high, :medium]
+    |> Enum.map(fn sev -> {sev, Map.get(counts, sev, 0)} end)
+    |> Enum.reject(fn {_, n} -> n == 0 end)
+    |> Enum.map_join(" · ", fn {sev, n} -> "**#{n} #{sev}**" end)
+  end
+
+  defp format_block_card(block) do
+    alias CodeQA.HealthReport.BehaviorLabels
+
+    end_line = block.end_line || block.start_line
+    top_potential = List.first(block.potentials)
+    icon = severity_icon(top_potential.severity)
+    label = BehaviorLabels.label(top_potential.category, top_potential.behavior)
+
+    summary_line = "#{icon} #{block.path}:#{block.start_line}-#{end_line} — #{label}"
+
+    issues = format_block_issues(block.potentials)
+    code_block = format_code_block(block)
+
+    [
+      "<details>",
+      "<summary>#{summary_line}</summary>",
+      "",
+      "**Issues:**",
+      ""
+      | issues
+    ] ++ ["", code_block, "", "</details>", ""]
+  end
+
+  defp format_block_issues(potentials) do
+    Enum.flat_map(potentials, fn p ->
+      icon = severity_icon(p.severity)
+      label = String.upcase(to_string(p.severity))
+      delta_str = format_num(p.cosine_delta)
+      line = "- #{icon} **#{label}** `#{p.category}/#{p.behavior}` (Δ #{delta_str})"
+      fix = if p.fix_hint, do: ["  > #{p.fix_hint}"], else: []
+      [line | fix]
+    end)
+  end
+
+  defp format_code_block(%{source: nil}), do: "_Source code not available_"
+
+  defp format_code_block(%{source: source, language: lang, start_line: start_line}) do
+    lang_hint = code_fence_lang(lang)
+    # Add line number comments for context
+    lines = String.split(source, "\n")
+
+    numbered_lines =
+      lines
+      |> Enum.with_index(start_line)
+      |> Enum.map(fn {line, num} -> "#{String.pad_leading(to_string(num), 4)} │ #{line}" end)
+      |> Enum.join("\n")
+
+    "```#{lang_hint}\n#{numbered_lines}\n```"
+  end
+
+  defp code_fence_lang("elixir"), do: "elixir"
+  defp code_fence_lang("ruby"), do: "ruby"
+  defp code_fence_lang("javascript"), do: "javascript"
+  defp code_fence_lang("typescript"), do: "typescript"
+  defp code_fence_lang("python"), do: "python"
+  defp code_fence_lang("swift"), do: "swift"
+  defp code_fence_lang("kotlin"), do: "kotlin"
+  defp code_fence_lang("java"), do: "java"
+  defp code_fence_lang("go"), do: "go"
+  defp code_fence_lang("rust"), do: "rust"
+  defp code_fence_lang(_), do: ""
+
+  defp severity_icon(:critical), do: "🔴"
+  defp severity_icon(:high), do: "🟠"
+  defp severity_icon(:medium), do: "🟡"
+  defp severity_icon(_), do: "⚪"
 end
diff --git a/lib/codeqa/health_report/formatter/plain.ex b/lib/codeqa/health_report/formatter/plain.ex
index 8471aef5..517fc5f8 100644
--- a/lib/codeqa/health_report/formatter/plain.ex
+++ b/lib/codeqa/health_report/formatter/plain.ex
@@ -4,8 +4,13 @@ defmodule CodeQA.HealthReport.Formatter.Plain do
   @spec render(map(), atom()) :: String.t()
   def render(report, detail) do
     [
+      pr_summary_section(Map.get(report, :pr_summary)),
       header(report),
+      cosine_legend(),
+      delta_section(Map.get(report, :codebase_delta)),
       overall_table(report),
+      top_issues_section(Map.get(report, :top_issues, []), detail),
+      blocks_section(Map.get(report, :top_blocks, [])),
       category_sections(report.categories, detail)
     ]
     |> List.flatten()
@@ -23,16 +28,24 @@ defmodule CodeQA.HealthReport.Formatter.Plain do
     ]
   end
 
+  defp cosine_legend do
+    [
+      "> *Combined metric scores use cosine similarity: +1 = metric profile perfectly matches healthy pattern for this behavior, 0 = no signal, −1 = anti-pattern detected. Mapped to 0–100 using breakpoints (approx: ≥0.5→A, ≥0.2→B, ≥0.0→C, ≥−0.3→D, <−0.3→F); actual letter grades use the full 15-step scale.*",
+      ""
+    ]
+  end
+
   defp overall_table(report) do
     rows =
       Enum.map(report.categories, fn cat ->
         summary = Map.get(cat, :summary, "")
-        "| #{cat.name} | #{cat.grade} | #{cat.score} | #{summary} |"
+        impact = Map.get(cat, :impact, "")
+        "| #{cat.name} | #{cat.grade} | #{cat.score} | #{impact} | #{summary} |"
       end)
 
     [
-      "| Category | Grade | Score | Summary |",
-      "|----------|-------|-------|---------|"
+      "| Category | Grade | Score | Impact | Summary |",
+      "|----------|-------|-------|--------|---------|"
       | rows
     ] ++ [""]
   end
@@ -41,15 +54,45 @@ defmodule CodeQA.HealthReport.Formatter.Plain do
 
   defp category_sections(categories, detail) do
     Enum.flat_map(categories, fn cat ->
-      section_header(cat) ++ metric_detail(cat) ++ worst_offenders_section(cat, detail)
+      render_category(cat, detail)
     end)
   end
 
+  defp render_category(%{type: :cosine} = cat, _detail) do
+    cosine_section_header(cat) ++ cosine_behaviors_table(cat)
+  end
+
+  defp render_category(cat, _detail) do
+    section_header(cat) ++ metric_detail(cat)
+  end
+
+  defp cosine_section_header(cat) do
+    n = length(cat.behaviors)
+
+    [
+      "## #{cat.name} — #{cat.grade}",
+      "",
+      "> Cosine similarity scores for #{n} behaviors.",
+      ""
+    ]
+  end
+
+  defp cosine_behaviors_table(cat) do
+    rows =
+      Enum.map(cat.behaviors, fn b ->
+        "| #{b.behavior} | #{format_num(b.cosine)} | #{b.score} | #{b.grade} |"
+      end)
+
+    [
+      "| Behavior | Cosine | Score | Grade |",
+      "|----------|--------|-------|-------|"
+      | rows
+    ] ++ [""]
+  end
+
   defp section_header(cat) do
     metric_summary =
-      cat.metric_scores
-      |> Enum.map(fn m -> "#{m.name}=#{format_num(m.value)}" end)
-      |> Enum.join(", ")
+      Enum.map_join(cat.metric_scores, ", ", fn m -> "#{m.name}=#{format_num(m.value)}" end)
 
     [
       "## #{cat.name} — #{cat.grade}",
@@ -76,66 +119,209 @@ defmodule CodeQA.HealthReport.Formatter.Plain do
     end
   end
 
-  defp worst_offenders_section(_cat, :summary), do: []
+  defp format_num(value) when is_float(value), do: :erlang.float_to_binary(value, decimals: 2)
+  defp format_num(value) when is_integer(value), do: to_string(value)
+  defp format_num(value), do: to_string(value)
+
+  defp format_date(timestamp) when is_binary(timestamp) do
+    timestamp |> String.slice(0, 10)
+  end
+
+  defp format_date(_), do: "unknown"
+
+  defp top_issues_section([], _detail), do: []
+  defp top_issues_section(_issues, :summary), do: []
+
+  defp top_issues_section(issues, _detail) do
+    rows =
+      Enum.map(issues, fn i ->
+        "| #{i.category}.#{i.behavior} | #{format_num(i.cosine)} | #{format_num(i.score)} |"
+      end)
+
+    [
+      "## Top Likely Issues",
+      "",
+      "> Ranked by cosine similarity — most negative means the file's metric profile best matches this anti-pattern.",
+      "",
+      "| Behavior | Cosine | Score |",
+      "|----------|--------|-------|"
+      | rows
+    ] ++ [""]
+  end
+
+  defp pr_summary_section(nil), do: []
 
-  defp worst_offenders_section(cat, _detail) do
-    offenders = Map.get(cat, :worst_offenders, [])
+  defp pr_summary_section(summary) do
+    delta_str =
+      if summary.score_delta >= 0,
+        do: "+#{summary.score_delta}",
+        else: "#{summary.score_delta}"
 
-    if offenders == [] do
+    status_str = "#{summary.files_modified} modified, #{summary.files_added} added"
+
+    [
+      "> **Score:** #{summary.base_grade} → #{summary.head_grade}  |  **Δ** #{delta_str} pts  |  **#{summary.blocks_flagged}** blocks flagged across #{summary.files_changed} files  |  #{status_str}",
+      ""
+    ]
+  end
+
+  defp delta_section(nil), do: []
+
+  defp delta_section(delta) do
+    base_agg = delta.base.aggregate
+    head_agg = delta.head.aggregate
+
+    metrics = [
+      {"Readability", "readability", "mean_flesch_adapted"},
+      {"Complexity", "halstead", "mean_difficulty"},
+      {"Duplication", "compression", "mean_redundancy"},
+      {"Structure", "branching", "mean_branch_count"}
+    ]
+
+    rows = Enum.flat_map(metrics, &format_metric_row(&1, base_agg, head_agg))
+
+    if rows == [] do
       []
     else
-      averages = Map.new(cat.metric_scores, &{&1.name, &1.value})
-
-      rows =
-        Enum.map(offenders, fn f ->
-          issues =
-            f.metric_scores
-            |> Enum.map(fn m ->
-              avg = Map.get(averages, m.name)
-              avg_str = if avg, do: " (avg: #{format_num(avg)})", else: ""
-              "#{direction(m.good)}#{m.name}=#{format_num(m.value)}#{avg_str}"
-            end)
-            |> Enum.join("<br>")
-
-          "| #{format_path(f.path)}<br>#{format_lines(f[:lines])} lines · #{format_size(f[:bytes])} | #{f.grade} | #{issues} |"
-        end)
-
       [
-        "### Worst Offenders",
+        "## Metric Changes",
         "",
-        "| File | Grade | Issues |",
-        "|------|-------|--------|"
+        "| Category | Base | Head | Δ |",
+        "|----------|------|------|---|"
         | rows
       ] ++ [""]
     end
   end
 
-  defp format_path(path) when byte_size(path) < 80, do: "`#{path}`"
+  defp format_metric_row({label, group, key}, base_agg, head_agg) do
+    base_val = get_in(base_agg, [group, key])
+    head_val = get_in(head_agg, [group, key])
 
-  defp format_path(path) do
-    case String.split(path, "/") do
-      [file] -> "`#{file}`"
-      parts -> Enum.join(Enum.drop(parts, -1), "/") <> "/<br>`#{List.last(parts)}`"
+    if is_number(base_val) and is_number(head_val) do
+      diff = Float.round(head_val - base_val, 2)
+      diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}"
+      ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"]
+    else
+      []
     end
   end
 
-  defp direction(:high), do: "↑ "
-  defp direction(_), do: "↓ "
+  defp blocks_section([]), do: ["## Code Blocks: 🟢 No block-level issues detected", ""]
 
-  defp format_lines(nil), do: "—"
-  defp format_lines(n), do: to_string(n)
+  defp blocks_section(top_blocks) do
+    alias CodeQA.HealthReport.BehaviorLabels
 
-  defp format_size(nil), do: "—"
-  defp format_size(bytes) when bytes < 1024, do: "#{bytes} B"
-  defp format_size(bytes), do: "#{Float.round(bytes / 1024, 1)} KB"
+    severity_counts = count_severities(top_blocks)
+    worst = worst_severity(severity_counts)
+    {icon, verdict} = verdict_text(worst, severity_counts)
 
-  defp format_num(value) when is_float(value), do: :erlang.float_to_binary(value, decimals: 2)
-  defp format_num(value) when is_integer(value), do: to_string(value)
-  defp format_num(value), do: to_string(value)
+    {actionable, medium_blocks} =
+      Enum.split_with(top_blocks, fn b ->
+        top = List.first(b.potentials)
+        top && top.severity in [:critical, :high]
+      end)
 
-  defp format_date(timestamp) when is_binary(timestamp) do
-    timestamp |> String.slice(0, 10)
+    header = ["## Code Blocks: #{icon} #{verdict}", ""]
+
+    action_table =
+      if actionable != [] do
+        rows =
+          Enum.map(actionable, fn block ->
+            top = List.first(block.potentials)
+            label = BehaviorLabels.label(top.category, top.behavior)
+            location = "#{block.path}:#{block.start_line}-#{block.end_line || block.start_line}"
+            action = BehaviorLabels.action(top.category, top.behavior)
+            "| #{label} | #{location} | #{action} |"
+          end)
+
+        [
+          "| What | Where | Action |",
+          "|------|-------|--------|"
+          | rows
+        ] ++ [""]
+      else
+        []
+      end
+
+    block_details = Enum.flat_map(actionable ++ medium_blocks, &format_block/1)
+
+    header ++ action_table ++ block_details
   end
 
-  defp format_date(_), do: "unknown"
+  defp count_severities(blocks) do
+    blocks
+    |> Enum.map(fn b -> (List.first(b.potentials) || %{severity: :medium}).severity end)
+    |> Enum.frequencies()
+  end
+
+  defp worst_severity(counts) do
+    cond do
+      Map.get(counts, :critical, 0) > 0 -> :critical
+      Map.get(counts, :high, 0) > 0 -> :high
+      Map.get(counts, :medium, 0) > 0 -> :medium
+      true -> :none
+    end
+  end
+
+  defp verdict_text(:critical, counts) do
+    n = Map.get(counts, :critical, 0)
+    {"🔴", "#{n} critical #{pl(n, "block")} — review required before merge"}
+  end
+
+  defp verdict_text(:high, counts) do
+    n = Map.get(counts, :high, 0) + Map.get(counts, :critical, 0)
+    {"🟠", "#{n} #{pl(n, "block")} need attention before merge"}
+  end
+
+  defp verdict_text(:medium, counts) do
+    n = Map.get(counts, :medium, 0)
+    {"🟡", "#{n} #{pl(n, "block")} with minor issues (safe to merge)"}
+  end
+
+  defp verdict_text(:none, _), do: {"🟢", "No block-level issues detected"}
+
+  defp pl(1, word), do: word
+  defp pl(_, word), do: word <> "s"
+
+  defp format_block(block) do
+    end_line = block.end_line || block.start_line
+    status_str = if block.status, do: " [#{block.status}]", else: ""
+
+    header =
+      "### #{block.path}:#{block.start_line}-#{end_line}#{status_str}"
+
+    subheader =
+      "#{block.type} · #{block.token_count} tokens"
+
+    potential_lines = Enum.flat_map(block.potentials, &format_potential/1)
+    code_lines = format_code_block(block)
+    [header, subheader, "" | potential_lines] ++ ["" | code_lines] ++ [""]
+  end
+
+  defp format_code_block(%{source: nil}), do: ["_Source code not available_"]
+
+  defp format_code_block(%{source: source, start_line: start_line}) do
+    lines = String.split(source, "\n")
+
+    numbered_lines =
+      lines
+      |> Enum.with_index(start_line)
+      |> Enum.map(fn {line, num} -> "  #{String.pad_leading(to_string(num), 4)} │ #{line}" end)
+
+    ["```" | numbered_lines] ++ ["```"]
+  end
+
+  defp format_potential(p) do
+    icon = severity_icon(p.severity)
+    delta_str = format_num(p.cosine_delta)
+    label = String.upcase(to_string(p.severity))
+    line = "  #{icon} #{label}  #{p.category} / #{p.behavior}  (Δ #{delta_str})"
+    fix = if p.fix_hint, do: ["    → #{p.fix_hint}"], else: []
+    [line | fix]
+  end
+
+  defp severity_icon(:critical), do: "🔴"
+  defp severity_icon(:high), do: "🟠"
+  defp severity_icon(:medium), do: "🟡"
+  defp severity_icon(_), do: "⚪"
 end
diff --git a/lib/codeqa/health_report/grader.ex b/lib/codeqa/health_report/grader.ex
index 864cad32..d671a0bf 100644
--- a/lib/codeqa/health_report/grader.ex
+++ b/lib/codeqa/health_report/grader.ex
@@ -1,6 +1,9 @@
 defmodule CodeQA.HealthReport.Grader do
   @moduledoc "Scores metrics and assigns letter grades."
 
+  alias CodeQA.Config
+  alias CodeQA.HealthReport.Categories
+
   @doc """
   Score a single metric value (0-100) based on thresholds and direction.
 
@@ -9,35 +12,60 @@ defmodule CodeQA.HealthReport.Grader do
   """
   @spec score_metric(map(), number()) :: integer()
   def score_metric(%{good: :high, thresholds: t}, value) do
-    value |> score_high_is_good(t) |> clamp(0, 100)
+    score_by_direction(:high, value, t) |> clamp(0, 100)
   end
 
   def score_metric(%{good: _, thresholds: t}, value) do
-    value |> score_low_is_good(t) |> clamp(0, 100)
+    score_by_direction(:low, value, t) |> clamp(0, 100)
   end
 
-  # Lower values are better: below A = 100, A = 90, A-B = 70-90, etc.
-  defp score_low_is_good(val, t) do
+  @doc """
+  Maps cosine similarity [-1, +1] to a score [0, 100] with linear interpolation
+  within each band. Result is clamped to [0, 100] and rounded to an integer.
+
+  | Cosine range  | Score range |
+  |---------------|-------------|
+  | [0.5, 1.0]    | [90, 100]   |
+  | [0.2, 0.5)    | [70, 90)    |
+  | [0.0, 0.2)    | [50, 70)    |
+  | [-0.3, 0.0)   | [30, 50)    |
+  | [-1.0, -0.3)  | [0, 30)     |
+  """
+  @spec score_cosine(float()) :: integer()
+  def score_cosine(cosine) do
+    cosine
+    |> cosine_to_score()
+    |> clamp(0, 100)
+    |> round()
+  end
+
+  defp cosine_to_score(c) when c >= 0.5, do: interpolate_between(c, 0.5, 90, 1.0, 100)
+  defp cosine_to_score(c) when c >= 0.2, do: interpolate_between(c, 0.2, 70, 0.5, 90)
+  defp cosine_to_score(c) when c >= 0.0, do: interpolate_between(c, 0.0, 50, 0.2, 70)
+  defp cosine_to_score(c) when c >= -0.3, do: interpolate_between(c, -0.3, 30, 0.0, 50)
+  defp cosine_to_score(c), do: interpolate_between(c, -1.0, 0, -0.3, 30)
+
+  # :low  — lower values are better (t.a < t.b < t.c < t.d); below t.a = 100
+  # :high — higher values are better (t.a > t.b > t.c > t.d); above t.a = 100
+  defp score_by_direction(:low, val, t) do
     cond do
       val < t.a -> 100
       val == t.a -> 90
       val <= t.b -> interpolate_between(val, t.a, 90, t.b, 70)
       val <= t.c -> interpolate_between(val, t.b, 70, t.c, 50)
       val <= t.d -> interpolate_between(val, t.c, 50, t.d, 30)
-      true -> interpolate_below_d(val, t.d, 30)
+      true -> interpolate_beyond_d(val, t.d, 30)
     end
   end
 
-  # Higher values are better: above A = 100, A = 90, A-B = 70-90, etc.
-  # Thresholds are in descending order (a > b > c > d)
-  defp score_high_is_good(val, t) do
+  defp score_by_direction(:high, val, t) do
     cond do
       val > t.a -> 100
       val == t.a -> 90
       val >= t.b -> interpolate_between(val, t.a, 90, t.b, 70)
       val >= t.c -> interpolate_between(val, t.b, 70, t.c, 50)
       val >= t.d -> interpolate_between(val, t.c, 50, t.d, 30)
-      true -> interpolate_below_d_high(val, t.d, 30)
+      true -> interpolate_beyond_d(val, t.d, 30)
     end
   end
 
@@ -52,27 +80,22 @@ defmodule CodeQA.HealthReport.Grader do
     end
   end
 
-  # Value beyond D threshold (low is good): score degrades below 30
-  defp interpolate_below_d(_val, threshold_d, _score_at_d) when threshold_d == 0, do: 0
+  # Score degrades below 30 when value is beyond the D threshold in either direction.
+  # abs(val - threshold_d) captures overshoot for :low and undershoot for :high uniformly.
+  defp interpolate_beyond_d(_val, 0, _score_at_d), do: 0
 
-  defp interpolate_below_d(val, threshold_d, score_at_d) do
-    overshoot = (val - threshold_d) / threshold_d
-    round(Kernel.max(0, score_at_d - overshoot * score_at_d))
+  defp interpolate_beyond_d(val, threshold_d, score_at_d) do
+    deviation = abs(val - threshold_d) / threshold_d
+    round(Kernel.max(0, score_at_d - deviation * score_at_d))
   end
 
-  # Value below D threshold (high is good): score degrades below 30
-  defp interpolate_below_d_high(_val, threshold_d, _score_at_d) when threshold_d == 0, do: 0
-
-  defp interpolate_below_d_high(val, threshold_d, score_at_d) do
-    undershoot = (threshold_d - val) / threshold_d
-    round(Kernel.max(0, score_at_d - undershoot * score_at_d))
+  defp clamp(val, min_val, max_val) do
+    val |> Kernel.max(min_val) |> Kernel.min(max_val)
   end
 
-  defp clamp(val, min_val, max_val), do: val |> Kernel.max(min_val) |> Kernel.min(max_val)
-
   @doc "Convert a numeric score (0-100) to a letter grade using the given scale."
   @spec grade_letter(number(), [{number(), String.t()}]) :: String.t()
-  def grade_letter(score, scale \\ CodeQA.HealthReport.Categories.default_grade_scale()) do
+  def grade_letter(score, scale \\ Categories.default_grade_scale()) do
     Enum.find_value(scale, "F", fn {min, letter} ->
       if score >= min, do: letter
     end)
@@ -86,35 +109,14 @@ defmodule CodeQA.HealthReport.Grader do
   def grade_category(
         category,
         file_metrics,
-        scale \\ CodeQA.HealthReport.Categories.default_grade_scale()
+        scale \\ Categories.default_grade_scale()
       ) do
     scored =
       category.metrics
-      |> Enum.map(fn metric_def ->
-        value = get_in(file_metrics, [metric_def.source, metric_def.name])
-
-        if value do
-          %{
-            name: metric_def.name,
-            source: metric_def.source,
-            weight: metric_def.weight,
-            good: metric_def.good,
-            value: value,
-            score: score_metric(metric_def, value)
-          }
-        end
-      end)
+      |> Enum.map(&score_metric_entry(&1, file_metrics))
       |> Enum.reject(&is_nil/1)
 
-    total_weight = Enum.reduce(scored, 0.0, fn s, acc -> acc + s.weight end)
-
-    score =
-      if total_weight > 0 do
-        weighted = Enum.reduce(scored, 0.0, fn s, acc -> acc + s.score * s.weight end)
-        round(weighted / total_weight)
-      else
-        0
-      end
+    score = weighted_category_score(scored)
 
     %{
       key: category.key,
@@ -125,6 +127,34 @@ defmodule CodeQA.HealthReport.Grader do
     }
   end
 
+  defp score_metric_entry(metric_def, file_metrics) do
+    value = get_in(file_metrics, [metric_def.source, metric_def.name])
+
+    if value do
+      %{
+        name: metric_def.name,
+        source: metric_def.source,
+        weight: metric_def.weight,
+        good: metric_def.good,
+        value: value,
+        score: score_metric(metric_def, value)
+      }
+    end
+  end
+
+  defp weighted_category_score([]), do: 0
+
+  defp weighted_category_score(scored) do
+    total_weight = Enum.reduce(scored, 0.0, fn s, acc -> acc + s.weight end)
+
+    if total_weight > 0 do
+      weighted = Enum.reduce(scored, 0.0, fn s, acc -> acc + s.score * s.weight end)
+      round(weighted / total_weight)
+    else
+      0
+    end
+  end
+
   @doc """
   Grade a file's metrics against all categories.
   `file_metrics` is the `%{"entropy" => %{...}, "halstead" => %{...}}` map from analysis.
@@ -133,7 +163,7 @@ defmodule CodeQA.HealthReport.Grader do
   def grade_file(
         categories,
         file_metrics,
-        scale \\ CodeQA.HealthReport.Categories.default_grade_scale()
+        scale \\ Categories.default_grade_scale()
       ) do
     Enum.map(categories, &grade_category(&1, file_metrics, scale))
   end
@@ -145,7 +175,7 @@ defmodule CodeQA.HealthReport.Grader do
   def grade_aggregate(
         categories,
         aggregate,
-        scale \\ CodeQA.HealthReport.Categories.default_grade_scale()
+        scale \\ Categories.default_grade_scale()
       ) do
     # Convert aggregate format (mean_X keys) to file-metric-like format
     file_like =
@@ -161,23 +191,114 @@ defmodule CodeQA.HealthReport.Grader do
     Enum.map(categories, &grade_category(&1, file_like, scale))
   end
 
-  @doc "Compute overall score as average of category scores."
-  @spec overall_score(list(), [{number(), String.t()}]) :: {integer(), String.t()}
+  @doc """
+  Compute overall score as a weighted average of category scores.
+
+  Each category's weight is looked up from `impact_map` by converting
+  `category.key` (atom) to string. Defaults to `1` if the key is absent.
+
+  Backward compatible: calling with two arguments (empty `impact_map`) produces
+  the same arithmetic mean as the old `/2` signature.
+  """
+  @spec overall_score(
+          categories :: [map()],
+          grade_scale :: [{number(), String.t()}],
+          impact_map :: %{String.t() => pos_integer()}
+        ) :: {integer(), String.t()}
   def overall_score(
         category_grades,
-        scale \\ CodeQA.HealthReport.Categories.default_grade_scale()
+        scale \\ Categories.default_grade_scale(),
+        impact_map \\ %{}
       ) do
     if category_grades == [] do
       {0, "F"}
     else
-      avg =
-        Enum.reduce(category_grades, 0, fn g, acc -> acc + g.score end)
-        |> div(length(category_grades))
+      {weighted_sum, total_impact} =
+        Enum.reduce(category_grades, {0, 0}, fn g, {ws, ti} ->
+          impact = Map.get(impact_map, to_string(g.key), 1)
+          {ws + g.score * impact, ti + impact}
+        end)
 
+      avg = round(weighted_sum / total_impact)
       {avg, grade_letter(avg, scale)}
     end
   end
 
+  @doc """
+  Grade codebase aggregate metrics using cosine similarity.
+
+  Accepts `cosines_by_category`, a map of category string keys to lists of
+  behavior cosine entries as returned by
+  `Enum.group_by(SampleRunner.diagnose_aggregate(...), & &1.category)`.
+
+  The caller is responsible for computing `cosines_by_category` so that
+  `diagnose_aggregate/2` is invoked only once across the report pipeline.
+
+  Categories with zero behaviors are skipped.
+  """
+  @spec grade_cosine_categories(
+          cosines_by_category :: %{String.t() => [map()]},
+          worst_files :: %{String.t() => [map()]},
+          grade_scale :: [{number(), String.t()}]
+        ) :: [map()]
+  def grade_cosine_categories(
+        cosines_by_category,
+        worst_files,
+        scale \\ Categories.default_grade_scale()
+      ) do
+    threshold = Config.cosine_significance_threshold()
+
+    cosines_by_category
+    |> Enum.map(fn {category, behaviors} ->
+      behavior_entries =
+        score_behavior_entries(behaviors, threshold, worst_files, scale, category)
+
+      category_score = average_behavior_score(behavior_entries)
+      build_cosine_category(category, category_score, behavior_entries, scale)
+    end)
+  end
+
+  defp score_behavior_entries(behaviors, threshold, worst_files, scale, category) do
+    behaviors
+    |> Enum.reject(fn b -> abs(b.cosine) < threshold end)
+    |> Enum.map(&score_behavior_entry(&1, worst_files, scale, category))
+  end
+
+  defp score_behavior_entry(b, worst_files, scale, category) do
+    cosine_score = score_cosine(b.cosine)
+
+    %{
+      behavior: b.behavior,
+      cosine: b.cosine,
+      score: cosine_score,
+      grade: grade_letter(cosine_score, scale),
+      worst_offenders: Map.get(worst_files, "#{category}.#{b.behavior}", [])
+    }
+  end
+
+  defp average_behavior_score([]), do: 50
+
+  defp average_behavior_score(entries) do
+    round(Enum.sum(Enum.map(entries, & &1.score)) / length(entries))
+  end
+
+  defp build_cosine_category(category, category_score, behavior_entries, scale) do
+    %{
+      type: :cosine,
+      key: category,
+      name: humanize_category(category),
+      score: category_score,
+      grade: grade_letter(category_score, scale),
+      behaviors: behavior_entries
+    }
+  end
+
+  defp humanize_category(slug) do
+    slug
+    |> String.split("_")
+    |> Enum.map_join(" ", &String.capitalize/1)
+  end
+
   @doc """
   Find worst offender files for a category. Returns top N files sorted by worst score.
   `all_file_metrics` is `%{"path" => %{"metrics" => %{...}}}` from analysis results.
@@ -187,23 +308,51 @@ defmodule CodeQA.HealthReport.Grader do
         category,
         all_file_metrics,
         top_n,
-        scale \\ CodeQA.HealthReport.Categories.default_grade_scale()
+        scale \\ Categories.default_grade_scale()
       ) do
+    # NOTE: threshold metric scores are file-level aggregates; line-level attribution would require
+    # each AST node to carry its own per-metric values so that the node with the highest
+    # contribution to the bad metric score could be identified and reported directly.
     all_file_metrics
     |> Enum.map(fn {path, file_data} ->
       metrics = Map.get(file_data, "metrics", %{})
       graded = grade_category(category, metrics, scale)
+
       %{
         path: path,
         score: graded.score,
         grade: graded.grade,
         metric_scores: graded.metric_scores,
         lines: file_data["lines"],
-        bytes: file_data["bytes"]
+        bytes: file_data["bytes"],
+        top_nodes: top_3_nodes(Map.get(file_data, "nodes"))
       }
     end)
     |> Enum.filter(fn f -> f.metric_scores != [] end)
     |> Enum.sort_by(& &1.score, :asc)
     |> Enum.take(top_n)
   end
+
+  @doc """
+  Returns the top 3 nodes by refactoring potential impact, ranked by cosine_delta sum.
+
+  Only considers top-level nodes; children are not traversed. Returns an empty list
+  if input is nil, empty, or nodes lack refactoring_potentials data.
+  """
+  @spec top_3_nodes(list() | nil) :: list()
+  def top_3_nodes(nil), do: []
+  def top_3_nodes([]), do: []
+
+  def top_3_nodes(nodes) when is_list(nodes) do
+    nodes
+    |> Enum.sort_by(&node_impact_score/1, :desc)
+    |> Enum.take(3)
+  end
+
+  defp node_impact_score(%{"refactoring_potentials" => potentials})
+       when is_list(potentials) and potentials != [] do
+    Enum.sum(Enum.map(potentials, & &1["cosine_delta"]))
+  end
+
+  defp node_impact_score(_), do: 0.0
 end
diff --git a/lib/codeqa/health_report/top_blocks.ex b/lib/codeqa/health_report/top_blocks.ex
new file mode 100644
index 00000000..5ceddf52
--- /dev/null
+++ b/lib/codeqa/health_report/top_blocks.ex
@@ -0,0 +1,222 @@
+defmodule CodeQA.HealthReport.TopBlocks do
+  @moduledoc "Assembles the top_blocks report section from analysis node data."
+
+  alias CodeQA.CombinedMetrics.Scorer
+
+  @min_tokens 10
+  @severity_critical 0.50
+  @severity_high 0.25
+  @severity_medium 0.10
+  @gap_floor 0.01
+  @top_n 10
+  @default_min_lines 3
+  @default_max_lines 20
+
+  defp build_fix_hint_lookup do
+    Scorer.all_yamls()
+    |> Enum.flat_map(fn {yaml_path, data} ->
+      category = yaml_path |> Path.basename() |> String.trim_trailing(".yml")
+      Enum.flat_map(data, &hints_for_behavior(category, &1))
+    end)
+    |> Map.new()
+  end
+
+  defp hints_for_behavior(category, {behavior, behavior_data}) when is_map(behavior_data) do
+    case Map.get(behavior_data, "_fix_hint") do
+      nil -> []
+      hint -> [{{category, behavior}, hint}]
+    end
+  end
+
+  defp hints_for_behavior(_category, _entry), do: []
+
+  @spec build(map(), [struct()], map(), keyword()) :: [map()]
+  def build(analysis_results, changed_files, codebase_cosine_lookup, opts \\ []) do
+    base_path = get_in(analysis_results, ["metadata", "path"]) || "."
+
+    analysis_results
+    |> collect_enriched_blocks(changed_files, codebase_cosine_lookup, opts)
+    # Rank by highest cosine_delta and take top N
+    |> Enum.sort_by(&(-max_delta(&1)))
+    |> Enum.take(@top_n)
+    # Add source code for each block
+    |> Enum.map(&add_source_code(&1, base_path))
+  end
+
+  @doc """
+  Returns a map of category => worst offending block for that category.
+  Only includes blocks that overlap with the diff (if diff_line_ranges provided).
+  """
+  @spec worst_per_category(map(), [struct()], map(), keyword()) :: %{String.t() => map()}
+  def worst_per_category(analysis_results, changed_files, codebase_cosine_lookup, opts \\ []) do
+    base_path = get_in(analysis_results, ["metadata", "path"]) || "."
+
+    all_blocks =
+      collect_enriched_blocks(analysis_results, changed_files, codebase_cosine_lookup, opts)
+
+    # Group blocks by category, finding the worst block per category
+    all_blocks
+    |> Enum.flat_map(fn block ->
+      # Each block may contribute to multiple categories via its potentials
+      block.potentials
+      |> Enum.map(fn potential ->
+        {potential.category, block, potential.cosine_delta}
+      end)
+    end)
+    |> Enum.group_by(&elem(&1, 0), fn {_cat, block, delta} -> {block, delta} end)
+    |> Enum.map(fn {category, block_deltas} ->
+      # Find the block with highest cosine_delta for this category
+      {worst_block, _delta} = Enum.max_by(block_deltas, fn {_block, delta} -> delta end)
+      {category, add_source_code(worst_block, base_path)}
+    end)
+    |> Map.new()
+  end
+
+  # Shared logic for collecting and enriching blocks
+  defp collect_enriched_blocks(analysis_results, changed_files, codebase_cosine_lookup, opts) do
+    files = Map.get(analysis_results, "files", %{})
+    fix_hints = build_fix_hint_lookup()
+
+    min_lines = Keyword.get(opts, :block_min_lines, @default_min_lines)
+    max_lines = Keyword.get(opts, :block_max_lines, @default_max_lines)
+    diff_line_ranges = Keyword.get(opts, :diff_line_ranges, %{})
+
+    file_entries =
+      if changed_files == [] do
+        Enum.map(files, fn {path, data} -> {path, nil, data} end)
+      else
+        changed_index = Map.new(changed_files, &{&1.path, &1.status})
+
+        files
+        |> Enum.filter(fn {path, _} -> Map.has_key?(changed_index, path) end)
+        |> Enum.map(fn {path, data} -> {path, Map.get(changed_index, path), data} end)
+      end
+
+    # Flatten all blocks across all files, enrich with path
+    file_entries
+    |> Enum.flat_map(fn {path, status, file_data} ->
+      path_diff_ranges = Map.get(diff_line_ranges, path, [])
+
+      file_data
+      |> Map.get("nodes", [])
+      |> Enum.flat_map(&collect_nodes/1)
+      |> Enum.filter(&(&1["token_count"] >= @min_tokens))
+      |> Enum.filter(&block_in_line_range?(&1, min_lines, max_lines))
+      |> filter_by_diff_overlap(path_diff_ranges, diff_line_ranges)
+      |> Enum.map(&enrich_block(&1, codebase_cosine_lookup, fix_hints))
+      |> Enum.reject(&(&1.potentials == []))
+      |> Enum.map(&Map.merge(&1, %{path: path, status: status}))
+    end)
+  end
+
+  @spec block_in_line_range?(map(), pos_integer(), pos_integer()) :: boolean()
+  defp block_in_line_range?(node, min_lines, max_lines) do
+    start_line = node["start_line"] || 1
+    end_line = node["end_line"] || start_line
+    line_count = end_line - start_line + 1
+    line_count >= min_lines and line_count <= max_lines
+  end
+
+  # When no diff_line_ranges provided (empty map), show all blocks - no filtering needed
+  @spec filter_by_diff_overlap([map()], [{pos_integer(), pos_integer()}], map()) :: [map()]
+  defp filter_by_diff_overlap(blocks, _path_ranges, diff_line_ranges)
+       when map_size(diff_line_ranges) == 0,
+       do: blocks
+
+  # When diff_line_ranges provided, filter blocks by overlap
+  defp filter_by_diff_overlap(blocks, path_ranges, _diff_line_ranges) do
+    Enum.filter(blocks, &block_overlaps_diff?(&1, path_ranges))
+  end
+
+  @spec block_overlaps_diff?(map(), [{pos_integer(), pos_integer()}]) :: boolean()
+  defp block_overlaps_diff?(_node, []), do: false
+
+  defp block_overlaps_diff?(node, path_ranges) do
+    block_start = node["start_line"] || 1
+    block_end = node["end_line"] || block_start
+
+    Enum.any?(path_ranges, fn {diff_start, diff_end} ->
+      ranges_overlap?(block_start, block_end, diff_start, diff_end)
+    end)
+  end
+
+  @spec ranges_overlap?(pos_integer(), pos_integer(), pos_integer(), pos_integer()) :: boolean()
+  defp ranges_overlap?(start1, end1, start2, end2) do
+    start1 <= end2 and start2 <= end1
+  end
+
+  defp collect_nodes(node) do
+    children = node |> Map.get("children", []) |> Enum.flat_map(&collect_nodes/1)
+    [node | children]
+  end
+
+  defp enrich_block(node, cosine_lookup, fix_hints) do
+    potentials =
+      node
+      |> Map.get("refactoring_potentials", [])
+      |> Enum.map(&enrich_potential(&1, cosine_lookup, fix_hints))
+      |> Enum.reject(&is_nil/1)
+      |> Enum.sort_by(& &1.cosine_delta, :desc)
+
+    %{
+      start_line: node["start_line"],
+      end_line: node["end_line"],
+      type: node["type"],
+      token_count: node["token_count"],
+      potentials: potentials
+    }
+  end
+
+  defp enrich_potential(p, cosine_lookup, fix_hints) do
+    category = p["category"]
+    behavior = p["behavior"]
+    cosine_delta = p["cosine_delta"]
+
+    codebase_cosine = Map.get(cosine_lookup, {category, behavior}, 0.0)
+    gap = max(@gap_floor, 1.0 - codebase_cosine)
+    severity = classify(cosine_delta / gap)
+
+    if severity == :filtered do
+      nil
+    else
+      %{
+        category: category,
+        behavior: behavior,
+        cosine_delta: cosine_delta,
+        severity: severity,
+        fix_hint: Map.get(fix_hints, {category, behavior})
+      }
+    end
+  end
+
+  defp classify(ratio) when ratio > @severity_critical, do: :critical
+  defp classify(ratio) when ratio > @severity_high, do: :high
+  defp classify(ratio) when ratio > @severity_medium, do: :medium
+  defp classify(_ratio), do: :filtered
+
+  defp max_delta(%{potentials: []}), do: 0.0
+
+  defp max_delta(%{potentials: potentials}),
+    do: Enum.max_by(potentials, & &1.cosine_delta).cosine_delta
+
+  defp add_source_code(block, base_path) do
+    full_path = Path.join(base_path, block.path)
+    start_line = block.start_line
+    end_line = block.end_line || start_line
+
+    source =
+      case File.read(full_path) do
+        {:ok, content} ->
+          content
+          |> String.split("\n")
+          |> Enum.slice((start_line - 1)..(end_line - 1)//1)
+          |> Enum.join("\n")
+
+        {:error, _} ->
+          nil
+      end
+
+    lang = CodeQA.Language.detect(block.path).name()
+    Map.merge(block, %{source: source, language: lang})
+  end
+end
diff --git a/lib/codeqa/languages/code/native/cpp.ex b/lib/codeqa/languages/code/native/cpp.ex
new file mode 100644
index 00000000..31cbb4e1
--- /dev/null
+++ b/lib/codeqa/languages/code/native/cpp.ex
@@ -0,0 +1,49 @@
+defmodule CodeQA.Languages.Code.Native.Cpp do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "cpp"
+
+  @impl true
+  def extensions, do: ~w[c cpp cc cxx hpp h hh]
+
+  @impl true
+  def comment_prefixes, do: ~w[//]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else for while do class struct namespace using include template typename
+    return new delete this public private protected virtual override static
+    const constexpr inline extern try catch throw switch case break continue
+    default auto void true false nullptr
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % << >> & | ^ ~ && || = += -= *= /= %= -> ::
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ # *
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[class struct namespace template]
+
+  @impl true
+  def branch_keywords, do: ~w[else catch case default]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def access_modifiers, do: ~w[public private protected static virtual override inline]
+
+  @impl true
+  def module_keywords, do: ~w[class struct namespace enum]
+end
diff --git a/lib/codeqa/languages/code/native/go.ex b/lib/codeqa/languages/code/native/go.ex
new file mode 100644
index 00000000..b728aab4
--- /dev/null
+++ b/lib/codeqa/languages/code/native/go.ex
@@ -0,0 +1,51 @@
+defmodule CodeQA.Languages.Code.Native.Go do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "go"
+
+  @impl true
+  def extensions, do: ~w[go]
+
+  @impl true
+  def comment_prefixes, do: ~w[//]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else for func type struct interface package import return var const
+    map chan go defer select switch case break continue default fallthrough
+    range make new append len cap close nil true false
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % << >> & | ^ ~ && || = += -= *= /= %= :=
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ;
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[func type struct interface]
+
+  @impl true
+  def branch_keywords, do: ~w[else case default]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def access_modifiers, do: []
+
+  @impl true
+  def function_keywords, do: ~w[func]
+
+  @impl true
+  def import_keywords, do: ~w[import package]
+end
diff --git a/lib/codeqa/languages/code/native/haskell.ex b/lib/codeqa/languages/code/native/haskell.ex
new file mode 100644
index 00000000..48cd6462
--- /dev/null
+++ b/lib/codeqa/languages/code/native/haskell.ex
@@ -0,0 +1,57 @@
+defmodule CodeQA.Languages.Code.Native.Haskell do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "haskell"
+
+  @impl true
+  def extensions, do: ~w[hs lhs]
+
+  @impl true
+  def comment_prefixes, do: ~w[--]
+
+  @impl true
+  def block_comments, do: [{"{-", "-}"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else then for do let in where module import data type newtype class
+    instance deriving case of return True False Nothing Just do
+    infixl infixr infix qualified as hiding
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == /= <= >= + - * / ^ && || ! $ . <$> <*> >>= >> -> <- :: = | @ ~
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; | @ -> <- ::
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[data type newtype class instance]
+
+  @impl true
+  def branch_keywords, do: ~w[else]
+
+  @impl true
+  def block_end_tokens, do: []
+
+  @impl true
+  def function_keywords, do: ~w[where let]
+
+  @impl true
+  def module_keywords, do: ~w[module class instance]
+
+  @impl true
+  def import_keywords, do: ~w[import]
+
+  @impl true
+  def test_keywords, do: ~w[test it describe prop]
+
+  @impl true
+  def uses_colon_indent?, do: true
+end
diff --git a/lib/codeqa/languages/code/native/ocaml.ex b/lib/codeqa/languages/code/native/ocaml.ex
new file mode 100644
index 00000000..d1e8b213
--- /dev/null
+++ b/lib/codeqa/languages/code/native/ocaml.ex
@@ -0,0 +1,54 @@
+defmodule CodeQA.Languages.Code.Native.Ocaml do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "ocaml"
+
+  @impl true
+  def extensions, do: ~w[ml mli]
+
+  @impl true
+  def comment_prefixes, do: []
+
+  @impl true
+  def block_comments, do: [{"(*", "*)"}]
+
+  @impl true
+  def keywords, do: ~w[
+    let rec fun if then else for while do done begin end match with type module
+    open struct sig functor val mutable exception raise try when and or not in
+    of as include class object method inherit new virtual
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == = != <> <= >= + - * / mod << >> & | ^ ~ && || @ :: |> -> <- := !
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; | @ ->
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[let type module class]
+
+  @impl true
+  def branch_keywords, do: ~w[else with when]
+
+  @impl true
+  def block_end_tokens, do: ~w[end]
+
+  @impl true
+  def access_modifiers, do: ~w[mutable virtual]
+
+  @impl true
+  def function_keywords, do: ~w[let fun]
+
+  @impl true
+  def module_keywords, do: ~w[module struct functor class]
+
+  @impl true
+  def import_keywords, do: ~w[open include]
+end
diff --git a/lib/codeqa/languages/code/native/rust.ex b/lib/codeqa/languages/code/native/rust.ex
new file mode 100644
index 00000000..0616834d
--- /dev/null
+++ b/lib/codeqa/languages/code/native/rust.ex
@@ -0,0 +1,54 @@
+defmodule CodeQA.Languages.Code.Native.Rust do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "rust"
+
+  @impl true
+  def extensions, do: ~w[rs]
+
+  @impl true
+  def comment_prefixes, do: ~w[//]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else for while loop fn struct enum trait impl use mod pub let mut const
+    static return match type where as in ref move async await dyn unsafe extern
+    crate self super true false
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % << >> & | ^ ~ && || = += -= *= /= %= -> => ::
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ # |
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[fn struct enum trait impl mod]
+
+  @impl true
+  def branch_keywords, do: ~w[else match]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def access_modifiers, do: ~w[pub]
+
+  @impl true
+  def function_keywords, do: ~w[fn]
+
+  @impl true
+  def module_keywords, do: ~w[impl trait struct enum]
+
+  @impl true
+  def import_keywords, do: ~w[use extern]
+end
diff --git a/lib/codeqa/languages/code/native/swift.ex b/lib/codeqa/languages/code/native/swift.ex
new file mode 100644
index 00000000..04225287
--- /dev/null
+++ b/lib/codeqa/languages/code/native/swift.ex
@@ -0,0 +1,54 @@
+defmodule CodeQA.Languages.Code.Native.Swift do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "swift"
+
+  @impl true
+  def extensions, do: ~w[swift]
+
+  @impl true
+  def comment_prefixes, do: ~w[//]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else for while repeat func class struct enum protocol extension import
+    return let var guard defer do try catch throw switch case break continue
+    default in as is init self super nil true false async await
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % << >> & | ^ ~ && || ?? = += -= *= /= %= -> =>
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ # |
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[func class struct enum protocol extension]
+
+  @impl true
+  def branch_keywords, do: ~w[else catch case default]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def access_modifiers, do: ~w[public private internal fileprivate open]
+
+  @impl true
+  def function_keywords, do: ~w[func]
+
+  @impl true
+  def module_keywords, do: ~w[class struct protocol extension enum]
+
+  @impl true
+  def import_keywords, do: ~w[import]
+end
diff --git a/lib/codeqa/languages/code/native/zig.ex b/lib/codeqa/languages/code/native/zig.ex
new file mode 100644
index 00000000..f3e13f85
--- /dev/null
+++ b/lib/codeqa/languages/code/native/zig.ex
@@ -0,0 +1,54 @@
+defmodule CodeQA.Languages.Code.Native.Zig do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "zig"
+
+  @impl true
+  def extensions, do: ~w[zig]
+
+  @impl true
+  def comment_prefixes, do: ~w[//]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    const var fn if else for while switch return pub try catch error defer errdefer
+    comptime inline struct enum union test break continue null undefined unreachable
+    async await suspend resume orelse anytype anyerror bool void noreturn type
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % << >> & | ^ ~ && || = += -= *= /= %= orelse catch
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[fn struct enum union]
+
+  @impl true
+  def branch_keywords, do: ~w[else]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def access_modifiers, do: ~w[pub inline comptime]
+
+  @impl true
+  def function_keywords, do: ~w[fn]
+
+  @impl true
+  def module_keywords, do: ~w[struct enum union]
+
+  @impl true
+  def test_keywords, do: ~w[test]
+end
diff --git a/lib/codeqa/languages/code/scripting/julia.ex b/lib/codeqa/languages/code/scripting/julia.ex
new file mode 100644
index 00000000..8f859d1d
--- /dev/null
+++ b/lib/codeqa/languages/code/scripting/julia.ex
@@ -0,0 +1,54 @@
+defmodule CodeQA.Languages.Code.Scripting.Julia do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "julia"
+
+  @impl true
+  def extensions, do: ~w[jl]
+
+  @impl true
+  def comment_prefixes, do: ~w[#]
+
+  @impl true
+  def block_comments, do: [{"#=", "=#"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else elseif for while do end function return module import using export
+    struct mutable abstract type primitive begin let local global const try catch
+    finally throw macro quote true false nothing
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % ^ << >> & | ~ && || = += -= *= /= ÷ → ← |>
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ |
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[function struct macro module]
+
+  @impl true
+  def branch_keywords, do: ~w[else elseif catch finally]
+
+  @impl true
+  def block_end_tokens, do: ~w[end]
+
+  @impl true
+  def function_keywords, do: ~w[function macro]
+
+  @impl true
+  def module_keywords, do: ~w[module struct]
+
+  @impl true
+  def import_keywords, do: ~w[import using]
+
+  @impl true
+  def test_keywords, do: ~w[@test @testset]
+end
diff --git a/lib/codeqa/languages/code/scripting/lua.ex b/lib/codeqa/languages/code/scripting/lua.ex
new file mode 100644
index 00000000..7ae8e9d3
--- /dev/null
+++ b/lib/codeqa/languages/code/scripting/lua.ex
@@ -0,0 +1,47 @@
+defmodule CodeQA.Languages.Code.Scripting.Lua do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "lua"
+
+  @impl true
+  def extensions, do: ~w[lua]
+
+  @impl true
+  def comment_prefixes, do: ~w[--]
+
+  @impl true
+  def block_comments, do: [{"--[[", "]]"}]
+
+  @impl true
+  def keywords, do: ~w[
+    and break do else elseif end false for function goto if in local nil not or
+    repeat return then true until while
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == ~= <= >= + - * / % ^ # & | ~ << >> // .. = and or not
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ;
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[function local]
+
+  @impl true
+  def branch_keywords, do: ~w[else elseif]
+
+  @impl true
+  def block_end_tokens, do: ~w[end]
+
+  @impl true
+  def function_keywords, do: ~w[function]
+
+  @impl true
+  def import_keywords, do: ~w[require]
+end
diff --git a/lib/codeqa/languages/code/scripting/perl.ex b/lib/codeqa/languages/code/scripting/perl.ex
new file mode 100644
index 00000000..3155f1c3
--- /dev/null
+++ b/lib/codeqa/languages/code/scripting/perl.ex
@@ -0,0 +1,54 @@
+defmodule CodeQA.Languages.Code.Scripting.Perl do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "perl"
+
+  @impl true
+  def extensions, do: ~w[pl pm t]
+
+  @impl true
+  def comment_prefixes, do: ~w[#]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    if else elsif unless for foreach while do until sub my our local use require
+    package return last next redo goto print say die warn eval and or not defined
+    undef true false
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= eq ne lt gt le ge + - * / % ** . x = += -= *= /= .= && || ! ~ & |
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ $ %
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[sub package]
+
+  @impl true
+  def branch_keywords, do: ~w[else elsif]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def function_keywords, do: ~w[sub]
+
+  @impl true
+  def module_keywords, do: ~w[package]
+
+  @impl true
+  def import_keywords, do: ~w[use require]
+
+  @impl true
+  def test_keywords, do: ~w[ok is isnt like unlike cmp_ok]
+end
diff --git a/lib/codeqa/languages/code/scripting/php.ex b/lib/codeqa/languages/code/scripting/php.ex
new file mode 100644
index 00000000..294b9a1f
--- /dev/null
+++ b/lib/codeqa/languages/code/scripting/php.ex
@@ -0,0 +1,55 @@
+defmodule CodeQA.Languages.Code.Scripting.PHP do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "php"
+
+  @impl true
+  def extensions, do: ~w[php phtml php3 php4 php5 php7 php8]
+
+  @impl true
+  def comment_prefixes, do: ~w[// #]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else elseif for foreach while do function class interface trait namespace
+    use return new echo print public private protected static abstract final
+    try catch finally throw switch case break continue default include require
+    include_once require_once extends implements null true false
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == === != !== <= >= + - * / % ** << >> & | ^ ~ && || ?? = += -= *= /= %= -> :: =>
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ # $
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[function class interface trait namespace]
+
+  @impl true
+  def branch_keywords, do: ~w[else elseif catch finally case default]
+
+  @impl true
+  def block_end_tokens, do: ~w[} endif endfor endforeach endwhile endswitch]
+
+  @impl true
+  def access_modifiers, do: ~w[public private protected static abstract final]
+
+  @impl true
+  def function_keywords, do: ~w[function fn]
+
+  @impl true
+  def module_keywords, do: ~w[class interface trait namespace]
+
+  @impl true
+  def import_keywords, do: ~w[use namespace]
+end
diff --git a/lib/codeqa/languages/code/scripting/python.ex b/lib/codeqa/languages/code/scripting/python.ex
new file mode 100644
index 00000000..e1c4bb4f
--- /dev/null
+++ b/lib/codeqa/languages/code/scripting/python.ex
@@ -0,0 +1,57 @@
+defmodule CodeQA.Languages.Code.Scripting.Python do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "python"
+
+  @impl true
+  def extensions, do: ~w[py pyi]
+
+  @impl true
+  def comment_prefixes, do: ~w[#]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    if else elif for while def class import from return pass break continue
+    not and or in is lambda with as try except finally raise yield async await
+    global nonlocal del assert True False None
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % ** // << >> & | ^ ~ = += -= *= /= %= **= //=
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ #
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[def class async]
+
+  @impl true
+  def branch_keywords, do: ~w[elif else except finally]
+
+  @impl true
+  def block_end_tokens, do: []
+
+  @impl true
+  def access_modifiers, do: []
+
+  @impl true
+  def function_keywords, do: ~w[def async]
+
+  @impl true
+  def module_keywords, do: ~w[class]
+
+  @impl true
+  def import_keywords, do: ~w[import from]
+
+  @impl true
+  def uses_colon_indent?, do: true
+end
diff --git a/lib/codeqa/languages/code/scripting/r.ex b/lib/codeqa/languages/code/scripting/r.ex
new file mode 100644
index 00000000..d735d2b1
--- /dev/null
+++ b/lib/codeqa/languages/code/scripting/r.ex
@@ -0,0 +1,49 @@
+defmodule CodeQA.Languages.Code.Scripting.R do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "r"
+
+  @impl true
+  def extensions, do: ~w[r R Rmd rmd]
+
+  @impl true
+  def comment_prefixes, do: ~w[#]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    if else for while repeat break next return function TRUE FALSE NULL NA Inf NaN
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / ^ %% %/% %in% <- -> = & | ! && || ~ : ::
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ;
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[function]
+
+  @impl true
+  def branch_keywords, do: ~w[else]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def function_keywords, do: ~w[function]
+
+  @impl true
+  def import_keywords, do: ~w[library require source]
+
+  @impl true
+  def test_keywords, do: ~w[test_that expect_equal expect_true describe it]
+end
diff --git a/lib/codeqa/languages/code/scripting/ruby.ex b/lib/codeqa/languages/code/scripting/ruby.ex
new file mode 100644
index 00000000..d1e9761e
--- /dev/null
+++ b/lib/codeqa/languages/code/scripting/ruby.ex
@@ -0,0 +1,58 @@
+defmodule CodeQA.Languages.Code.Scripting.Ruby do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "ruby"
+
+  @impl true
+  def extensions, do: ~w[rb rake gemspec]
+
+  @impl true
+  def comment_prefixes, do: ~w[#]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    if else elsif unless for while until def class module do end return begin
+    rescue ensure raise yield include extend require require_relative
+    attr_accessor attr_reader attr_writer then case when next break in
+    and or not true false nil self super
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % ** << >> & | ^ ~ = += -= *= /= %= **= <=> === =~
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ | # ?
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[def class module]
+
+  @impl true
+  def branch_keywords, do: ~w[else elsif rescue ensure when]
+
+  @impl true
+  def block_end_tokens, do: ~w[end]
+
+  @impl true
+  def access_modifiers, do: []
+
+  @impl true
+  def function_keywords, do: ~w[def]
+
+  @impl true
+  def module_keywords, do: ~w[class module]
+
+  @impl true
+  def import_keywords, do: ~w[require require_relative include]
+
+  @impl true
+  def test_keywords, do: ~w[it describe context scenario feature given]
+end
diff --git a/lib/codeqa/languages/code/scripting/shell.ex b/lib/codeqa/languages/code/scripting/shell.ex
new file mode 100644
index 00000000..710d28aa
--- /dev/null
+++ b/lib/codeqa/languages/code/scripting/shell.ex
@@ -0,0 +1,47 @@
+defmodule CodeQA.Languages.Code.Scripting.Shell do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "shell"
+
+  @impl true
+  def extensions, do: ~w[sh bash zsh fish]
+
+  @impl true
+  def comment_prefixes, do: ~w[#]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    if else elif fi for while do done case esac function return then in until
+    select break continue exit local export readonly unset
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % && || | & > < >> << = += -= *= /= %= -eq -ne -lt -gt -le -ge
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ # $ ! ? |
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[function]
+
+  @impl true
+  def branch_keywords, do: ~w[else elif case]
+
+  @impl true
+  def block_end_tokens, do: ~w[fi done esac]
+
+  @impl true
+  def access_modifiers, do: []
+
+  @impl true
+  def function_keywords, do: ~w[function]
+end
diff --git a/lib/codeqa/languages/code/vm/clojure.ex b/lib/codeqa/languages/code/vm/clojure.ex
new file mode 100644
index 00000000..5dd149be
--- /dev/null
+++ b/lib/codeqa/languages/code/vm/clojure.ex
@@ -0,0 +1,54 @@
+defmodule CodeQA.Languages.Code.Vm.Clojure do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "clojure"
+
+  @impl true
+  def extensions, do: ~w[clj cljs cljc edn]
+
+  @impl true
+  def comment_prefixes, do: ~w[;]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    def defn defmacro let fn if do when cond case for loop recur ns require use
+    import try catch finally throw quote defprotocol defrecord deftype reify
+    extend-type extend-protocol nil true false and or not
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    = == not= < > <= >= + - * / mod rem quot and or not
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; # @ ^
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[def defn defmacro defprotocol defrecord deftype]
+
+  @impl true
+  def branch_keywords, do: ~w[else]
+
+  @impl true
+  def block_end_tokens, do: ~w[)]
+
+  @impl true
+  def function_keywords, do: ~w[defn fn]
+
+  @impl true
+  def module_keywords, do: ~w[ns defprotocol defrecord]
+
+  @impl true
+  def import_keywords, do: ~w[ns require use import]
+
+  @impl true
+  def test_keywords, do: ~w[deftest is testing]
+end
diff --git a/lib/codeqa/languages/code/vm/csharp.ex b/lib/codeqa/languages/code/vm/csharp.ex
new file mode 100644
index 00000000..85edce73
--- /dev/null
+++ b/lib/codeqa/languages/code/vm/csharp.ex
@@ -0,0 +1,54 @@
+defmodule CodeQA.Languages.Code.Vm.CSharp do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "csharp"
+
+  @impl true
+  def extensions, do: ~w[cs csx]
+
+  @impl true
+  def comment_prefixes, do: ~w[//]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else for foreach while do class interface struct enum namespace using
+    return var new this base public private protected internal static abstract
+    virtual override sealed async await try catch finally throw switch case
+    break continue default in out ref void true false null readonly const
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % << >> & | ^ ~ && || ?? = += -= *= /= %=
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ # =>
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[class interface struct enum namespace]
+
+  @impl true
+  def branch_keywords, do: ~w[else catch finally case default]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def access_modifiers,
+    do:
+      ~w[public private protected internal static abstract virtual override sealed readonly const async]
+
+  @impl true
+  def module_keywords, do: ~w[class interface struct enum namespace]
+
+  @impl true
+  def import_keywords, do: ~w[using namespace]
+end
diff --git a/lib/codeqa/languages/code/vm/dart.ex b/lib/codeqa/languages/code/vm/dart.ex
new file mode 100644
index 00000000..e821e226
--- /dev/null
+++ b/lib/codeqa/languages/code/vm/dart.ex
@@ -0,0 +1,58 @@
+defmodule CodeQA.Languages.Code.Vm.Dart do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "dart"
+
+  @impl true
+  def extensions, do: ~w[dart]
+
+  @impl true
+  def comment_prefixes, do: ~w[//]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else for while do switch case break continue return class extends implements
+    with new final const var void null true false import export part library
+    abstract static dynamic async await yield try catch finally throw rethrow
+    enum typedef mixin factory is as in
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % ~/ << >> & | ^ ~ && || ?? = += -= *= /= %= ??= -> =>
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ # =>
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[class enum typedef mixin]
+
+  @impl true
+  def branch_keywords, do: ~w[else catch finally case]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def access_modifiers, do: ~w[static final const abstract]
+
+  @impl true
+  def function_keywords, do: ~w[void async]
+
+  @impl true
+  def module_keywords, do: ~w[class enum mixin]
+
+  @impl true
+  def import_keywords, do: ~w[import export]
+
+  @impl true
+  def test_keywords, do: ~w[test group setUp tearDown expect]
+end
diff --git a/lib/codeqa/languages/code/vm/elixir.ex b/lib/codeqa/languages/code/vm/elixir.ex
new file mode 100644
index 00000000..2eab0274
--- /dev/null
+++ b/lib/codeqa/languages/code/vm/elixir.ex
@@ -0,0 +1,59 @@
+defmodule CodeQA.Languages.Code.Vm.Elixir do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "elixir"
+
+  @impl true
+  def extensions, do: ~w[ex exs]
+
+  @impl true
+  def comment_prefixes, do: ~w[#]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    if else unless for do end def defp defmodule defmacro defmacrop defprotocol
+    defimpl defguard defdelegate defstruct case cond with when fn try rescue
+    catch raise receive in not and or true false nil
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == === != !== <= >= + - * / % << >> & | ^ ~ && || |> <> <- -> = ! not and or in
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ |
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords,
+    do:
+      ~w[def defp defmodule defmacro defmacrop defprotocol defimpl defdelegate defoverridable defguard]
+
+  @impl true
+  def branch_keywords, do: ~w[else rescue catch ensure cond when case]
+
+  @impl true
+  def block_end_tokens, do: ~w[end]
+
+  @impl true
+  def access_modifiers, do: []
+
+  @impl true
+  def function_keywords, do: ~w[def defp defmacro defmacrop defdelegate defguard]
+
+  @impl true
+  def module_keywords, do: ~w[defmodule defprotocol defimpl]
+
+  @impl true
+  def import_keywords, do: ~w[import require use alias]
+
+  @impl true
+  def test_keywords, do: ~w[test describe]
+end
diff --git a/lib/codeqa/languages/code/vm/erlang.ex b/lib/codeqa/languages/code/vm/erlang.ex
new file mode 100644
index 00000000..c835dd63
--- /dev/null
+++ b/lib/codeqa/languages/code/vm/erlang.ex
@@ -0,0 +1,54 @@
+defmodule CodeQA.Languages.Code.Vm.Erlang do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "erlang"
+
+  @impl true
+  def extensions, do: ~w[erl hrl]
+
+  @impl true
+  def comment_prefixes, do: ~w[%]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    if case when of begin end receive after fun try catch throw error exit
+    module export import define record true false ok undefined andalso orelse
+    not band bor bxor bnot bsl bsr div rem
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == /= =< >= =:= =/= + - * / ! <- -> :: | . , ; :
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; | ->
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[-module -record -define]
+
+  @impl true
+  def branch_keywords, do: ~w[of after catch]
+
+  @impl true
+  def block_end_tokens, do: ~w[end]
+
+  @impl true
+  def function_keywords, do: ~w[fun]
+
+  @impl true
+  def module_keywords, do: ~w[-module]
+
+  @impl true
+  def import_keywords, do: ~w[-import -include]
+
+  @impl true
+  def test_keywords, do: ~w[_test_ _test]
+end
diff --git a/lib/codeqa/languages/code/vm/fsharp.ex b/lib/codeqa/languages/code/vm/fsharp.ex
new file mode 100644
index 00000000..9c7792f3
--- /dev/null
+++ b/lib/codeqa/languages/code/vm/fsharp.ex
@@ -0,0 +1,60 @@
+defmodule CodeQA.Languages.Code.Vm.Fsharp do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "fsharp"
+
+  @impl true
+  def extensions, do: ~w[fs fsi fsx]
+
+  @impl true
+  def comment_prefixes, do: ~w[//]
+
+  @impl true
+  def block_comments, do: [{"(*", "*)"}]
+
+  @impl true
+  def keywords, do: ~w[
+    let rec if then else for while do match with type module open namespace val
+    mutable abstract member override new return yield async await try finally
+    raise true false null and or not in when downto to
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % << >> & | ^ ~ && || = |> <| >> << -> <- :: @ ?
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; | @ # ->
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[let type module]
+
+  @impl true
+  def branch_keywords, do: ~w[else with]
+
+  @impl true
+  def block_end_tokens, do: []
+
+  @impl true
+  def access_modifiers, do: ~w[public private protected internal static abstract override]
+
+  @impl true
+  def function_keywords, do: ~w[let fun]
+
+  @impl true
+  def module_keywords, do: ~w[module namespace type]
+
+  @impl true
+  def import_keywords, do: ~w[open]
+
+  @impl true
+  def test_keywords, do: ~w[testCase test testProperty]
+
+  @impl true
+  def uses_colon_indent?, do: true
+end
diff --git a/lib/codeqa/languages/code/vm/java.ex b/lib/codeqa/languages/code/vm/java.ex
new file mode 100644
index 00000000..fa018e0c
--- /dev/null
+++ b/lib/codeqa/languages/code/vm/java.ex
@@ -0,0 +1,52 @@
+defmodule CodeQA.Languages.Code.Vm.Java do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "java"
+
+  @impl true
+  def extensions, do: ~w[java]
+
+  @impl true
+  def comment_prefixes, do: ~w[//]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else for while do class interface extends implements import package
+    return new this super public private protected static abstract final
+    synchronized volatile try catch finally throw throws switch case break
+    continue default void true false null instanceof
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % << >> >>> & | ^ ~ && || = += -= *= /= %=
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[class interface]
+
+  @impl true
+  def branch_keywords, do: ~w[else catch finally case default]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def access_modifiers, do: ~w[public private protected static abstract final synchronized]
+
+  @impl true
+  def module_keywords, do: ~w[class interface enum]
+
+  @impl true
+  def import_keywords, do: ~w[import package]
+end
diff --git a/lib/codeqa/languages/code/vm/kotlin.ex b/lib/codeqa/languages/code/vm/kotlin.ex
new file mode 100644
index 00000000..4c286c27
--- /dev/null
+++ b/lib/codeqa/languages/code/vm/kotlin.ex
@@ -0,0 +1,55 @@
+defmodule CodeQA.Languages.Code.Vm.Kotlin do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "kotlin"
+
+  @impl true
+  def extensions, do: ~w[kt kts]
+
+  @impl true
+  def comment_prefixes, do: ~w[//]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else for while do fun class object interface data sealed abstract enum
+    companion import package return val var when is as in out by override open
+    final private protected public internal suspend inline reified crossinline
+    noinline try catch finally throw break continue null true false this super init
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == === != !== <= >= + - * / % << >> & | ^ ~ && || ?: = += -= *= /= %= -> => ::
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ # |
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[fun class object interface data sealed abstract enum]
+
+  @impl true
+  def branch_keywords, do: ~w[else when catch finally]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def access_modifiers, do: ~w[public private protected internal override open abstract final]
+
+  @impl true
+  def function_keywords, do: ~w[fun]
+
+  @impl true
+  def module_keywords, do: ~w[class interface object]
+
+  @impl true
+  def import_keywords, do: ~w[import package]
+end
diff --git a/lib/codeqa/languages/code/vm/scala.ex b/lib/codeqa/languages/code/vm/scala.ex
new file mode 100644
index 00000000..08ac7ab1
--- /dev/null
+++ b/lib/codeqa/languages/code/vm/scala.ex
@@ -0,0 +1,58 @@
+defmodule CodeQA.Languages.Code.Vm.Scala do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "scala"
+
+  @impl true
+  def extensions, do: ~w[scala sc]
+
+  @impl true
+  def comment_prefixes, do: ~w[//]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else for while do def class object trait extends with new return import
+    package val var type match case sealed abstract override final protected
+    private implicit lazy yield try catch finally throw true false null this super
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == != <= >= + - * / % << >> & | ^ ~ && || = += -= *= /= => <- <: >: :
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ # =>
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[def class object trait type]
+
+  @impl true
+  def branch_keywords, do: ~w[else catch case finally]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def access_modifiers,
+    do: ~w[public private protected override abstract final sealed implicit lazy]
+
+  @impl true
+  def function_keywords, do: ~w[def]
+
+  @impl true
+  def module_keywords, do: ~w[class object trait package]
+
+  @impl true
+  def import_keywords, do: ~w[import package]
+
+  @impl true
+  def test_keywords, do: ~w[test it describe should]
+end
diff --git a/lib/codeqa/languages/code/web/javascript.ex b/lib/codeqa/languages/code/web/javascript.ex
new file mode 100644
index 00000000..87f48f59
--- /dev/null
+++ b/lib/codeqa/languages/code/web/javascript.ex
@@ -0,0 +1,57 @@
+defmodule CodeQA.Languages.Code.Web.JavaScript do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "javascript"
+
+  @impl true
+  def extensions, do: ~w[js mjs cjs jsx vue svelte]
+
+  @impl true
+  def comment_prefixes, do: ~w[//]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else for while function class return var let const import export from
+    new this typeof instanceof try catch finally throw switch case break
+    continue default delete in of async await yield true false null undefined
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == === != !== <= >= + - * / % ** << >> >>> & | ^ ~ && || ?? = += -= *= /= %=
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ # =>
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[function class async]
+
+  @impl true
+  def branch_keywords, do: ~w[else catch finally case default]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def access_modifiers, do: ~w[export static]
+
+  @impl true
+  def function_keywords, do: ~w[function async]
+
+  @impl true
+  def module_keywords, do: ~w[class]
+
+  @impl true
+  def import_keywords, do: ~w[import]
+
+  @impl true
+  def test_keywords, do: ~w[test it describe context scenario feature given]
+end
diff --git a/lib/codeqa/languages/code/web/typescript.ex b/lib/codeqa/languages/code/web/typescript.ex
new file mode 100644
index 00000000..b8a422af
--- /dev/null
+++ b/lib/codeqa/languages/code/web/typescript.ex
@@ -0,0 +1,60 @@
+defmodule CodeQA.Languages.Code.Web.TypeScript do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "typescript"
+
+  @impl true
+  def extensions, do: ~w[ts tsx]
+
+  @impl true
+  def comment_prefixes, do: ~w[//]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    if else for while function class return var let const import export from
+    new this typeof instanceof try catch finally throw switch case break
+    continue default delete in of async await yield true false null undefined
+    type interface enum namespace declare abstract override readonly implements
+    extends satisfies as keyof typeof infer never unknown any void
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == === != !== <= >= + - * / % ** << >> >>> & | ^ ~ && || ?? = += -= *= /= %=
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ # => <
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[function class async interface enum namespace type declare]
+
+  @impl true
+  def branch_keywords, do: ~w[else catch finally case default]
+
+  @impl true
+  def block_end_tokens, do: ~w[}]
+
+  @impl true
+  def access_modifiers,
+    do: ~w[export public private protected static abstract override readonly sealed]
+
+  @impl true
+  def function_keywords, do: ~w[function async]
+
+  @impl true
+  def module_keywords, do: ~w[class interface enum namespace]
+
+  @impl true
+  def import_keywords, do: ~w[import]
+
+  @impl true
+  def test_keywords, do: ~w[test it describe context scenario feature given]
+end
diff --git a/lib/codeqa/languages/config/dockerfile.ex b/lib/codeqa/languages/config/dockerfile.ex
new file mode 100644
index 00000000..e1ed3a69
--- /dev/null
+++ b/lib/codeqa/languages/config/dockerfile.ex
@@ -0,0 +1,35 @@
+defmodule CodeQA.Languages.Config.Dockerfile do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "dockerfile"
+
+  @impl true
+  def extensions, do: ~w[Dockerfile]
+
+  @impl true
+  def comment_prefixes, do: ~w[#]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    FROM RUN CMD LABEL EXPOSE ENV ADD COPY ENTRYPOINT VOLUME USER WORKDIR ARG
+    ONBUILD STOPSIGNAL HEALTHCHECK SHELL AS
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    = \
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) , : #
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def declaration_keywords, do: ~w[FROM]
+end
diff --git a/lib/codeqa/languages/config/makefile.ex b/lib/codeqa/languages/config/makefile.ex
new file mode 100644
index 00000000..ffb45221
--- /dev/null
+++ b/lib/codeqa/languages/config/makefile.ex
@@ -0,0 +1,32 @@
+defmodule CodeQA.Languages.Config.Makefile do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "makefile"
+
+  @impl true
+  def extensions, do: ~w[Makefile GNUmakefile mk]
+
+  @impl true
+  def comment_prefixes, do: ~w[#]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    ifeq ifneq ifdef ifndef else endif define endef include export unexport
+    override private vpath all clean install
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    = := ::= ?= += !=
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; @ $ % # \
+  ] ++ ~w( [ ] )
+end
diff --git a/lib/codeqa/languages/config/terraform.ex b/lib/codeqa/languages/config/terraform.ex
new file mode 100644
index 00000000..c35cb9f2
--- /dev/null
+++ b/lib/codeqa/languages/config/terraform.ex
@@ -0,0 +1,33 @@
+defmodule CodeQA.Languages.Config.Terraform do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "terraform"
+
+  @impl true
+  def extensions, do: ~w[tf tfvars]
+
+  @impl true
+  def comment_prefixes, do: ~w[# //]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    resource data variable output locals module provider terraform
+    required_providers backend for_each count depends_on lifecycle
+    source version true false null for if
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    = == != <= >= && || ! ? :
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    { } ( ) , . : = " # //
+  ] ++ ~w( [ ] )
+end
diff --git a/lib/codeqa/languages/data/graphql.ex b/lib/codeqa/languages/data/graphql.ex
new file mode 100644
index 00000000..47dbc51f
--- /dev/null
+++ b/lib/codeqa/languages/data/graphql.ex
@@ -0,0 +1,32 @@
+defmodule CodeQA.Languages.Data.GraphQL do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "graphql"
+
+  @impl true
+  def extensions, do: ~w[graphql gql]
+
+  @impl true
+  def comment_prefixes, do: ~w[#]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    query mutation subscription fragment on type interface union enum input
+    scalar schema directive extend implements true false null
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    = : ! | &
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    { } ( ) , . : # @ !
+  ] ++ ~w( [ ] )
+end
diff --git a/lib/codeqa/languages/data/json.ex b/lib/codeqa/languages/data/json.ex
new file mode 100644
index 00000000..0b1909e5
--- /dev/null
+++ b/lib/codeqa/languages/data/json.ex
@@ -0,0 +1,31 @@
+defmodule CodeQA.Languages.Data.Json do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "json"
+
+  @impl true
+  def extensions, do: ~w[json jsonc]
+
+  @impl true
+  def comment_prefixes, do: []
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    true false null
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    :
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    { } , " '
+  ] ++ ~w( [ ] )
+end
diff --git a/lib/codeqa/languages/data/sql.ex b/lib/codeqa/languages/data/sql.ex
new file mode 100644
index 00000000..ddc40181
--- /dev/null
+++ b/lib/codeqa/languages/data/sql.ex
@@ -0,0 +1,40 @@
+defmodule CodeQA.Languages.Data.Sql do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "sql"
+
+  @impl true
+  def extensions, do: ~w[sql]
+
+  @impl true
+  def comment_prefixes, do: ~w[--]
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    SELECT FROM WHERE INSERT INTO UPDATE DELETE SET CREATE DROP ALTER TABLE
+    INDEX VIEW JOIN LEFT RIGHT INNER OUTER FULL CROSS ON AND OR NOT IN EXISTS
+    AS GROUP BY ORDER HAVING LIMIT OFFSET DISTINCT NULL TRUE FALSE PRIMARY KEY
+    FOREIGN REFERENCES CASCADE UNIQUE DEFAULT VALUES RETURNING WITH UNION
+    INTERSECT EXCEPT CASE WHEN THEN ELSE END IF BEGIN COMMIT ROLLBACK
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    = != <> <= >= + - * / % LIKE BETWEEN IS IN
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) , . ; ' " -- /*
+  ] ++ ~w( [ ] )
+
+  @impl true
+  def statement_keywords,
+    do:
+      ~w[select insert update delete create drop alter truncate begin commit rollback call execute]
+end
diff --git a/lib/codeqa/languages/data/toml.ex b/lib/codeqa/languages/data/toml.ex
new file mode 100644
index 00000000..1051c0dc
--- /dev/null
+++ b/lib/codeqa/languages/data/toml.ex
@@ -0,0 +1,31 @@
+defmodule CodeQA.Languages.Data.Toml do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "toml"
+
+  @impl true
+  def extensions, do: ~w[toml]
+
+  @impl true
+  def comment_prefixes, do: ~w[#]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    true false
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    =
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    { } , . : # " '
+  ] ++ ~w( [ ] )
+end
diff --git a/lib/codeqa/languages/data/yaml.ex b/lib/codeqa/languages/data/yaml.ex
new file mode 100644
index 00000000..8beb0cbb
--- /dev/null
+++ b/lib/codeqa/languages/data/yaml.ex
@@ -0,0 +1,31 @@
+defmodule CodeQA.Languages.Data.Yaml do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "yaml"
+
+  @impl true
+  def extensions, do: ~w[yml yaml]
+
+  @impl true
+  def comment_prefixes, do: ~w[#]
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    true false null yes no on off
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    : | > & * !
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    { } , . # @ ---
+  ] ++ ~w( [ ] )
+end
diff --git a/lib/codeqa/languages/language.ex b/lib/codeqa/languages/language.ex
new file mode 100644
index 00000000..3ccd1728
--- /dev/null
+++ b/lib/codeqa/languages/language.ex
@@ -0,0 +1,181 @@
+defmodule CodeQA.Language do
+  @moduledoc false
+  @callback name() :: String.t()
+  @callback extensions() :: [String.t()]
+  @callback comment_prefixes() :: [String.t()]
+  @callback block_comments() :: [{String.t(), String.t()}]
+  @callback keywords() :: [String.t()]
+  @callback operators() :: [String.t()]
+  @callback delimiters() :: [String.t()]
+
+  @callback declaration_keywords() :: [String.t()]
+  @callback branch_keywords() :: [String.t()]
+  @callback block_end_tokens() :: [String.t()]
+  @callback access_modifiers() :: [String.t()]
+  @callback statement_keywords() :: [String.t()]
+
+  @callback function_keywords() :: [String.t()]
+  @callback module_keywords() :: [String.t()]
+  @callback import_keywords() :: [String.t()]
+  @callback test_keywords() :: [String.t()]
+  @callback uses_colon_indent?() :: boolean()
+  @callback divider_indicators() :: [String.t()]
+
+  @optional_callbacks [
+    declaration_keywords: 0,
+    branch_keywords: 0,
+    block_end_tokens: 0,
+    access_modifiers: 0,
+    statement_keywords: 0,
+    function_keywords: 0,
+    module_keywords: 0,
+    import_keywords: 0,
+    test_keywords: 0,
+    uses_colon_indent?: 0,
+    divider_indicators: 0
+  ]
+
+  defmacro __using__(_opts) do
+    quote do
+      @behaviour CodeQA.Language
+      def declaration_keywords, do: []
+      def branch_keywords, do: []
+      def block_end_tokens, do: []
+      def access_modifiers, do: []
+      def statement_keywords, do: []
+      def function_keywords, do: []
+      def module_keywords, do: []
+      def import_keywords, do: []
+      def test_keywords, do: []
+      def uses_colon_indent?, do: false
+      def divider_indicators, do: ~w[-- - == === ~ * ** # // / =]
+
+      defoverridable declaration_keywords: 0,
+                     branch_keywords: 0,
+                     block_end_tokens: 0,
+                     access_modifiers: 0,
+                     statement_keywords: 0,
+                     function_keywords: 0,
+                     module_keywords: 0,
+                     import_keywords: 0,
+                     test_keywords: 0,
+                     uses_colon_indent?: 0,
+                     divider_indicators: 0
+    end
+  end
+
+  @spec all() :: [module()]
+  def all do
+    {:ok, modules} = :application.get_key(:codeqa, :modules)
+    Enum.filter(modules, &implements?/1)
+  end
+
+  @spec all_keywords() :: [String.t()]
+  def all_keywords do
+    all()
+    |> Enum.flat_map(& &1.keywords())
+    |> Enum.uniq()
+  end
+
+  @spec keywords(atom() | String.t()) :: MapSet.t()
+  def keywords(language) do
+    case find(language) do
+      nil -> MapSet.new()
+      mod -> MapSet.new(mod.keywords())
+    end
+  end
+
+  @spec operators(atom() | String.t()) :: MapSet.t()
+  def operators(language) do
+    case find(language) do
+      nil -> MapSet.new()
+      mod -> MapSet.new(mod.operators())
+    end
+  end
+
+  @spec delimiters(atom() | String.t()) :: MapSet.t()
+  def delimiters(language) do
+    case find(language) do
+      nil -> MapSet.new()
+      mod -> MapSet.new(mod.delimiters())
+    end
+  end
+
+  @spec declaration_keywords(module()) :: MapSet.t()
+  def declaration_keywords(mod), do: MapSet.new(mod.declaration_keywords())
+
+  @spec branch_keywords(module()) :: MapSet.t()
+  def branch_keywords(mod), do: MapSet.new(mod.branch_keywords())
+
+  @spec block_end_tokens(module()) :: MapSet.t()
+  def block_end_tokens(mod), do: MapSet.new(mod.block_end_tokens())
+
+  @spec access_modifiers(module()) :: MapSet.t()
+  def access_modifiers(mod), do: MapSet.new(mod.access_modifiers())
+
+  @spec statement_keywords(module()) :: MapSet.t()
+  def statement_keywords(mod), do: MapSet.new(mod.statement_keywords())
+
+  @spec function_keywords(module()) :: MapSet.t()
+  def function_keywords(mod), do: MapSet.new(mod.function_keywords())
+
+  @spec module_keywords(module()) :: MapSet.t()
+  def module_keywords(mod), do: MapSet.new(mod.module_keywords())
+
+  @spec import_keywords(module()) :: MapSet.t()
+  def import_keywords(mod), do: MapSet.new(mod.import_keywords())
+
+  @spec test_keywords(module()) :: MapSet.t()
+  def test_keywords(mod), do: MapSet.new(mod.test_keywords())
+
+  @spec divider_indicators(module()) :: MapSet.t()
+  def divider_indicators(mod), do: MapSet.new(mod.divider_indicators())
+
+  @spec find(atom() | String.t()) :: module()
+  def find(language) do
+    name = to_string(language)
+    Enum.find(all(), fn mod -> mod.name() == name end) || CodeQA.Languages.Unknown
+  end
+
+  @spec detect(String.t()) :: module()
+  def detect(path) do
+    basename = Path.basename(path)
+    ext = path |> Path.extname() |> String.trim_leading(".")
+
+    Enum.find(all(), fn mod ->
+      ext in mod.extensions() or (ext == "" and basename in mod.extensions())
+    end) || CodeQA.Languages.Unknown
+  end
+
+  @spec strip_comments(String.t(), module()) :: String.t()
+  def strip_comments(content, language_mod) do
+    content
+    |> strip_block_comments(language_mod.block_comments())
+    |> strip_line_comments(language_mod.comment_prefixes())
+  end
+
+  defp strip_block_comments(content, []), do: content
+
+  defp strip_block_comments(content, pairs) do
+    Enum.reduce(pairs, content, fn {open, close}, acc ->
+      regex = Regex.compile!(Regex.escape(open) <> ".*?" <> Regex.escape(close), [:dotall])
+
+      Regex.replace(regex, acc, fn match ->
+        String.replace(match, ~r/[^\n]/, "")
+      end)
+    end)
+  end
+
+  defp strip_line_comments(content, []), do: content
+
+  defp strip_line_comments(content, prefixes) do
+    pattern = Enum.map_join(prefixes, "|", &Regex.escape/1)
+    Regex.replace(Regex.compile!("(#{pattern}).*$", [:multiline]), content, "")
+  end
+
+  defp implements?(module) do
+    CodeQA.Language in (module.__info__(:attributes)[:behaviour] || [])
+  rescue
+    _ -> false
+  end
+end
diff --git a/lib/codeqa/languages/markup/css.ex b/lib/codeqa/languages/markup/css.ex
new file mode 100644
index 00000000..0b0af143
--- /dev/null
+++ b/lib/codeqa/languages/markup/css.ex
@@ -0,0 +1,32 @@
+defmodule CodeQA.Languages.Markup.Css do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "css"
+
+  @impl true
+  def extensions, do: ~w[css scss sass less]
+
+  @impl true
+  def comment_prefixes, do: []
+
+  @impl true
+  def block_comments, do: [{"/*", "*/"}]
+
+  @impl true
+  def keywords, do: ~w[
+    media keyframes import charset supports layer font-face from to
+    auto none inherit initial unset normal bold italic
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    : ; > + ~ * = ^= $= *= ~= |=
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    { } ( ) , . # : ; @
+  ] ++ ~w( [ ] )
+end
diff --git a/lib/codeqa/languages/markup/html.ex b/lib/codeqa/languages/markup/html.ex
new file mode 100644
index 00000000..31a0fe8d
--- /dev/null
+++ b/lib/codeqa/languages/markup/html.ex
@@ -0,0 +1,34 @@
+defmodule CodeQA.Languages.Markup.Html do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "html"
+
+  @impl true
+  def extensions, do: ~w[html htm heex eex leex erb htmlbars hbs mustache jinja jinja2 njk liquid]
+
+  @impl true
+  def comment_prefixes, do: []
+
+  @impl true
+  def block_comments, do: [{"<!--", "-->"}]
+
+  @impl true
+  def keywords, do: ~w[
+    html head body div span p a img input form button select option textarea
+    script style link meta title h1 h2 h3 h4 h5 h6 ul ol li table tr td th
+    header footer nav main section article aside figure figcaption
+    class id href src type name value rel action method placeholder
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    < > / = &
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { } , . : ; " ' # ! ?
+  ] ++ ~w( [ ] )
+end
diff --git a/lib/codeqa/languages/markup/markdown.ex b/lib/codeqa/languages/markup/markdown.ex
new file mode 100644
index 00000000..ee75d60c
--- /dev/null
+++ b/lib/codeqa/languages/markup/markdown.ex
@@ -0,0 +1,31 @@
+defmodule CodeQA.Languages.Markup.Markdown do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "markdown"
+
+  @impl true
+  def extensions, do: ~w[md mdx]
+
+  @impl true
+  def comment_prefixes, do: []
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    TODO NOTE FIXME WARNING IMPORTANT
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    # ## ### #### ##### ###### > ``` ** * _ __ ~~
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) . ! ? ` * _ ~
+  ] ++ ~w( [ ] )
+end
diff --git a/lib/codeqa/languages/markup/xml.ex b/lib/codeqa/languages/markup/xml.ex
new file mode 100644
index 00000000..85c76687
--- /dev/null
+++ b/lib/codeqa/languages/markup/xml.ex
@@ -0,0 +1,31 @@
+defmodule CodeQA.Languages.Markup.Xml do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "xml"
+
+  @impl true
+  def extensions, do: ~w[xml svg xsl xslt xsd wsdl plist]
+
+  @impl true
+  def comment_prefixes, do: []
+
+  @impl true
+  def block_comments, do: [{"<!--", "-->"}]
+
+  @impl true
+  def keywords, do: ~w[
+    xmlns version encoding standalone
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    < > / = &
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) , . : ; " ' # ! ?
+  ] ++ ~w( [ ] )
+end
diff --git a/lib/codeqa/languages/unknown.ex b/lib/codeqa/languages/unknown.ex
new file mode 100644
index 00000000..11a0f7ac
--- /dev/null
+++ b/lib/codeqa/languages/unknown.ex
@@ -0,0 +1,31 @@
+defmodule CodeQA.Languages.Unknown do
+  @moduledoc false
+  use CodeQA.Language
+
+  @impl true
+  def name, do: "unknown"
+
+  @impl true
+  def extensions, do: []
+
+  @impl true
+  def comment_prefixes, do: []
+
+  @impl true
+  def block_comments, do: []
+
+  @impl true
+  def keywords, do: ~w[
+    if else
+  ]
+
+  @impl true
+  def operators, do: ~w[
+    == !=
+  ]
+
+  @impl true
+  def delimiters, do: ~w[
+    ( ) { }
+  ]
+end
diff --git a/lib/codeqa/metrics/codebase/codebase_metric.ex b/lib/codeqa/metrics/codebase/codebase_metric.ex
new file mode 100644
index 00000000..8b275c82
--- /dev/null
+++ b/lib/codeqa/metrics/codebase/codebase_metric.ex
@@ -0,0 +1,14 @@
+defmodule CodeQA.Metrics.Codebase.CodebaseMetric do
+  @moduledoc """
+  Behaviour for metrics that operate across an entire codebase.
+
+  Unlike `FileMetric`, which analyzes a single file, codebase metrics receive
+  a map of all source files and can compute cross-file properties such as
+  duplication or structural similarity.
+
+  See [software metrics](https://en.wikipedia.org/wiki/Software_metric).
+  """
+
+  @callback name() :: String.t()
+  @callback analyze(%{String.t() => String.t()}, keyword()) :: map()
+end
diff --git a/lib/codeqa/metrics/codebase/near_duplicate_blocks_codebase.ex b/lib/codeqa/metrics/codebase/near_duplicate_blocks_codebase.ex
new file mode 100644
index 00000000..2e821e98
--- /dev/null
+++ b/lib/codeqa/metrics/codebase/near_duplicate_blocks_codebase.ex
@@ -0,0 +1,44 @@
+defmodule CodeQA.Metrics.Codebase.NearDuplicateBlocksCodebase do
+  @moduledoc """
+  Counts near-duplicate and exact-duplicate natural code blocks across the codebase.
+
+  Detects blocks per file, pools them, and finds pairs across all files.
+  Includes pair source lists (capped by max_pairs_per_bucket).
+
+  Configure in .codeqa.yml:
+      near_duplicate_blocks:
+        max_pairs_per_bucket: 50
+  """
+
+  @behaviour CodeQA.Metrics.Codebase.CodebaseMetric
+
+  alias CodeQA.Analysis.FileContextServer
+  alias CodeQA.Metrics.File.NearDuplicateBlocks
+
+  @impl true
+  def name, do: "near_duplicate_blocks_codebase"
+
+  @impl true
+  def analyze(files, opts \\ []) do
+    ndb_opts = Keyword.get(opts, :near_duplicate_blocks, [])
+    max_pairs = Keyword.get(ndb_opts, :max_pairs_per_bucket, nil)
+    workers = Keyword.get(opts, :workers, System.schedulers_online())
+
+    ndb_opts =
+      [include_pairs: true, max_pairs_per_bucket: max_pairs, workers: workers] ++
+        Keyword.take(opts, [:on_progress])
+
+    pid = Keyword.fetch!(opts, :file_context_pid)
+
+    all_blocks =
+      Enum.flat_map(files, fn {path, content} ->
+        ctx = FileContextServer.get(pid, content, path: path)
+        NearDuplicateBlocks.label_blocks(ctx.blocks, path)
+      end)
+
+    result = NearDuplicateBlocks.analyze_from_blocks(all_blocks, ndb_opts)
+
+    result
+    |> Map.reject(fn {k, _} -> k in ["block_count", "sub_block_count"] end)
+  end
+end
diff --git a/lib/codeqa/metrics/similarity.ex b/lib/codeqa/metrics/codebase/similarity.ex
similarity index 65%
rename from lib/codeqa/metrics/similarity.ex
rename to lib/codeqa/metrics/codebase/similarity.ex
index 910e631d..e20e556f 100644
--- a/lib/codeqa/metrics/similarity.ex
+++ b/lib/codeqa/metrics/codebase/similarity.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.Similarity do
+defmodule CodeQA.Metrics.Codebase.Similarity do
   @moduledoc """
   Detects cross-file code duplication at the codebase level.
 
@@ -9,23 +9,19 @@ defmodule CodeQA.Metrics.Similarity do
   See [winnowing](https://theory.stanford.edu/~aiken/publications/papers/sigmod03.pdf),
   [locality-sensitive hashing](https://en.wikipedia.org/wiki/Locality-sensitive_hashing),
   and [normalized compression distance](https://en.wikipedia.org/wiki/Normalized_compression_distance).
-
-  ## Options
-
-    - `:show_ncd` — boolean, whether to compute per-pair NCD scores (default: `false`)
-    - `:ncd_paths` — list of file paths to compute similarity for (default: all files)
-    - `:ncd_top` — integer, max similar files to return per path (default: all)
-    - `:ncd_threshold` — minimum Jaccard similarity to consider as candidate (default: `0.20`)
-    - `:workers` — number of parallel workers (default: `System.schedulers_online/0`)
-    - `:on_progress` — include this key (any value) to enable progress output to stderr
-    - `:fp_stopwords` — `MapSet` of fingerprint hashes to exclude (default: empty)
   """
 
-  @behaviour CodeQA.Metrics.CodebaseMetric
+  @behaviour CodeQA.Metrics.Codebase.CodebaseMetric
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.CLI.UI
+  alias CodeQA.Metrics.File.Winnowing
 
   @impl true
   def name, do: "similarity"
 
+  def keys, do: ["ncd_pairs", "cross_file_density"]
+
   @spec analyze(map(), keyword()) :: map()
   @impl true
   def analyze(files, opts \\ [])
@@ -37,6 +33,9 @@ defmodule CodeQA.Metrics.Similarity do
   def analyze(files, opts) do
     names = Map.keys(files)
     contents = Map.values(files)
+    has_progress = Keyword.has_key?(opts, :on_progress)
+
+    if has_progress, do: IO.puts(:stderr, "  Computing cross-file density...")
 
     result = %{
       "cross_file_density" => cross_file_density(contents)
@@ -80,38 +79,45 @@ defmodule CodeQA.Metrics.Similarity do
     if has_progress, do: IO.puts(:stderr, "  2/5 Computing Winnowing fingerprints...")
 
     result =
-      CodeQA.Telemetry.time(:ncd_fingerprinting, fn ->
-        contents
-        |> Enum.with_index()
-        |> Task.async_stream(
-          fn {content, i} ->
-            fp = compute_fingerprints(content, opts)
-            {i, fp}
-          end, max_concurrency: workers, timeout: :infinity)
-        |> Enum.map(fn {:ok, {i, fp}} ->
-          print_bar_progress(has_progress, i, length(contents), "Fingerprinting")
+      contents
+      |> Enum.with_index()
+      |> Task.async_stream(
+        fn {content, i} ->
+          fp = compute_fingerprints(content, opts)
           {i, fp}
-        end)
-        |> Map.new()
+        end,
+        max_concurrency: workers,
+        timeout: :infinity
+      )
+      |> Enum.map(fn {:ok, {i, fp}} ->
+        maybe_print_fingerprint_progress(has_progress, i, length(contents))
+        {i, fp}
       end)
+      |> Map.new()
 
     if has_progress, do: IO.puts(:stderr, "")
     result
   end
 
+  defp maybe_print_fingerprint_progress(false, _i, _total), do: :ok
+
+  defp maybe_print_fingerprint_progress(true, i, total) do
+    if rem(i + 1, max(1, div(total, 20))) == 0 do
+      IO.write(:stderr, "\r" <> UI.progress_bar(i + 1, total, label: "Fingerprinting"))
+    end
+  end
+
   defp build_inverted_index(fingerprints_by_id, has_progress) do
     if has_progress, do: IO.puts(:stderr, "  3/5 Building inverted index...")
 
     total = map_size(fingerprints_by_id)
 
     result =
-      CodeQA.Telemetry.time(:ncd_build_index, fn ->
-        fingerprints_by_id
-        |> Enum.with_index()
-        |> Enum.reduce(%{}, fn {{i, set}, idx}, acc ->
-          print_bar_progress(has_progress, idx, total, "Indexing")
-          index_fingerprint_set(set, i, acc)
-        end)
+      fingerprints_by_id
+      |> Enum.with_index()
+      |> Enum.reduce(%{}, fn {{i, set}, idx}, acc ->
+        maybe_print_index_progress(has_progress, idx, total)
+        index_fingerprint_set(set, i, acc)
       end)
 
     if has_progress, do: IO.puts(:stderr, "")
@@ -124,6 +130,14 @@ defmodule CodeQA.Metrics.Similarity do
     end)
   end
 
+  defp maybe_print_index_progress(false, _idx, _total), do: :ok
+
+  defp maybe_print_index_progress(true, idx, total) do
+    if rem(idx + 1, max(1, div(total, 20))) == 0 do
+      IO.write(:stderr, "\r" <> UI.progress_bar(idx + 1, total, label: "Indexing"))
+    end
+  end
+
   defp find_candidate_pairs(
          fingerprints_by_id,
          inverted_index,
@@ -136,37 +150,37 @@ defmodule CodeQA.Metrics.Similarity do
     if has_progress, do: IO.puts(:stderr, "  4/5 Identifying candidate pairs...")
 
     total = map_size(fingerprints_by_id)
-    names_tuple = List.to_tuple(names)
 
     candidates =
-      CodeQA.Telemetry.time(:ncd_lsh_filter, fn ->
-        fingerprints_by_id
-        |> Enum.with_index()
-        |> Task.async_stream(
-          fn {{i, set}, idx} ->
-            valid_pairs =
-              collect_valid_pairs(
-                i,
-                set,
-                inverted_index,
-                fingerprints_by_id,
-                names_tuple,
-                target_set,
-                threshold
-              )
-
-            {idx, valid_pairs}
-          end, max_concurrency: workers, timeout: :infinity)
-        |> Enum.reduce(%{}, fn {:ok, {idx, valid_pairs}}, acc ->
-          print_bar_progress(has_progress, idx, total, "LSH Filter")
-          merge_valid_pairs(valid_pairs, acc)
-        end)
+      fingerprints_by_id
+      |> Enum.with_index()
+      |> Task.async_stream(
+        fn {{i, set}, idx} ->
+          valid_pairs =
+            collect_valid_pairs(
+              i,
+              set,
+              inverted_index,
+              fingerprints_by_id,
+              names,
+              target_set,
+              threshold
+            )
+
+          {idx, valid_pairs}
+        end,
+        max_concurrency: workers,
+        timeout: :infinity
+      )
+      |> Enum.reduce(%{}, fn {:ok, {idx, valid_pairs}}, acc ->
+        maybe_print_lsh_progress(has_progress, idx, total)
+        merge_valid_pairs(valid_pairs, acc)
       end)
 
     if has_progress, do: IO.puts(:stderr, "")
 
     Enum.map(candidates, fn {{i, j}, jaccard} ->
-      {elem(names_tuple, i), i, elem(names_tuple, j), j, jaccard}
+      {Enum.at(names, i), i, Enum.at(names, j), j, jaccard}
     end)
   end
 
@@ -175,19 +189,19 @@ defmodule CodeQA.Metrics.Similarity do
          set,
          inverted_index,
          fingerprints_by_id,
-         names_tuple,
+         names,
          target_set,
          threshold
        ) do
     collisions = count_collisions(set, inverted_index, i)
 
     size_a = MapSet.size(set)
-    name_a = elem(names_tuple, i)
+    name_a = Enum.at(names, i)
 
     is_target_a = MapSet.member?(target_set, name_a)
 
     collisions
-    |> Enum.filter(fn {j, _} -> is_target_a or MapSet.member?(target_set, elem(names_tuple, j)) end)
+    |> Enum.filter(fn {j, _} -> is_target_a or MapSet.member?(target_set, Enum.at(names, j)) end)
     |> Enum.reduce([], fn {j, intersection}, acc_pairs ->
       jaccard = compute_jaccard(size_a, MapSet.size(Map.get(fingerprints_by_id, j)), intersection)
       if jaccard >= threshold, do: [{{i, j}, jaccard} | acc_pairs], else: acc_pairs
@@ -217,11 +231,11 @@ defmodule CodeQA.Metrics.Similarity do
     end)
   end
 
-  defp print_bar_progress(false, _current, _total, _label), do: :ok
+  defp maybe_print_lsh_progress(false, _idx, _total), do: :ok
 
-  defp print_bar_progress(true, current, total, label) do
-    if rem(current + 1, max(1, div(total, 20))) == 0 do
-      IO.write(:stderr, "\r" <> CodeQA.CLI.UI.progress_bar(current + 1, total, label: label))
+  defp maybe_print_lsh_progress(true, idx, total) do
+    if rem(idx + 1, max(1, div(total, 20))) == 0 do
+      IO.write(:stderr, "\r" <> UI.progress_bar(idx + 1, total, label: "LSH Filter"))
     end
   end
 
@@ -240,26 +254,25 @@ defmodule CodeQA.Metrics.Similarity do
     counter = :counters.new(1, [:atomics])
     start_time_ncd = System.monotonic_time(:millisecond)
 
-    CodeQA.Telemetry.time(:ncd_exact_compression_phase, fn ->
-      filtered_pairs
-      |> Task.async_stream(
-        fn {name_a, i, name_b, j, _jaccard} ->
-          ncd = compute_single_ncd(precomputed, i, j)
-          maybe_print_ncd_progress(has_progress, counter, total_pairs, start_time_ncd)
-          {name_a, name_b, ncd}
-        end, max_concurrency: workers, timeout: :infinity)
-      |> Enum.map(fn {:ok, res} -> res end)
-    end)
+    filtered_pairs
+    |> Task.async_stream(
+      fn {name_a, i, name_b, j, _jaccard} ->
+        ncd = compute_single_ncd(precomputed, i, j)
+        maybe_print_ncd_progress(has_progress, counter, total_pairs, start_time_ncd)
+        {name_a, name_b, ncd}
+      end,
+      max_concurrency: workers,
+      timeout: :infinity
+    )
+    |> Enum.map(fn {:ok, res} -> res end)
   end
 
   defp compute_single_ncd(precomputed, i, j) do
-    CodeQA.Telemetry.time(:ncd_single_compression, fn ->
-      {a, ca} = elem(precomputed, i)
-      {b, cb} = elem(precomputed, j)
-      cab = byte_size(:zlib.compress([a, b]))
-      ncd = if max(ca, cb) > 0, do: (cab - min(ca, cb)) / max(ca, cb), else: 0.0
-      Float.round(ncd, 4)
-    end)
+    {a, ca} = elem(precomputed, i)
+    {b, cb} = elem(precomputed, j)
+    cab = byte_size(:zlib.compress([a, b]))
+    ncd = if max(ca, cb) > 0, do: (cab - min(ca, cb)) / max(ca, cb), else: 0.0
+    Float.round(ncd, 4)
   end
 
   defp maybe_print_ncd_progress(false, _counter, _total_pairs, _start_time), do: :ok
@@ -275,8 +288,8 @@ defmodule CodeQA.Metrics.Similarity do
       eta_ms = round((total_pairs - c) * avg_time)
 
       output =
-        CodeQA.CLI.UI.progress_bar(c, total_pairs,
-          eta: CodeQA.CLI.UI.format_eta(eta_ms),
+        UI.progress_bar(c, total_pairs,
+          eta: UI.format_eta(eta_ms),
           label: "NCD Compression"
         )
 
@@ -316,13 +329,11 @@ defmodule CodeQA.Metrics.Similarity do
     end
   end
 
-  defp compute_fingerprints(content, opts) do
-    fp_stopwords = Keyword.get(opts, :fp_stopwords, MapSet.new())
-
+  defp compute_fingerprints(content, _opts) do
     content
-    |> CodeQA.Metrics.TokenNormalizer.normalize()
-    |> CodeQA.Metrics.Winnowing.kgrams(5)
-    |> Enum.reject(&MapSet.member?(fp_stopwords, &1))
+    |> TokenNormalizer.normalize_structural()
+    |> Enum.map(& &1.kind)
+    |> Winnowing.kgrams(5)
     |> MapSet.new()
   end
 
diff --git a/lib/codeqa/metrics/codebase_metric.ex b/lib/codeqa/metrics/codebase_metric.ex
deleted file mode 100644
index 0b1284d6..00000000
--- a/lib/codeqa/metrics/codebase_metric.ex
+++ /dev/null
@@ -1,42 +0,0 @@
-defmodule CodeQA.Metrics.CodebaseMetric do
-  @moduledoc """
-  Behaviour for metrics that operate across an entire codebase.
-
-  Unlike `FileMetric`, which analyzes a single file, codebase metrics receive
-  a map of all source files and can compute cross-file properties such as
-  duplication or structural similarity.
-
-  ## Common opts keys
-
-  Implementations may accept keyword options including:
-  - `:workers` — number of parallel workers (default: `System.schedulers_online/0`)
-  - `:on_progress` — progress callback key (presence enables progress output)
-
-  ## Minimal implementation
-
-      defmodule MyCodebaseMetric do
-        @behaviour CodeQA.Metrics.CodebaseMetric
-
-        @impl true
-        def name, do: "my_metric"
-
-        @impl true
-        def analyze(files, _opts) do
-          %{"file_count" => map_size(files)}
-        end
-      end
-
-  See [software metrics](https://en.wikipedia.org/wiki/Software_metric).
-  """
-
-  @typedoc "Map of file path to file content string."
-  @type file_map :: %{required(String.t()) => String.t()}
-
-  @callback name() :: String.t()
-  @callback analyze(file_map(), keyword()) :: map()
-
-  @doc "Human-readable description of what this metric measures."
-  @callback description() :: String.t()
-
-  @optional_callbacks [description: 0]
-end
diff --git a/lib/codeqa/metrics/file/bradford.ex b/lib/codeqa/metrics/file/bradford.ex
new file mode 100644
index 00000000..22b7bcee
--- /dev/null
+++ b/lib/codeqa/metrics/file/bradford.ex
@@ -0,0 +1,99 @@
+defmodule CodeQA.Metrics.File.Bradford do
+  @moduledoc """
+  Applies Bradford's concentration law to token density across lines.
+
+  Lines are ranked by token count (densest first), then grouped into three
+  zones of equal total tokens. The ratio between zone sizes gives Bradford's
+  k values: how many more lines each successive zone needs to match the
+  token yield of the previous one.
+
+      k ≈ 1    uniform density — tokens spread evenly across lines
+      k = 3–5  Bradford-like — a small dense core, long sparse tail
+      k >> 5   extreme concentration — a few lines carry almost all tokens
+
+  k1 = zone2_lines / zone1_lines  (core → middle transition)
+  k2 = zone3_lines / zone2_lines  (middle → tail transition)
+  k_ratio = k2 / k1               (> 1 means tail is more stretched than core)
+
+  In a perfect Bradford distribution k1 ≈ k2. In practice k2 > k1 is common
+  (moderate core, very stretched tail); k1 > k2 suggests extreme concentration
+  that levels off quickly.
+
+  See [Bradford's law](https://en.wikipedia.org/wiki/Bradford%27s_law).
+  """
+
+  @behaviour CodeQA.Metrics.File.FileMetric
+
+  @impl true
+  def name, do: "bradford"
+
+  @impl true
+  def keys, do: ["k1", "k2", "k_ratio"]
+
+  @spec analyze(map()) :: map()
+  @impl true
+  def analyze(%{tokens: []}) do
+    %{"k1" => 0.0, "k2" => 0.0, "k_ratio" => 0.0}
+  end
+
+  def analyze(%{tokens: tokens}) do
+    # Count tokens per line using the .line field, then rank densest-first —
+    # this is Bradford's "sort sources by yield" step.
+    counts =
+      tokens
+      |> Enum.group_by(& &1.line)
+      |> Enum.map(fn {_line, toks} -> length(toks) end)
+      |> Enum.sort(:desc)
+
+    total = Enum.sum(counts)
+
+    # Need at least 3 lines and 3 tokens to form meaningful zones.
+    if total < 3 or length(counts) < 3 do
+      %{"k1" => 0.0, "k2" => 0.0, "k_ratio" => 0.0}
+    else
+      # Each zone should contain one third of all tokens.
+      # We find zone boundaries by walking the ranked list until each third is filled.
+      third = total / 3
+
+      # n1: lines in zone 1 (the dense core — fewest lines, highest token density)
+      # n2: lines in zone 2 (middle tier)
+      # n3: all remaining lines (the sparse tail)
+      {n1, rest} = count_until(counts, third)
+      {n2, _} = count_until(rest, third)
+      n3 = length(counts) - n1 - n2
+
+      # k1 > 1 always: the middle zone always needs more lines than the core.
+      # Higher k1 = more extreme concentration in the core (fewer lines do more work).
+      k1 = if n1 > 0, do: Float.round(n2 / n1, 4), else: 0.0
+
+      # k2 > 1 always: the tail always needs more lines than the middle.
+      # Higher k2 = longer sparse tail relative to the middle zone.
+      k2 = if n2 > 0, do: Float.round(n3 / n2, 4), else: 0.0
+
+      # k_ratio = k2 / k1
+      # > 1: the tail is more stretched than the core is concentrated (common — many trivial lines)
+      # < 1: the core is more extreme than the tail is sparse (god-function pattern)
+      # ≈ 1: a clean Bradford distribution where each zone multiplies evenly
+      k_ratio = if k1 > 0, do: Float.round(k2 / k1, 4), else: 0.0
+
+      %{"k1" => k1, "k2" => k2, "k_ratio" => k_ratio}
+    end
+  end
+
+  # Walks the density-ranked list, consuming lines until the accumulated token
+  # count reaches the zone target. Returns {lines_consumed, remaining_list}.
+  # The remaining list is passed directly to the next zone's count_until call,
+  # so zones are computed in a single linear pass over the sorted counts.
+  defp count_until(counts, target), do: do_count(counts, target, 0, 0)
+
+  defp do_count([], _target, n, _acc), do: {n, []}
+
+  defp do_count([h | rest], target, n, acc) do
+    new_acc = acc + h
+    # Once we've accumulated enough tokens to fill the zone, stop and return
+    # the remainder so the next zone can continue from where we left off.
+    if new_acc >= target,
+      do: {n + 1, rest},
+      else: do_count(rest, target, n + 1, new_acc)
+  end
+end
diff --git a/lib/codeqa/metrics/branching.ex b/lib/codeqa/metrics/file/branching.ex
similarity index 70%
rename from lib/codeqa/metrics/branching.ex
rename to lib/codeqa/metrics/file/branching.ex
index 2cfdbe1d..ce5e20a0 100644
--- a/lib/codeqa/metrics/branching.ex
+++ b/lib/codeqa/metrics/file/branching.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.Branching do
+defmodule CodeQA.Metrics.File.Branching do
   @moduledoc """
   Measures branching density as a proxy for cyclomatic complexity.
 
@@ -12,7 +12,7 @@ defmodule CodeQA.Metrics.Branching do
   See [cyclomatic complexity](https://en.wikipedia.org/wiki/Cyclomatic_complexity).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   # Python:     if elif else for while try except finally with match case
   # Ruby:       if elsif else unless for while until case when begin rescue ensure
@@ -43,18 +43,14 @@ defmodule CodeQA.Metrics.Branching do
   @impl true
   def name, do: "branching"
 
-  @spec analyze(map()) :: map()
   @impl true
-  def analyze(%{lines: lines, tokens: tokens}) do
-    non_blank_count =
-      lines
-      |> Tuple.to_list()
-      |> Enum.count(&(String.trim(&1) != ""))
+  def keys, do: ["branching_density", "branch_count", "non_blank_count", "max_nesting_depth"]
 
-    branch_count =
-      tokens
-      |> Tuple.to_list()
-      |> Enum.count(&MapSet.member?(@branching_keywords, &1))
+  @spec analyze(CodeQA.Engine.FileContext.t()) :: map()
+  @impl true
+  def analyze(%{lines: lines, tokens: tokens, content: content}) do
+    non_blank_count = Enum.count(lines, &(String.trim(&1) != ""))
+    branch_count = Enum.count(tokens, &MapSet.member?(@branching_keywords, &1.content))
 
     density =
       if non_blank_count > 0,
@@ -64,7 +60,19 @@ defmodule CodeQA.Metrics.Branching do
     %{
       "branching_density" => density,
       "branch_count" => branch_count,
-      "non_blank_count" => non_blank_count
+      "non_blank_count" => non_blank_count,
+      "max_nesting_depth" => max_nesting_depth(content)
     }
   end
+
+  defp max_nesting_depth(content) do
+    content
+    |> String.graphemes()
+    |> Enum.reduce({0, 0}, fn
+      c, {depth, max} when c in ["(", "[", "{"] -> {depth + 1, max(depth + 1, max)}
+      c, {depth, max} when c in [")", "]", "}"] -> {max(depth - 1, 0), max}
+      _, acc -> acc
+    end)
+    |> elem(1)
+  end
 end
diff --git a/lib/codeqa/metrics/file/brevity.ex b/lib/codeqa/metrics/file/brevity.ex
new file mode 100644
index 00000000..bc0d9a62
--- /dev/null
+++ b/lib/codeqa/metrics/file/brevity.ex
@@ -0,0 +1,50 @@
+defmodule CodeQA.Metrics.File.Brevity do
+  @moduledoc """
+  Measures how well Brevity law holds in the token distribution.
+
+  Computes the Pearson correlation between token length and token frequency.
+  A negative value indicates shorter tokens appear more often (law holds).
+  A positive value indicates longer tokens appear more often (law violated).
+  Also fits a log-log regression to capture the power-law slope.
+
+  See [Brevity law](https://en.wikipedia.org/wiki/Brevity_law).
+  """
+
+  @behaviour CodeQA.Metrics.File.FileMetric
+
+  @impl true
+  def name, do: "brevity"
+
+  @impl true
+  def keys, do: ["correlation", "slope", "sample_size"]
+
+  @spec analyze(map()) :: map()
+  @impl true
+  def analyze(%{token_counts: token_counts}) when map_size(token_counts) < 3 do
+    %{"correlation" => 0.0, "slope" => 0.0, "sample_size" => map_size(token_counts)}
+  end
+
+  def analyze(%{token_counts: token_counts}) do
+    pairs = Enum.map(token_counts, fn {token, freq} -> {String.length(token), freq} end)
+    lengths = Enum.map(pairs, &elem(&1, 0))
+    freqs = Enum.map(pairs, &elem(&1, 1))
+
+    %{
+      "correlation" => CodeQA.Math.pearson_correlation_list(lengths, freqs),
+      "slope" => log_log_slope(lengths, freqs),
+      "sample_size" => map_size(token_counts)
+    }
+  end
+
+  defp log_log_slope(lengths, freqs) do
+    log_lengths = lengths |> Enum.map(&:math.log(max(&1, 1))) |> Nx.tensor(type: :f64)
+    log_freqs = freqs |> Enum.map(&:math.log(max(&1, 1))) |> Nx.tensor(type: :f64)
+
+    {slope, _intercept, _r_squared} = CodeQA.Math.linear_regression(log_lengths, log_freqs)
+
+    case Nx.to_number(slope) do
+      val when is_float(val) -> Float.round(val, 4)
+      _ -> 0.0
+    end
+  end
+end
diff --git a/lib/codeqa/metrics/casing_entropy.ex b/lib/codeqa/metrics/file/casing_entropy.ex
similarity index 60%
rename from lib/codeqa/metrics/casing_entropy.ex
rename to lib/codeqa/metrics/file/casing_entropy.ex
index cb380112..4256e0e6 100644
--- a/lib/codeqa/metrics/casing_entropy.ex
+++ b/lib/codeqa/metrics/file/casing_entropy.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.CasingEntropy do
+defmodule CodeQA.Metrics.File.CasingEntropy do
   @moduledoc """
   Measures Shannon entropy of identifier casing styles in a file.
 
@@ -12,31 +12,45 @@ defmodule CodeQA.Metrics.CasingEntropy do
   - `"pascal_case_count"`, `"camel_case_count"`, `"snake_case_count"`,
     `"macro_case_count"`, `"kebab_case_count"`, `"other_count"` — per-style
     counts (only keys for styles that appear are included)
+  - `"screaming_snake_density"` — ratio of MACRO_CASE identifiers to total identifiers
 
   See [Shannon entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory))
   and [naming conventions](https://en.wikipedia.org/wiki/Naming_convention_(programming)).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
+
+  alias CodeQA.Metrics.File.Inflector
 
   @impl true
   def name, do: "casing_entropy"
 
+  @impl true
+  def keys,
+    do: [
+      "entropy",
+      "pascal_case_count",
+      "camel_case_count",
+      "snake_case_count",
+      "macro_case_count",
+      "kebab_case_count",
+      "other_count",
+      "screaming_snake_density"
+    ]
+
   @spec analyze(map()) :: map()
   @impl true
-  def analyze(%{identifiers: identifiers}) when tuple_size(identifiers) == 0 do
-    %{"entropy" => 0.0}
+  def analyze(%{identifiers: []}) do
+    %{"entropy" => 0.0, "screaming_snake_density" => 0.0}
   end
 
   def analyze(%{identifiers: identifiers}) do
-    identifiers_list = Tuple.to_list(identifiers)
-
     counts =
-      identifiers_list
-      |> Enum.map(&CodeQA.Metrics.Inflector.detect_casing/1)
+      identifiers
+      |> Enum.map(&Inflector.detect_casing/1)
       |> Enum.frequencies()
 
-    total = length(identifiers_list)
+    total = length(identifiers)
 
     entropy =
       counts
@@ -46,7 +60,10 @@ defmodule CodeQA.Metrics.CasingEntropy do
         acc - p * :math.log2(p)
       end)
 
-    %{"entropy" => Float.round(entropy, 4)}
+    macro_count = Map.get(counts, :macro_case, 0)
+    screaming_density = Float.round(macro_count / total, 4)
+
+    %{"entropy" => Float.round(entropy, 4), "screaming_snake_density" => screaming_density}
     |> Map.merge(counts_to_output(counts))
   end
 
diff --git a/lib/codeqa/metrics/file/comment_structure.ex b/lib/codeqa/metrics/file/comment_structure.ex
new file mode 100644
index 00000000..65bc0e0a
--- /dev/null
+++ b/lib/codeqa/metrics/file/comment_structure.ex
@@ -0,0 +1,45 @@
+defmodule CodeQA.Metrics.File.CommentStructure do
+  @moduledoc """
+  Measures comment density and annotation patterns.
+
+  Counts lines that begin with a comment marker (language-agnostic: `#`, `//`,
+  `/*`, ` *`) relative to non-blank lines. Also counts TODO/FIXME/HACK/XXX
+  markers which indicate deferred work or known issues.
+
+  ## Output keys
+
+  - `"comment_line_ratio"` — comment lines / non-blank lines
+  - `"comment_line_count"` — raw count of comment lines
+  - `"todo_fixme_count"` — occurrences of TODO, FIXME, HACK, or XXX
+  """
+
+  @behaviour CodeQA.Metrics.File.FileMetric
+
+  @impl true
+  def name, do: "comment_structure"
+
+  @impl true
+  def keys, do: ["comment_line_ratio", "comment_line_count", "todo_fixme_count"]
+
+  @comment_line ~r/^\s*(?:#|\/\/|\/\*|\*)/
+  @todo_marker ~r/\b(?:TODO|FIXME|HACK|XXX)\b/
+
+  @spec analyze(map()) :: map()
+  @impl true
+  def analyze(%{content: content, lines: lines}) do
+    non_blank = Enum.reject(lines, &(String.trim(&1) == ""))
+    non_blank_count = length(non_blank)
+
+    comment_count = Enum.count(non_blank, &Regex.match?(@comment_line, &1))
+    todo_count = @todo_marker |> Regex.scan(content) |> length()
+
+    comment_ratio =
+      if non_blank_count > 0, do: Float.round(comment_count / non_blank_count, 4), else: 0.0
+
+    %{
+      "comment_line_ratio" => comment_ratio,
+      "comment_line_count" => comment_count,
+      "todo_fixme_count" => todo_count
+    }
+  end
+end
diff --git a/lib/codeqa/metrics/compression.ex b/lib/codeqa/metrics/file/compression.ex
similarity index 56%
rename from lib/codeqa/metrics/compression.ex
rename to lib/codeqa/metrics/file/compression.ex
index fe687059..9f0981b9 100644
--- a/lib/codeqa/metrics/compression.ex
+++ b/lib/codeqa/metrics/file/compression.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.Compression do
+defmodule CodeQA.Metrics.File.Compression do
   @moduledoc """
   Measures file redundancy via zlib compression ratio.
 
@@ -6,34 +6,49 @@ defmodule CodeQA.Metrics.Compression do
   original. A high compression ratio signals repetitive or boilerplate-heavy
   code.
 
-  `ctx.encoded` is the binary representation of the file content used for
-  compression, distinct from `ctx.content` which is the UTF-8 string.
-
   See [Kolmogorov complexity](https://en.wikipedia.org/wiki/Kolmogorov_complexity)
   and [data compression ratio](https://en.wikipedia.org/wiki/Data_compression_ratio).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   @impl true
   def name, do: "compression"
 
+  @impl true
+  def keys, do: ["raw_bytes", "zlib_bytes", "zlib_ratio", "redundancy", "unique_line_ratio"]
+
   @spec analyze(map()) :: map()
   @impl true
   def analyze(%{content: "", byte_count: 0}) do
-    %{"raw_bytes" => 0, "zlib_bytes" => 0, "zlib_ratio" => 0.0, "redundancy" => 0.0}
+    %{
+      "raw_bytes" => 0,
+      "zlib_bytes" => 0,
+      "zlib_ratio" => 0.0,
+      "redundancy" => 0.0,
+      "unique_line_ratio" => 0.0
+    }
   end
 
   def analyze(ctx) do
     raw_size = ctx.byte_count
-    zlib_data = :zlib.compress(ctx.encoded)
+    zlib_data = :zlib.compress(ctx.content)
     zlib_size = byte_size(zlib_data)
 
+    non_blank = ctx.lines |> Enum.reject(&(String.trim(&1) == ""))
+
+    unique_line_ratio =
+      case length(non_blank) do
+        0 -> 0.0
+        n -> Float.round(length(Enum.uniq(non_blank)) / n, 4)
+      end
+
     %{
       "raw_bytes" => raw_size,
       "zlib_bytes" => zlib_size,
       "zlib_ratio" => Float.round(raw_size / max(1, zlib_size), 4),
-      "redundancy" => Float.round(max(0.0, 1.0 - zlib_size / raw_size), 4)
+      "redundancy" => Float.round(max(0.0, 1.0 - zlib_size / raw_size), 4),
+      "unique_line_ratio" => unique_line_ratio
     }
   end
 end
diff --git a/lib/codeqa/metrics/entropy.ex b/lib/codeqa/metrics/file/entropy.ex
similarity index 82%
rename from lib/codeqa/metrics/entropy.ex
rename to lib/codeqa/metrics/file/entropy.ex
index 47564715..6533a21a 100644
--- a/lib/codeqa/metrics/entropy.ex
+++ b/lib/codeqa/metrics/file/entropy.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.Entropy do
+defmodule CodeQA.Metrics.File.Entropy do
   @moduledoc """
   Computes Shannon entropy at both character and token levels.
 
@@ -11,11 +11,24 @@ defmodule CodeQA.Metrics.Entropy do
   See [Shannon entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory)).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   @impl true
   def name, do: "entropy"
 
+  @impl true
+  def keys,
+    do: [
+      "char_entropy",
+      "char_max_entropy",
+      "char_normalized",
+      "token_entropy",
+      "token_max_entropy",
+      "token_normalized",
+      "vocab_size",
+      "total_tokens"
+    ]
+
   @spec analyze(map()) :: map()
   @impl true
   def analyze(ctx) do
@@ -30,13 +43,12 @@ defmodule CodeQA.Metrics.Entropy do
     compute_entropy(counts, total, "char")
   end
 
-  defp token_entropy(%{tokens: tokens, token_counts: _token_counts})
-       when tuple_size(tokens) == 0 do
+  defp token_entropy(%{tokens: [], token_counts: _token_counts}) do
     Map.merge(zero_entropy_map("token"), %{"vocab_size" => 0, "total_tokens" => 0})
   end
 
   defp token_entropy(%{tokens: tokens, token_counts: token_counts}) do
-    total = tuple_size(tokens)
+    total = length(tokens)
     vocab_size = map_size(token_counts)
 
     entropy_map = compute_entropy(token_counts, total, "token")
diff --git a/lib/codeqa/metrics/file/file_metric.ex b/lib/codeqa/metrics/file/file_metric.ex
new file mode 100644
index 00000000..5d127163
--- /dev/null
+++ b/lib/codeqa/metrics/file/file_metric.ex
@@ -0,0 +1,52 @@
+defmodule CodeQA.Metrics.File.FileMetric do
+  @moduledoc """
+  Behaviour for metrics that analyze a single source file.
+
+  Implementations receive a `CodeQA.Engine.FileContext` struct containing
+  pre-parsed data (tokens, identifiers, lines, etc.) and return a map of
+  metric key-value pairs. On error, return an empty map `%{}` rather than
+  raising.
+
+  ## Minimal implementation
+
+      defmodule MyMetric do
+        @behaviour CodeQA.Metrics.FileMetric
+
+        @impl true
+        def name, do: "my_metric"
+
+        @impl true
+        def analyze(ctx) do
+          %{"value" => compute(ctx)}
+        end
+      end
+
+  See [software metrics](https://en.wikipedia.org/wiki/Software_metric).
+  """
+
+  @callback name() :: String.t()
+  @callback analyze(CodeQA.Engine.FileContext.t()) :: map()
+
+  @doc "List of metric keys returned by analyze/1."
+  @callback keys() :: [String.t()]
+
+  @doc "Human-readable description of what this metric measures."
+  @callback description() :: String.t()
+
+  @doc "Whether this metric is enabled. Defaults to true when not implemented."
+  @callback enabled?() :: boolean()
+
+  @doc """
+  Subtractive leave-one-out path. When implemented, the block-impact analyzer
+  uses this instead of a full re-run on the file-minus-block reconstruction:
+  it derives the new metric values from the unchanged baseline values for the
+  whole file plus the content of the block being removed.
+
+  Must return the same map shape as `analyze/1` and produce values bit-equal
+  to what `analyze/1` would yield on the file-minus-block content. A goldfile
+  test asserts this.
+  """
+  @callback analyze_loo(baseline :: map(), block_content :: String.t()) :: map()
+
+  @optional_callbacks [description: 0, enabled?: 0, analyze_loo: 2]
+end
diff --git a/lib/codeqa/metrics/function_metrics.ex b/lib/codeqa/metrics/file/function_metrics.ex
similarity index 92%
rename from lib/codeqa/metrics/function_metrics.ex
rename to lib/codeqa/metrics/file/function_metrics.ex
index 7fd22627..6a9bb0c6 100644
--- a/lib/codeqa/metrics/function_metrics.ex
+++ b/lib/codeqa/metrics/file/function_metrics.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.FunctionMetrics do
+defmodule CodeQA.Metrics.File.FunctionMetrics do
   @moduledoc """
   Estimates function-level structure metrics from source text.
 
@@ -13,7 +13,7 @@ defmodule CodeQA.Metrics.FunctionMetrics do
   - C#: lines starting with access modifiers (`public`, `private`, etc.)
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   # Python, Ruby, Elixir: `def` family
   # JavaScript: `function`
@@ -39,14 +39,23 @@ defmodule CodeQA.Metrics.FunctionMetrics do
   @impl true
   def name, do: "function_metrics"
 
+  @impl true
+  def keys,
+    do: [
+      "function_count",
+      "avg_function_lines",
+      "max_function_lines",
+      "avg_param_count",
+      "max_param_count"
+    ]
+
   @spec analyze(map()) :: map()
   @impl true
   def analyze(%{lines: lines}) do
-    lines_list = Tuple.to_list(lines)
-    total = length(lines_list)
+    total = length(lines)
 
     {func_indices, param_counts} =
-      lines_list
+      lines
       |> Enum.with_index()
       |> Enum.filter(fn {line, _} ->
         Regex.match?(@func_keyword_re, line) or Regex.match?(@csharp_method_re, line)
diff --git a/lib/codeqa/metrics/halstead.ex b/lib/codeqa/metrics/file/halstead.ex
similarity index 92%
rename from lib/codeqa/metrics/halstead.ex
rename to lib/codeqa/metrics/file/halstead.ex
index ca38665f..157f67b5 100644
--- a/lib/codeqa/metrics/halstead.ex
+++ b/lib/codeqa/metrics/file/halstead.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.Halstead do
+defmodule CodeQA.Metrics.File.Halstead do
   @moduledoc """
   Implements Halstead software-science complexity metrics.
 
@@ -9,11 +9,27 @@ defmodule CodeQA.Metrics.Halstead do
   See [Halstead complexity measures](https://en.wikipedia.org/wiki/Halstead_complexity_measures).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   @impl true
   def name, do: "halstead"
 
+  @impl true
+  def keys,
+    do: [
+      "n1_unique_operators",
+      "n2_unique_operands",
+      "N1_total_operators",
+      "N2_total_operands",
+      "vocabulary",
+      "length",
+      "volume",
+      "difficulty",
+      "effort",
+      "estimated_bugs",
+      "time_to_implement_seconds"
+    ]
+
   # Keyword operators for:
   # Python, Ruby, JavaScript, Elixir, C#,
   # Java, C++, Go, Rust, PHP, Swift, Shell, Kotlin
diff --git a/lib/codeqa/metrics/heaps.ex b/lib/codeqa/metrics/file/heaps.ex
similarity index 84%
rename from lib/codeqa/metrics/heaps.ex
rename to lib/codeqa/metrics/file/heaps.ex
index edc390bc..b7cae9c3 100644
--- a/lib/codeqa/metrics/heaps.ex
+++ b/lib/codeqa/metrics/file/heaps.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.Heaps do
+defmodule CodeQA.Metrics.File.Heaps do
   @moduledoc """
   Fits Heaps' law to vocabulary growth in a file.
 
@@ -9,25 +9,27 @@ defmodule CodeQA.Metrics.Heaps do
   See [Heaps' law](https://en.wikipedia.org/wiki/Heaps%27_law).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   @impl true
   def name, do: "heaps"
 
+  @impl true
+  def keys, do: ["k", "beta", "r_squared"]
+
   @max_samples 50
 
   @spec analyze(map()) :: map()
   @impl true
-  def analyze(%{tokens: tokens}) when tuple_size(tokens) == 0 do
+  def analyze(%{tokens: []}) do
     %{"k" => 0.0, "beta" => 0.0, "r_squared" => 0.0}
   end
 
   def analyze(%{tokens: tokens}) do
-    token_list = Tuple.to_list(tokens)
-    total = length(token_list)
+    total = length(tokens)
     interval = max(1, div(total, @max_samples))
 
-    data_points = sample_vocabulary_growth(token_list, interval)
+    data_points = sample_vocabulary_growth(tokens, interval)
 
     if length(data_points) < 5 do
       %{"k" => 0.0, "beta" => 0.0, "r_squared" => 0.0}
@@ -40,7 +42,7 @@ defmodule CodeQA.Metrics.Heaps do
     tokens
     |> Enum.with_index(1)
     |> Enum.reduce({MapSet.new(), []}, fn {token, i}, {seen, points} ->
-      seen = MapSet.put(seen, token)
+      seen = MapSet.put(seen, token.content)
 
       if rem(i, interval) == 0 do
         {seen, [{i, MapSet.size(seen)} | points]}
diff --git a/lib/codeqa/metrics/identifier_length_variance.ex b/lib/codeqa/metrics/file/identifier_length_variance.ex
similarity index 81%
rename from lib/codeqa/metrics/identifier_length_variance.ex
rename to lib/codeqa/metrics/file/identifier_length_variance.ex
index 2203b100..424b95b5 100644
--- a/lib/codeqa/metrics/identifier_length_variance.ex
+++ b/lib/codeqa/metrics/file/identifier_length_variance.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.IdentifierLengthVariance do
+defmodule CodeQA.Metrics.File.IdentifierLengthVariance do
   @moduledoc """
   Measures the mean, variance, and maximum length of identifiers.
 
@@ -11,20 +11,22 @@ defmodule CodeQA.Metrics.IdentifierLengthVariance do
   and [variance](https://en.wikipedia.org/wiki/Variance).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   @impl true
   def name, do: "identifier_length_variance"
 
+  @impl true
+  def keys, do: ["mean", "variance", "std_dev", "max"]
+
   @spec analyze(map()) :: map()
   @impl true
-  def analyze(%{identifiers: identifiers}) when tuple_size(identifiers) == 0 do
+  def analyze(%{identifiers: []}) do
     %{"mean" => 0.0, "variance" => 0.0, "std_dev" => 0.0, "max" => 0}
   end
 
   def analyze(%{identifiers: identifiers}) do
-    list = Tuple.to_list(identifiers)
-    lengths = Enum.map(list, &String.length/1)
+    lengths = Enum.map(identifiers, &String.length/1)
     n = length(lengths)
     mean = Enum.sum(lengths) / n
 
diff --git a/lib/codeqa/metrics/indentation.ex b/lib/codeqa/metrics/file/indentation.ex
similarity index 60%
rename from lib/codeqa/metrics/indentation.ex
rename to lib/codeqa/metrics/file/indentation.ex
index ab44743b..75923b98 100644
--- a/lib/codeqa/metrics/indentation.ex
+++ b/lib/codeqa/metrics/file/indentation.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.Indentation do
+defmodule CodeQA.Metrics.File.Indentation do
   @moduledoc """
   Analyzes indentation depth patterns across non-blank lines.
 
@@ -10,20 +10,27 @@ defmodule CodeQA.Metrics.Indentation do
   See [indentation style](https://en.wikipedia.org/wiki/Indentation_style).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   @impl true
   def name, do: "indentation"
 
+  @impl true
+  def keys, do: ["mean_depth", "variance", "max_depth", "uses_tabs", "blank_line_ratio"]
+
   @spec analyze(map()) :: map()
   @impl true
   def analyze(%{lines: lines}) do
-    lines_list = Tuple.to_list(lines)
+    uses_tabs = Enum.any?(lines, &String.match?(&1, ~r/^\t/))
+
+    total_lines = length(lines)
+    blank_count = Enum.count(lines, &(String.trim(&1) == ""))
 
-    uses_tabs = Enum.any?(lines_list, &String.match?(&1, ~r/^\t/))
+    blank_line_ratio =
+      if total_lines > 0, do: Float.round(blank_count / total_lines, 4), else: 0.0
 
     depths =
-      lines_list
+      lines
       |> Enum.reject(&(String.trim(&1) == ""))
       |> Enum.map(fn line ->
         [leading] = Regex.run(~r/^\s*/, line)
@@ -31,7 +38,13 @@ defmodule CodeQA.Metrics.Indentation do
       end)
 
     if depths == [] do
-      %{"mean_depth" => 0.0, "max_depth" => 0, "variance" => 0.0, "uses_tabs" => uses_tabs}
+      %{
+        "mean_depth" => 0.0,
+        "max_depth" => 0,
+        "variance" => 0.0,
+        "uses_tabs" => uses_tabs,
+        "blank_line_ratio" => blank_line_ratio
+      }
     else
       n = length(depths)
       mean = Enum.sum(depths) / n
@@ -45,7 +58,8 @@ defmodule CodeQA.Metrics.Indentation do
         "mean_depth" => Float.round(mean, 4),
         "variance" => Float.round(variance, 4),
         "max_depth" => Enum.max(depths),
-        "uses_tabs" => uses_tabs
+        "uses_tabs" => uses_tabs,
+        "blank_line_ratio" => blank_line_ratio
       }
     end
   end
diff --git a/lib/codeqa/metrics/inflector.ex b/lib/codeqa/metrics/file/inflector.ex
similarity index 89%
rename from lib/codeqa/metrics/inflector.ex
rename to lib/codeqa/metrics/file/inflector.ex
index 7c495314..04e732cb 100644
--- a/lib/codeqa/metrics/inflector.ex
+++ b/lib/codeqa/metrics/file/inflector.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.Inflector do
+defmodule CodeQA.Metrics.File.Inflector do
   @moduledoc """
   Utility for detecting identifier casing styles.
 
@@ -30,7 +30,8 @@ defmodule CodeQA.Metrics.Inflector do
       iex> CodeQA.Metrics.Inflector.detect_casing("FOO_BAR")
       :macro_case
   """
-  @spec detect_casing(String.t()) :: :pascal_case | :camel_case | :snake_case | :macro_case | :kebab_case | :other
+  @spec detect_casing(String.t()) ::
+          :pascal_case | :camel_case | :snake_case | :macro_case | :kebab_case | :other
   def detect_casing(identifier) do
     cond do
       identifier =~ ~r/^[A-Z][a-zA-Z0-9]*$/ -> :pascal_case
diff --git a/lib/codeqa/metrics/file/line_patterns.ex b/lib/codeqa/metrics/file/line_patterns.ex
new file mode 100644
index 00000000..e8b2b452
--- /dev/null
+++ b/lib/codeqa/metrics/file/line_patterns.ex
@@ -0,0 +1,83 @@
+defmodule CodeQA.Metrics.File.LinePatterns do
+  @moduledoc """
+  Structural line-level and nesting metrics.
+
+  ## Output keys
+
+  - `"blank_line_ratio"` — blank lines / total lines (spacing/organisation signal)
+  - `"unique_line_ratio"` — distinct non-blank trimmed lines / total non-blank lines
+    (low values indicate repetition or boilerplate)
+  - `"max_nesting_depth"` — maximum bracket nesting depth across `()`, `[]`, `{}`
+    (complexity proxy independent of branching keywords)
+  - `"string_literal_ratio"` — quoted string literal spans / total tokens
+    (high values may indicate magic strings or hardcoded data)
+  """
+
+  @behaviour CodeQA.Metrics.File.FileMetric
+
+  @impl true
+  def name, do: "line_patterns"
+
+  @impl true
+  def keys,
+    do: ["blank_line_ratio", "unique_line_ratio", "max_nesting_depth", "string_literal_ratio"]
+
+  @string_literal ~r/(?:"[^"]*"|'[^']*')/
+
+  @spec analyze(map()) :: map()
+  @impl true
+  def analyze(%{content: content, lines: lines, tokens: tokens}) do
+    total_lines = length(lines)
+    total_tokens = length(tokens)
+
+    if total_lines == 0 do
+      %{
+        "blank_line_ratio" => 0.0,
+        "unique_line_ratio" => 1.0,
+        "max_nesting_depth" => 0,
+        "string_literal_ratio" => 0.0
+      }
+    else
+      blank_count = Enum.count(lines, &(String.trim(&1) == ""))
+      blank_ratio = Float.round(blank_count / total_lines, 4)
+
+      non_blank = lines |> Enum.map(&String.trim/1) |> Enum.reject(&(&1 == ""))
+
+      unique_ratio =
+        if non_blank == [],
+          do: 1.0,
+          else: Float.round(length(Enum.uniq(non_blank)) / length(non_blank), 4)
+
+      string_count = @string_literal |> Regex.scan(content) |> length()
+
+      string_ratio =
+        if total_tokens == 0,
+          do: 0.0,
+          else: Float.round(string_count / total_tokens, 4)
+
+      %{
+        "blank_line_ratio" => blank_ratio,
+        "unique_line_ratio" => unique_ratio,
+        "max_nesting_depth" => max_nesting_depth(content),
+        "string_literal_ratio" => string_ratio
+      }
+    end
+  end
+
+  defp max_nesting_depth(content) do
+    content
+    |> String.graphemes()
+    |> Enum.reduce({0, 0}, fn
+      char, {depth, max_d} when char in ["(", "[", "{"] ->
+        new_depth = depth + 1
+        {new_depth, max(max_d, new_depth)}
+
+      char, {depth, max_d} when char in [")", "]", "}"] ->
+        {max(depth - 1, 0), max_d}
+
+      _, acc ->
+        acc
+    end)
+    |> elem(1)
+  end
+end
diff --git a/lib/codeqa/metrics/magic_number_density.ex b/lib/codeqa/metrics/file/magic_number_density.ex
similarity index 51%
rename from lib/codeqa/metrics/magic_number_density.ex
rename to lib/codeqa/metrics/file/magic_number_density.ex
index 3e28bb4f..20428df6 100644
--- a/lib/codeqa/metrics/magic_number_density.ex
+++ b/lib/codeqa/metrics/file/magic_number_density.ex
@@ -1,10 +1,10 @@
-defmodule CodeQA.Metrics.MagicNumberDensity do
+defmodule CodeQA.Metrics.File.MagicNumberDensity do
   @moduledoc """
-  Measures the density of magic numbers in source code.
+  Measures the density of magic numbers and string literals in source code.
 
-  Counts numeric literals (excluding common constants 0, 1, 0.0, 1.0) as a
-  proportion of total tokens. A high density suggests unexplained constants
-  that should be extracted into named values.
+  Counts numeric literals (excluding common constants 0, 1, 0.0, 1.0) and
+  double-quoted string literals as proportions of total tokens. High densities
+  suggest unexplained constants or hardcoded values that should be extracted.
 
   Note: negative numbers (e.g. `-42`) are not detected since the minus sign
   is a separate token.
@@ -12,22 +12,25 @@ defmodule CodeQA.Metrics.MagicNumberDensity do
   See [magic number](<https://en.wikipedia.org/wiki/Magic_number_(programming)>).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   @impl true
   def name, do: "magic_number_density"
 
+  @impl true
+  def keys, do: ["density", "magic_number_count", "string_literal_ratio"]
+
   @number_re ~r/\b\d+\.?\d*(?:[eE][+-]?\d+)?\b/
   @idiomatic_constants ~w[0 1 2 0.0 1.0 0.5]
+  @string_literal_re ~r/"(?:[^"\\]|\\.)*"/
 
   @spec analyze(map()) :: map()
   @impl true
   def analyze(%{content: content, tokens: tokens}) do
-    token_list = Tuple.to_list(tokens)
-    total_tokens = length(token_list)
+    total_tokens = length(tokens)
 
     if total_tokens == 0 do
-      %{"density" => 0.0, "magic_number_count" => 0}
+      %{"density" => 0.0, "magic_number_count" => 0, "string_literal_ratio" => 0.0}
     else
       numbers =
         @number_re
@@ -36,10 +39,12 @@ defmodule CodeQA.Metrics.MagicNumberDensity do
         |> Enum.reject(&(&1 in @idiomatic_constants))
 
       magic_count = length(numbers)
+      string_count = @string_literal_re |> Regex.scan(content) |> length()
 
       %{
         "density" => Float.round(magic_count / total_tokens, 4),
-        "magic_number_count" => magic_count
+        "magic_number_count" => magic_count,
+        "string_literal_ratio" => Float.round(string_count / total_tokens, 4)
       }
     end
   end
diff --git a/lib/codeqa/metrics/file/near_duplicate_blocks.ex b/lib/codeqa/metrics/file/near_duplicate_blocks.ex
new file mode 100644
index 00000000..e1e0c08a
--- /dev/null
+++ b/lib/codeqa/metrics/file/near_duplicate_blocks.ex
@@ -0,0 +1,198 @@
+defmodule CodeQA.Metrics.File.NearDuplicateBlocks do
+  @moduledoc """
+  Near-duplicate block detection using natural code blocks.
+
+  Detects blocks via blank-line boundaries and sub-blocks via bracket/indentation rules.
+  Compares structurally similar blocks by token-level edit distance, bucketed as a
+  percentage of the smaller block's token count.
+
+  Distance buckets:
+    d0 = exact (0%), d1 ≤ 5%, d2 ≤ 10%, d3 ≤ 15%, d4 ≤ 20%,
+    d5 ≤ 25%, d6 ≤ 30%, d7 ≤ 40%, d8 ≤ 50%
+  """
+
+  alias CodeQA.AST.Enrichment.Node
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Parser
+  alias CodeQA.Language
+  alias CodeQA.Metrics.File.NearDuplicateBlocks.Candidates
+  alias CodeQA.Metrics.File.NearDuplicateBlocks.Distance
+
+  @max_bucket 8
+
+  # ---------------------------------------------------------------------------
+  # Public API — distance helpers delegated to Distance submodule
+  # ---------------------------------------------------------------------------
+
+  @doc "Standard Levenshtein distance between two token lists."
+  @spec token_edit_distance([String.t()], [String.t()]) :: non_neg_integer()
+  defdelegate token_edit_distance(a, b), to: Distance
+
+  @doc "Map an edit distance and min token count to a percentage bucket 0–8, or nil if > 50%."
+  @spec percent_bucket(non_neg_integer(), non_neg_integer()) :: 0..8 | nil
+  defdelegate percent_bucket(ed, min_count), to: Distance
+
+  # ---------------------------------------------------------------------------
+  # Public API — analysis entry points
+  # ---------------------------------------------------------------------------
+
+  @doc """
+  Analyze a list of `{path, content}` pairs for near-duplicate blocks.
+  Returns count keys `near_dup_block_d0..d8`, `block_count`, `sub_block_count`.
+  With `include_pairs: true` in opts, also returns `_pairs` keys.
+  """
+  @dialyzer {:nowarn_function, analyze: 2}
+  @spec analyze([{String.t(), String.t()}], keyword()) :: map()
+  def analyze(labeled_content, opts) do
+    all_blocks =
+      Enum.flat_map(labeled_content, fn {path, content} ->
+        lang_mod = Language.detect(path)
+        tokens = TokenNormalizer.normalize_structural(content)
+
+        Parser.detect_blocks(tokens, lang_mod)
+        |> label_blocks(path)
+      end)
+
+    analyze_from_blocks(all_blocks, opts)
+  end
+
+  @doc """
+  Analyze a pre-built list of labeled `Node.t()` structs for near-duplicate blocks.
+  Skips tokenization and block detection — use when blocks are already available.
+  Returns the same keys as `analyze/2`.
+  """
+  @dialyzer {:nowarn_function, analyze_from_blocks: 2}
+  @spec analyze_from_blocks([Node.t()], keyword()) :: map()
+  def analyze_from_blocks(all_blocks, opts) do
+    workers = Keyword.get(opts, :workers, System.schedulers_online())
+    max_pairs = Keyword.get(opts, :max_pairs_per_bucket, nil)
+    include_pairs = Keyword.get(opts, :include_pairs, false)
+
+    block_count = length(all_blocks)
+
+    find_pairs_opts =
+      [workers: workers, max_pairs_per_bucket: max_pairs] ++
+        Keyword.take(opts, [:on_progress, :idf_max_freq])
+
+    # do_find_pairs computes sub_block_count from the decorated list it already
+    # builds, eliminating the separate NodeProtocol.children pass.
+    {buckets, sub_block_count} = do_find_pairs(all_blocks, find_pairs_opts)
+
+    result =
+      for d <- 0..@max_bucket, into: %{} do
+        {"near_dup_block_d#{d}", Map.get(buckets, d, %{count: 0}).count}
+      end
+
+    result =
+      Map.merge(result, %{"block_count" => block_count, "sub_block_count" => sub_block_count})
+
+    case include_pairs do
+      true ->
+        pairs_result =
+          for d <- 0..@max_bucket, into: %{} do
+            {"near_dup_block_d#{d}_pairs",
+             Map.get(buckets, d, %{pairs: []}).pairs |> format_pairs()}
+          end
+
+        Map.merge(result, pairs_result)
+
+      false ->
+        result
+    end
+  end
+
+  @doc "Find near-duplicate pairs across a list of %Node{} structs."
+  @spec find_pairs([Node.t()], keyword()) :: map()
+  def find_pairs(blocks, opts) do
+    {buckets, _sub_block_count} = do_find_pairs(blocks, opts)
+    buckets
+  end
+
+  @doc false
+  def label_blocks(blocks, path) do
+    Enum.map(blocks, fn block ->
+      label = if block.start_line, do: "#{path}:#{block.start_line}", else: path
+      %{block | label: label}
+    end)
+  end
+
+  # ---------------------------------------------------------------------------
+  # Internal pair-finding pipeline
+  # ---------------------------------------------------------------------------
+
+  # Internal implementation returning {buckets, sub_block_count} so that
+  # analyze_from_blocks gets both without a redundant NodeProtocol.children pass.
+  defp do_find_pairs(blocks, opts) do
+    workers = Keyword.get(opts, :workers, System.schedulers_online())
+    max_pairs = Keyword.get(opts, :max_pairs_per_bucket, nil)
+    idf_max_freq = Keyword.get(opts, :idf_max_freq, 1.0)
+    has_progress = Keyword.has_key?(opts, :on_progress)
+
+    if length(blocks) < 2 do
+      {%{}, 0}
+    else
+      decorated = Candidates.decorate(blocks)
+
+      # sub_block_count derived from the already-computed children_count in decorated.
+      sub_block_count =
+        Enum.reduce(decorated, 0, fn {_, _, _, _, _, cc, _, _}, acc -> acc + cc end)
+
+      # IDF: prune bigrams that appear in more than idf_max_freq fraction of blocks.
+      # These are structural noise (e.g. "end nil", "return false") that inflate the
+      # candidate set without helping identify true duplicates.
+      pruned = Candidates.compute_frequent_bigrams(decorated, idf_max_freq)
+
+      decorated =
+        if MapSet.size(pruned) > 0 do
+          Enum.map(decorated, &Candidates.prune_bigrams(&1, pruned))
+        else
+          decorated
+        end
+
+      {exact_index, shingle_index} = Candidates.build_indexes(decorated)
+
+      total = length(decorated)
+      # Convert to tuple for O(1) indexed lookup inside the hot comparison loop.
+      decorated_arr = List.to_tuple(decorated)
+
+      if has_progress,
+        do: IO.puts(:stderr, "  Comparing #{total} blocks for near-duplicates...")
+
+      raw_pairs =
+        decorated
+        |> Flow.from_enumerable(max_demand: 10, stages: workers)
+        |> Flow.flat_map(
+          &Candidates.find_pairs_for_block(&1, decorated_arr, exact_index, shingle_index)
+        )
+        |> Enum.to_list()
+
+      {bucket_pairs(raw_pairs, max_pairs), sub_block_count}
+    end
+  end
+
+  defp bucket_pairs(raw_pairs, max_pairs) do
+    Enum.reduce(raw_pairs, %{}, fn {bucket, pair}, acc ->
+      Map.update(
+        acc,
+        bucket,
+        %{count: 1, pairs: maybe_append([], pair, max_pairs, 0)},
+        fn existing ->
+          %{
+            count: existing.count + 1,
+            pairs: maybe_append(existing.pairs, pair, max_pairs, existing.count)
+          }
+        end
+      )
+    end)
+  end
+
+  # Uses the already-tracked count instead of length(list) to avoid an O(n) walk.
+  defp maybe_append(list, _pair, max, count) when is_integer(max) and count >= max, do: list
+  defp maybe_append(list, pair, _max, _count), do: [pair | list]
+
+  defp format_pairs(pairs) do
+    Enum.map(pairs, fn {label_a, label_b} ->
+      %{"source_a" => label_a, "source_b" => label_b}
+    end)
+  end
+end
diff --git a/lib/codeqa/metrics/file/near_duplicate_blocks/candidates.ex b/lib/codeqa/metrics/file/near_duplicate_blocks/candidates.ex
new file mode 100644
index 00000000..522f5481
--- /dev/null
+++ b/lib/codeqa/metrics/file/near_duplicate_blocks/candidates.ex
@@ -0,0 +1,238 @@
+defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Candidates do
+  @moduledoc """
+  Block fingerprinting, indexing, and candidate-pair matching for near-duplicate detection.
+
+  Handles:
+  - Canonical token-value extraction (stripping leading/trailing whitespace tokens)
+  - Exact-hash and shingle indexes for fast candidate lookup
+  - IDF-based bigram pruning to reduce structural-noise candidates
+  - Structural compatibility checks (child-count and line-ratio guards)
+  - Pair scoring and bucketing
+  """
+
+  alias CodeQA.AST.Classification.NodeProtocol
+  alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken}
+  alias CodeQA.Metrics.File.NearDuplicateBlocks.Distance
+
+  # Pre-compute token kind strings to avoid repeated function calls in the hot path.
+  @nl_kind NewlineToken.kind()
+  @ws_kind WhitespaceToken.kind()
+
+  @doc """
+  Decorate a list of blocks with pre-computed canonical values, hashes, bigrams, and
+  structural metadata. Each entry is an 8-tuple:
+
+      {index, block, values, hash, len_values, children_count, newline_count, bigrams}
+  """
+  @spec decorate([term()]) :: [tuple()]
+  def decorate(blocks) do
+    blocks
+    |> Enum.with_index()
+    |> Enum.map(fn {block, i} ->
+      values = canonical_values(NodeProtocol.flat_tokens(block))
+      children_count = length(NodeProtocol.children(block))
+      newline_count = Enum.count(values, &(&1 == @nl_kind))
+      bigrams = Enum.chunk_every(values, 2, 1, :discard)
+
+      {i, block, values, :erlang.phash2(values), length(values), children_count, newline_count,
+       bigrams}
+    end)
+  end
+
+  @doc """
+  Build both exact (hash → [idx]) and shingle (bigram_hash → [idx]) indexes in one pass,
+  using the pre-computed values from the decorated list.
+  """
+  @spec build_indexes([tuple()]) :: {map(), map()}
+  def build_indexes(decorated) do
+    Enum.reduce(decorated, {%{}, %{}}, fn {idx, _block, _values, hash, _len, _children, _newlines,
+                                           bigrams},
+                                          {exact_acc, shingle_acc} ->
+      exact_acc = Map.update(exact_acc, hash, [idx], &[idx | &1])
+
+      shingle_acc =
+        bigrams
+        |> Enum.reduce(shingle_acc, fn bigram, sh_acc ->
+          h = :erlang.phash2(bigram)
+          Map.update(sh_acc, h, [idx], &[idx | &1])
+        end)
+
+      {exact_acc, shingle_acc}
+    end)
+  end
+
+  @doc """
+  Returns the set of bigram hashes that appear in more than `max_freq` fraction of blocks.
+
+  Minimum threshold of 2 so a bigram must appear in 3+ blocks before being pruned —
+  prevents over-pruning when the total block count is very small.
+  """
+  @spec compute_frequent_bigrams([tuple()], float()) :: MapSet.t()
+  def compute_frequent_bigrams(decorated, max_freq) do
+    total = length(decorated)
+    threshold = max(2, round(total * max_freq))
+
+    decorated
+    |> Enum.reduce(%{}, fn {_, _, _, _, _, _, _, bigrams}, acc ->
+      bigrams
+      |> Enum.uniq_by(&:erlang.phash2/1)
+      |> Enum.reduce(acc, fn bigram, a ->
+        Map.update(a, :erlang.phash2(bigram), 1, &(&1 + 1))
+      end)
+    end)
+    |> Enum.filter(fn {_, count} -> count > threshold end)
+    |> Enum.map(&elem(&1, 0))
+    |> MapSet.new()
+  end
+
+  @doc "Remove bigrams whose hash is in the pruned set from a decorated tuple."
+  @spec prune_bigrams(tuple(), MapSet.t()) :: tuple()
+  def prune_bigrams({i, b, v, h, l, c, n, bigrams}, pruned) do
+    {i, b, v, h, l, c, n, Enum.reject(bigrams, &MapSet.member?(pruned, :erlang.phash2(&1)))}
+  end
+
+  @doc """
+  Find all near-duplicate pairs for a single block against the full decorated array.
+  Returns a list of `{bucket, {label_a, label_b}}` pairs.
+  """
+  @spec find_pairs_for_block(tuple(), tuple(), map(), map()) :: list()
+  def find_pairs_for_block(
+        {i, block_a, values_a, hash_a, len_a, children_a, newlines_a, bigrams_a},
+        decorated_arr,
+        exact_index,
+        shingle_index
+      ) do
+    # For small exact-match lists (typically 0–3 entries) a plain list membership
+    # check avoids the overhead of constructing a MapSet.
+    exact_list = Map.get(exact_index, hash_a, [])
+    exact_set = if length(exact_list) > 3, do: MapSet.new(exact_list), else: nil
+
+    # For d0 (exact), find hash-matching blocks and confirm with value equality
+    # to guard against phash2 collisions.
+    exact_pairs =
+      exact_list
+      |> Enum.filter(&(&1 > i))
+      |> Enum.map(fn j ->
+        {_j, block_b, values_b, _hash_b, _len_b, children_b, newlines_b, _bigrams_b} =
+          elem(decorated_arr, j)
+
+        if values_b == values_a and
+             structure_compatible?(children_a, newlines_a, children_b, newlines_b) do
+          {0, {block_a.label, block_b.label}}
+        else
+          nil
+        end
+      end)
+      |> Enum.reject(&is_nil/1)
+
+    # For d1-d8 (near), use shingle index to find candidates.
+    min_shared = max(0, round(len_a * 0.5) - 1)
+    counter = :counters.new(tuple_size(decorated_arr), [])
+
+    # Reduce bigrams → shingle index → counter array. We track the list of
+    # touched indices so the post-pass only iterates the candidates we actually
+    # encountered, not the full counter range. The first-touch check on the
+    # counter is O(1) (a single :counters.get), much cheaper than the previous
+    # HAMT-based Map.update accumulator on a per-block basis.
+    touched =
+      Enum.reduce(bigrams_a, [], fn bigram, touched_acc ->
+        h = :erlang.phash2(bigram)
+
+        shingle_index
+        |> Map.get(h, [])
+        |> Enum.reduce(touched_acc, fn
+          j, acc when j > i ->
+            idx = j + 1
+            old = :counters.get(counter, idx)
+            :counters.add(counter, idx, 1)
+            if old == 0, do: [j | acc], else: acc
+
+          _j, acc ->
+            acc
+        end)
+      end)
+
+    in_exact? = fn j ->
+      if exact_set, do: MapSet.member?(exact_set, j), else: j in exact_list
+    end
+
+    near_pairs =
+      Enum.flat_map(touched, fn j ->
+        count = :counters.get(counter, j + 1)
+
+        if count >= min_shared and not in_exact?.(j) do
+          near_pair_for_candidate(
+            j,
+            decorated_arr,
+            block_a,
+            values_a,
+            len_a,
+            children_a,
+            newlines_a
+          )
+        else
+          []
+        end
+      end)
+
+    exact_pairs ++ near_pairs
+  end
+
+  # ---------------------------------------------------------------------------
+  # Private helpers
+  # ---------------------------------------------------------------------------
+
+  # Strip leading/trailing <NL> and <WS> tokens and extract kind values as strings.
+  # Optimised to 3 passes: one reduce (skip leading NL/WS + collect reversed kinds),
+  # one drop_while (strip trailing), one :lists.reverse.
+  defp canonical_values(tokens) do
+    {reversed, _in_content} =
+      Enum.reduce(tokens, {[], false}, fn t, {acc, in_content} ->
+        kind = t.kind
+        is_skip = kind == @nl_kind or kind == @ws_kind
+
+        if in_content or not is_skip do
+          {[kind | acc], true}
+        else
+          {acc, false}
+        end
+      end)
+
+    reversed
+    |> Enum.drop_while(&(&1 == @nl_kind or &1 == @ws_kind))
+    |> :lists.reverse()
+  end
+
+  defp near_pair_for_candidate(j, decorated_arr, block_a, values_a, len_a, children_a, newlines_a) do
+    {_j, block_b, values_b, _hash_b, len_b, children_b, newlines_b, _bigrams_b} =
+      elem(decorated_arr, j)
+
+    min_count = min(len_a, len_b)
+    max_allowed = round(min_count * 0.5)
+
+    if structure_compatible?(children_a, newlines_a, children_b, newlines_b) and
+         abs(len_a - len_b) <= max_allowed do
+      ed = Distance.token_edit_distance_bounded(values_a, values_b, max_allowed)
+
+      case Distance.percent_bucket(ed, min_count) do
+        nil -> []
+        bucket when bucket > 0 -> [{bucket, {block_a.label, block_b.label}}]
+        # ed=0 handled by exact_pairs above
+        _ -> []
+      end
+    else
+      []
+    end
+  end
+
+  # Uses pre-computed children counts and newline counts from the decorated tuple
+  # so NodeProtocol.children/1 and Enum.count/2 are not called per candidate pair.
+  defp structure_compatible?(children_a, newlines_a, children_b, newlines_b) do
+    sub_diff = abs(children_a - children_b)
+    lines_a = newlines_a + 1
+    lines_b = newlines_b + 1
+    max_lines = max(lines_a, lines_b)
+    line_ratio = if max_lines > 0, do: abs(lines_a - lines_b) / max_lines, else: 0.0
+    sub_diff <= 1 and line_ratio <= 0.30
+  end
+end
diff --git a/lib/codeqa/metrics/file/near_duplicate_blocks/distance.ex b/lib/codeqa/metrics/file/near_duplicate_blocks/distance.ex
new file mode 100644
index 00000000..475aa3e2
--- /dev/null
+++ b/lib/codeqa/metrics/file/near_duplicate_blocks/distance.ex
@@ -0,0 +1,114 @@
+defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Distance do
+  @moduledoc """
+  Token-level edit distance and percentage-bucket classification for near-duplicate detection.
+
+  Provides standard Levenshtein distance, a bounded variant that short-circuits
+  when the distance already exceeds a threshold, and a bucket classifier that maps
+  an edit distance + minimum token count to a similarity bucket (d0–d8).
+
+  Distance buckets:
+    d0 = exact (0%), d1 ≤ 5%, d2 ≤ 10%, d3 ≤ 15%, d4 ≤ 20%,
+    d5 ≤ 25%, d6 ≤ 30%, d7 ≤ 40%, d8 ≤ 50%
+  """
+
+  @bucket_thresholds [
+    {0, 0.0},
+    {1, 0.05},
+    {2, 0.10},
+    {3, 0.15},
+    {4, 0.20},
+    {5, 0.25},
+    {6, 0.30},
+    {7, 0.40},
+    {8, 0.50}
+  ]
+
+  @doc "Standard Levenshtein distance between two token lists."
+  @spec token_edit_distance([String.t()], [String.t()]) :: non_neg_integer()
+  def token_edit_distance([], b), do: length(b)
+  def token_edit_distance(a, []), do: length(a)
+
+  def token_edit_distance(a, b) do
+    a_arr = List.to_tuple(a)
+    b_arr = List.to_tuple(b)
+    lb = tuple_size(b_arr)
+    init_row = List.to_tuple(Enum.to_list(0..lb))
+    result_row = levenshtein_rows(a_arr, b_arr, tuple_size(a_arr), lb, init_row, 1)
+    elem(result_row, lb)
+  end
+
+  defp levenshtein_rows(_a, _b, la, _lb, prev, i) when i > la, do: prev
+
+  defp levenshtein_rows(a, b, la, lb, prev, i) do
+    ai = elem(a, i - 1)
+    curr_reversed = levenshtein_cols(b, lb, prev, ai, [i], 1)
+    curr = List.to_tuple(:lists.reverse(curr_reversed))
+    levenshtein_rows(a, b, la, lb, curr, i + 1)
+  end
+
+  defp levenshtein_cols(_b, lb, _prev, _ai, acc, j) when j > lb, do: acc
+
+  defp levenshtein_cols(b, lb, prev, ai, [last_val | _] = acc, j) do
+    cost = if ai == elem(b, j - 1), do: 0, else: 1
+    val = min(elem(prev, j) + 1, min(last_val + 1, elem(prev, j - 1) + cost))
+    levenshtein_cols(b, lb, prev, ai, [val | acc], j + 1)
+  end
+
+  # Bounded Levenshtein: returns the edit distance, or max_distance + 1 if the
+  # distance would exceed max_distance. Bails after each row when the row minimum
+  # already exceeds max_distance — the final distance can only grow from there.
+  @doc false
+  @spec token_edit_distance_bounded([String.t()], [String.t()], non_neg_integer()) ::
+          non_neg_integer()
+  def token_edit_distance_bounded([], b, _max), do: length(b)
+  def token_edit_distance_bounded(a, [], _max), do: length(a)
+
+  def token_edit_distance_bounded(a, b, max_distance) do
+    a_arr = List.to_tuple(a)
+    b_arr = List.to_tuple(b)
+    lb = tuple_size(b_arr)
+    init_row = List.to_tuple(Enum.to_list(0..lb))
+    levenshtein_rows_bounded(a_arr, b_arr, tuple_size(a_arr), lb, init_row, max_distance, 1)
+  end
+
+  defp levenshtein_rows_bounded(_a, _b, la, lb, prev, _max, i) when i > la, do: elem(prev, lb)
+
+  defp levenshtein_rows_bounded(a, b, la, lb, prev, max_distance, i) do
+    ai = elem(a, i - 1)
+    # levenshtein_cols_with_min tracks the row minimum as it builds, avoiding
+    # a separate O(lb) pass to find the min after the row is complete.
+    {curr_reversed, row_min} = levenshtein_cols_with_min(b, lb, prev, ai, {[i], i}, 1)
+    curr = List.to_tuple(:lists.reverse(curr_reversed))
+
+    if row_min > max_distance do
+      max_distance + 1
+    else
+      levenshtein_rows_bounded(a, b, la, lb, curr, max_distance, i + 1)
+    end
+  end
+
+  defp levenshtein_cols_with_min(_b, lb, _prev, _ai, acc_and_min, j) when j > lb,
+    do: acc_and_min
+
+  defp levenshtein_cols_with_min(b, lb, prev, ai, {[last_val | _] = acc, min_val}, j) do
+    cost = if ai == elem(b, j - 1), do: 0, else: 1
+    val = min(elem(prev, j) + 1, min(last_val + 1, elem(prev, j - 1) + cost))
+    levenshtein_cols_with_min(b, lb, prev, ai, {[val | acc], min(min_val, val)}, j + 1)
+  end
+
+  @doc "Map an edit distance and min token count to a percentage bucket 0–8, or nil if > 50%."
+  @spec percent_bucket(non_neg_integer(), non_neg_integer()) :: 0..8 | nil
+  def percent_bucket(_ed, 0), do: nil
+  def percent_bucket(0, _min_count), do: 0
+
+  def percent_bucket(ed, min_count) do
+    pct = ed / min_count
+
+    @bucket_thresholds
+    |> Enum.find(fn {bucket, threshold} -> bucket > 0 and pct <= threshold end)
+    |> case do
+      {bucket, _} -> bucket
+      nil -> nil
+    end
+  end
+end
diff --git a/lib/codeqa/metrics/file/near_duplicate_blocks_file.ex b/lib/codeqa/metrics/file/near_duplicate_blocks_file.ex
new file mode 100644
index 00000000..7a15e749
--- /dev/null
+++ b/lib/codeqa/metrics/file/near_duplicate_blocks_file.ex
@@ -0,0 +1,39 @@
+defmodule CodeQA.Metrics.File.NearDuplicateBlocksFile do
+  @moduledoc """
+  Counts near-duplicate and exact-duplicate natural code blocks within a single file.
+
+  Blocks are detected at blank-line boundaries with sub-block detection via bracket rules.
+  Distance is a percentage of the smaller block's token count, bucketed d0–d8.
+  Also reports block_count and sub_block_count as standalone metrics.
+  """
+
+  @behaviour CodeQA.Metrics.File.FileMetric
+
+  alias CodeQA.Metrics.File.NearDuplicateBlocks
+
+  @impl true
+  def name, do: "near_duplicate_blocks_file"
+
+  @impl true
+  def keys do
+    ["block_count", "sub_block_count"] ++ for(d <- 0..8, do: "near_dup_block_d#{d}")
+  end
+
+  @impl true
+  def analyze(%{blocks: nil}), do: Map.new(keys(), fn k -> {k, 0} end)
+
+  def analyze(%{path: path, blocks: blocks}) when is_list(blocks) do
+    NearDuplicateBlocks.analyze_from_blocks(
+      NearDuplicateBlocks.label_blocks(blocks, path || "unknown"),
+      []
+    )
+    |> Map.reject(fn {k, _} -> String.ends_with?(k, "_pairs") end)
+  end
+
+  def analyze(ctx) do
+    path = ctx.path || "unknown"
+
+    NearDuplicateBlocks.analyze([{path, ctx.content}], [])
+    |> Map.reject(fn {k, _} -> String.ends_with?(k, "_pairs") end)
+  end
+end
diff --git a/lib/codeqa/metrics/ngram.ex b/lib/codeqa/metrics/file/ngram.ex
similarity index 71%
rename from lib/codeqa/metrics/ngram.ex
rename to lib/codeqa/metrics/file/ngram.ex
index fb2b44b5..b100513c 100644
--- a/lib/codeqa/metrics/ngram.ex
+++ b/lib/codeqa/metrics/file/ngram.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.Ngram do
+defmodule CodeQA.Metrics.File.Ngram do
   @moduledoc """
   Computes bigram and trigram statistics over the token stream.
 
@@ -10,15 +10,30 @@ defmodule CodeQA.Metrics.Ngram do
   and [hapax legomenon](https://en.wikipedia.org/wiki/Hapax_legomenon).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   @impl true
   def name, do: "ngram"
 
+  @impl true
+  def keys,
+    do: [
+      "bigram_total",
+      "bigram_unique",
+      "bigram_repetition_rate",
+      "bigram_hapax_fraction",
+      "bigram_repeated_unique",
+      "trigram_total",
+      "trigram_unique",
+      "trigram_repetition_rate",
+      "trigram_hapax_fraction",
+      "trigram_repeated_unique"
+    ]
+
   @spec analyze(map()) :: map()
   @impl true
   def analyze(ctx) do
-    tokens = Tuple.to_list(ctx.tokens)
+    tokens = Enum.map(ctx.tokens, & &1.content)
 
     bigram_stats = ngram_stats(tokens, 2) |> rename_keys("bigram")
     trigram_stats = ngram_stats(tokens, 3) |> rename_keys("trigram")
@@ -27,7 +42,13 @@ defmodule CodeQA.Metrics.Ngram do
   end
 
   defp ngram_stats(tokens, n) when length(tokens) < n do
-    %{"total" => 0, "unique" => 0, "repetition_rate" => 0.0, "hapax_fraction" => 0.0, "repeated_unique" => 0}
+    %{
+      "total" => 0,
+      "unique" => 0,
+      "repetition_rate" => 0.0,
+      "hapax_fraction" => 0.0,
+      "repeated_unique" => 0
+    }
   end
 
   defp ngram_stats(tokens, n) do
diff --git a/lib/codeqa/metrics/file/punctuation_density.ex b/lib/codeqa/metrics/file/punctuation_density.ex
new file mode 100644
index 00000000..8b42ee41
--- /dev/null
+++ b/lib/codeqa/metrics/file/punctuation_density.ex
@@ -0,0 +1,96 @@
+defmodule CodeQA.Metrics.File.PunctuationDensity do
+  @moduledoc """
+  Character-level punctuation and structural pattern metrics.
+
+  Captures signals that character-level metrics miss: naming conventions using
+  `?`/`!` suffixes, chained method calls (dots), non-standard bracket adjacency,
+  and numeric bracket pair patterns.
+  """
+
+  @behaviour CodeQA.Metrics.File.FileMetric
+
+  @impl true
+  def name, do: "punctuation_density"
+
+  @impl true
+  def keys do
+    [
+      "question_mark_density",
+      "exclamation_density",
+      "dot_count",
+      "id_nonalpha_suffix_density",
+      "bracket_nonalpha_prefix_count",
+      "bracket_nonalpha_suffix_count",
+      "bracket_number_pair_count",
+      "arrow_density",
+      "colon_suffix_density"
+    ]
+  end
+
+  # identifier-like token (starts with letter/underscore) ending with non-alphanumeric non-whitespace
+  @id_nonalpha_suffix ~r/[a-zA-Z_]\w*[^\w\s]/
+  # opening bracket immediately preceded by non-alphanumeric non-whitespace (e.g. `?(`, `==[`)
+  @bracket_nonalpha_prefix ~r/[^\w\s\(\[\{][\(\[\{]/
+  # closing bracket immediately followed by non-alphanumeric non-whitespace (e.g. `}.`, `)?`)
+  @bracket_nonalpha_suffix ~r/[\)\]\}][^\w\s\)\]\}]/
+  # number (with optional underscores) wrapped in brackets: (42), [1_000], (3.14)
+  @bracket_number_pair ~r/[\(\[]\d[\d_]*(?:\.\d+)?[\)\]]/
+  # arrow operators: -> and =>
+  @arrow ~r/->|=>/
+  # identifier immediately followed by colon (keyword args, dict keys, labels)
+  @colon_suffix ~r/[a-zA-Z_]\w*:/
+
+  @spec analyze(map()) :: map()
+  @impl true
+  def analyze(%{content: content, tokens: tokens}) do
+    total_chars = String.length(content)
+    total_tokens = length(tokens)
+
+    if total_chars == 0 do
+      %{
+        "question_mark_density" => 0.0,
+        "exclamation_density" => 0.0,
+        "dot_count" => 0,
+        "id_nonalpha_suffix_density" => 0.0,
+        "bracket_nonalpha_prefix_count" => 0,
+        "bracket_nonalpha_suffix_count" => 0,
+        "bracket_number_pair_count" => 0,
+        "arrow_density" => 0.0,
+        "colon_suffix_density" => 0.0
+      }
+    else
+      qmarks = count_char(content, "?")
+      excls = count_char(content, "!")
+      dots = count_char(content, ".")
+
+      id_suffix_count = count_matches(content, @id_nonalpha_suffix)
+      bracket_prefix = count_matches(content, @bracket_nonalpha_prefix)
+      bracket_suffix = count_matches(content, @bracket_nonalpha_suffix)
+      bracket_num = count_matches(content, @bracket_number_pair)
+
+      id_denom = max(total_tokens, 1)
+      arrows = count_matches(content, @arrow)
+      colon_suffixes = count_matches(content, @colon_suffix)
+
+      %{
+        "question_mark_density" => Float.round(qmarks / total_chars, 6),
+        "exclamation_density" => Float.round(excls / total_chars, 6),
+        "dot_count" => dots,
+        "id_nonalpha_suffix_density" => Float.round(id_suffix_count / id_denom, 4),
+        "bracket_nonalpha_prefix_count" => bracket_prefix,
+        "bracket_nonalpha_suffix_count" => bracket_suffix,
+        "bracket_number_pair_count" => bracket_num,
+        "arrow_density" => Float.round(arrows / id_denom, 4),
+        "colon_suffix_density" => Float.round(colon_suffixes / id_denom, 4)
+      }
+    end
+  end
+
+  defp count_char(content, char) do
+    content |> String.graphemes() |> Enum.count(&(&1 == char))
+  end
+
+  defp count_matches(content, regex) do
+    regex |> Regex.scan(content) |> length()
+  end
+end
diff --git a/lib/codeqa/metrics/readability.ex b/lib/codeqa/metrics/file/readability.ex
similarity index 89%
rename from lib/codeqa/metrics/readability.ex
rename to lib/codeqa/metrics/file/readability.ex
index 5ffa9e1f..3e1bd2c4 100644
--- a/lib/codeqa/metrics/readability.ex
+++ b/lib/codeqa/metrics/file/readability.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.Readability do
+defmodule CodeQA.Metrics.File.Readability do
   @moduledoc """
   Computes adapted Flesch and Fog readability indices for source code.
 
@@ -10,17 +10,27 @@ defmodule CodeQA.Metrics.Readability do
   and [Gunning fog index](https://en.wikipedia.org/wiki/Gunning_fog_index).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   @impl true
   def name, do: "readability"
 
+  @impl true
+  def keys,
+    do: [
+      "avg_tokens_per_line",
+      "avg_line_length",
+      "avg_sub_words_per_id",
+      "flesch_adapted",
+      "fog_adapted",
+      "total_lines"
+    ]
+
   @spec analyze(map()) :: map()
   @impl true
   def analyze(ctx) do
     lines =
       ctx.lines
-      |> Tuple.to_list()
       |> Enum.filter(fn line ->
         trimmed = String.trim(line)
         trimmed != "" and not String.starts_with?(trimmed, "#")
@@ -42,11 +52,11 @@ defmodule CodeQA.Metrics.Readability do
 
   defp compute_readability(ctx, lines) do
     total_lines = length(lines)
-    total_tokens = tuple_size(ctx.tokens)
+    total_tokens = length(ctx.tokens)
     avg_tokens = total_tokens / total_lines
     avg_line_length = lines |> Enum.map(&String.length/1) |> Enum.sum() |> Kernel./(total_lines)
 
-    words = Tuple.to_list(ctx.words)
+    words = ctx.words
 
     {avg_sub_words, complex_fraction} =
       if words != [] do
diff --git a/lib/codeqa/metrics/file/rfc.ex b/lib/codeqa/metrics/file/rfc.ex
new file mode 100644
index 00000000..5416c684
--- /dev/null
+++ b/lib/codeqa/metrics/file/rfc.ex
@@ -0,0 +1,81 @@
+defmodule CodeQA.Metrics.File.RFC do
+  @moduledoc """
+  Response For a Class (RFC) — a coupling metric from the Chidamber & Kemerer suite.
+
+  RFC ≈ number of distinct methods/functions reachable from this file, counting
+  both locally-defined functions and distinct external call targets.
+
+  Formula: `RFC = function_def_count + |distinct_call_targets|`
+
+  Computed from the token stream without requiring a real AST:
+  - Function definitions are detected by function-keyword tokens (`def`, `fn`, etc.)
+    followed by an `<ID>` token.
+  - Call targets are detected by `<ID>` tokens immediately followed by `(`.
+    Duplicates are collapsed to a set.
+
+  Higher RFC values indicate a module with more responsibility and more external
+  coupling, correlating empirically with higher fault density.
+
+  See [CK metrics suite](https://en.wikipedia.org/wiki/Programming_complexity#Chidamber_and_Kemerer_metrics).
+  """
+
+  @behaviour CodeQA.Metrics.File.FileMetric
+
+  @func_keywords MapSet.new(~w[
+    def defp defmacro defmacrop defguard defdelegate
+    function func fun fn
+    sub proc method
+  ])
+
+  @impl true
+  def name, do: "rfc"
+
+  @impl true
+  def keys, do: ["rfc_count", "rfc_density", "function_def_count", "distinct_call_count"]
+
+  @impl true
+  def description,
+    do: "Response For a Class: function definitions + distinct call targets (CK suite)"
+
+  @spec analyze(CodeQA.Engine.FileContext.t()) :: map()
+  @impl true
+  def analyze(%{tokens: tokens, line_count: line_count}) do
+    {func_def_count, call_targets} = scan_tokens(tokens)
+
+    distinct_call_count = MapSet.size(call_targets)
+    rfc_count = func_def_count + distinct_call_count
+
+    density =
+      if line_count > 0,
+        do: Float.round(rfc_count / line_count, 4),
+        else: 0.0
+
+    %{
+      "rfc_count" => rfc_count,
+      "rfc_density" => density,
+      "function_def_count" => func_def_count,
+      "distinct_call_count" => distinct_call_count
+    }
+  end
+
+  # Single pass: detect function definitions and call sites simultaneously.
+  # Uses a sliding window of two adjacent tokens.
+  defp scan_tokens(tokens) do
+    tokens
+    |> Enum.zip(Enum.drop(tokens, 1))
+    |> Enum.reduce({0, MapSet.new()}, fn {tok, next}, {defs, calls} ->
+      cond do
+        # Function definition: keyword followed by an identifier
+        MapSet.member?(@func_keywords, tok.content) and next.kind == "<ID>" ->
+          {defs + 1, calls}
+
+        # Call site: identifier followed by open paren
+        tok.kind == "<ID>" and next.content == "(" ->
+          {defs, MapSet.put(calls, tok.content)}
+
+        true ->
+          {defs, calls}
+      end
+    end)
+  end
+end
diff --git a/lib/codeqa/metrics/file/separator_counts.ex b/lib/codeqa/metrics/file/separator_counts.ex
new file mode 100644
index 00000000..62586560
--- /dev/null
+++ b/lib/codeqa/metrics/file/separator_counts.ex
@@ -0,0 +1,44 @@
+defmodule CodeQA.Metrics.File.SeparatorCounts do
+  @moduledoc """
+  Counts dividing characters (`_`, `-`, `/`, `.`) in source code.
+
+  These separators appear in identifiers (snake_case, kebab-case),
+  paths, and dotted access. Their frequency can distinguish naming
+  conventions and structural patterns across languages.
+  """
+
+  @behaviour CodeQA.Metrics.File.FileMetric
+
+  @impl true
+  def name, do: "separator_counts"
+
+  @impl true
+  def keys, do: ["underscore_count", "hyphen_count", "slash_count", "dot_count"]
+
+  @spec analyze(map()) :: map()
+  @impl true
+  def analyze(%{content: content}) do
+    %{
+      "underscore_count" => count(content, "_"),
+      "hyphen_count" => count(content, "-"),
+      "slash_count" => count(content, "/"),
+      "dot_count" => count(content, ".")
+    }
+  end
+
+  @impl true
+  def analyze_loo(baseline, block_content) do
+    %{
+      "underscore_count" => baseline["underscore_count"] - count(block_content, "_"),
+      "hyphen_count" => baseline["hyphen_count"] - count(block_content, "-"),
+      "slash_count" => baseline["slash_count"] - count(block_content, "/"),
+      "dot_count" => baseline["dot_count"] - count(block_content, ".")
+    }
+  end
+
+  defp count(content, char) do
+    content
+    |> String.graphemes()
+    |> Enum.count(&(&1 == char))
+  end
+end
diff --git a/lib/codeqa/metrics/symbol_density.ex b/lib/codeqa/metrics/file/symbol_density.ex
similarity index 85%
rename from lib/codeqa/metrics/symbol_density.ex
rename to lib/codeqa/metrics/file/symbol_density.ex
index 67459a0c..3e71bf34 100644
--- a/lib/codeqa/metrics/symbol_density.ex
+++ b/lib/codeqa/metrics/file/symbol_density.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.SymbolDensity do
+defmodule CodeQA.Metrics.File.SymbolDensity do
   @moduledoc """
   Measures the density of non-word, non-whitespace symbols in source code.
 
@@ -9,11 +9,14 @@ defmodule CodeQA.Metrics.SymbolDensity do
   See [code readability](https://en.wikipedia.org/wiki/Computer_programming#Readability_of_source_code).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   @impl true
   def name, do: "symbol_density"
 
+  @impl true
+  def keys, do: ["density", "symbol_count", "distinct_symbol_types"]
+
   @spec analyze(map()) :: map()
   @impl true
   def analyze(%{content: content}) do
diff --git a/lib/codeqa/metrics/vocabulary.ex b/lib/codeqa/metrics/file/vocabulary.ex
similarity index 91%
rename from lib/codeqa/metrics/vocabulary.ex
rename to lib/codeqa/metrics/file/vocabulary.ex
index d9ef6374..496cc68a 100644
--- a/lib/codeqa/metrics/vocabulary.ex
+++ b/lib/codeqa/metrics/file/vocabulary.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.Vocabulary do
+defmodule CodeQA.Metrics.File.Vocabulary do
   @moduledoc """
   Analyzes vocabulary diversity using type-token ratio (TTR) and MATTR.
 
@@ -14,19 +14,22 @@ defmodule CodeQA.Metrics.Vocabulary do
   and [MATTR](https://doi.org/10.3758/BRM.42.2.381).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   @impl true
   def name, do: "vocabulary"
 
+  @impl true
+  def keys, do: ["raw_ttr", "mattr", "unique_identifiers", "total_identifiers", "vocabulary"]
+
   @window_size 100
 
   @spec analyze(map()) :: map()
   @impl true
   def analyze(ctx) do
-    identifiers = Tuple.to_list(ctx.identifiers)
+    identifiers = ctx.identifiers
     total = length(identifiers)
-    vocabulary = ctx.words |> Tuple.to_list() |> Enum.uniq() |> Enum.sort()
+    vocabulary = ctx.words |> Enum.uniq() |> Enum.sort()
 
     if total == 0 do
       %{
diff --git a/lib/codeqa/metrics/vowel_density.ex b/lib/codeqa/metrics/file/vowel_density.ex
similarity index 86%
rename from lib/codeqa/metrics/vowel_density.ex
rename to lib/codeqa/metrics/file/vowel_density.ex
index 84ea39e7..f3f53de5 100644
--- a/lib/codeqa/metrics/vowel_density.ex
+++ b/lib/codeqa/metrics/file/vowel_density.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.VowelDensity do
+defmodule CodeQA.Metrics.File.VowelDensity do
   @moduledoc """
   Measures the density of vowels in identifiers.
 
@@ -9,17 +9,20 @@ defmodule CodeQA.Metrics.VowelDensity do
   See [identifier naming](https://en.wikipedia.org/wiki/Identifier_(computer_languages)).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   @vowels MapSet.new(~c"aeiouyAEIOUY")
 
   @impl true
   def name, do: "vowel_density"
 
+  @impl true
+  def keys, do: ["density", "vowel_count", "total_chars"]
+
   @spec analyze(map()) :: map()
   @impl true
   def analyze(%{identifiers: identifiers}) do
-    list = Tuple.to_list(identifiers)
+    list = identifiers
 
     if list == [] do
       %{"density" => 0.0, "vowel_count" => 0, "total_chars" => 0}
diff --git a/lib/codeqa/metrics/winnowing.ex b/lib/codeqa/metrics/file/winnowing.ex
similarity index 96%
rename from lib/codeqa/metrics/winnowing.ex
rename to lib/codeqa/metrics/file/winnowing.ex
index 9c8961ca..d725a388 100644
--- a/lib/codeqa/metrics/winnowing.ex
+++ b/lib/codeqa/metrics/file/winnowing.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.Winnowing do
+defmodule CodeQA.Metrics.File.Winnowing do
   @moduledoc """
   Generates structural fingerprints using k-grams.
 
diff --git a/lib/codeqa/metrics/zipf.ex b/lib/codeqa/metrics/file/zipf.ex
similarity index 86%
rename from lib/codeqa/metrics/zipf.ex
rename to lib/codeqa/metrics/file/zipf.ex
index 4948c3d9..b03a07c5 100644
--- a/lib/codeqa/metrics/zipf.ex
+++ b/lib/codeqa/metrics/file/zipf.ex
@@ -1,4 +1,4 @@
-defmodule CodeQA.Metrics.Zipf do
+defmodule CodeQA.Metrics.File.Zipf do
   @moduledoc """
   Fits Zipf's law to the token frequency distribution.
 
@@ -9,21 +9,24 @@ defmodule CodeQA.Metrics.Zipf do
   See [Zipf's law](https://en.wikipedia.org/wiki/Zipf%27s_law).
   """
 
-  @behaviour CodeQA.Metrics.FileMetric
+  @behaviour CodeQA.Metrics.File.FileMetric
 
   @impl true
   def name, do: "zipf"
 
+  @impl true
+  def keys, do: ["exponent", "r_squared", "vocab_size", "total_tokens"]
+
   @spec analyze(map()) :: map()
   @impl true
-  def analyze(%{tokens: tokens, token_counts: _token_counts}) when tuple_size(tokens) == 0 do
+  def analyze(%{tokens: [], token_counts: _token_counts}) do
     %{"exponent" => 0.0, "r_squared" => 0.0, "vocab_size" => 0, "total_tokens" => 0}
   end
 
   def analyze(%{tokens: tokens, token_counts: token_counts}) do
     frequencies = token_counts |> Map.values() |> Enum.sort(:desc)
     vocab_size = length(frequencies)
-    total_tokens = tuple_size(tokens)
+    total_tokens = length(tokens)
 
     if vocab_size < 3 do
       %{
diff --git a/lib/codeqa/metrics/file_metric.ex b/lib/codeqa/metrics/file_metric.ex
deleted file mode 100644
index 75a6f61b..00000000
--- a/lib/codeqa/metrics/file_metric.ex
+++ /dev/null
@@ -1,37 +0,0 @@
-defmodule CodeQA.Metrics.FileMetric do
-  @moduledoc """
-  Behaviour for metrics that analyze a single source file.
-
-  Implementations receive a `CodeQA.Pipeline.FileContext` struct containing
-  pre-parsed data (tokens, identifiers, lines, etc.) and return a map of
-  metric key-value pairs. On error, return an empty map `%{}` rather than
-  raising.
-
-  ## Minimal implementation
-
-      defmodule MyMetric do
-        @behaviour CodeQA.Metrics.FileMetric
-
-        @impl true
-        def name, do: "my_metric"
-
-        @impl true
-        def analyze(ctx) do
-          %{"value" => compute(ctx)}
-        end
-      end
-
-  See [software metrics](https://en.wikipedia.org/wiki/Software_metric).
-  """
-
-  @callback name() :: String.t()
-  @callback analyze(CodeQA.Pipeline.FileContext.t()) :: map()
-
-  @doc "Human-readable description of what this metric measures."
-  @callback description() :: String.t()
-
-  @doc "Whether this metric is enabled. Defaults to true when not implemented."
-  @callback enabled?() :: boolean()
-
-  @optional_callbacks [description: 0, enabled?: 0]
-end
diff --git a/lib/codeqa/metrics/post_processing/menzerath.ex b/lib/codeqa/metrics/post_processing/menzerath.ex
new file mode 100644
index 00000000..4b5b10cf
--- /dev/null
+++ b/lib/codeqa/metrics/post_processing/menzerath.ex
@@ -0,0 +1,282 @@
+defmodule CodeQA.Metrics.PostProcessing.Menzerath do
+  @moduledoc """
+  Measures structural hierarchy conformance using Menzerath's law.
+
+  ## Block-level score
+
+  For each parsed block in a file, computes:
+
+      ratio = block.line_count / parent.line_count
+
+  Root blocks use the file's line count as parent. Ratio close to 1.0 means the block
+  dominates its parent (poor decomposition). Low ratio means the block is small relative
+  to its parent (good decomposition).
+
+  For internal nodes that have children, also computes `avg_child_ratio` — the mean ratio
+  of direct children. High `avg_child_ratio` means this node failed to decompose its
+  children into small enough pieces.
+
+  ## Codebase-level score
+
+  Collects `{function_count, avg_function_lines}` pairs from all files and computes:
+  - Pearson correlation (negative = law holds across the codebase)
+  - Power-law exponent `b` from `y = a · x^b` fit on log-log scale
+  - R² of the fit
+  """
+
+  @behaviour CodeQA.Metrics.PostProcessing.PostProcessingMetric
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Parser
+  alias CodeQA.Languages.Unknown
+
+  @violation_threshold 0.6
+
+  @impl true
+  def name, do: "menzerath"
+
+  @impl true
+  def analyze(pipeline_result, files_map, _opts) do
+    file_scores =
+      Map.new(files_map, fn {path, content} ->
+        {path, %{"menzerath" => score_file(content)}}
+      end)
+
+    codebase_score = compute_codebase_score(pipeline_result)
+
+    %{
+      "files" => file_scores,
+      "codebase" => %{"menzerath" => codebase_score}
+    }
+  end
+
+  # --- file-level scoring ---
+
+  defp score_file("") do
+    %{
+      "blocks" => [],
+      "mean_ratio" => 0.0,
+      "max_ratio" => 0.0,
+      "violation_count" => 0,
+      "insight" => "Empty file."
+    }
+  end
+
+  defp score_file(content) do
+    file_lines = content |> String.split("\n") |> length()
+    root_tokens = TokenNormalizer.normalize_structural(content)
+    top_nodes = Parser.detect_blocks(root_tokens, Unknown)
+
+    blocks = Enum.map(top_nodes, &score_node(&1, file_lines))
+    all_ratios = collect_ratios(blocks)
+    n = length(all_ratios)
+
+    mean_ratio = if(n == 0, do: 0.0, else: round4(Enum.sum(all_ratios) / n))
+    max_ratio = if(n == 0, do: 0.0, else: round4(Enum.max(all_ratios)))
+    violation_count = Enum.count(all_ratios, &(&1 >= @violation_threshold))
+
+    %{
+      "blocks" => blocks,
+      "mean_ratio" => mean_ratio,
+      "max_ratio" => max_ratio,
+      "violation_count" => violation_count,
+      "insight" => file_insight(mean_ratio, max_ratio, violation_count, length(top_nodes))
+    }
+  end
+
+  defp file_insight(_mean, _max, _violations, 0),
+    do: "No blocks detected."
+
+  defp file_insight(_mean, _max, 0, _block_count),
+    do: "Well decomposed — all blocks are small relative to their parents."
+
+  defp file_insight(_mean, max_ratio, violations, _block_count) when max_ratio >= 0.9,
+    do:
+      "#{violations} block(s) nearly span the entire file — the file is not decomposed into meaningful pieces."
+
+  defp file_insight(mean_ratio, _max, violations, _block_count) when mean_ratio >= 0.5,
+    do:
+      "#{violations} violation(s); blocks are large on average (mean ratio #{mean_ratio}) — the file likely needs to be split or its blocks extracted."
+
+  defp file_insight(_mean, _max, violations, _block_count),
+    do:
+      "#{violations} block(s) dominate their parent context — consider extracting those into separate functions or modules."
+
+  defp score_node(node, parent_lines) do
+    ratio = if parent_lines > 0, do: round4(node.line_count / parent_lines), else: 0.0
+
+    children = Enum.map(node.children, &score_node(&1, node.line_count))
+
+    base = %{
+      "start_line" => node.start_line,
+      "end_line" => node.end_line,
+      "line_count" => node.line_count,
+      "parent_lines" => parent_lines,
+      "ratio" => ratio,
+      "insight" => block_insight(ratio, []),
+      "children" => children
+    }
+
+    case children do
+      [] ->
+        base
+
+      kids ->
+        child_ratios = Enum.map(kids, & &1["ratio"])
+        avg = round4(Enum.sum(child_ratios) / length(child_ratios))
+
+        base
+        |> Map.put("avg_child_ratio", avg)
+        |> Map.put("insight", block_insight(ratio, avg_child_ratio: avg))
+    end
+  end
+
+  defp block_insight(ratio, opts) do
+    avg_child_ratio = Keyword.get(opts, :avg_child_ratio)
+
+    cond do
+      ratio >= 0.9 ->
+        "Block spans nearly the entire parent — no meaningful decomposition at this level."
+
+      (ratio >= @violation_threshold and avg_child_ratio) &&
+          avg_child_ratio >= @violation_threshold ->
+        "Block is large relative to its parent and its own children are also large — nested decomposition failure."
+
+      ratio >= @violation_threshold ->
+        "Block is large relative to its parent — consider splitting or extracting."
+
+      avg_child_ratio && avg_child_ratio >= @violation_threshold ->
+        "Block is reasonably sized but its children are too large — this block should be broken down further."
+
+      true ->
+        nil
+    end
+  end
+
+  defp collect_ratios(blocks) do
+    Enum.flat_map(blocks, fn block ->
+      [block["ratio"] | collect_ratios(block["children"])]
+    end)
+  end
+
+  # --- codebase-level scoring ---
+
+  defp compute_codebase_score(pipeline_result) do
+    pairs =
+      pipeline_result
+      |> Map.get("files", %{})
+      |> Enum.flat_map(fn {_path, file_data} ->
+        fm = get_in(file_data, ["metrics", "function_metrics"]) || %{}
+        count = fm["function_count"]
+        avg = fm["avg_function_lines"]
+
+        if is_number(count) and is_number(avg) and count > 0 do
+          [{count * 1.0, avg * 1.0}]
+        else
+          []
+        end
+      end)
+
+    n = length(pairs)
+
+    if n < 3 do
+      %{
+        "correlation" => nil,
+        "exponent" => nil,
+        "r_squared" => nil,
+        "sample_size" => n,
+        "insight" =>
+          "Not enough files with function data to compute Menzerath conformance (need ≥ 3, got #{n})."
+      }
+    else
+      xs = Enum.map(pairs, &elem(&1, 0))
+      ys = Enum.map(pairs, &elem(&1, 1))
+      correlation = round4(pearson(xs, ys))
+      {exponent, r_squared} = fit_power_law(xs, ys)
+
+      %{
+        "correlation" => correlation,
+        "exponent" => if(exponent, do: round4(exponent), else: nil),
+        "r_squared" => if(r_squared, do: round4(r_squared), else: nil),
+        "sample_size" => n,
+        "insight" => codebase_insight(correlation, r_squared)
+      }
+    end
+  end
+
+  defp codebase_insight(correlation, r_squared) do
+    fit_quality = if r_squared && r_squared >= 0.5, do: " (strong fit, R²=#{r_squared})", else: ""
+
+    cond do
+      correlation <= -0.3 ->
+        "Menzerath's law holds#{fit_quality} — larger files tend to have shorter functions, indicating healthy decomposition."
+
+      correlation >= 0.3 ->
+        "Menzerath's law violated#{fit_quality} — larger files have longer functions. Files are growing without being decomposed; consider splitting large files or extracting functions."
+
+      true ->
+        "Weak Menzerath signal (correlation #{correlation}) — no clear relationship between file size and function length. Decomposition patterns are inconsistent across the codebase."
+    end
+  end
+
+  defp pearson(xs, ys) do
+    n = length(xs)
+    sum_x = Enum.sum(xs)
+    sum_y = Enum.sum(ys)
+    sum_xy = Enum.zip(xs, ys) |> Enum.reduce(0.0, fn {x, y}, acc -> acc + x * y end)
+    sum_x2 = Enum.reduce(xs, 0.0, fn x, acc -> acc + x * x end)
+    sum_y2 = Enum.reduce(ys, 0.0, fn y, acc -> acc + y * y end)
+
+    num = n * sum_xy - sum_x * sum_y
+    den = :math.sqrt((n * sum_x2 - sum_x * sum_x) * (n * sum_y2 - sum_y * sum_y))
+
+    if den == 0.0, do: 0.0, else: num / den
+  end
+
+  defp fit_power_law(xs, ys) do
+    # Linearize: log(y) = log(a) + b * log(x), fit via OLS on log-log scale
+    pairs =
+      Enum.zip(xs, ys)
+      |> Enum.filter(fn {x, y} -> x > 0 and y > 0 end)
+
+    if length(pairs) < 2 do
+      {nil, nil}
+    else
+      log_xs = Enum.map(pairs, fn {x, _} -> :math.log(x) end)
+      log_ys = Enum.map(pairs, fn {_, y} -> :math.log(y) end)
+
+      n = length(pairs)
+      sum_lx = Enum.sum(log_xs)
+      sum_ly = Enum.sum(log_ys)
+      sum_lx2 = Enum.reduce(log_xs, 0.0, fn x, acc -> acc + x * x end)
+      sum_lxly = Enum.zip(log_xs, log_ys) |> Enum.reduce(0.0, fn {x, y}, acc -> acc + x * y end)
+
+      denom = n * sum_lx2 - sum_lx * sum_lx
+
+      if denom == 0.0 do
+        {nil, nil}
+      else
+        fit_power_law_coefficients(log_xs, log_ys, sum_lx, sum_ly, sum_lxly, n, denom)
+      end
+    end
+  end
+
+  defp fit_power_law_coefficients(log_xs, log_ys, sum_lx, sum_ly, sum_lxly, n, denom) do
+    b = (n * sum_lxly - sum_lx * sum_ly) / denom
+    log_a = (sum_ly - b * sum_lx) / n
+    mean_ly = sum_ly / n
+
+    ss_tot = Enum.reduce(log_ys, 0.0, fn ly, acc -> acc + (ly - mean_ly) ** 2 end)
+
+    ss_res =
+      Enum.zip(log_xs, log_ys)
+      |> Enum.reduce(0.0, fn {lx, ly}, acc ->
+        acc + (ly - (log_a + b * lx)) ** 2
+      end)
+
+    r_squared = if ss_tot == 0.0, do: 0.0, else: 1.0 - ss_res / ss_tot
+    {b, r_squared}
+  end
+
+  defp round4(v), do: Float.round(v * 1.0, 4)
+end
diff --git a/lib/codeqa/metrics/post_processing/post_processing_metric.ex b/lib/codeqa/metrics/post_processing/post_processing_metric.ex
new file mode 100644
index 00000000..c4b7bc05
--- /dev/null
+++ b/lib/codeqa/metrics/post_processing/post_processing_metric.ex
@@ -0,0 +1,21 @@
+defmodule CodeQA.Metrics.PostProcessing.PostProcessingMetric do
+  @moduledoc """
+  Behaviour for post-processing metrics that derive values from the full pipeline result.
+
+  Post-processing metrics run after both file and codebase metrics complete. They receive
+  the full result tree and the raw files map, and return a partial result map that is
+  deep-merged into the pipeline result.
+  """
+
+  @doc "Unique name used as the key in the output."
+  @callback name() :: String.t()
+
+  @doc """
+  Analyze the pipeline result and return a partial result map to be deep-merged.
+
+  The returned map should use the same top-level structure as the pipeline result:
+  `%{"files" => %{path => additions}, "codebase" => additions}`.
+  Only keys present in the return value are merged; absent keys are left unchanged.
+  """
+  @callback analyze(pipeline_result :: map(), files_map :: map(), opts :: keyword()) :: map()
+end
diff --git a/lib/codeqa/metrics/token_normalizer.ex b/lib/codeqa/metrics/token_normalizer.ex
deleted file mode 100644
index 6967e6a4..00000000
--- a/lib/codeqa/metrics/token_normalizer.ex
+++ /dev/null
@@ -1,45 +0,0 @@
-defmodule CodeQA.Metrics.TokenNormalizer do
-  @moduledoc """
-  Abstracts raw source code into language-agnostic structural tokens.
-
-  See [lexical analysis](https://en.wikipedia.org/wiki/Lexical_analysis).
-  """
-
-  # Note for future: This module can be extended with a second parameter
-  # normalize(code, language \\ :agnostic) to load specific regex dictionaries.
-
-  @doc """
-  Normalizes source code into a list of structural tokens.
-
-  Replaces string literals with `<STR>`, numeric literals with `<NUM>`,
-  and identifiers/keywords with `<ID>`. Remaining punctuation is split into
-  individual tokens, with common multi-character operators kept together.
-
-  ## Examples
-
-      iex> CodeQA.Metrics.TokenNormalizer.normalize("x = 42")
-      ["<ID>", "=", "<NUM>"]
-
-  """
-  @spec normalize(String.t()) :: [String.t()]
-  def normalize(code) do
-    code
-    # 1. Strings (single and double quotes, handling escaped quotes)
-    |> String.replace(~r/"(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*'/, " <STR> ")
-    # 2. Numbers (integers and floats)
-    |> String.replace(~r/\b\d+(\.\d+)?\b/, " <NUM> ")
-    # 3. Identifiers/Keywords (negative lookbehind/ahead to avoid clobbering <STR>/<NUM>/<ID> tags)
-    |> String.replace(~r/(?<!<)\b[a-zA-Z_]\w*\b(?!>)/, " <ID> ")
-    # 4. Split by whitespace to extract the tokens and remaining structural punctuation
-    |> String.split(~r/\s+/, trim: true)
-    # 5. Further split punctuation, keeping common multi-char operators together
-    |> Enum.flat_map(&split_punctuation/1)
-  end
-
-  defp split_punctuation(token) when token in ["<STR>", "<NUM>", "<ID>"], do: [token]
-
-  defp split_punctuation(text) do
-    Regex.scan(~r/->|=>|<>|\|>|::|\.\.\.|<-|!=|==|<=|>=|\+\+|--|&&|\|\||[^\w\s]/, text)
-    |> List.flatten()
-  end
-end
diff --git a/lib/codeqa/pipeline.ex b/lib/codeqa/pipeline.ex
deleted file mode 100644
index bcd256cb..00000000
--- a/lib/codeqa/pipeline.ex
+++ /dev/null
@@ -1,109 +0,0 @@
-defmodule CodeQA.Pipeline do
-  @moduledoc "Pre-computed shared context for file-level metrics."
-
-  defmodule FileContext do
-    @moduledoc "Immutable pre-computed data shared across all file metrics."
-    @enforce_keys [
-      :content,
-      :tokens,
-      :token_counts,
-      :words,
-      :identifiers,
-      :lines,
-      :encoded,
-      :byte_count,
-      :line_count
-    ]
-    defstruct @enforce_keys
-
-    @type t :: %__MODULE__{
-      content: String.t(),
-      tokens: tuple(),
-      token_counts: map(),
-      words: tuple(),
-      identifiers: tuple(),
-      lines: tuple(),
-      encoded: String.t(),
-      byte_count: non_neg_integer(),
-      line_count: non_neg_integer()
-    }
-  end
-
-  @word_re ~r/\b[a-zA-Z_]\w*\b/u
-
-  # Reserved words and keywords for:
-  # Python, Ruby, JavaScript, Elixir, C#,
-  # Java, C++, Go, Rust, PHP, Swift, Shell, Kotlin
-  @keywords MapSet.new(~w[
-    if else elif elsif unless
-    for foreach while until do
-    return break continue yield pass
-    try except finally rescue ensure after catch throw raise begin end throws
-    case when switch cond match default fallthrough
-    with as and or not in is
-    import from require use using alias namespace package
-    class def defp defmodule defmacro defmacrop defprotocol defimpl defguard defdelegate
-    module interface struct enum delegate event protocol extension
-    function fn func fun new delete typeof instanceof void
-    var let val const static public private protected internal
-    sealed override virtual abstract final readonly open
-    async await receive suspend
-    self super this Self
-    extends implements
-    null undefined nil None nullptr
-    true false True False
-    bool int float double long short byte char boolean string decimal object dynamic
-    ref out params get set value inout
-    lambda del global nonlocal assert
-    type typealias
-    synchronized volatile transient native strictfp
-    auto register extern signed unsigned typedef sizeof union
-    template typename operator inline friend explicit mutable constexpr decltype noexcept
-    func chan go select defer range
-    mut impl trait pub mod crate dyn unsafe loop where move
-    echo print array list mixed never
-    actor init deinit lazy open some any rethrows willSet didSet
-    then fi done esac local export source unset declare
-    fun val object data companion reified infix vararg expect actual
-  ])
-
-  @spec build_file_context(String.t(), keyword()) :: FileContext.t()
-  def build_file_context(content, opts \\ []) when is_binary(content) do
-    stopwords = Keyword.get(opts, :word_stopwords, MapSet.new())
-
-    tokens = content |> String.split() |> List.to_tuple()
-    token_list = Tuple.to_list(tokens)
-    token_counts = Enum.frequencies(token_list)
-
-    words =
-      Regex.scan(@word_re, content)
-      |> List.flatten()
-      |> Enum.reject(&MapSet.member?(stopwords, &1))
-      |> List.to_tuple()
-
-    word_list = Tuple.to_list(words)
-    identifiers = word_list |> Enum.reject(&MapSet.member?(@keywords, &1)) |> List.to_tuple()
-    lines = content |> String.split("\n") |> trim_trailing_empty() |> List.to_tuple()
-    encoded = content
-
-    %FileContext{
-      content: content,
-      tokens: tokens,
-      token_counts: token_counts,
-      words: words,
-      identifiers: identifiers,
-      lines: lines,
-      encoded: encoded,
-      byte_count: byte_size(content),
-      line_count: tuple_size(lines)
-    }
-  end
-
-  defp trim_trailing_empty(lines) do
-    # Match Python's str.splitlines() behavior
-    case List.last(lines) do
-      "" -> List.delete_at(lines, -1)
-      _ -> lines
-    end
-  end
-end
diff --git a/lib/codeqa/stopwords.ex b/lib/codeqa/stopwords.ex
deleted file mode 100644
index bd33374f..00000000
--- a/lib/codeqa/stopwords.ex
+++ /dev/null
@@ -1,63 +0,0 @@
-defmodule CodeQA.Stopwords do
-  @moduledoc "Finds highly frequent items across a codebase to act as stopwords."
-
-  @doc """
-  Finds items that appear in more than the specified threshold of files.
-  `extractor` is a function that takes a file's content and returns an Enumerable of items.
-  """
-  def find_stopwords(files, extractor, opts \\ []) do
-    threshold_ratio = Keyword.get(opts, :stopwords_threshold, 0.15)
-    total_docs = map_size(files)
-    min_docs = max(1, round(total_docs * threshold_ratio))
-    workers = Keyword.get(opts, :workers, System.schedulers_online())
-    has_progress = Keyword.get(opts, :progress, false)
-    label = Keyword.get(opts, :progress_label, "")
-
-    counter = :counters.new(1, [:atomics])
-    start_time = System.monotonic_time(:millisecond)
-
-    files
-    |> Task.async_stream(
-      fn {_path, content} ->
-        res =
-          content
-          |> extractor.()
-          |> MapSet.new()
-
-        if has_progress do
-          :counters.add(counter, 1, 1)
-          completed = :counters.get(counter, 1)
-          print_progress(completed, total_docs, start_time, label)
-        end
-
-        res
-      end, max_concurrency: workers, timeout: :infinity)
-    |> Enum.reduce(%{}, fn {:ok, unique_items_in_file}, doc_freqs ->
-      Enum.reduce(unique_items_in_file, doc_freqs, fn item, acc ->
-        Map.update(acc, item, 1, &(&1 + 1))
-      end)
-    end)
-    |> Enum.filter(fn {_item, count} -> count >= min_docs end)
-    |> Enum.map(fn {item, _count} -> item end)
-    |> MapSet.new()
-  end
-
-  defp print_progress(completed, total, start_time, label) do
-    now = System.monotonic_time(:millisecond)
-    elapsed = max(now - start_time, 1)
-    avg_time = elapsed / completed
-    eta_ms = round((total - completed) * avg_time)
-
-    output =
-      CodeQA.CLI.UI.progress_bar(completed, total,
-        eta: CodeQA.CLI.UI.format_eta(eta_ms),
-        label: label
-      )
-
-    IO.write(:stderr, "\r" <> output)
-
-    if completed == total do
-      IO.puts(:stderr, "")
-    end
-  end
-end
diff --git a/lib/codeqa/summarizer.ex b/lib/codeqa/summarizer.ex
deleted file mode 100644
index d6d9c924..00000000
--- a/lib/codeqa/summarizer.ex
+++ /dev/null
@@ -1,126 +0,0 @@
-defmodule CodeQA.Summarizer do
-  @moduledoc false
-
-  @codebase_direction_metrics [
-    {"complexity", "halstead", "mean_volume"},
-    {"readability", "readability", "mean_flesch_adapted"},
-    {"entropy", "entropy", "mean_char_entropy"},
-    {"redundancy", "compression", "mean_redundancy"}
-  ]
-
-  @file_direction_metrics [
-    {"complexity", "halstead", "volume"},
-    {"readability", "readability", "flesch_adapted"},
-    {"entropy", "entropy", "char_entropy"},
-    {"redundancy", "compression", "redundancy"}
-  ]
-
-  @threshold_stable 0.05
-  @threshold_slight 0.20
-
-  def summarize_codebase(comparison) do
-    files = Map.get(comparison, "files", %{})
-    codebase = Map.get(comparison, "codebase", %{})
-
-    file_counts = count_statuses(files)
-    directions = compute_codebase_directions(codebase)
-    gist = build_codebase_gist(file_counts, directions)
-
-    %{"gist" => gist, "file_counts" => file_counts, "directions" => directions}
-  end
-
-  def summarize_file(_path, %{"status" => "added"} = data) do
-    lines = get_in(data, ["head", "lines"]) || 0
-    %{"gist" => "new file (#{lines} lines)", "status" => "added", "lines" => lines}
-  end
-
-  def summarize_file(_path, %{"status" => "deleted"} = data) do
-    lines = get_in(data, ["base", "lines"]) || 0
-    %{"gist" => "removed (#{lines} lines)", "status" => "deleted", "lines" => lines}
-  end
-
-  def summarize_file(_path, %{"status" => "modified"} = data) do
-    directions = compute_file_directions(data)
-    gist = build_file_gist(directions)
-    %{"gist" => gist, "status" => "modified", "directions" => directions}
-  end
-
-  defp count_statuses(files) do
-    Enum.reduce(files, %{"added" => 0, "modified" => 0, "deleted" => 0}, fn {_path, data}, acc ->
-      status = Map.get(data, "status", "modified")
-      Map.update!(acc, status, &(&1 + 1))
-    end)
-  end
-
-  defp compute_codebase_directions(codebase) do
-    base_agg = get_in(codebase, ["base", "aggregate"]) || %{}
-    delta_agg = get_in(codebase, ["delta", "aggregate"]) || %{}
-
-    Map.new(@codebase_direction_metrics, fn {dir_key, metric, agg_key} ->
-      base_val = get_in(base_agg, [metric, agg_key])
-      delta_val = get_in(delta_agg, [metric, agg_key])
-      {dir_key, classify_change(base_val, delta_val)}
-    end)
-  end
-
-  defp compute_file_directions(file_data) do
-    base_metrics = get_in(file_data, ["base", "metrics"]) || %{}
-    delta_metrics = get_in(file_data, ["delta", "metrics"]) || %{}
-
-    Map.new(@file_direction_metrics, fn {dir_key, metric, key} ->
-      base_val = get_in(base_metrics, [metric, key])
-      delta_val = get_in(delta_metrics, [metric, key])
-      {dir_key, classify_change(base_val, delta_val)}
-    end)
-  end
-
-  defp classify_change(nil, _), do: "stable"
-  defp classify_change(_, nil), do: "stable"
-  defp classify_change(0, _), do: "stable"
-  defp classify_change(+0.0, _), do: "stable"
-
-  defp classify_change(base_val, delta_val) do
-    ratio = abs(delta_val) / abs(base_val)
-
-    cond do
-      ratio < @threshold_stable -> "stable"
-      ratio < @threshold_slight and delta_val > 0 -> "increased slightly"
-      ratio < @threshold_slight -> "decreased slightly"
-      delta_val > 0 -> "increased"
-      true -> "decreased"
-    end
-  end
-
-  defp build_file_gist(directions) do
-    parts =
-      directions
-      |> Enum.reject(fn {_, d} -> d == "stable" end)
-      |> Enum.map(fn {k, d} -> "#{k} #{d}" end)
-
-    if parts == [], do: "all metrics stable", else: Enum.join(parts, ", ")
-  end
-
-  defp build_codebase_gist(file_counts, directions) do
-    file_parts =
-      [
-        {"added", file_counts["added"]},
-        {"modified", file_counts["modified"]},
-        {"deleted", file_counts["deleted"]}
-      ]
-      |> Enum.filter(fn {_, c} -> c > 0 end)
-      |> Enum.map(fn {s, c} -> "#{c} #{s}" end)
-
-    file_summary = if file_parts == [], do: "no changes", else: Enum.join(file_parts, ", ")
-
-    dir_parts =
-      directions
-      |> Enum.reject(fn {_, d} -> d == "stable" end)
-      |> Enum.map(fn {k, d} -> "#{k} #{d}" end)
-
-    if dir_parts == [] do
-      "#{file_summary} — all metrics stable"
-    else
-      "#{file_summary} — #{Enum.join(dir_parts, ", ")}"
-    end
-  end
-end
diff --git a/lib/codeqa/telemetry.ex b/lib/codeqa/telemetry.ex
deleted file mode 100644
index 3f5d22d0..00000000
--- a/lib/codeqa/telemetry.ex
+++ /dev/null
@@ -1,68 +0,0 @@
-defmodule CodeQA.Telemetry do
-  @moduledoc "Simple concurrent telemetry tracker using ETS."
-
-  def setup do
-    if :ets.info(:codeqa_telemetry) == :undefined do
-      :ets.new(:codeqa_telemetry, [:named_table, :public, :set, write_concurrency: true])
-    end
-
-    :ok
-  end
-
-  def time(metric_name, fun) do
-    if :ets.info(:codeqa_telemetry) != :undefined do
-      start_time = System.monotonic_time(:microsecond)
-      result = fun.()
-      end_time = System.monotonic_time(:microsecond)
-      duration = end_time - start_time
-
-      :ets.update_counter(:codeqa_telemetry, metric_name, {2, duration}, {metric_name, 0})
-
-      :ets.update_counter(
-        :codeqa_telemetry,
-        "#{metric_name}_count",
-        {2, 1},
-        {"#{metric_name}_count", 0}
-      )
-
-      result
-    else
-      fun.()
-    end
-  end
-
-  defp format_metric_line({name, total_time_us}) do
-    count =
-      case :ets.lookup(:codeqa_telemetry, "#{name}_count") do
-        [{_, c}] -> c
-        _ -> 1
-      end
-
-    total_ms = Float.round(total_time_us / 1000, 2)
-    avg_ms = Float.round(total_ms / count, 2)
-
-    String.pad_trailing(to_string(name), 30) <>
-      " | Total: #{String.pad_trailing(to_string(total_ms) <> "ms", 12)}" <>
-      " | Count: #{String.pad_trailing(to_string(count), 6)}" <>
-      " | Avg: #{avg_ms}ms"
-  end
-
-  def print_report do
-    if :ets.info(:codeqa_telemetry) != :undefined do
-      IO.puts(:stderr, "
---- Telemetry Report (Wall-clock times) ---")
-      metrics = :ets.tab2list(:codeqa_telemetry)
-
-      # Group totals and counts
-      totals =
-        Enum.filter(metrics, fn {k, _} -> not String.ends_with?(to_string(k), "_count") end)
-
-      totals
-      |> Enum.sort_by(fn {_, time} -> time end, :desc)
-      |> Enum.each(&IO.puts(:stderr, format_metric_line(&1)))
-
-      IO.puts(:stderr, "-------------------------------------------
-")
-    end
-  end
-end
diff --git a/lib/mix/tasks/codeqa/sample_report.ex b/lib/mix/tasks/codeqa/sample_report.ex
new file mode 100644
index 00000000..1bc5cf0d
--- /dev/null
+++ b/lib/mix/tasks/codeqa/sample_report.ex
@@ -0,0 +1,210 @@
+defmodule Mix.Tasks.Codeqa.SampleReport do
+  use Mix.Task
+
+  @shortdoc "Evaluates combined metric formulas against good/bad sample code"
+
+  @moduledoc """
+  Runs combined metric formulas against sample files and prints a separation table.
+
+      mix codeqa.sample_report
+      mix codeqa.sample_report --category variable_naming
+      mix codeqa.sample_report --verbose
+      mix codeqa.sample_report --output results.json
+      mix codeqa.sample_report --apply-scalars
+      mix codeqa.sample_report --apply-languages
+      mix codeqa.sample_report --apply-languages --category variable_naming
+      mix codeqa.sample_report --file path/to/file.ex
+
+  A ratio ≥ 2x means the formula meaningfully separates good from bad code.
+  A ratio < 1.5x is flagged as weak; < 1.0x is marked ✗ (wrong direction).
+
+  `--apply-scalars` rewrites the YAML config files with suggested scalars derived
+  from the sample data. Metrics with ratio in the deadzone (0.995–1.005) are
+  excluded. All non-deadzoned metrics are written, including ones not previously
+  in the YAML.
+
+  `--file` analyzes a single file or directory and prints all combined metric
+  behavior scores, grouped by category, sorted worst-first.
+  """
+
+  @switches [
+    category: :string,
+    verbose: :boolean,
+    output: :string,
+    report: :string,
+    apply_scalars: :boolean,
+    apply_languages: :boolean,
+    file: :string,
+    top: :integer
+  ]
+
+  alias CodeQA.CombinedMetrics.SampleRunner
+  alias CodeQA.Engine.Analyzer
+  alias CodeQA.Engine.Collector
+
+  def run(args) do
+    Mix.Task.run("app.start")
+    {opts, _, _} = OptionParser.parse(args, switches: @switches)
+
+    results = SampleRunner.run(opts)
+
+    results
+    |> Enum.group_by(& &1.category)
+    |> Enum.each(&print_category(&1, opts))
+
+    if path = opts[:output] do
+      File.write!(path, Jason.encode!(results, pretty: true))
+      IO.puts("\nResults written to #{path}")
+    end
+
+    if path = opts[:report] do
+      report = SampleRunner.build_metric_report(opts)
+      File.write!(path, Jason.encode!(report, pretty: true))
+      IO.puts("\nMetric report written to #{path}")
+    end
+
+    if opts[:apply_scalars] do
+      stats = SampleRunner.apply_scalars(opts)
+      IO.puts("\nApplied scalars to YAML configs:")
+      Enum.each(stats, &print_scalar_stats/1)
+    end
+
+    if opts[:apply_languages] do
+      stats = SampleRunner.apply_languages(opts)
+      IO.puts("\nApplied language coverage to YAML configs:")
+
+      Enum.each(stats, fn %{category: cat, behaviors_with_languages: n} ->
+        IO.puts("  #{cat}: #{n} behaviors with language coverage")
+      end)
+    end
+
+    if path = opts[:file] do
+      print_file_scores(path, opts)
+    end
+  end
+
+  defp print_category({category, results}, opts) do
+    IO.puts("\n#{category}")
+    IO.puts(String.duplicate("-", 75))
+
+    IO.puts(
+      "  " <>
+        pad("behavior", 35) <>
+        pad("bad", 9) <>
+        pad("good", 9) <>
+        pad("ratio", 13) <>
+        "ok?"
+    )
+
+    Enum.each(results, &print_row(&1, opts))
+  end
+
+  defp print_row(r, opts) do
+    ratio_str =
+      "#{r.ratio}x" <>
+        cond do
+          not r.direction_ok -> ""
+          r.ratio < 1.5 -> " (weak)"
+          true -> ""
+        end
+
+    ok = if r.direction_ok, do: "✓", else: "✗"
+
+    IO.puts(
+      "  " <>
+        pad(r.behavior, 35) <>
+        pad(fmt(r.bad_score), 9) <>
+        pad(fmt(r.good_score), 9) <>
+        pad(ratio_str, 13) <>
+        ok
+    )
+
+    if opts[:verbose] do
+      Enum.each(r.metric_detail, &print_metric_detail/1)
+    end
+  end
+
+  defp print_metric_detail(m) do
+    scalar_str = if m.scalar >= 0, do: "+#{m.scalar}", else: "#{m.scalar}"
+
+    IO.puts(
+      "      " <>
+        pad("#{m.group}.#{m.key}", 45) <>
+        pad(scalar_str, 7) <>
+        pad(fmt(m.bad), 8) <>
+        pad(fmt(m.good), 8) <>
+        "#{m.ratio}x"
+    )
+  end
+
+  defp print_file_scores(path, opts) do
+    expanded = Path.expand(path)
+
+    files =
+      cond do
+        File.dir?(expanded) ->
+          Collector.collect_files(expanded)
+
+        File.regular?(expanded) ->
+          %{Path.basename(expanded) => File.read!(expanded)}
+
+        true ->
+          IO.puts("\nPath not found: #{path}")
+          nil
+      end
+
+    if files && map_size(files) > 0 do
+      IO.puts("\nAnalyzing #{map_size(files)} file(s) at: #{path}")
+
+      aggregate =
+        files
+        |> Analyzer.analyze_codebase()
+        |> get_in(["codebase", "aggregate"])
+
+      top_n = opts[:top] || 15
+      issues = SampleRunner.diagnose_aggregate(aggregate, top: top_n)
+      IO.puts("\nTop #{top_n} likely issues (by cosine similarity):")
+      IO.puts(String.duplicate("-", 75))
+      IO.puts("  " <> pad("behavior", 38) <> pad("cosine", 9) <> "score")
+      Enum.each(issues, &print_issue_row/1)
+
+      IO.puts("\nFull breakdown by category:")
+      combined = SampleRunner.score_aggregate(aggregate)
+      IO.puts("")
+      Enum.each(combined, &print_combined_category/1)
+    else
+      IO.puts("\nNo supported files found at: #{path}")
+    end
+  end
+
+  defp print_issue_row(%{category: cat, behavior: b, cosine: cos, score: s, top_metrics: metrics}) do
+    IO.puts("  " <> pad("#{cat}.#{b}", 38) <> pad(fmt(cos), 9) <> fmt(s))
+
+    Enum.each(metrics, fn %{metric: m, contribution: c} ->
+      IO.puts("      " <> pad(m, 44) <> fmt(c))
+    end)
+  end
+
+  defp print_combined_category(%{name: name, behaviors: behaviors}) do
+    IO.puts(name)
+    IO.puts(String.duplicate("-", 60))
+
+    IO.puts("  " <> pad("behavior", 40) <> "score")
+
+    behaviors
+    |> Enum.sort_by(& &1.score)
+    |> Enum.each(fn %{behavior: b, score: s} ->
+      flag = if s < 0.0, do: "  ⚠", else: ""
+      IO.puts("  " <> pad(b, 40) <> fmt(s) <> flag)
+    end)
+
+    IO.puts("")
+  end
+
+  defp print_scalar_stats(%{category: cat, updated: u, deadzoned: d, skipped: s}) do
+    IO.puts("  #{pad(cat, 30)}  #{u} written  #{d} deadzoned  #{s} skipped (no samples)")
+  end
+
+  defp fmt(f), do: :erlang.float_to_binary(f / 1, decimals: 4)
+  defp pad(s, n), do: String.pad_trailing(to_string(s), n)
+end
diff --git a/lib/mix/tasks/codeqa/signal_debug.ex b/lib/mix/tasks/codeqa/signal_debug.ex
new file mode 100644
index 00000000..3852dec5
--- /dev/null
+++ b/lib/mix/tasks/codeqa/signal_debug.ex
@@ -0,0 +1,183 @@
+defmodule Mix.Tasks.Codeqa.SignalDebug do
+  use Mix.Task
+
+  @shortdoc "Shows structural signal emissions when splitting a file into blocks"
+
+  @moduledoc """
+  Runs each structural signal over a file and prints its emissions step by step.
+
+      mix codeqa.signal_debug path/to/file.ex
+      mix codeqa.signal_debug path/to/file.py --signal keyword
+      mix codeqa.signal_debug path/to/file.ex --show-tokens
+
+  Options:
+    --signal <name>    Only show a specific signal (e.g. keyword, blank, bracket)
+    --show-tokens      Print the full token list before signal output
+  """
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.Language
+
+  alias CodeQA.AST.Signals.Structural.{
+    AccessModifierSignal,
+    BlankLineSignal,
+    BracketSignal,
+    BranchSplitSignal,
+    ColonIndentSignal,
+    CommentDividerSignal,
+    KeywordSignal,
+    SQLBlockSignal,
+    TripleQuoteSignal
+  }
+
+  @switches [signal: :string, show_tokens: :boolean]
+
+  @all_signals [
+    %TripleQuoteSignal{},
+    %BlankLineSignal{},
+    %KeywordSignal{},
+    %BranchSplitSignal{},
+    %AccessModifierSignal{},
+    %CommentDividerSignal{},
+    %SQLBlockSignal{},
+    %BracketSignal{},
+    %ColonIndentSignal{}
+  ]
+
+  @impl Mix.Task
+  def run(args) do
+    {opts, positional, _} = OptionParser.parse(args, strict: @switches)
+
+    path =
+      case positional do
+        [p | _] -> p
+        [] -> Mix.raise("Usage: mix codeqa.signal_debug <file> [--signal <name>] [--show-tokens]")
+      end
+
+    unless File.exists?(path), do: Mix.raise("File not found: #{path}")
+
+    content = File.read!(path)
+    lang_mod = Language.detect(path)
+    tokens = TokenNormalizer.normalize_structural(content)
+    lines = String.split(content, "\n")
+
+    Mix.shell().info("File: #{path}")
+    Mix.shell().info("Language: #{lang_mod.name()}")
+    Mix.shell().info("Tokens: #{length(tokens)}")
+    Mix.shell().info("Lines: #{length(lines)}")
+    Mix.shell().info("")
+
+    if opts[:show_tokens] do
+      print_tokens(tokens)
+    end
+
+    signals = filter_signals(@all_signals, opts[:signal])
+
+    emissions_per_signal =
+      SignalStream.run(tokens, signals, lang_mod)
+
+    Enum.zip(signals, emissions_per_signal)
+    |> Enum.each(fn {signal, emissions} ->
+      print_signal_section(signal, emissions, tokens, lines)
+    end)
+  end
+
+  defp filter_signals(signals, nil), do: signals
+
+  defp filter_signals(signals, name_filter) do
+    Enum.filter(signals, fn signal ->
+      module_name =
+        signal.__struct__
+        |> Module.split()
+        |> List.last()
+        |> String.downcase()
+
+      String.contains?(module_name, String.downcase(name_filter))
+    end)
+  end
+
+  defp print_tokens(tokens) do
+    Mix.shell().info("=== TOKEN LIST ===")
+
+    tokens
+    |> Enum.with_index()
+    |> Enum.each(fn {token, idx} ->
+      Mix.shell().info(
+        "  [#{idx}] line #{token.line} col #{token.col}  #{inspect(token.kind)}  #{inspect(token.content)}"
+      )
+    end)
+
+    Mix.shell().info("")
+  end
+
+  defp print_signal_section(signal, emissions, tokens, lines) do
+    name = signal.__struct__ |> Module.split() |> List.last()
+    separator = String.duplicate("─", 60)
+
+    Mix.shell().info(separator)
+    Mix.shell().info("SIGNAL: #{name}")
+    Mix.shell().info("Emissions: #{length(emissions)}")
+    Mix.shell().info("")
+
+    if Enum.empty?(emissions) do
+      Mix.shell().info("  (no emissions)")
+    else
+      Enum.each(emissions, fn {_source, group, emission_name, value} ->
+        print_emission(group, emission_name, value, tokens, lines)
+      end)
+    end
+
+    Mix.shell().info("")
+  end
+
+  defp print_emission(:split, name, token_idx, tokens, lines) do
+    token = Enum.at(tokens, token_idx)
+
+    line_num = token && token.line
+    line_src = line_num && Enum.at(lines, line_num - 1)
+
+    Mix.shell().info("  [SPLIT :#{name}]  token[#{token_idx}] → line #{line_num}")
+
+    if line_src do
+      Mix.shell().info("    #{String.trim_trailing(line_src)}")
+    end
+
+    if token do
+      Mix.shell().info("    ^ #{inspect(token.kind)} #{inspect(token.content)}")
+    end
+
+    Mix.shell().info("")
+  end
+
+  defp print_emission(:enclosure, name, {start_idx, end_idx}, tokens, lines) do
+    start_token = Enum.at(tokens, start_idx)
+    end_token = Enum.at(tokens, end_idx)
+
+    start_line = start_token && start_token.line
+    end_line = end_token && end_token.line
+
+    Mix.shell().info(
+      "  [ENCLOSURE :#{name}]  tokens[#{start_idx}..#{end_idx}]  lines #{start_line}–#{end_line}"
+    )
+
+    if start_line do
+      Mix.shell().info(
+        "    open:  #{inspect(Enum.at(lines, start_line - 1) |> String.trim_trailing())}"
+      )
+    end
+
+    if end_line && end_line != start_line do
+      Mix.shell().info(
+        "    close: #{inspect(Enum.at(lines, end_line - 1) |> String.trim_trailing())}"
+      )
+    end
+
+    Mix.shell().info("")
+  end
+
+  defp print_emission(group, name, value, _tokens, _lines) do
+    Mix.shell().info("  [:#{group} :#{name}]  #{inspect(value)}")
+    Mix.shell().info("")
+  end
+end
diff --git a/mix.exs b/mix.exs
index 2e55bfe8..cb2f4133 100644
--- a/mix.exs
+++ b/mix.exs
@@ -11,7 +11,13 @@ defmodule CodeQA.MixProject do
       escript: [main_module: CodeQA.CLI],
       elixirc_paths: elixirc_paths(Mix.env()),
       preferred_envs: [precommit: :test],
-      aliases: aliases()
+      aliases: aliases(),
+      dialyzer: [
+        ignore_warnings: ".dialyzer_ignore.exs",
+        plt_local_path: "priv/plts",
+        plt_core_path: "priv/plts"
+      ],
+      consolidate_protocols: Mix.env() != :test
     ]
   end
 
@@ -30,6 +36,12 @@ defmodule CodeQA.MixProject do
         "compile --warnings-as-errors",
         "deps.unlock --unused",
         "format"
+      ],
+      health: [
+        "run -e 'CodeQA.CLI.main([\"health-report\", \".\", \"--ignore-paths\", \"test/**\"])'"
+      ],
+      "health.progress": [
+        "run -e 'CodeQA.CLI.main([\"health-report\", \".\", \"--ignore-paths\", \"test/**\", \"--progress\"])'"
       ]
     ]
   end
diff --git a/priv/combined_metrics/code_smells.yml b/priv/combined_metrics/code_smells.yml
new file mode 100644
index 00000000..cd4e559b
--- /dev/null
+++ b/priv/combined_metrics/code_smells.yml
@@ -0,0 +1,592 @@
+consistent_string_quote_style:
+  _doc: "Files should use a single, consistent string quoting style throughout."
+  _languages: [elixir]
+  _log_baseline: -18.9887
+  branching:
+    mean_branching_density: 0.0243
+    mean_non_blank_count: -0.0248
+  brevity:
+    mean_sample_size: -0.0656
+  casing_entropy:
+    mean_entropy: -0.0405
+    mean_pascal_case_count: -0.1743
+    mean_snake_case_count: -0.0505
+  compression:
+    mean_raw_bytes: -0.0672
+    mean_redundancy: 0.0207
+    mean_unique_line_ratio: -0.0338
+    mean_zlib_bytes: -0.1085
+    mean_zlib_ratio: 0.0413
+  entropy:
+    mean_char_max_entropy: -0.0077
+    mean_token_entropy: -0.0178
+    mean_token_max_entropy: -0.0141
+    mean_total_tokens: -0.0783
+    mean_vocab_size: -0.0656
+  function_metrics:
+    mean_avg_function_lines: -0.0226
+    mean_max_function_lines: -0.0644
+  halstead:
+    mean_N1_total_operators: -0.1087
+    mean_N2_total_operands: -0.2297
+    mean_difficulty: -0.1017
+    mean_effort: -0.2949
+    mean_estimated_bugs: -0.1934
+    mean_length: -0.1541
+    mean_n1_unique_operators: -0.0790
+    mean_n2_unique_operands: -0.2071
+    mean_time_to_implement_seconds: -0.2949
+    mean_vocabulary: -0.1721
+    mean_volume: -0.1933
+  heaps:
+    mean_k: -0.0368
+    mean_r_squared: -0.0080
+  identifier_length_variance:
+    mean_mean: 0.0059
+    mean_std_dev: 0.0235
+    mean_variance: 0.0470
+  indentation:
+    mean_blank_line_ratio: 0.0205
+    mean_variance: 0.0246
+  line_patterns:
+    mean_blank_line_ratio: 0.0205
+    mean_string_literal_ratio: -0.1616
+    mean_unique_line_ratio: -0.0365
+  magic_number_density:
+    mean_string_literal_ratio: 0.3018
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d2: 0.9542
+    mean_near_dup_block_d3: -0.6021
+    mean_near_dup_block_d4: -0.9542
+    mean_near_dup_block_d5: 0.9542
+    mean_near_dup_block_d6: -0.6021
+    mean_sub_block_count: -0.1804
+  ngram:
+    mean_bigram_hapax_fraction: -0.0101
+    mean_bigram_repeated_unique: -0.0915
+    mean_bigram_repetition_rate: 0.0277
+    mean_bigram_total: -0.0785
+    mean_bigram_unique: -0.1146
+    mean_trigram_repeated_unique: -0.1104
+    mean_trigram_repetition_rate: 0.0499
+    mean_trigram_total: -0.0787
+    mean_trigram_unique: -0.1182
+  punctuation_density:
+    mean_arrow_density: 0.0674
+    mean_bracket_nonalpha_prefix_count: 1.0103
+    mean_bracket_nonalpha_suffix_count: 2.0000
+    mean_colon_suffix_density: 0.0458
+    mean_dot_count: -0.1743
+    mean_exclamation_density: 0.0424
+    mean_id_nonalpha_suffix_density: 0.0783
+  readability:
+    mean_avg_line_length: -0.0444
+    mean_avg_tokens_per_line: -0.0535
+    mean_flesch_adapted: 0.0046
+    mean_fog_adapted: -0.0301
+    mean_total_lines: -0.0248
+  separator_counts:
+    mean_dot_count: -0.1743
+    mean_underscore_count: -0.0644
+  symbol_density:
+    mean_density: -0.0325
+    mean_distinct_symbol_types: -0.0966
+    mean_symbol_count: -0.0999
+  vocabulary:
+    mean_mattr: -0.0187
+    mean_raw_ttr: -0.0050
+    mean_total_identifiers: -0.0666
+    mean_unique_identifiers: -0.0714
+  vowel_density:
+    mean_total_chars: -0.0607
+  zipf:
+    mean_total_tokens: -0.0783
+    mean_vocab_size: -0.0656
+
+no_dead_code_after_return:
+  _doc: "There should be no unreachable statements after a return or early exit."
+  _languages: [elixir]
+  _log_baseline: -62.7495
+  branching:
+    mean_branch_count: -2.0000
+    mean_branching_density: -1.4201
+    mean_non_blank_count: -0.5815
+  brevity:
+    mean_sample_size: -0.2610
+  casing_entropy:
+    mean_entropy: -0.2430
+    mean_other_count: -0.8708
+    mean_pascal_case_count: -0.5752
+    mean_snake_case_count: -0.3559
+  compression:
+    mean_raw_bytes: -0.4531
+    mean_redundancy: -0.0467
+    mean_zlib_bytes: -0.3558
+    mean_zlib_ratio: -0.0974
+  entropy:
+    mean_char_entropy: 0.0250
+    mean_char_max_entropy: -0.0205
+    mean_char_normalized: 0.0455
+    mean_token_entropy: -0.0475
+    mean_token_max_entropy: -0.0575
+    mean_token_normalized: 0.0099
+    mean_total_tokens: -0.3093
+    mean_vocab_size: -0.2610
+  function_metrics:
+    mean_avg_function_lines: -0.4255
+    mean_avg_param_count: 0.1143
+    mean_function_count: -0.1143
+    mean_max_function_lines: -0.5062
+  halstead:
+    mean_N1_total_operators: -0.2185
+    mean_N2_total_operands: -0.4051
+    mean_difficulty: -0.1769
+    mean_effort: -0.5126
+    mean_estimated_bugs: -0.3357
+    mean_length: -0.2795
+    mean_n1_unique_operators: -0.0857
+    mean_n2_unique_operands: -0.3139
+    mean_time_to_implement_seconds: -0.5126
+    mean_vocabulary: -0.2525
+    mean_volume: -0.3357
+  heaps:
+    mean_k: -0.1169
+  identifier_length_variance:
+    mean_max: -0.4367
+    mean_mean: -0.0159
+    mean_std_dev: -0.2804
+    mean_variance: -0.5607
+  indentation:
+    mean_blank_line_ratio: 0.2883
+    mean_mean_depth: -0.4448
+    mean_variance: -0.6173
+  line_patterns:
+    mean_blank_line_ratio: 0.2883
+    mean_string_literal_ratio: -0.8289
+    mean_unique_line_ratio: -0.0289
+  magic_number_density:
+    mean_density: 0.2821
+    mean_string_literal_ratio: -0.8289
+  near_duplicate_blocks_file:
+    mean_block_count: -0.1083
+    mean_near_dup_block_d0: 1.1292
+    mean_near_dup_block_d5: 1.1292
+    mean_near_dup_block_d7: -0.7124
+    mean_near_dup_block_d8: 1.1292
+    mean_sub_block_count: -0.3612
+  ngram:
+    mean_bigram_hapax_fraction: 0.0142
+    mean_bigram_repeated_unique: -0.3335
+    mean_bigram_repetition_rate: -0.0114
+    mean_bigram_total: -0.3100
+    mean_bigram_unique: -0.3022
+    mean_trigram_hapax_fraction: -0.0576
+    mean_trigram_repeated_unique: -0.0894
+    mean_trigram_repetition_rate: 0.0890
+    mean_trigram_total: -0.3107
+    mean_trigram_unique: -0.3313
+  punctuation_density:
+    mean_arrow_density: -1.1156
+    mean_bracket_nonalpha_prefix_count: 1.0397
+    mean_bracket_nonalpha_suffix_count: -0.4541
+    mean_colon_suffix_density: 0.3588
+    mean_dot_count: -1.0081
+    mean_id_nonalpha_suffix_density: 0.0111
+  readability:
+    mean_avg_line_length: 0.1309
+    mean_avg_sub_words_per_id: -0.0415
+    mean_avg_tokens_per_line: 0.2722
+    mean_flesch_adapted: 0.0243
+    mean_fog_adapted: -0.3299
+    mean_total_lines: -0.5815
+  separator_counts:
+    mean_dot_count: -1.0081
+    mean_hyphen_count: -1.1292
+    mean_underscore_count: -0.6750
+  symbol_density:
+    mean_density: 0.2141
+    mean_symbol_count: -0.2386
+  vocabulary:
+    mean_mattr: -0.0424
+    mean_raw_ttr: 0.0435
+    mean_total_identifiers: -0.4061
+    mean_unique_identifiers: -0.3626
+  vowel_density:
+    mean_total_chars: -0.4220
+  zipf:
+    mean_exponent: -0.0067
+    mean_total_tokens: -0.3093
+    mean_vocab_size: -0.2610
+
+no_debug_print_statements:
+  _doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code."
+  _languages: [elixir, go, javascript, python, ruby]
+  _log_baseline: -88.2885
+  branching:
+    mean_branch_count: 0.2378
+    mean_branching_density: 0.7072
+    mean_max_nesting_depth: 0.1175
+    mean_non_blank_count: -0.3222
+  brevity:
+    mean_sample_size: -0.0776
+  casing_entropy:
+    mean_camel_case_count: -0.4777
+    mean_entropy: 0.2378
+    mean_other_count: 0.0409
+    mean_pascal_case_count: -0.5178
+    mean_snake_case_count: -0.7116
+  compression:
+    mean_raw_bytes: -0.3758
+    mean_redundancy: -0.0974
+    mean_unique_line_ratio: -0.0654
+    mean_zlib_bytes: -0.2114
+    mean_zlib_ratio: -0.1430
+  entropy:
+    mean_char_entropy: 0.0136
+    mean_char_max_entropy: 0.0206
+    mean_token_entropy: 0.0326
+    mean_token_max_entropy: -0.0104
+    mean_token_normalized: 0.0414
+    mean_total_tokens: -0.4317
+    mean_vocab_size: -0.0776
+  function_metrics:
+    mean_avg_function_lines: -0.5945
+    mean_avg_param_count: -0.0572
+    mean_function_count: 0.0513
+    mean_max_function_lines: -0.5630
+  halstead:
+    mean_N1_total_operators: -0.2976
+    mean_N2_total_operands: -0.3928
+    mean_difficulty: -0.1881
+    mean_effort: -0.6575
+    mean_estimated_bugs: -0.3863
+    mean_length: -0.3347
+    mean_n1_unique_operators: -0.0361
+    mean_n2_unique_operands: -0.2661
+    mean_time_to_implement_seconds: -0.6575
+    mean_vocabulary: -0.1934
+    mean_volume: -0.3862
+  heaps:
+    mean_beta: 0.0567
+    mean_k: 0.0771
+    mean_r_squared: -0.0186
+  identifier_length_variance:
+    mean_max: -0.0146
+    mean_mean: 0.0926
+    mean_std_dev: -0.0267
+    mean_variance: -0.0755
+  indentation:
+    mean_blank_line_ratio: 0.1672
+    mean_max_depth: -0.1656
+    mean_mean_depth: -0.2127
+    mean_variance: 0.1646
+  line_patterns:
+    mean_blank_line_ratio: 0.1672
+    mean_max_nesting_depth: 0.1175
+    mean_string_literal_ratio: -0.6400
+    mean_unique_line_ratio: -0.0422
+  magic_number_density:
+    mean_density: 0.8678
+    mean_magic_number_count: 0.3203
+    mean_string_literal_ratio: -0.6501
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d0: -0.6126
+    mean_near_dup_block_d5: 1.2615
+    mean_near_dup_block_d6: 0.5236
+    mean_near_dup_block_d7: 0.1585
+    mean_near_dup_block_d8: 1.0702
+    mean_sub_block_count: 0.1879
+  ngram:
+    mean_bigram_repeated_unique: -0.1860
+    mean_bigram_repetition_rate: -0.1042
+    mean_bigram_total: -0.4331
+    mean_bigram_unique: -0.2201
+    mean_trigram_hapax_fraction: -0.0213
+    mean_trigram_repeated_unique: -0.1930
+    mean_trigram_repetition_rate: -0.0683
+    mean_trigram_total: -0.4345
+    mean_trigram_unique: -0.3080
+  punctuation_density:
+    mean_arrow_density: 0.8097
+    mean_bracket_nonalpha_prefix_count: -1.6350
+    mean_bracket_nonalpha_suffix_count: -1.0342
+    mean_colon_suffix_density: -0.4483
+    mean_dot_count: -0.4489
+    mean_exclamation_density: 1.6684
+    mean_id_nonalpha_suffix_density: -0.1102
+  readability:
+    mean_avg_line_length: -0.0110
+    mean_avg_sub_words_per_id: 0.0409
+    mean_avg_tokens_per_line: -0.0639
+    mean_flesch_adapted: -0.0331
+    mean_fog_adapted: -0.0304
+    mean_total_lines: -0.3222
+  separator_counts:
+    mean_dot_count: -0.4489
+    mean_hyphen_count: 0.1350
+    mean_slash_count: 2.0000
+    mean_underscore_count: 0.1201
+  symbol_density:
+    mean_density: -0.0960
+    mean_distinct_symbol_types: 0.0354
+    mean_symbol_count: -0.4722
+  vocabulary:
+    mean_mattr: 0.3241
+    mean_raw_ttr: 0.3641
+    mean_total_identifiers: -0.5984
+    mean_unique_identifiers: -0.1231
+  vowel_density:
+    mean_total_chars: -0.4638
+  zipf:
+    mean_exponent: -0.1608
+    mean_total_tokens: -0.4317
+    mean_vocab_size: -0.0776
+
+no_fixme_comments:
+  _doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging."
+  _languages: [elixir, go, javascript, python, ruby]
+  _log_baseline: 11.3113
+  branching:
+    mean_branch_count: 0.1713
+    mean_branching_density: 0.1042
+    mean_max_nesting_depth: 0.0518
+    mean_non_blank_count: 0.0570
+  brevity:
+    mean_sample_size: -0.1049
+  casing_entropy:
+    mean_camel_case_count: 0.1803
+    mean_entropy: 0.0464
+    mean_macro_case_count: 0.2871
+    mean_other_count: 0.0237
+    mean_pascal_case_count: 0.0230
+    mean_screaming_snake_density: -2.0000
+    mean_snake_case_count: -0.0374
+  comment_structure:
+    mean_comment_line_count: -0.6230
+    mean_comment_line_ratio: -0.8044
+    mean_todo_fixme_count: -1.0293
+  compression:
+    mean_raw_bytes: 0.0311
+    mean_redundancy: 0.0289
+    mean_zlib_bytes: -0.0094
+    mean_zlib_ratio: 0.0402
+  entropy:
+    mean_char_entropy: 0.0042
+    mean_char_normalized: 0.0044
+    mean_token_entropy: -0.0313
+    mean_token_max_entropy: -0.0222
+    mean_token_normalized: -0.0092
+    mean_total_tokens: 0.0768
+    mean_vocab_size: -0.1049
+  function_metrics:
+    mean_avg_function_lines: 0.0522
+    mean_avg_param_count: 0.0093
+    mean_function_count: 0.0138
+    mean_max_function_lines: 0.1258
+  halstead:
+    mean_N1_total_operators: 0.1260
+    mean_N2_total_operands: 0.0296
+    mean_difficulty: 0.1341
+    mean_effort: 0.2123
+    mean_estimated_bugs: 0.0709
+    mean_length: 0.0866
+    mean_n2_unique_operands: -0.1071
+    mean_time_to_implement_seconds: 0.2123
+    mean_vocabulary: -0.0827
+    mean_volume: 0.0709
+  heaps:
+    mean_beta: -0.0733
+    mean_k: 0.1138
+  identifier_length_variance:
+    mean_mean: 0.0175
+    mean_std_dev: 0.0355
+    mean_variance: 0.0707
+  indentation:
+    mean_blank_line_ratio: 0.0568
+    mean_max_depth: 0.0525
+    mean_mean_depth: 0.0511
+    mean_variance: 0.1576
+  line_patterns:
+    mean_blank_line_ratio: 0.0568
+    mean_max_nesting_depth: 0.0518
+    mean_string_literal_ratio: 0.1690
+    mean_unique_line_ratio: 0.0069
+  magic_number_density:
+    mean_density: -0.0712
+    mean_string_literal_ratio: 0.1213
+  near_duplicate_blocks_file:
+    mean_block_count: 0.0522
+    mean_near_dup_block_d0: 0.6667
+    mean_near_dup_block_d2: -0.3795
+    mean_near_dup_block_d4: -0.2116
+    mean_near_dup_block_d5: 0.2871
+    mean_near_dup_block_d6: -0.6667
+    mean_near_dup_block_d8: -0.0553
+    mean_sub_block_count: 0.1304
+  ngram:
+    mean_bigram_hapax_fraction: -0.0337
+    mean_bigram_repeated_unique: 0.1067
+    mean_bigram_repetition_rate: 0.0765
+    mean_bigram_total: 0.0770
+    mean_bigram_unique: 0.0192
+    mean_trigram_hapax_fraction: -0.0128
+    mean_trigram_repeated_unique: 0.1226
+    mean_trigram_repetition_rate: 0.0851
+    mean_trigram_total: 0.0771
+    mean_trigram_unique: 0.0513
+  punctuation_density:
+    mean_arrow_density: 0.0655
+    mean_bracket_nonalpha_prefix_count: 0.4987
+    mean_bracket_nonalpha_suffix_count: 0.2338
+    mean_colon_suffix_density: -0.2241
+    mean_dot_count: 0.3139
+    mean_exclamation_density: 0.3359
+    mean_id_nonalpha_suffix_density: 0.0318
+    mean_question_mark_density: 0.6230
+  readability:
+    mean_avg_line_length: 0.0230
+    mean_avg_sub_words_per_id: 0.0184
+    mean_avg_tokens_per_line: -0.0239
+    mean_flesch_adapted: -0.0152
+    mean_fog_adapted: -0.0037
+    mean_total_lines: 0.0927
+  separator_counts:
+    mean_dot_count: 0.3139
+    mean_hyphen_count: -0.0251
+    mean_slash_count: -0.1931
+    mean_underscore_count: 0.0971
+  symbol_density:
+    mean_density: 0.1198
+    mean_distinct_symbol_types: 0.0283
+    mean_symbol_count: 0.1467
+  vocabulary:
+    mean_mattr: -0.1170
+    mean_raw_ttr: -0.1141
+    mean_total_identifiers: -0.0122
+    mean_unique_identifiers: -0.1331
+  zipf:
+    mean_exponent: 0.0693
+    mean_r_squared: 0.0061
+    mean_total_tokens: 0.0768
+    mean_vocab_size: -0.1049
+
+no_nested_ternary:
+  _doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else."
+  _languages: [elixir]
+  _log_baseline: 8.0040
+  branching:
+    mean_branch_count: -0.5662
+    mean_branching_density: -0.3441
+    mean_max_nesting_depth: 0.1824
+    mean_non_blank_count: -0.2221
+  brevity:
+    mean_sample_size: 0.0486
+  casing_entropy:
+    mean_entropy: 0.2311
+    mean_other_count: 0.7455
+    mean_pascal_case_count: 0.1237
+    mean_snake_case_count: 0.1138
+  compression:
+    mean_raw_bytes: -0.0141
+    mean_redundancy: -0.0117
+    mean_unique_line_ratio: 0.1154
+    mean_zlib_bytes: 0.0170
+    mean_zlib_ratio: -0.0312
+  entropy:
+    mean_char_entropy: 0.0689
+    mean_char_max_entropy: 0.0024
+    mean_char_normalized: 0.0665
+    mean_token_entropy: -0.0014
+    mean_token_max_entropy: 0.0110
+    mean_token_normalized: -0.0124
+    mean_total_tokens: 0.1324
+    mean_vocab_size: 0.0486
+  function_metrics:
+    mean_avg_function_lines: -0.7403
+    mean_avg_param_count: -0.0277
+    mean_function_count: 0.5579
+    mean_max_function_lines: -0.4954
+  halstead:
+    mean_N1_total_operators: 0.1382
+    mean_N2_total_operands: 0.1252
+    mean_difficulty: 0.0773
+    mean_effort: 0.2218
+    mean_estimated_bugs: 0.1445
+    mean_length: 0.1335
+    mean_n1_unique_operators: 0.0128
+    mean_n2_unique_operands: 0.0608
+    mean_time_to_implement_seconds: 0.2218
+    mean_vocabulary: 0.0480
+    mean_volume: 0.1445
+  heaps:
+    mean_beta: -0.0334
+    mean_k: 0.0563
+  identifier_length_variance:
+    mean_max: 0.0170
+    mean_mean: -0.0112
+    mean_std_dev: -0.0060
+    mean_variance: -0.0120
+  indentation:
+    mean_blank_line_ratio: 0.3825
+    mean_max_depth: -0.2891
+    mean_mean_depth: -0.2922
+    mean_variance: -0.5254
+  line_patterns:
+    mean_blank_line_ratio: 0.3825
+    mean_max_nesting_depth: 0.1824
+    mean_string_literal_ratio: 0.0146
+    mean_unique_line_ratio: 0.1591
+  magic_number_density:
+    mean_density: -0.1634
+    mean_magic_number_count: -0.0310
+    mean_string_literal_ratio: 0.0146
+  near_duplicate_blocks_file:
+    mean_block_count: 0.0885
+    mean_near_dup_block_d7: -0.1824
+    mean_sub_block_count: 0.5472
+  ngram:
+    mean_bigram_hapax_fraction: -0.0464
+    mean_bigram_repeated_unique: 0.1405
+    mean_bigram_repetition_rate: 0.0564
+    mean_bigram_total: 0.1327
+    mean_bigram_unique: 0.0600
+    mean_trigram_hapax_fraction: -0.0321
+    mean_trigram_repeated_unique: 0.1699
+    mean_trigram_repetition_rate: 0.1003
+    mean_trigram_total: 0.1331
+    mean_trigram_unique: 0.0704
+  punctuation_density:
+    mean_bracket_nonalpha_prefix_count: 0.5781
+    mean_bracket_nonalpha_suffix_count: 0.7295
+    mean_colon_suffix_density: -0.6851
+    mean_dot_count: -0.1824
+    mean_exclamation_density: 2.0000
+    mean_id_nonalpha_suffix_density: 0.2589
+  readability:
+    mean_avg_line_length: 0.2148
+    mean_avg_sub_words_per_id: 0.0173
+    mean_avg_tokens_per_line: 0.3545
+    mean_flesch_adapted: -0.0367
+    mean_fog_adapted: 0.3545
+    mean_total_lines: -0.2221
+  separator_counts:
+    mean_dot_count: -0.1824
+    mean_hyphen_count: -0.1067
+    mean_underscore_count: 0.3101
+  symbol_density:
+    mean_density: 0.2615
+    mean_distinct_symbol_types: 0.0377
+    mean_symbol_count: 0.2475
+  vocabulary:
+    mean_mattr: -0.0587
+    mean_raw_ttr: -0.0515
+    mean_total_identifiers: 0.1551
+    mean_unique_identifiers: 0.1036
+  vowel_density:
+    mean_total_chars: 0.1439
+  zipf:
+    mean_exponent: 0.0240
+    mean_r_squared: 0.0111
+    mean_total_tokens: 0.1324
+    mean_vocab_size: 0.0486
+
diff --git a/priv/combined_metrics/consistency.yml b/priv/combined_metrics/consistency.yml
new file mode 100644
index 00000000..902817d2
--- /dev/null
+++ b/priv/combined_metrics/consistency.yml
@@ -0,0 +1,334 @@
+consistent_casing_within_file:
+  _doc: "A file should use one naming convention throughout — no mixing of camelCase and snake_case for the same kind of identifier."
+  _log_baseline: -0.6750
+  brevity:
+    mean_sample_size: -0.0471
+  casing_entropy:
+    mean_camel_case_count: -2.0000
+    mean_entropy: -0.4254
+    mean_snake_case_count: 0.2663
+  compression:
+    mean_raw_bytes: 0.0213
+    mean_redundancy: 0.0219
+    mean_zlib_bytes: -0.0194
+    mean_zlib_ratio: 0.0407
+  entropy:
+    mean_char_entropy: -0.0126
+    mean_char_max_entropy: -0.0170
+    mean_char_normalized: 0.0044
+    mean_token_entropy: -0.0090
+    mean_token_max_entropy: -0.0101
+    mean_vocab_size: -0.0471
+  halstead:
+    mean_difficulty: 0.0629
+    mean_effort: 0.0530
+    mean_estimated_bugs: -0.0099
+    mean_n2_unique_operands: -0.0629
+    mean_time_to_implement_seconds: 0.0530
+    mean_vocabulary: -0.0456
+    mean_volume: -0.0099
+  heaps:
+    mean_beta: -0.0232
+    mean_k: 0.0253
+  identifier_length_variance:
+    mean_mean: 0.0337
+    mean_std_dev: 0.0139
+    mean_variance: 0.0278
+  ngram:
+    mean_bigram_hapax_fraction: -0.0071
+    mean_bigram_repetition_rate: 0.0267
+    mean_bigram_unique: -0.0197
+    mean_trigram_hapax_fraction: -0.0122
+    mean_trigram_repeated_unique: 0.0698
+    mean_trigram_repetition_rate: 0.0874
+    mean_trigram_unique: -0.0172
+  readability:
+    mean_avg_line_length: 0.0221
+  separator_counts:
+    mean_underscore_count: 0.4311
+  symbol_density:
+    mean_density: -0.0214
+  vocabulary:
+    mean_mattr: -0.0680
+    mean_raw_ttr: -0.0735
+    mean_unique_identifiers: -0.0735
+  vowel_density:
+    mean_total_chars: 0.0337
+  zipf:
+    mean_exponent: 0.0265
+    mean_vocab_size: -0.0471
+
+consistent_error_return_shape:
+  _doc: "All functions in a module should return errors in the same shape — mixed `nil`, `false`, and `{:error, _}` returns are confusing."
+  _log_baseline: -2.6048
+  branching:
+    mean_branch_count: -0.2800
+    mean_branching_density: 0.5563
+    mean_max_nesting_depth: -0.4366
+    mean_non_blank_count: -0.3834
+  brevity:
+    mean_sample_size: -0.0588
+  casing_entropy:
+    mean_entropy: 0.0596
+    mean_pascal_case_count: 0.2800
+    mean_snake_case_count: -0.0268
+  compression:
+    mean_raw_bytes: -0.1755
+    mean_redundancy: -0.0215
+    mean_unique_line_ratio: 0.0369
+    mean_zlib_bytes: -0.2211
+    mean_zlib_ratio: -0.0103
+  entropy:
+    mean_char_entropy: 0.0605
+    mean_char_normalized: 0.0641
+    mean_token_entropy: -0.0194
+    mean_token_max_entropy: -0.0167
+    mean_total_tokens: -0.0672
+    mean_vocab_size: -0.0588
+  function_metrics:
+    mean_avg_function_lines: -0.5108
+    mean_function_count: -0.0833
+    mean_max_function_lines: -0.4535
+  halstead:
+    mean_N1_total_operators: 0.0624
+    mean_N2_total_operands: -0.0870
+    mean_difficulty: 0.1365
+    mean_effort: 0.2447
+    mean_estimated_bugs: 0.0150
+    mean_length: 0.0128
+    mean_n1_unique_operators: 0.0412
+    mean_n2_unique_operands: -0.2129
+    mean_time_to_implement_seconds: 0.2447
+    mean_vocabulary: -0.1263
+    mean_volume: 0.0150
+  heaps:
+    mean_beta: 0.0694
+    mean_k: -0.0226
+  identifier_length_variance:
+    mean_mean: -0.0575
+    mean_std_dev: -0.0511
+    mean_variance: -0.0858
+  indentation:
+    mean_blank_line_ratio: 0.5151
+    mean_max_depth: -0.4917
+    mean_mean_depth: -0.4787
+    mean_variance: -0.8229
+  line_patterns:
+    mean_blank_line_ratio: 0.5151
+    mean_max_nesting_depth: -0.4366
+    mean_string_literal_ratio: -0.5234
+    mean_unique_line_ratio: 0.0441
+  magic_number_density:
+    mean_density: -0.2062
+    mean_string_literal_ratio: -0.5234
+  near_duplicate_blocks_file:
+    mean_block_count: -0.1566
+    mean_near_dup_block_d0: -1.3562
+    mean_near_dup_block_d4: 1.3562
+    mean_near_dup_block_d5: 1.3562
+    mean_near_dup_block_d6: 1.3562
+    mean_near_dup_block_d7: 1.3562
+    mean_near_dup_block_d8: -0.7933
+    mean_sub_block_count: 0.0308
+  ngram:
+    mean_bigram_hapax_fraction: -0.4505
+    mean_bigram_repeated_unique: 0.3514
+    mean_bigram_repetition_rate: 0.3102
+    mean_bigram_total: -0.0673
+    mean_bigram_unique: -0.2362
+    mean_trigram_hapax_fraction: -0.3293
+    mean_trigram_repeated_unique: 0.3390
+    mean_trigram_repetition_rate: 0.5429
+    mean_trigram_total: -0.0675
+    mean_trigram_unique: -0.2662
+  punctuation_density:
+    mean_arrow_density: -2.0000
+    mean_bracket_nonalpha_prefix_count: 0.1865
+    mean_bracket_nonalpha_suffix_count: 0.3180
+    mean_colon_suffix_density: 0.7587
+    mean_dot_count: 0.6069
+    mean_id_nonalpha_suffix_density: 0.3082
+  readability:
+    mean_avg_line_length: 0.3857
+    mean_avg_tokens_per_line: 0.5377
+    mean_flesch_adapted: -0.0433
+    mean_fog_adapted: 0.1454
+    mean_total_lines: -0.3834
+  separator_counts:
+    mean_dot_count: 0.6069
+    mean_hyphen_count: -1.1258
+    mean_underscore_count: 0.0416
+  symbol_density:
+    mean_density: 0.1508
+    mean_distinct_symbol_types: -0.0567
+  vocabulary:
+    mean_mattr: -0.1541
+    mean_raw_ttr: -0.1719
+    mean_unique_identifiers: -0.0425
+  zipf:
+    mean_exponent: -0.0773
+    mean_r_squared: 0.0157
+    mean_total_tokens: -0.0672
+    mean_vocab_size: -0.0588
+
+consistent_function_style:
+  _doc: "A module should not mix one-liner and multi-clause function definitions for the same concern."
+  _log_baseline: -0.1780
+  branching:
+    mean_branch_count: -0.1610
+    mean_branching_density: -0.3349
+    mean_max_nesting_depth: -0.1610
+    mean_non_blank_count: 0.1738
+  brevity:
+    mean_sample_size: 0.0028
+  casing_entropy:
+    mean_entropy: -0.0534
+    mean_other_count: -0.2753
+    mean_pascal_case_count: -0.0379
+    mean_snake_case_count: 0.0199
+  compression:
+    mean_raw_bytes: 0.0313
+    mean_redundancy: 0.0188
+    mean_unique_line_ratio: -0.0440
+    mean_zlib_bytes: 0.0037
+    mean_zlib_ratio: 0.0276
+  entropy:
+    mean_char_entropy: -0.0072
+    mean_char_normalized: -0.0071
+    mean_token_entropy: 0.0058
+    mean_token_normalized: 0.0052
+    mean_vocab_size: 0.0028
+  function_metrics:
+    mean_avg_function_lines: 0.0608
+    mean_avg_param_count: -0.0099
+    mean_function_count: 0.0939
+    mean_max_function_lines: -0.0797
+  halstead:
+    mean_N2_total_operands: 0.0471
+    mean_difficulty: 0.0186
+    mean_effort: 0.0362
+    mean_estimated_bugs: 0.0176
+    mean_length: 0.0157
+    mean_n1_unique_operators: -0.0122
+    mean_n2_unique_operands: 0.0162
+    mean_time_to_implement_seconds: 0.0362
+    mean_vocabulary: 0.0091
+    mean_volume: 0.0176
+  heaps:
+    mean_beta: 0.0024
+  identifier_length_variance:
+    mean_mean: 0.0076
+    mean_variance: 0.0038
+  indentation:
+    mean_blank_line_ratio: -0.0991
+    mean_max_depth: -0.1143
+    mean_mean_depth: -0.0203
+    mean_variance: -0.1730
+  line_patterns:
+    mean_blank_line_ratio: -0.0991
+    mean_max_nesting_depth: -0.1610
+    mean_unique_line_ratio: -0.0456
+  near_duplicate_blocks_file:
+    mean_block_count: 0.2753
+    mean_near_dup_block_d0: 0.9145
+    mean_near_dup_block_d7: 0.1610
+    mean_near_dup_block_d8: 0.5506
+    mean_sub_block_count: 0.0594
+  ngram:
+    mean_bigram_hapax_fraction: 0.0037
+    mean_bigram_repeated_unique: -0.0041
+    mean_bigram_repetition_rate: -0.0091
+    mean_bigram_unique: 0.0065
+    mean_trigram_repeated_unique: -0.0058
+  punctuation_density:
+    mean_arrow_density: 2.0000
+    mean_bracket_nonalpha_suffix_count: -0.0781
+    mean_colon_suffix_density: -0.1318
+    mean_dot_count: -0.0204
+    mean_id_nonalpha_suffix_density: -0.0132
+  readability:
+    mean_avg_line_length: -0.1471
+    mean_avg_sub_words_per_id: 0.0030
+    mean_avg_tokens_per_line: -0.1751
+    mean_flesch_adapted: 0.0147
+    mean_fog_adapted: -0.1412
+    mean_total_lines: 0.1738
+  separator_counts:
+    mean_dot_count: -0.0204
+    mean_hyphen_count: -0.1460
+    mean_underscore_count: 0.0287
+  symbol_density:
+    mean_density: -0.0473
+    mean_symbol_count: -0.0159
+  vocabulary:
+    mean_mattr: -0.0025
+    mean_raw_ttr: -0.0051
+    mean_total_identifiers: 0.0098
+    mean_unique_identifiers: 0.0047
+  vowel_density:
+    mean_total_chars: 0.0175
+  zipf:
+    mean_exponent: -0.0054
+    mean_vocab_size: 0.0028
+
+same_concept_same_name:
+  _doc: "The same domain concept should use the same name throughout a file — mixing `user`, `usr`, and `u` for the same thing harms readability."
+  _log_baseline: -15.1568
+  brevity:
+    mean_sample_size: -1.3457
+  compression:
+    mean_raw_bytes: 0.1773
+    mean_redundancy: 0.3935
+    mean_unique_line_ratio: -0.3251
+    mean_zlib_bytes: -0.8263
+    mean_zlib_ratio: 1.0033
+  entropy:
+    mean_char_entropy: -0.1808
+    mean_char_normalized: -0.1800
+    mean_token_entropy: -0.3546
+    mean_token_max_entropy: -0.2899
+    mean_vocab_size: -1.3457
+  halstead:
+    mean_difficulty: 1.8665
+    mean_effort: 1.5662
+    mean_estimated_bugs: -0.2997
+    mean_n2_unique_operands: -1.8665
+    mean_time_to_implement_seconds: 1.5662
+    mean_vocabulary: -1.3857
+    mean_volume: -0.3003
+  heaps:
+    mean_beta: -0.5870
+    mean_k: 0.5102
+  identifier_length_variance:
+    mean_mean: 0.3431
+    mean_std_dev: -0.4791
+    mean_variance: -0.9580
+  line_patterns:
+    mean_unique_line_ratio: -0.6939
+  ngram:
+    mean_bigram_hapax_fraction: -0.6466
+    mean_bigram_repeated_unique: -0.2091
+    mean_bigram_repetition_rate: 0.6530
+    mean_bigram_unique: -1.1746
+    mean_trigram_hapax_fraction: -0.6625
+    mean_trigram_repeated_unique: 1.2887
+    mean_trigram_repetition_rate: 1.6149
+    mean_trigram_unique: -0.9875
+  readability:
+    mean_avg_line_length: 0.1837
+    mean_avg_sub_words_per_id: -0.1771
+    mean_flesch_adapted: 0.2348
+  separator_counts:
+    mean_underscore_count: -0.5711
+  symbol_density:
+    mean_density: -0.1807
+  vocabulary:
+    mean_mattr: -1.8899
+    mean_raw_ttr: -1.9969
+    mean_unique_identifiers: -2.0000
+  vowel_density:
+    mean_total_chars: 0.3432
+  zipf:
+    mean_exponent: 0.7698
+    mean_vocab_size: -1.3457
+
diff --git a/priv/combined_metrics/dependencies.yml b/priv/combined_metrics/dependencies.yml
new file mode 100644
index 00000000..93c0d128
--- /dev/null
+++ b/priv/combined_metrics/dependencies.yml
@@ -0,0 +1,326 @@
+import_count_under_10:
+  _doc: "Files should import fewer than 10 modules; high import counts signal excessive coupling."
+  _languages: [elixir]
+  _log_baseline: 7.1916
+  branching:
+    mean_branch_count: 0.2110
+    mean_branching_density: -1.0683
+    mean_max_nesting_depth: 0.1234
+    mean_non_blank_count: -0.0219
+  brevity:
+    mean_sample_size: 0.0119
+  casing_entropy:
+    mean_entropy: -0.0389
+    mean_pascal_case_count: -0.1657
+    mean_snake_case_count: -0.0025
+  comment_structure:
+    mean_comment_line_ratio: -1.2802
+  compression:
+    mean_raw_bytes: -0.0133
+    mean_redundancy: -0.0135
+    mean_unique_line_ratio: -0.0046
+    mean_zlib_bytes: 0.0144
+    mean_zlib_ratio: -0.0277
+  entropy:
+    mean_char_entropy: 0.0035
+    mean_char_max_entropy: 0.0088
+    mean_char_normalized: -0.0053
+    mean_token_entropy: -0.0040
+    mean_token_max_entropy: 0.0026
+    mean_token_normalized: -0.0066
+    mean_total_tokens: -0.0251
+    mean_vocab_size: 0.0119
+  function_metrics:
+    mean_avg_function_lines: -0.0688
+    mean_avg_param_count: -0.0555
+    mean_function_count: 0.1234
+    mean_max_function_lines: 0.0944
+    mean_max_param_count: -0.1234
+  halstead:
+    mean_N1_total_operators: -0.0138
+    mean_N2_total_operands: -0.0464
+    mean_difficulty: -0.0353
+    mean_effort: -0.0606
+    mean_estimated_bugs: -0.0253
+    mean_length: -0.0260
+    mean_n1_unique_operators: 0.0111
+    mean_time_to_implement_seconds: -0.0606
+    mean_vocabulary: 0.0032
+    mean_volume: -0.0253
+  heaps:
+    mean_beta: -0.0893
+    mean_k: 0.3293
+    mean_r_squared: 0.0101
+  identifier_length_variance:
+    mean_max: 0.0679
+    mean_mean: 0.0648
+    mean_std_dev: 0.0688
+    mean_variance: 0.1375
+  indentation:
+    mean_blank_line_ratio: 0.1478
+    mean_max_depth: -0.0876
+    mean_mean_depth: -0.0397
+    mean_variance: -0.2328
+  line_patterns:
+    mean_blank_line_ratio: 0.1478
+    mean_max_nesting_depth: 0.1234
+    mean_string_literal_ratio: 0.0265
+    mean_unique_line_ratio: -0.0050
+  magic_number_density:
+    mean_density: 0.5219
+    mean_magic_number_count: 0.4898
+    mean_string_literal_ratio: 0.0265
+  near_duplicate_blocks_file:
+    mean_block_count: 0.0765
+    mean_sub_block_count: 0.1110
+  ngram:
+    mean_bigram_repeated_unique: 0.0034
+    mean_bigram_repetition_rate: -0.0129
+    mean_bigram_total: -0.0252
+    mean_bigram_unique: 0.0024
+    mean_trigram_hapax_fraction: -0.0051
+    mean_trigram_repeated_unique: 0.0257
+    mean_trigram_repetition_rate: -0.0296
+    mean_trigram_total: -0.0252
+    mean_trigram_unique: 0.0062
+  punctuation_density:
+    mean_bracket_nonalpha_prefix_count: 0.5922
+    mean_bracket_nonalpha_suffix_count: 0.1086
+    mean_colon_suffix_density: -0.1389
+    mean_dot_count: -0.1234
+    mean_id_nonalpha_suffix_density: -0.0141
+    mean_question_mark_density: -2.0000
+  readability:
+    mean_avg_line_length: 0.0110
+    mean_avg_sub_words_per_id: 0.0173
+    mean_flesch_adapted: -0.0204
+    mean_fog_adapted: 0.2028
+    mean_total_lines: -0.0265
+  symbol_density:
+    mean_density: 0.0223
+    mean_distinct_symbol_types: 0.0643
+    mean_symbol_count: 0.0087
+  vocabulary:
+    mean_mattr: -0.0031
+    mean_raw_ttr: 0.0573
+    mean_total_identifiers: -0.0573
+  vowel_density:
+    mean_total_chars: 0.0075
+  zipf:
+    mean_exponent: -0.0152
+    mean_r_squared: 0.0050
+    mean_total_tokens: -0.0251
+    mean_vocab_size: 0.0119
+
+low_coupling:
+  _doc: "Modules should depend on few external symbols — a low unique-operand count relative to total is a proxy for tight coupling."
+  _languages: [elixir]
+  _log_baseline: -38.2335
+  branching:
+    mean_branch_count: 0.0745
+    mean_branching_density: 0.2097
+    mean_max_nesting_depth: -0.1353
+    mean_non_blank_count: -0.1353
+  brevity:
+    mean_sample_size: -0.1276
+  casing_entropy:
+    mean_entropy: -0.0870
+    mean_pascal_case_count: -0.3722
+    mean_snake_case_count: -0.1302
+  compression:
+    mean_raw_bytes: -0.1657
+    mean_redundancy: 0.0126
+    mean_unique_line_ratio: -0.0296
+    mean_zlib_bytes: -0.1918
+    mean_zlib_ratio: 0.0262
+  entropy:
+    mean_char_entropy: -0.0044
+    mean_char_max_entropy: -0.0152
+    mean_char_normalized: 0.0108
+    mean_token_entropy: -0.0215
+    mean_token_max_entropy: -0.0285
+    mean_token_normalized: 0.0070
+    mean_total_tokens: -0.1602
+    mean_vocab_size: -0.1276
+  function_metrics:
+    mean_avg_function_lines: -0.3103
+    mean_function_count: 0.1353
+    mean_max_function_lines: -0.3573
+  halstead:
+    mean_N1_total_operators: -0.1645
+    mean_N2_total_operands: -0.1785
+    mean_difficulty: -0.1429
+    mean_effort: -0.3500
+    mean_estimated_bugs: -0.2072
+    mean_length: -0.1700
+    mean_n1_unique_operators: -0.1406
+    mean_n2_unique_operands: -0.1763
+    mean_time_to_implement_seconds: -0.3500
+    mean_vocabulary: -0.1655
+    mean_volume: -0.2072
+  heaps:
+    mean_beta: -0.0557
+    mean_k: 0.1362
+    mean_r_squared: -0.0234
+  identifier_length_variance:
+    mean_max: -0.0427
+    mean_mean: 0.0133
+    mean_std_dev: -0.0321
+    mean_variance: -0.0642
+  indentation:
+    mean_blank_line_ratio: -0.0752
+    mean_max_depth: -0.0352
+    mean_mean_depth: -0.1381
+    mean_variance: -0.2519
+  line_patterns:
+    mean_blank_line_ratio: -0.0752
+    mean_max_nesting_depth: -0.1353
+    mean_string_literal_ratio: 0.1282
+    mean_unique_line_ratio: -0.0312
+  magic_number_density:
+    mean_density: -2.0000
+    mean_string_literal_ratio: 0.1282
+  near_duplicate_blocks_file:
+    mean_block_count: 0.1123
+    mean_near_dup_block_d4: 0.2314
+    mean_near_dup_block_d8: 0.2314
+    mean_sub_block_count: -0.0902
+  ngram:
+    mean_bigram_hapax_fraction: -0.0247
+    mean_bigram_repeated_unique: -0.1792
+    mean_bigram_repetition_rate: 0.0301
+    mean_bigram_total: -0.1605
+    mean_bigram_unique: -0.2135
+    mean_trigram_hapax_fraction: -0.0265
+    mean_trigram_repeated_unique: -0.1784
+    mean_trigram_repetition_rate: 0.0750
+    mean_trigram_total: -0.1608
+    mean_trigram_unique: -0.2352
+  punctuation_density:
+    mean_arrow_density: -0.0373
+    mean_bracket_nonalpha_prefix_count: -0.4412
+    mean_bracket_nonalpha_suffix_count: 0.2314
+    mean_colon_suffix_density: -0.0705
+    mean_dot_count: -0.2609
+    mean_exclamation_density: 1.8877
+    mean_id_nonalpha_suffix_density: -0.0113
+  readability:
+    mean_avg_line_length: -0.0307
+    mean_avg_sub_words_per_id: 0.0032
+    mean_avg_tokens_per_line: -0.0248
+    mean_fog_adapted: 0.0082
+    mean_total_lines: -0.1353
+  symbol_density:
+    mean_density: -0.0137
+    mean_distinct_symbol_types: -0.0960
+    mean_symbol_count: -0.1794
+  vocabulary:
+    mean_mattr: -0.0823
+    mean_raw_ttr: 0.0349
+    mean_total_identifiers: -0.1801
+    mean_unique_identifiers: -0.1453
+  vowel_density:
+    mean_total_chars: -0.1669
+  zipf:
+    mean_exponent: 0.0065
+    mean_r_squared: -0.0205
+    mean_total_tokens: -0.1602
+    mean_vocab_size: -0.1276
+
+no_wildcard_imports:
+  _doc: "Wildcard imports (`import *`, `using Module`) pollute the local namespace and hide dependencies."
+  _languages: [elixir]
+  _log_baseline: -8.9685
+  branching:
+    mean_branching_density: 0.0249
+    mean_non_blank_count: -0.0268
+  brevity:
+    mean_sample_size: -0.0077
+  casing_entropy:
+    mean_entropy: -0.0054
+    mean_snake_case_count: 0.0163
+  compression:
+    mean_raw_bytes: 0.0310
+    mean_unique_line_ratio: -0.0046
+    mean_zlib_bytes: 0.0331
+  entropy:
+    mean_total_tokens: 0.0131
+    mean_vocab_size: -0.0077
+  function_metrics:
+    mean_avg_function_lines: -0.0263
+  halstead:
+    mean_N1_total_operators: 0.0202
+    mean_N2_total_operands: 0.0271
+    mean_difficulty: 0.0600
+    mean_effort: 0.0778
+    mean_estimated_bugs: 0.0179
+    mean_length: 0.0228
+    mean_n2_unique_operands: -0.0329
+    mean_time_to_implement_seconds: 0.0778
+    mean_vocabulary: -0.0230
+    mean_volume: 0.0178
+  heaps:
+    mean_beta: -0.0537
+    mean_k: 0.1998
+    mean_r_squared: -0.0155
+  identifier_length_variance:
+    mean_mean: 0.0438
+    mean_std_dev: 0.0473
+    mean_variance: 0.0945
+  indentation:
+    mean_blank_line_ratio: 0.0763
+    mean_mean_depth: -0.0117
+    mean_variance: 0.0042
+  line_patterns:
+    mean_blank_line_ratio: 0.0763
+    mean_string_literal_ratio: -0.3463
+    mean_unique_line_ratio: -0.0053
+  magic_number_density:
+    mean_density: 1.1035
+    mean_magic_number_count: 1.1312
+    mean_string_literal_ratio: -0.3463
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d6: -0.3309
+    mean_near_dup_block_d7: 0.3309
+    mean_near_dup_block_d8: 0.3309
+    mean_sub_block_count: 0.0355
+  ngram:
+    mean_bigram_hapax_fraction: 0.0182
+    mean_bigram_repeated_unique: -0.0089
+    mean_bigram_repetition_rate: -0.0149
+    mean_bigram_total: 0.0131
+    mean_bigram_unique: 0.0308
+    mean_trigram_hapax_fraction: 0.0094
+    mean_trigram_repeated_unique: -0.0263
+    mean_trigram_repetition_rate: -0.0255
+    mean_trigram_total: 0.0132
+    mean_trigram_unique: 0.0274
+  punctuation_density:
+    mean_arrow_density: -0.0139
+    mean_bracket_nonalpha_prefix_count: -0.5656
+    mean_bracket_nonalpha_suffix_count: -0.0908
+    mean_colon_suffix_density: 2.0000
+    mean_dot_count: -0.0137
+    mean_id_nonalpha_suffix_density: 0.0143
+  readability:
+    mean_avg_line_length: 0.0591
+    mean_avg_sub_words_per_id: 0.0084
+    mean_avg_tokens_per_line: 0.0399
+    mean_flesch_adapted: -0.0142
+    mean_fog_adapted: 0.0290
+    mean_total_lines: -0.0268
+  symbol_density:
+    mean_density: -0.0266
+    mean_distinct_symbol_types: -0.0817
+    mean_symbol_count: 0.0042
+  vocabulary:
+    mean_mattr: 0.0259
+    mean_raw_ttr: -0.0117
+    mean_total_identifiers: 0.0116
+  vowel_density:
+    mean_total_chars: 0.0554
+  zipf:
+    mean_exponent: -0.0270
+    mean_total_tokens: 0.0131
+    mean_vocab_size: -0.0077
+
diff --git a/priv/combined_metrics/documentation.yml b/priv/combined_metrics/documentation.yml
new file mode 100644
index 00000000..fba47a32
--- /dev/null
+++ b/priv/combined_metrics/documentation.yml
@@ -0,0 +1,637 @@
+docstring_is_nonempty:
+  _doc: "Docstrings must contain meaningful content, not just a placeholder or empty string."
+  _languages: [elixir]
+  _log_baseline: 28.4942
+  branching:
+    mean_branch_count: 0.3089
+    mean_branching_density: 0.2652
+    mean_non_blank_count: 0.0437
+  brevity:
+    mean_sample_size: 0.1931
+  casing_entropy:
+    mean_entropy: 0.0676
+    mean_other_count: 0.0709
+    mean_pascal_case_count: 0.3089
+    mean_snake_case_count: 0.1382
+  compression:
+    mean_raw_bytes: 0.1245
+    mean_redundancy: -0.0198
+    mean_unique_line_ratio: 0.0053
+    mean_zlib_bytes: 0.1557
+    mean_zlib_ratio: -0.0312
+  entropy:
+    mean_char_entropy: 0.0065
+    mean_char_max_entropy: 0.0102
+    mean_char_normalized: -0.0036
+    mean_token_entropy: 0.0408
+    mean_token_max_entropy: 0.0400
+    mean_total_tokens: 0.1038
+    mean_vocab_size: 0.1931
+  function_metrics:
+    mean_avg_function_lines: 0.0357
+    mean_avg_param_count: 0.0131
+    mean_function_count: -0.0290
+    mean_max_function_lines: 0.0329
+  halstead:
+    mean_N1_total_operators: 0.0456
+    mean_N2_total_operands: -0.0027
+    mean_difficulty: 0.0706
+    mean_effort: 0.1098
+    mean_estimated_bugs: 0.0392
+    mean_length: 0.0289
+    mean_n1_unique_operators: 0.0913
+    mean_n2_unique_operands: 0.0179
+    mean_time_to_implement_seconds: 0.1098
+    mean_vocabulary: 0.0465
+    mean_volume: 0.0392
+  heaps:
+    mean_beta: 0.0242
+    mean_k: 0.0556
+  identifier_length_variance:
+    mean_mean: 0.0042
+    mean_std_dev: -0.0168
+    mean_variance: -0.0336
+  indentation:
+    mean_blank_line_ratio: 0.0413
+    mean_mean_depth: -0.0330
+    mean_variance: -0.0309
+  line_patterns:
+    mean_blank_line_ratio: 0.0413
+    mean_string_literal_ratio: 0.1078
+    mean_unique_line_ratio: 0.0072
+  magic_number_density:
+    mean_density: 0.0693
+    mean_magic_number_count: 0.1709
+    mean_string_literal_ratio: 0.1078
+  near_duplicate_blocks_file:
+    mean_block_count: 0.0907
+    mean_near_dup_block_d5: -0.2709
+    mean_near_dup_block_d8: 0.1000
+    mean_sub_block_count: -0.0061
+  ngram:
+    mean_bigram_hapax_fraction: 0.0378
+    mean_bigram_repeated_unique: 0.0767
+    mean_bigram_repetition_rate: -0.0528
+    mean_bigram_total: 0.1039
+    mean_bigram_unique: 0.1635
+    mean_trigram_hapax_fraction: 0.0158
+    mean_trigram_repeated_unique: 0.0692
+    mean_trigram_repetition_rate: -0.0615
+    mean_trigram_total: 0.1041
+    mean_trigram_unique: 0.1386
+  punctuation_density:
+    mean_arrow_density: -0.0651
+    mean_bracket_nonalpha_prefix_count: 0.0450
+    mean_bracket_nonalpha_suffix_count: 0.1000
+    mean_colon_suffix_density: -0.0260
+    mean_dot_count: 0.1435
+    mean_exclamation_density: -2.0000
+    mean_id_nonalpha_suffix_density: -0.0474
+  readability:
+    mean_avg_line_length: 0.0834
+    mean_avg_sub_words_per_id: -0.0071
+    mean_avg_tokens_per_line: 0.0601
+    mean_fog_adapted: 0.0452
+    mean_total_lines: 0.0437
+  symbol_density:
+    mean_density: -0.0578
+    mean_distinct_symbol_types: 0.0505
+    mean_symbol_count: 0.0664
+  vocabulary:
+    mean_mattr: 0.1382
+    mean_raw_ttr: 0.0976
+    mean_total_identifiers: 0.1534
+    mean_unique_identifiers: 0.2510
+  vowel_density:
+    mean_total_chars: 0.1576
+  zipf:
+    mean_exponent: -0.0353
+    mean_r_squared: 0.0037
+    mean_total_tokens: 0.1038
+    mean_vocab_size: 0.1931
+
+file_has_license_header:
+  _doc: "Source files should begin with a license or copyright header."
+  _languages: [elixir]
+  _log_baseline: 5.8777
+  branching:
+    mean_branching_density: -0.0081
+    mean_non_blank_count: 0.0080
+  brevity:
+    mean_sample_size: 0.0263
+  casing_entropy:
+    mean_entropy: 0.0296
+    mean_pascal_case_count: 0.0957
+    mean_snake_case_count: 0.0039
+  comment_structure:
+    mean_comment_line_ratio: -2.0000
+  compression:
+    mean_raw_bytes: 0.0104
+    mean_redundancy: -0.0059
+    mean_zlib_bytes: 0.0200
+    mean_zlib_ratio: -0.0095
+  entropy:
+    mean_char_entropy: 0.0028
+    mean_char_max_entropy: 0.0052
+    mean_token_entropy: 0.0042
+    mean_token_max_entropy: 0.0054
+    mean_total_tokens: 0.0091
+    mean_vocab_size: 0.0263
+  halstead:
+    mean_N1_total_operators: 0.0051
+    mean_N2_total_operands: 0.0185
+    mean_difficulty: -0.0273
+    mean_effort: -0.0113
+    mean_estimated_bugs: 0.0159
+    mean_length: 0.0095
+    mean_n2_unique_operands: 0.0458
+    mean_time_to_implement_seconds: -0.0113
+    mean_vocabulary: 0.0306
+    mean_volume: 0.0160
+  heaps:
+    mean_beta: -0.0113
+    mean_k: 0.0614
+  identifier_length_variance:
+    mean_mean: -0.0048
+  indentation:
+    mean_blank_line_ratio: 0.0206
+    mean_mean_depth: -0.0080
+    mean_variance: 0.0154
+  line_patterns:
+    mean_blank_line_ratio: 0.0206
+    mean_string_literal_ratio: -0.0104
+  magic_number_density:
+    mean_density: 0.1920
+    mean_magic_number_count: 0.1973
+    mean_string_literal_ratio: -0.0104
+  near_duplicate_blocks_file:
+    mean_block_count: 0.0650
+    mean_sub_block_count: 0.0089
+  ngram:
+    mean_bigram_hapax_fraction: 0.0086
+    mean_bigram_repetition_rate: -0.0091
+    mean_bigram_total: 0.0091
+    mean_bigram_unique: 0.0182
+    mean_trigram_hapax_fraction: 0.0031
+    mean_trigram_repetition_rate: -0.0091
+    mean_trigram_total: 0.0091
+    mean_trigram_unique: 0.0133
+  punctuation_density:
+    mean_arrow_density: -0.0105
+    mean_colon_suffix_density: -0.0104
+    mean_dot_count: 0.0423
+  readability:
+    mean_avg_tokens_per_line: 0.0091
+    mean_fog_adapted: 0.0060
+  symbol_density:
+    mean_density: -0.0042
+    mean_symbol_count: 0.0065
+  vocabulary:
+    mean_mattr: 0.0108
+    mean_raw_ttr: 0.0207
+    mean_total_identifiers: 0.0187
+    mean_unique_identifiers: 0.0395
+  vowel_density:
+    mean_total_chars: 0.0139
+  zipf:
+    mean_exponent: -0.0055
+    mean_total_tokens: 0.0091
+    mean_vocab_size: 0.0263
+
+file_has_module_docstring:
+  _doc: "Files should have a module-level docstring explaining purpose and usage."
+  _languages: [elixir]
+  _log_baseline: 24.1681
+  branching:
+    mean_branch_count: 0.3854
+    mean_branching_density: -2.0000
+    mean_non_blank_count: 0.0908
+  brevity:
+    mean_sample_size: 0.2219
+  casing_entropy:
+    mean_entropy: -0.0210
+    mean_pascal_case_count: 0.0929
+    mean_snake_case_count: 0.1544
+  compression:
+    mean_raw_bytes: 0.1161
+    mean_redundancy: -0.0256
+    mean_unique_line_ratio: 0.0122
+    mean_zlib_bytes: 0.1676
+    mean_zlib_ratio: -0.0514
+  entropy:
+    mean_char_max_entropy: 0.0126
+    mean_char_normalized: -0.0120
+    mean_token_entropy: 0.0441
+    mean_token_max_entropy: 0.0457
+    mean_total_tokens: 0.0837
+    mean_vocab_size: 0.2219
+  function_metrics:
+    mean_avg_function_lines: 0.0166
+    mean_max_function_lines: 0.1014
+  halstead:
+    mean_N1_total_operators: 0.0448
+    mean_N2_total_operands: 0.0268
+    mean_difficulty: 0.0971
+    mean_effort: 0.1486
+    mean_estimated_bugs: 0.0515
+    mean_length: 0.0387
+    mean_n1_unique_operators: 0.1116
+    mean_n2_unique_operands: 0.0412
+    mean_time_to_implement_seconds: 0.1486
+    mean_vocabulary: 0.0602
+    mean_volume: 0.0515
+  heaps:
+    mean_beta: -0.0925
+    mean_k: 0.5760
+    mean_r_squared: -0.0049
+  identifier_length_variance:
+    mean_mean: -0.0101
+    mean_std_dev: -0.0477
+    mean_variance: -0.0954
+  indentation:
+    mean_blank_line_ratio: 0.0686
+    mean_mean_depth: -0.0240
+    mean_variance: -0.0634
+  line_patterns:
+    mean_blank_line_ratio: 0.0686
+    mean_string_literal_ratio: 0.1425
+    mean_unique_line_ratio: 0.0141
+  magic_number_density:
+    mean_density: 0.0812
+    mean_magic_number_count: 0.1599
+    mean_string_literal_ratio: 0.1425
+  near_duplicate_blocks_file:
+    mean_block_count: 0.0586
+    mean_sub_block_count: 0.0098
+  ngram:
+    mean_bigram_hapax_fraction: 0.0500
+    mean_bigram_repeated_unique: 0.0539
+    mean_bigram_repetition_rate: -0.0497
+    mean_bigram_total: 0.0838
+    mean_bigram_unique: 0.1493
+    mean_trigram_hapax_fraction: 0.0283
+    mean_trigram_repeated_unique: 0.0225
+    mean_trigram_repetition_rate: -0.0657
+    mean_trigram_total: 0.0839
+    mean_trigram_unique: 0.1235
+  punctuation_density:
+    mean_colon_suffix_density: 0.0341
+    mean_dot_count: 0.0777
+    mean_exclamation_density: -0.1014
+    mean_id_nonalpha_suffix_density: -0.0339
+  readability:
+    mean_avg_line_length: 0.0257
+    mean_avg_sub_words_per_id: -0.0181
+    mean_avg_tokens_per_line: -0.0071
+    mean_flesch_adapted: 0.0205
+    mean_fog_adapted: -0.0266
+    mean_total_lines: 0.0908
+  symbol_density:
+    mean_density: -0.0727
+    mean_distinct_symbol_types: 0.0618
+    mean_symbol_count: 0.0433
+  vocabulary:
+    mean_mattr: 0.0532
+    mean_raw_ttr: 0.1353
+    mean_total_identifiers: 0.1326
+    mean_unique_identifiers: 0.2679
+  vowel_density:
+    mean_total_chars: 0.1226
+  zipf:
+    mean_exponent: -0.0467
+    mean_total_tokens: 0.0837
+    mean_vocab_size: 0.2219
+
+file_has_no_commented_out_code:
+  _doc: "Files should not contain commented-out code blocks left from development."
+  _languages: [elixir]
+  _log_baseline: -8.5677
+  branching:
+    mean_branching_density: 0.0368
+    mean_non_blank_count: -0.0367
+  brevity:
+    mean_sample_size: -0.0046
+  casing_entropy:
+    mean_entropy: -0.0091
+    mean_pascal_case_count: -0.0597
+    mean_snake_case_count: -0.0126
+  comment_structure:
+    mean_comment_line_count: -0.9901
+    mean_comment_line_ratio: 0.3578
+  compression:
+    mean_raw_bytes: -0.0068
+    mean_redundancy: 0.0077
+    mean_zlib_bytes: -0.0179
+    mean_zlib_ratio: 0.0111
+  entropy:
+    mean_char_entropy: -0.0026
+    mean_char_max_entropy: -0.0061
+    mean_char_normalized: 0.0035
+    mean_total_tokens: -0.0158
+    mean_vocab_size: -0.0046
+  function_metrics:
+    mean_avg_function_lines: -0.0992
+    mean_function_count: 0.0686
+    mean_max_function_lines: -0.1247
+  halstead:
+    mean_N1_total_operators: -0.0058
+    mean_N2_total_operands: -0.0546
+    mean_difficulty: 0.0608
+    mean_effort: 0.0253
+    mean_estimated_bugs: -0.0355
+    mean_length: -0.0224
+    mean_n1_unique_operators: 0.0171
+    mean_n2_unique_operands: -0.0984
+    mean_time_to_implement_seconds: 0.0253
+    mean_vocabulary: -0.0628
+    mean_volume: -0.0356
+  heaps:
+    mean_beta: -0.0499
+    mean_k: 0.1958
+    mean_r_squared: -0.0200
+  identifier_length_variance:
+    mean_mean: 0.0169
+    mean_std_dev: 0.0264
+    mean_variance: 0.0527
+  indentation:
+    mean_blank_line_ratio: 0.0551
+    mean_max_depth: 0.0324
+    mean_mean_depth: 0.0564
+    mean_variance: 0.0552
+  line_patterns:
+    mean_blank_line_ratio: 0.0551
+    mean_string_literal_ratio: -0.0818
+    mean_unique_line_ratio: -0.0077
+  magic_number_density:
+    mean_density: 2.0000
+    mean_string_literal_ratio: -0.0818
+  near_duplicate_blocks_file:
+    mean_block_count: -0.0474
+    mean_sub_block_count: -0.0454
+  ngram:
+    mean_bigram_hapax_fraction: 0.0101
+    mean_bigram_repeated_unique: -0.0414
+    mean_bigram_repetition_rate: -0.0019
+    mean_bigram_total: -0.0158
+    mean_bigram_unique: -0.0223
+    mean_trigram_hapax_fraction: -0.0019
+    mean_trigram_repeated_unique: -0.0273
+    mean_trigram_repetition_rate: 0.0258
+    mean_trigram_total: -0.0159
+    mean_trigram_unique: -0.0338
+  punctuation_density:
+    mean_arrow_density: 0.1869
+    mean_bracket_nonalpha_prefix_count: -0.1247
+    mean_bracket_nonalpha_suffix_count: -0.0885
+    mean_colon_suffix_density: -0.1285
+    mean_dot_count: -0.0411
+    mean_exclamation_density: -0.1956
+    mean_id_nonalpha_suffix_density: 0.0028
+    mean_question_mark_density: -0.2494
+  readability:
+    mean_avg_line_length: 0.0371
+    mean_avg_sub_words_per_id: -0.0018
+    mean_avg_tokens_per_line: -0.0943
+    mean_flesch_adapted: 0.0114
+    mean_fog_adapted: -0.0779
+    mean_total_lines: 0.0785
+  symbol_density:
+    mean_density: -0.0172
+    mean_symbol_count: -0.0237
+  vocabulary:
+    mean_mattr: -0.0327
+    mean_raw_ttr: 0.0060
+    mean_total_identifiers: -0.0246
+    mean_unique_identifiers: -0.0186
+  vowel_density:
+    mean_total_chars: -0.0077
+  zipf:
+    mean_exponent: -0.0043
+    mean_total_tokens: -0.0158
+    mean_vocab_size: -0.0046
+
+function_has_docstring:
+  _doc: "Public functions should have a docstring describing behaviour, params, and return value."
+  _languages: [elixir]
+  _log_baseline: 41.6283
+  branching:
+    mean_branch_count: 0.5279
+    mean_branching_density: 0.3832
+    mean_non_blank_count: 0.1446
+  brevity:
+    mean_sample_size: 0.2608
+  casing_entropy:
+    mean_entropy: -0.0026
+    mean_other_count: 0.3105
+    mean_pascal_case_count: 0.1852
+    mean_snake_case_count: 0.2708
+  comment_structure:
+    mean_comment_line_ratio: -2.0000
+  compression:
+    mean_raw_bytes: 0.2251
+    mean_redundancy: -0.0242
+    mean_unique_line_ratio: -0.0264
+    mean_zlib_bytes: 0.2718
+    mean_zlib_ratio: -0.0468
+  entropy:
+    mean_char_entropy: 0.0081
+    mean_char_max_entropy: 0.0163
+    mean_char_normalized: -0.0082
+    mean_token_entropy: 0.0517
+    mean_token_max_entropy: 0.0557
+    mean_token_normalized: -0.0040
+    mean_total_tokens: 0.2284
+    mean_vocab_size: 0.2608
+  function_metrics:
+    mean_avg_function_lines: 0.0289
+    mean_avg_param_count: 0.0202
+    mean_function_count: 0.0999
+    mean_max_function_lines: 0.1368
+  halstead:
+    mean_N1_total_operators: 0.1175
+    mean_N2_total_operands: 0.0799
+    mean_difficulty: 0.0232
+    mean_effort: 0.1555
+    mean_estimated_bugs: 0.1324
+    mean_length: 0.1035
+    mean_n1_unique_operators: 0.0939
+    mean_n2_unique_operands: 0.1507
+    mean_time_to_implement_seconds: 0.1555
+    mean_vocabulary: 0.1288
+    mean_volume: 0.1324
+  heaps:
+    mean_beta: 0.0660
+    mean_k: -0.0612
+    mean_r_squared: -0.0041
+  identifier_length_variance:
+    mean_mean: -0.0191
+    mean_std_dev: -0.0493
+    mean_variance: -0.0985
+  indentation:
+    mean_blank_line_ratio: 0.1003
+    mean_max_depth: -0.1288
+    mean_mean_depth: -0.0904
+    mean_variance: -0.2118
+  line_patterns:
+    mean_blank_line_ratio: 0.1003
+    mean_string_literal_ratio: 0.5931
+    mean_unique_line_ratio: -0.0135
+  magic_number_density:
+    mean_density: 0.1744
+    mean_magic_number_count: 0.4104
+    mean_string_literal_ratio: 0.5931
+  near_duplicate_blocks_file:
+    mean_block_count: 0.2288
+    mean_near_dup_block_d6: -0.3105
+    mean_near_dup_block_d7: 0.3105
+    mean_near_dup_block_d8: -0.1816
+    mean_sub_block_count: 0.0349
+  ngram:
+    mean_bigram_hapax_fraction: 0.0560
+    mean_bigram_repeated_unique: 0.1917
+    mean_bigram_repetition_rate: -0.0476
+    mean_bigram_total: 0.2288
+    mean_bigram_unique: 0.2856
+    mean_trigram_hapax_fraction: 0.0480
+    mean_trigram_repeated_unique: 0.1175
+    mean_trigram_repetition_rate: -0.1025
+    mean_trigram_total: 0.2292
+    mean_trigram_unique: 0.2807
+  punctuation_density:
+    mean_arrow_density: -0.3619
+    mean_bracket_nonalpha_prefix_count: 0.0999
+    mean_bracket_nonalpha_suffix_count: 0.2024
+    mean_colon_suffix_density: -0.0297
+    mean_dot_count: 0.1816
+    mean_exclamation_density: -0.3105
+    mean_id_nonalpha_suffix_density: -0.1019
+    mean_question_mark_density: -0.2377
+  readability:
+    mean_avg_line_length: 0.0861
+    mean_avg_sub_words_per_id: -0.0113
+    mean_avg_tokens_per_line: 0.0890
+    mean_flesch_adapted: 0.0026
+    mean_fog_adapted: 0.0948
+    mean_total_lines: 0.1394
+  symbol_density:
+    mean_density: -0.0353
+    mean_distinct_symbol_types: 0.0427
+    mean_symbol_count: 0.1896
+  vocabulary:
+    mean_mattr: 0.1769
+    mean_raw_ttr: 0.0666
+    mean_total_identifiers: 0.2541
+    mean_unique_identifiers: 0.3207
+  vowel_density:
+    mean_total_chars: 0.2350
+  zipf:
+    mean_exponent: 0.0025
+    mean_total_tokens: 0.2284
+    mean_vocab_size: 0.2608
+
+function_todo_comment_in_body:
+  _doc: "Functions should not contain TODO/FIXME comments indicating unfinished work."
+  _languages: [elixir]
+  _log_baseline: 7.2394
+  branching:
+    mean_branch_count: -0.0287
+    mean_branching_density: -0.0435
+    mean_non_blank_count: 0.0147
+  brevity:
+    mean_sample_size: -0.0084
+  casing_entropy:
+    mean_entropy: 0.0157
+    mean_pascal_case_count: 0.0410
+    mean_snake_case_count: -0.0125
+  comment_structure:
+    mean_comment_line_count: -0.5392
+    mean_comment_line_ratio: 0.7796
+    mean_todo_fixme_count: -0.5392
+  compression:
+    mean_raw_bytes: 0.0082
+    mean_unique_line_ratio: 0.0028
+    mean_zlib_bytes: 0.0074
+  entropy:
+    mean_char_entropy: 0.0026
+    mean_char_normalized: 0.0026
+    mean_token_max_entropy: -0.0017
+    mean_token_normalized: 0.0020
+    mean_total_tokens: 0.0157
+    mean_vocab_size: -0.0084
+  function_metrics:
+    mean_avg_function_lines: -0.0250
+    mean_avg_param_count: -0.0354
+    mean_function_count: 0.0354
+    mean_max_function_lines: -0.0182
+  halstead:
+    mean_N1_total_operators: 0.0224
+    mean_N2_total_operands: -0.0309
+    mean_difficulty: 0.0451
+    mean_effort: 0.0375
+    mean_estimated_bugs: -0.0076
+    mean_length: 0.0035
+    mean_n2_unique_operands: -0.0761
+    mean_time_to_implement_seconds: 0.0375
+    mean_vocabulary: -0.0540
+    mean_volume: -0.0076
+  heaps:
+    mean_beta: -0.0498
+    mean_k: 0.1608
+    mean_r_squared: -0.0095
+  identifier_length_variance:
+    mean_mean: 0.0061
+    mean_std_dev: 0.0128
+    mean_variance: 0.0257
+  indentation:
+    mean_blank_line_ratio: 0.0593
+    mean_mean_depth: -0.0184
+    mean_variance: -0.0277
+  line_patterns:
+    mean_blank_line_ratio: 0.0593
+    mean_string_literal_ratio: -0.0151
+    mean_unique_line_ratio: 0.0033
+  magic_number_density:
+    mean_density: -2.0000
+    mean_string_literal_ratio: -0.0151
+  near_duplicate_blocks_file:
+    mean_block_count: 0.0317
+    mean_sub_block_count: 0.0281
+  ngram:
+    mean_bigram_hapax_fraction: -0.0187
+    mean_bigram_repeated_unique: 0.0464
+    mean_bigram_repetition_rate: 0.0098
+    mean_bigram_total: 0.0157
+    mean_bigram_unique: 0.0136
+    mean_trigram_hapax_fraction: -0.0109
+    mean_trigram_repeated_unique: 0.0479
+    mean_trigram_repetition_rate: 0.0123
+    mean_trigram_total: 0.0157
+    mean_trigram_unique: 0.0149
+  punctuation_density:
+    mean_arrow_density: -0.0161
+    mean_bracket_nonalpha_prefix_count: -0.0287
+    mean_colon_suffix_density: -0.0293
+    mean_dot_count: 0.0485
+    mean_id_nonalpha_suffix_density: 0.0062
+    mean_question_mark_density: -0.0287
+  readability:
+    mean_avg_line_length: 0.0123
+    mean_avg_sub_words_per_id: 0.0073
+    mean_avg_tokens_per_line: -0.0224
+    mean_flesch_adapted: -0.0053
+    mean_fog_adapted: -0.0109
+    mean_total_lines: 0.0381
+  symbol_density:
+    mean_density: 0.0116
+    mean_distinct_symbol_types: -0.0140
+    mean_symbol_count: 0.0200
+  vocabulary:
+    mean_mattr: -0.0525
+    mean_raw_ttr: -0.0250
+    mean_unique_identifiers: -0.0236
+  vowel_density:
+    mean_total_chars: 0.0076
+  zipf:
+    mean_total_tokens: 0.0157
+    mean_vocab_size: -0.0084
+
diff --git a/priv/combined_metrics/error_handling.yml b/priv/combined_metrics/error_handling.yml
new file mode 100644
index 00000000..b09f542e
--- /dev/null
+++ b/priv/combined_metrics/error_handling.yml
@@ -0,0 +1,335 @@
+does_not_swallow_errors:
+  _doc: "Errors must be handled or re-raised — empty rescue/catch blocks silently hide failures."
+  _languages: [elixir]
+  _log_baseline: 86.0584
+  branching:
+    mean_branch_count: -0.1041
+    mean_branching_density: -0.2095
+    mean_max_nesting_depth: 0.5405
+    mean_non_blank_count: 0.1054
+  brevity:
+    mean_sample_size: 0.2830
+  casing_entropy:
+    mean_entropy: -0.1412
+    mean_other_count: -1.6214
+    mean_pascal_case_count: 0.8391
+    mean_snake_case_count: 0.4785
+  compression:
+    mean_raw_bytes: 0.3818
+    mean_redundancy: 0.0202
+    mean_unique_line_ratio: 0.1028
+    mean_zlib_bytes: 0.3399
+    mean_zlib_ratio: 0.0419
+  entropy:
+    mean_char_entropy: 0.0445
+    mean_char_max_entropy: 0.0347
+    mean_char_normalized: 0.0098
+    mean_token_entropy: 0.0223
+    mean_token_max_entropy: 0.0620
+    mean_token_normalized: -0.0397
+    mean_total_tokens: 0.4926
+    mean_vocab_size: 0.2830
+  function_metrics:
+    mean_avg_function_lines: 0.1005
+    mean_max_function_lines: 0.2243
+  halstead:
+    mean_N1_total_operators: 0.4699
+    mean_N2_total_operands: 0.2900
+    mean_difficulty: 0.2395
+    mean_effort: 0.6960
+    mean_estimated_bugs: 0.4564
+    mean_length: 0.4072
+    mean_n1_unique_operators: 0.1859
+    mean_n2_unique_operands: 0.2364
+    mean_time_to_implement_seconds: 0.6960
+    mean_vocabulary: 0.2190
+    mean_volume: 0.4565
+  heaps:
+    mean_beta: -0.0869
+    mean_k: 0.2466
+  identifier_length_variance:
+    mean_std_dev: -0.1168
+    mean_variance: -0.2335
+  indentation:
+    mean_blank_line_ratio: 0.0451
+    mean_max_depth: 0.1740
+    mean_mean_depth: 0.1043
+    mean_variance: 0.3416
+  line_patterns:
+    mean_blank_line_ratio: 0.0451
+    mean_max_nesting_depth: 0.5405
+    mean_string_literal_ratio: 0.2524
+    mean_unique_line_ratio: 0.1413
+  magic_number_density:
+    mean_string_literal_ratio: 0.2524
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d0: -0.5405
+    mean_near_dup_block_d7: -0.3162
+    mean_near_dup_block_d8: 0.8566
+    mean_sub_block_count: 0.3065
+  ngram:
+    mean_bigram_hapax_fraction: -0.0373
+    mean_bigram_repeated_unique: 0.4011
+    mean_bigram_repetition_rate: 0.0995
+    mean_bigram_total: 0.4937
+    mean_bigram_unique: 0.3266
+    mean_trigram_hapax_fraction: -0.0651
+    mean_trigram_repeated_unique: 0.5672
+    mean_trigram_repetition_rate: 0.2299
+    mean_trigram_total: 0.4949
+    mean_trigram_unique: 0.3376
+  punctuation_density:
+    mean_arrow_density: -0.3177
+    mean_bracket_nonalpha_prefix_count: 0.7888
+    mean_bracket_nonalpha_suffix_count: 2.0000
+    mean_colon_suffix_density: 0.2150
+    mean_dot_count: 0.5172
+    mean_exclamation_density: -0.5217
+    mean_id_nonalpha_suffix_density: 0.0181
+    mean_question_mark_density: -0.4364
+  readability:
+    mean_avg_line_length: 0.2905
+    mean_avg_sub_words_per_id: 0.0219
+    mean_avg_tokens_per_line: 0.3872
+    mean_flesch_adapted: -0.0373
+    mean_fog_adapted: 0.3019
+    mean_total_lines: 0.1054
+  symbol_density:
+    mean_density: 0.2563
+    mean_distinct_symbol_types: 0.0400
+    mean_symbol_count: 0.6378
+  vocabulary:
+    mean_mattr: 0.0350
+    mean_raw_ttr: -0.0769
+    mean_total_identifiers: 0.4896
+    mean_unique_identifiers: 0.4127
+  vowel_density:
+    mean_total_chars: 0.4927
+  zipf:
+    mean_exponent: 0.0933
+    mean_total_tokens: 0.4926
+    mean_vocab_size: 0.2830
+
+error_message_is_descriptive:
+  _doc: "Error values should carry a meaningful message, not just a bare atom or empty string."
+  _languages: [elixir]
+  _log_baseline: 52.7053
+  branching:
+    mean_branch_count: 0.0664
+    mean_branching_density: -0.0540
+    mean_max_nesting_depth: 0.3900
+    mean_non_blank_count: 0.1204
+  brevity:
+    mean_sample_size: 0.3136
+  casing_entropy:
+    mean_entropy: 0.1147
+    mean_pascal_case_count: 2.0000
+    mean_snake_case_count: 0.5117
+  compression:
+    mean_raw_bytes: 0.3028
+    mean_redundancy: 0.0104
+    mean_unique_line_ratio: -0.0126
+    mean_zlib_bytes: 0.2771
+    mean_zlib_ratio: 0.0257
+  entropy:
+    mean_char_entropy: 0.0161
+    mean_char_max_entropy: 0.0487
+    mean_char_normalized: -0.0326
+    mean_token_entropy: 0.0596
+    mean_token_max_entropy: 0.0685
+    mean_token_normalized: -0.0089
+    mean_total_tokens: 0.3002
+    mean_vocab_size: 0.3136
+  function_metrics:
+    mean_avg_function_lines: 0.1160
+    mean_max_function_lines: 0.0713
+  halstead:
+    mean_N1_total_operators: 0.1787
+    mean_N2_total_operands: 0.0463
+    mean_difficulty: 0.0136
+    mean_effort: 0.1822
+    mean_estimated_bugs: 0.1686
+    mean_length: 0.1370
+    mean_n1_unique_operators: 0.1179
+    mean_n2_unique_operands: 0.1506
+    mean_time_to_implement_seconds: 0.1822
+    mean_vocabulary: 0.1415
+    mean_volume: 0.1686
+  heaps:
+    mean_beta: 0.0120
+    mean_k: 0.1259
+    mean_r_squared: 0.0073
+  identifier_length_variance:
+    mean_mean: -0.0908
+    mean_std_dev: -0.0799
+    mean_variance: -0.1597
+  indentation:
+    mean_blank_line_ratio: -0.1098
+    mean_max_depth: 0.1754
+    mean_mean_depth: 0.1108
+    mean_variance: 0.1967
+  line_patterns:
+    mean_blank_line_ratio: -0.1098
+    mean_max_nesting_depth: 0.3900
+    mean_string_literal_ratio: 0.3673
+    mean_unique_line_ratio: 0.0304
+  magic_number_density:
+    mean_string_literal_ratio: 0.3673
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d6: -1.0566
+    mean_near_dup_block_d8: -0.6667
+    mean_sub_block_count: 0.0621
+  ngram:
+    mean_bigram_hapax_fraction: -0.0059
+    mean_bigram_repeated_unique: 0.3150
+    mean_bigram_total: 0.3008
+    mean_bigram_unique: 0.3055
+    mean_trigram_hapax_fraction: -0.0298
+    mean_trigram_repeated_unique: 0.4104
+    mean_trigram_repetition_rate: 0.0227
+    mean_trigram_total: 0.3014
+    mean_trigram_unique: 0.3075
+  punctuation_density:
+    mean_arrow_density: -0.0591
+    mean_bracket_nonalpha_prefix_count: 1.4250
+    mean_bracket_nonalpha_suffix_count: 0.3350
+    mean_colon_suffix_density: -0.0854
+    mean_id_nonalpha_suffix_density: -0.0694
+  readability:
+    mean_avg_line_length: 0.1895
+    mean_avg_tokens_per_line: 0.1798
+    mean_flesch_adapted: -0.0175
+    mean_fog_adapted: 0.1420
+    mean_total_lines: 0.1204
+  symbol_density:
+    mean_distinct_symbol_types: 0.0664
+    mean_symbol_count: 0.3056
+  vocabulary:
+    mean_mattr: -0.0179
+    mean_raw_ttr: -0.1153
+    mean_total_identifiers: 0.5114
+    mean_unique_identifiers: 0.3962
+  vowel_density:
+    mean_total_chars: 0.4207
+  zipf:
+    mean_r_squared: 0.0056
+    mean_total_tokens: 0.3002
+    mean_vocab_size: 0.3136
+
+returns_typed_error:
+  _doc: "Functions should signal failure via a typed return (e.g. `{:error, reason}`) rather than returning `nil` or `false`."
+  _languages: [elixir]
+  _log_baseline: 120.8554
+  branching:
+    mean_branch_count: -0.1286
+    mean_branching_density: -0.1895
+    mean_max_nesting_depth: 1.1292
+    mean_non_blank_count: 0.0608
+  brevity:
+    mean_sample_size: 0.2322
+  casing_entropy:
+    mean_entropy: -0.3072
+    mean_other_count: -0.2697
+    mean_pascal_case_count: 0.7124
+    mean_snake_case_count: 0.6125
+  compression:
+    mean_raw_bytes: 0.4375
+    mean_redundancy: 0.0334
+    mean_unique_line_ratio: 0.1471
+    mean_zlib_bytes: 0.3486
+    mean_zlib_ratio: 0.0889
+  entropy:
+    mean_char_entropy: 0.0854
+    mean_char_max_entropy: 0.0427
+    mean_char_normalized: 0.0426
+    mean_token_entropy: -0.0120
+    mean_token_max_entropy: 0.0531
+    mean_token_normalized: -0.0651
+    mean_total_tokens: 0.6727
+    mean_vocab_size: 0.2322
+  function_metrics:
+    mean_avg_function_lines: 0.0904
+    mean_avg_param_count: 0.0054
+    mean_function_count: -0.0556
+    mean_max_function_lines: 0.0823
+  halstead:
+    mean_N1_total_operators: 0.7914
+    mean_N2_total_operands: 0.5495
+    mean_difficulty: 0.4516
+    mean_effort: 1.2300
+    mean_estimated_bugs: 0.7784
+    mean_length: 0.7139
+    mean_n1_unique_operators: 0.2105
+    mean_n2_unique_operands: 0.3084
+    mean_time_to_implement_seconds: 1.2300
+    mean_vocabulary: 0.2747
+    mean_volume: 0.7785
+  heaps:
+    mean_beta: -0.2332
+    mean_k: 0.4822
+    mean_r_squared: 0.0110
+  identifier_length_variance:
+    mean_max: 0.1996
+    mean_mean: 0.1313
+    mean_std_dev: 0.2519
+    mean_variance: 0.5039
+  indentation:
+    mean_blank_line_ratio: -0.1515
+    mean_mean_depth: -0.0287
+    mean_variance: 0.0372
+  line_patterns:
+    mean_blank_line_ratio: -0.1515
+    mean_max_nesting_depth: 1.1292
+    mean_string_literal_ratio: -0.6750
+    mean_unique_line_ratio: 0.1454
+  magic_number_density:
+    mean_string_literal_ratio: -0.6750
+  near_duplicate_blocks_file:
+    mean_block_count: -0.0980
+    mean_near_dup_block_d0: -1.4248
+    mean_near_dup_block_d6: 0.7124
+    mean_near_dup_block_d7: -1.0081
+    mean_near_dup_block_d8: -2.0000
+    mean_sub_block_count: 0.7384
+  ngram:
+    mean_bigram_hapax_fraction: -0.1410
+    mean_bigram_repeated_unique: 0.4891
+    mean_bigram_repetition_rate: 0.1302
+    mean_bigram_total: 0.6740
+    mean_bigram_unique: 0.3101
+    mean_trigram_hapax_fraction: -0.0547
+    mean_trigram_repeated_unique: 0.5003
+    mean_trigram_repetition_rate: 0.2370
+    mean_trigram_total: 0.6753
+    mean_trigram_unique: 0.3580
+  punctuation_density:
+    mean_arrow_density: -0.8033
+    mean_bracket_nonalpha_prefix_count: -0.1874
+    mean_colon_suffix_density: -0.8583
+    mean_dot_count: 1.1292
+    mean_id_nonalpha_suffix_density: 0.0810
+    mean_question_mark_density: -0.6568
+  readability:
+    mean_avg_line_length: 0.3955
+    mean_avg_sub_words_per_id: 0.0948
+    mean_avg_tokens_per_line: 0.6118
+    mean_flesch_adapted: -0.1272
+    mean_fog_adapted: 0.6637
+    mean_total_lines: 0.0608
+  symbol_density:
+    mean_density: 0.5813
+    mean_distinct_symbol_types: 0.2134
+    mean_symbol_count: 1.0187
+  vocabulary:
+    mean_mattr: -0.2229
+    mean_raw_ttr: -0.2020
+    mean_total_identifiers: 0.4979
+    mean_unique_identifiers: 0.2957
+  vowel_density:
+    mean_total_chars: 0.6292
+  zipf:
+    mean_exponent: 0.1047
+    mean_r_squared: 0.0253
+    mean_total_tokens: 0.6727
+    mean_vocab_size: 0.2322
+
diff --git a/priv/combined_metrics/file_structure.yml b/priv/combined_metrics/file_structure.yml
new file mode 100644
index 00000000..10ae0b2e
--- /dev/null
+++ b/priv/combined_metrics/file_structure.yml
@@ -0,0 +1,574 @@
+has_consistent_indentation:
+  _doc: "Files should use a single, consistent indentation style with no mixed tabs and spaces."
+  _log_baseline: -8.2745
+  branching:
+    mean_branching_density: 0.0144
+    mean_non_blank_count: -0.0302
+  brevity:
+    mean_sample_size: -0.0053
+  casing_entropy:
+    mean_entropy: 0.0021
+    mean_snake_case_count: -0.0030
+  compression:
+    mean_raw_bytes: -0.0218
+    mean_redundancy: -0.0026
+    mean_unique_line_ratio: -0.0381
+    mean_zlib_bytes: -0.0128
+    mean_zlib_ratio: -0.0065
+  entropy:
+    mean_char_entropy: 0.0079
+    mean_char_normalized: 0.0096
+    mean_token_entropy: -0.0018
+    mean_total_tokens: -0.0033
+    mean_vocab_size: -0.0053
+  function_metrics:
+    mean_avg_function_lines: -0.0128
+    mean_max_function_lines: -0.0176
+  halstead:
+    mean_N1_total_operators: -0.0035
+    mean_difficulty: -0.0061
+    mean_effort: -0.0038
+    mean_estimated_bugs: -0.0025
+    mean_length: -0.0023
+    mean_n1_unique_operators: -0.0081
+    mean_time_to_implement_seconds: -0.0038
+    mean_vocabulary: -0.0030
+    mean_volume: -0.0026
+  identifier_length_variance:
+    mean_mean: 0.0089
+    mean_std_dev: -0.0025
+    mean_variance: -0.0054
+  indentation:
+    mean_blank_line_ratio: 0.0250
+    mean_max_depth: -0.2075
+    mean_mean_depth: -0.0900
+    mean_variance: -0.3941
+  line_patterns:
+    mean_blank_line_ratio: 0.0250
+    mean_string_literal_ratio: 0.0093
+    mean_unique_line_ratio: -0.0077
+  magic_number_density:
+    mean_density: 2.0000
+    mean_magic_number_count: 0.2373
+    mean_string_literal_ratio: 0.0093
+  ngram:
+    mean_bigram_hapax_fraction: -0.0042
+    mean_bigram_repeated_unique: -0.0026
+    mean_bigram_repetition_rate: 0.0023
+    mean_bigram_total: -0.0033
+    mean_bigram_unique: -0.0051
+    mean_trigram_hapax_fraction: -0.0055
+    mean_trigram_repetition_rate: 0.0058
+    mean_trigram_total: -0.0033
+    mean_trigram_unique: -0.0048
+  punctuation_density:
+    mean_colon_suffix_density: 0.0078
+  readability:
+    mean_avg_tokens_per_line: 0.0087
+    mean_fog_adapted: 0.0083
+    mean_total_lines: -0.0302
+  separator_counts:
+    mean_hyphen_count: -0.0221
+  symbol_density:
+    mean_density: 0.0145
+    mean_symbol_count: -0.0026
+  vocabulary:
+    mean_mattr: 0.0043
+    mean_raw_ttr: 0.0043
+    mean_total_identifiers: -0.0025
+  vowel_density:
+    mean_total_chars: 0.0031
+  zipf:
+    mean_total_tokens: -0.0033
+    mean_vocab_size: -0.0053
+
+line_count_under_300:
+  _doc: "Files should be under 300 lines; longer files typically violate single responsibility."
+  _log_baseline: -48.1609
+  branching:
+    mean_branch_count: -0.4508
+    mean_branching_density: -0.2446
+    mean_non_blank_count: -0.2063
+  brevity:
+    mean_sample_size: -0.2062
+  casing_entropy:
+    mean_entropy: 0.0413
+    mean_other_count: -0.6011
+    mean_pascal_case_count: 0.1036
+    mean_snake_case_count: -0.2080
+  compression:
+    mean_raw_bytes: -0.2263
+    mean_redundancy: -0.0026
+    mean_unique_line_ratio: 0.0519
+    mean_zlib_bytes: -0.2194
+    mean_zlib_ratio: -0.0069
+  entropy:
+    mean_char_entropy: -0.0072
+    mean_char_max_entropy: -0.0245
+    mean_char_normalized: 0.0173
+    mean_token_entropy: -0.0264
+    mean_token_max_entropy: -0.0433
+    mean_token_normalized: 0.0169
+    mean_total_tokens: -0.1807
+    mean_vocab_size: -0.2062
+  function_metrics:
+    mean_avg_function_lines: 0.1338
+    mean_avg_param_count: -0.0931
+    mean_function_count: -0.3274
+    mean_max_function_lines: 0.0222
+    mean_max_param_count: -0.1036
+  halstead:
+    mean_N1_total_operators: -0.1746
+    mean_N2_total_operands: -0.1868
+    mean_difficulty: 0.0070
+    mean_effort: -0.2194
+    mean_estimated_bugs: -0.2264
+    mean_length: -0.1785
+    mean_n1_unique_operators: -0.0814
+    mean_n2_unique_operands: -0.2752
+    mean_time_to_implement_seconds: -0.2194
+    mean_vocabulary: -0.2238
+    mean_volume: -0.2264
+  heaps:
+    mean_beta: -0.0687
+    mean_k: 0.0978
+    mean_r_squared: -0.0094
+  identifier_length_variance:
+    mean_max: -0.0671
+    mean_mean: -0.0614
+    mean_std_dev: 0.0205
+    mean_variance: 0.0411
+  indentation:
+    mean_blank_line_ratio: -0.4899
+    mean_max_depth: 0.0301
+    mean_mean_depth: 0.0114
+    mean_variance: 0.1685
+  line_patterns:
+    mean_blank_line_ratio: -0.4899
+    mean_string_literal_ratio: 0.0039
+    mean_unique_line_ratio: 0.0561
+  magic_number_density:
+    mean_density: 1.4051
+    mean_magic_number_count: -0.4114
+    mean_string_literal_ratio: 0.0039
+  near_duplicate_blocks_file:
+    mean_block_count: 0.5617
+    mean_near_dup_block_d7: 0.1772
+    mean_near_dup_block_d8: 0.1772
+    mean_sub_block_count: 1.0591
+  ngram:
+    mean_bigram_hapax_fraction: -0.0655
+    mean_bigram_repeated_unique: -0.1356
+    mean_bigram_repetition_rate: 0.0296
+    mean_bigram_total: -0.1809
+    mean_bigram_unique: -0.2260
+    mean_trigram_hapax_fraction: -0.0366
+    mean_trigram_repeated_unique: -0.1208
+    mean_trigram_repetition_rate: 0.0506
+    mean_trigram_total: -0.1812
+    mean_trigram_unique: -0.2220
+  punctuation_density:
+    mean_arrow_density: -0.2511
+    mean_bracket_nonalpha_prefix_count: -0.2342
+    mean_bracket_nonalpha_suffix_count: -0.3472
+    mean_bracket_number_pair_count: -0.1772
+    mean_colon_suffix_density: -0.2045
+    mean_dot_count: -0.0341
+    mean_exclamation_density: 2.0000
+    mean_id_nonalpha_suffix_density: -0.0125
+  readability:
+    mean_avg_line_length: -0.0217
+    mean_avg_sub_words_per_id: -0.0148
+    mean_avg_tokens_per_line: 0.0256
+    mean_flesch_adapted: 0.0146
+    mean_fog_adapted: 0.0323
+    mean_total_lines: -0.2063
+  separator_counts:
+    mean_dot_count: -0.0341
+    mean_hyphen_count: -0.2532
+    mean_underscore_count: -0.3087
+  symbol_density:
+    mean_density: 0.0758
+    mean_distinct_symbol_types: -0.0604
+    mean_symbol_count: -0.1504
+  vocabulary:
+    mean_mattr: -0.1396
+    mean_raw_ttr: -0.0669
+    mean_total_identifiers: -0.1838
+    mean_unique_identifiers: -0.2507
+  vowel_density:
+    mean_total_chars: -0.2452
+  zipf:
+    mean_exponent: 0.0102
+    mean_r_squared: -0.0067
+    mean_total_tokens: -0.1807
+    mean_vocab_size: -0.2062
+
+line_length_under_120:
+  _doc: "Lines should be under 120 characters to avoid horizontal scrolling."
+  _log_baseline: -6.3790
+  branching:
+    mean_branching_density: -0.1942
+    mean_non_blank_count: 0.1944
+  brevity:
+    mean_sample_size: -0.0200
+  casing_entropy:
+    mean_entropy: -0.0047
+    mean_snake_case_count: 0.0074
+  compression:
+    mean_raw_bytes: 0.0170
+    mean_redundancy: 0.0140
+    mean_unique_line_ratio: 0.0133
+    mean_zlib_bytes: -0.0077
+    mean_zlib_ratio: 0.0247
+  entropy:
+    mean_char_entropy: -0.0087
+    mean_char_normalized: -0.0076
+    mean_token_entropy: -0.0022
+    mean_token_max_entropy: -0.0041
+    mean_token_normalized: 0.0019
+    mean_total_tokens: -0.0030
+    mean_vocab_size: -0.0200
+  function_metrics:
+    mean_avg_function_lines: 0.2084
+    mean_avg_param_count: -0.0276
+    mean_max_function_lines: 0.2570
+    mean_max_param_count: -0.0944
+  halstead:
+    mean_N1_total_operators: -0.0033
+    mean_N2_total_operands: 0.0022
+    mean_difficulty: 0.0219
+    mean_effort: 0.0160
+    mean_estimated_bugs: -0.0059
+    mean_n1_unique_operators: -0.0081
+    mean_n2_unique_operands: -0.0278
+    mean_time_to_implement_seconds: 0.0160
+    mean_vocabulary: -0.0228
+    mean_volume: -0.0059
+  heaps:
+    mean_beta: -0.0068
+    mean_k: 0.0086
+  identifier_length_variance:
+    mean_mean: -0.0207
+    mean_std_dev: -0.0480
+    mean_variance: -0.0960
+  indentation:
+    mean_blank_line_ratio: -0.0420
+    mean_max_depth: 0.1137
+    mean_mean_depth: 0.1254
+    mean_variance: 0.2595
+  line_patterns:
+    mean_blank_line_ratio: -0.0420
+    mean_string_literal_ratio: -0.0264
+    mean_unique_line_ratio: 0.0181
+  magic_number_density:
+    mean_density: 0.0052
+    mean_string_literal_ratio: -0.0264
+  near_duplicate_blocks_file:
+    mean_sub_block_count: 0.0477
+  ngram:
+    mean_bigram_hapax_fraction: -0.0141
+    mean_bigram_repeated_unique: 0.0257
+    mean_bigram_repetition_rate: 0.0141
+    mean_bigram_total: -0.0030
+    mean_bigram_unique: -0.0113
+    mean_trigram_hapax_fraction: 0.0017
+    mean_trigram_repeated_unique: -0.0134
+    mean_trigram_total: -0.0030
+    mean_trigram_unique: -0.0043
+  punctuation_density:
+    mean_bracket_nonalpha_prefix_count: -0.0807
+    mean_bracket_nonalpha_suffix_count: -0.1362
+    mean_colon_suffix_density: 0.0705
+    mean_dot_count: -0.0069
+    mean_id_nonalpha_suffix_density: 0.0093
+    mean_question_mark_density: 2.0000
+  readability:
+    mean_avg_line_length: -0.1816
+    mean_avg_sub_words_per_id: -0.0066
+    mean_avg_tokens_per_line: -0.1974
+    mean_flesch_adapted: 0.0402
+    mean_fog_adapted: -0.2009
+    mean_total_lines: 0.1944
+  separator_counts:
+    mean_dot_count: -0.0069
+    mean_underscore_count: -0.0349
+  symbol_density:
+    mean_density: -0.0247
+    mean_distinct_symbol_types: -0.0130
+    mean_symbol_count: -0.0078
+  vocabulary:
+    mean_mattr: -0.0231
+    mean_raw_ttr: -0.0300
+    mean_total_identifiers: 0.0067
+    mean_unique_identifiers: -0.0232
+  vowel_density:
+    mean_total_chars: -0.0140
+  zipf:
+    mean_exponent: 0.0039
+    mean_total_tokens: -0.0030
+    mean_vocab_size: -0.0200
+
+no_magic_numbers:
+  _doc: "Numeric literals should be extracted to named constants rather than used inline."
+  _log_baseline: 111.4823
+  branching:
+    mean_branch_count: -0.4352
+    mean_branching_density: -0.9103
+    mean_non_blank_count: 0.4762
+  brevity:
+    mean_sample_size: 0.3955
+  casing_entropy:
+    mean_entropy: -0.5234
+    mean_snake_case_count: 0.9072
+  compression:
+    mean_raw_bytes: 0.7713
+    mean_redundancy: 0.1328
+    mean_unique_line_ratio: 0.1073
+    mean_zlib_bytes: 0.5072
+    mean_zlib_ratio: 0.2642
+  entropy:
+    mean_char_entropy: 0.0481
+    mean_char_normalized: 0.0481
+    mean_token_entropy: 0.0769
+    mean_token_max_entropy: 0.0825
+    mean_total_tokens: 0.4877
+    mean_vocab_size: 0.3955
+  function_metrics:
+    mean_avg_function_lines: -0.5888
+    mean_avg_param_count: -0.1339
+    mean_function_count: 0.5327
+    mean_max_function_lines: -0.2655
+  halstead:
+    mean_N1_total_operators: 0.1749
+    mean_N2_total_operands: 0.4966
+    mean_difficulty: -0.0338
+    mean_effort: 0.3387
+    mean_estimated_bugs: 0.3723
+    mean_length: 0.3056
+    mean_n1_unique_operators: -0.0901
+    mean_n2_unique_operands: 0.4402
+    mean_time_to_implement_seconds: 0.3387
+    mean_vocabulary: 0.3159
+    mean_volume: 0.3724
+  heaps:
+    mean_beta: -0.1294
+    mean_k: 0.7952
+    mean_r_squared: -0.0645
+  identifier_length_variance:
+    mean_max: 0.2172
+    mean_mean: 0.4886
+    mean_std_dev: 0.4918
+    mean_variance: 0.9835
+  indentation:
+    mean_blank_line_ratio: 0.3137
+    mean_mean_depth: -0.4612
+    mean_variance: -0.5503
+  line_patterns:
+    mean_blank_line_ratio: 0.3137
+    mean_string_literal_ratio: -0.5060
+    mean_unique_line_ratio: 0.1502
+  magic_number_density:
+    mean_density: -1.2903
+    mean_magic_number_count: -0.8032
+    mean_string_literal_ratio: -0.5060
+  near_duplicate_blocks_file:
+    mean_block_count: -0.1911
+    mean_near_dup_block_d0: -1.6546
+    mean_near_dup_block_d7: -1.0789
+    mean_sub_block_count: 0.3466
+  ngram:
+    mean_bigram_hapax_fraction: -0.1520
+    mean_bigram_repeated_unique: 0.7630
+    mean_bigram_repetition_rate: 0.1469
+    mean_bigram_total: 0.4887
+    mean_bigram_unique: 0.4248
+    mean_trigram_hapax_fraction: 0.0849
+    mean_trigram_repeated_unique: 0.0415
+    mean_trigram_repetition_rate: -0.2233
+    mean_trigram_total: 0.4896
+    mean_trigram_unique: 0.5215
+  punctuation_density:
+    mean_arrow_density: -1.4573
+    mean_bracket_nonalpha_suffix_count: 0.5999
+    mean_colon_suffix_density: 0.5811
+    mean_id_nonalpha_suffix_density: -0.1238
+    mean_question_mark_density: -0.8032
+  readability:
+    mean_avg_line_length: 0.3048
+    mean_avg_sub_words_per_id: 0.3883
+    mean_flesch_adapted: -0.7069
+    mean_fog_adapted: 2.0000
+    mean_total_lines: 0.4762
+  separator_counts:
+    mean_hyphen_count: -0.8032
+    mean_underscore_count: 1.7114
+  symbol_density:
+    mean_density: -0.3071
+    mean_symbol_count: 0.4654
+  vocabulary:
+    mean_mattr: 0.3553
+    mean_raw_ttr: -0.0669
+    mean_total_identifiers: 0.7640
+    mean_unique_identifiers: 0.6968
+  vowel_density:
+    mean_total_chars: 1.2526
+  zipf:
+    mean_exponent: -0.1353
+    mean_r_squared: -0.0320
+    mean_total_tokens: 0.4877
+    mean_vocab_size: 0.3955
+
+single_responsibility:
+  _doc: "Each file should have one primary concern — low complexity spread across few, focused functions."
+  _log_baseline: -38.1040
+  branching:
+    mean_branch_count: -0.0678
+    mean_branching_density: 0.1364
+    mean_max_nesting_depth: -0.1093
+    mean_non_blank_count: -0.2043
+  brevity:
+    mean_sample_size: -0.0864
+  casing_entropy:
+    mean_entropy: -0.0206
+    mean_other_count: -0.7475
+    mean_pascal_case_count: 0.0470
+    mean_snake_case_count: -0.1543
+  compression:
+    mean_raw_bytes: -0.1908
+    mean_redundancy: -0.0351
+    mean_unique_line_ratio: 0.0316
+    mean_zlib_bytes: -0.1293
+    mean_zlib_ratio: -0.0616
+  entropy:
+    mean_char_entropy: 0.0078
+    mean_char_max_entropy: -0.0021
+    mean_char_normalized: 0.0099
+    mean_token_entropy: 0.0014
+    mean_token_max_entropy: -0.0182
+    mean_token_normalized: 0.0196
+    mean_total_tokens: -0.1489
+    mean_vocab_size: -0.0864
+  function_metrics:
+    mean_avg_function_lines: 0.1696
+    mean_avg_param_count: -0.0805
+    mean_function_count: -0.4114
+    mean_max_param_count: -0.2962
+  halstead:
+    mean_N1_total_operators: -0.1395
+    mean_N2_total_operands: -0.1701
+    mean_difficulty: 0.0527
+    mean_effort: -0.1183
+    mean_estimated_bugs: -0.1710
+    mean_length: -0.1498
+    mean_n1_unique_operators: 0.0541
+    mean_n2_unique_operands: -0.1687
+    mean_time_to_implement_seconds: -0.1183
+    mean_vocabulary: -0.0965
+    mean_volume: -0.1710
+  heaps:
+    mean_beta: -0.0154
+    mean_k: 0.0801
+    mean_r_squared: -0.0163
+  identifier_length_variance:
+    mean_max: -0.0836
+    mean_mean: -0.0508
+    mean_std_dev: -0.0865
+    mean_variance: -0.1729
+  indentation:
+    mean_blank_line_ratio: 0.0458
+    mean_mean_depth: -0.0476
+    mean_variance: -0.0931
+  line_patterns:
+    mean_blank_line_ratio: 0.0458
+    mean_max_nesting_depth: -0.1093
+    mean_string_literal_ratio: -0.1759
+    mean_unique_line_ratio: 0.0324
+  magic_number_density:
+    mean_density: 0.1469
+    mean_string_literal_ratio: -0.1759
+  near_duplicate_blocks_file:
+    mean_block_count: -0.2284
+    mean_near_dup_block_d0: -0.2962
+    mean_near_dup_block_d7: -0.3737
+    mean_sub_block_count: -0.1348
+  ngram:
+    mean_bigram_hapax_fraction: 0.0075
+    mean_bigram_repeated_unique: -0.1303
+    mean_bigram_repetition_rate: -0.0207
+    mean_bigram_total: -0.1492
+    mean_bigram_unique: -0.1162
+    mean_trigram_hapax_fraction: 0.0132
+    mean_trigram_repeated_unique: -0.1793
+    mean_trigram_repetition_rate: -0.0466
+    mean_trigram_total: -0.1495
+    mean_trigram_unique: -0.1273
+  punctuation_density:
+    mean_arrow_density: -0.1462
+    mean_bracket_nonalpha_prefix_count: -0.0859
+    mean_bracket_nonalpha_suffix_count: -0.4201
+    mean_colon_suffix_density: -0.4720
+    mean_dot_count: -0.0630
+    mean_exclamation_density: 2.0000
+    mean_id_nonalpha_suffix_density: -0.0499
+    mean_question_mark_density: 0.4596
+  readability:
+    mean_avg_line_length: 0.0137
+    mean_avg_sub_words_per_id: -0.0377
+    mean_avg_tokens_per_line: 0.0553
+    mean_flesch_adapted: 0.0348
+    mean_fog_adapted: -0.0587
+    mean_total_lines: -0.2043
+  separator_counts:
+    mean_dot_count: -0.0630
+    mean_hyphen_count: -0.1453
+    mean_slash_count: 0.3737
+    mean_underscore_count: -0.4685
+  symbol_density:
+    mean_density: 0.0683
+    mean_distinct_symbol_types: 0.0284
+    mean_symbol_count: -0.1225
+  vocabulary:
+    mean_mattr: -0.0285
+    mean_raw_ttr: 0.0110
+    mean_total_identifiers: -0.1419
+    mean_unique_identifiers: -0.1309
+  vowel_density:
+    mean_total_chars: -0.1927
+  zipf:
+    mean_exponent: -0.0209
+    mean_r_squared: -0.0043
+    mean_total_tokens: -0.1489
+    mean_vocab_size: -0.0864
+
+uses_standard_indentation_width:
+  _doc: "Indentation should use consistent multiples of 2 or 4 spaces throughout the file."
+  _log_baseline: -17.9172
+  compression:
+    mean_raw_bytes: -0.2512
+    mean_redundancy: -0.0906
+    mean_zlib_bytes: -0.0351
+    mean_zlib_ratio: -0.2161
+  entropy:
+    mean_char_entropy: 0.1510
+    mean_char_normalized: 0.1510
+  function_metrics:
+    mean_avg_function_lines: 0.0361
+  indentation:
+    mean_blank_line_ratio: 0.2077
+    mean_max_depth: -1.0000
+    mean_mean_depth: -1.0000
+    mean_variance: -2.0000
+  line_patterns:
+    mean_blank_line_ratio: 0.2077
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d3: -1.0000
+    mean_near_dup_block_d4: 1.0000
+  punctuation_density:
+    mean_exclamation_density: 0.2630
+    mean_question_mark_density: 0.2630
+  readability:
+    mean_avg_line_length: -0.2644
+  symbol_density:
+    mean_density: 0.2512
+
diff --git a/priv/combined_metrics/function_design.yml b/priv/combined_metrics/function_design.yml
new file mode 100644
index 00000000..e34ba2ab
--- /dev/null
+++ b/priv/combined_metrics/function_design.yml
@@ -0,0 +1,862 @@
+boolean_function_has_question_mark:
+  _doc: "Functions returning a boolean should end with `?` (Elixir/Ruby) or start with `is_`/`has_` (JS/Python)."
+  _log_baseline: -6.4663
+  brevity:
+    mean_sample_size: 0.0127
+  casing_entropy:
+    mean_camel_case_count: 0.3410
+    mean_entropy: 0.0137
+    mean_snake_case_count: -0.0205
+  compression:
+    mean_raw_bytes: -0.0088
+    mean_zlib_bytes: -0.0060
+  entropy:
+    mean_token_max_entropy: 0.0036
+    mean_token_normalized: -0.0045
+    mean_total_tokens: 0.0278
+    mean_vocab_size: 0.0127
+  halstead:
+    mean_difficulty: -0.0101
+    mean_effort: -0.0070
+    mean_n2_unique_operands: 0.0049
+    mean_time_to_implement_seconds: -0.0070
+  heaps:
+    mean_beta: -0.0136
+    mean_k: 0.0266
+  identifier_length_variance:
+    mean_max: 0.0167
+    mean_mean: -0.0315
+    mean_std_dev: -0.0588
+    mean_variance: -0.0896
+  line_patterns:
+    mean_string_literal_ratio: -0.0125
+  magic_number_density:
+    mean_string_literal_ratio: -0.0125
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d5: 0.2707
+    mean_near_dup_block_d6: 1.0745
+    mean_near_dup_block_d8: 0.4628
+  ngram:
+    mean_bigram_repeated_unique: 0.0212
+    mean_bigram_repetition_rate: 0.0188
+    mean_bigram_total: 0.0281
+    mean_bigram_unique: 0.0114
+    mean_trigram_repeated_unique: 0.0334
+    mean_trigram_repetition_rate: 0.0258
+    mean_trigram_total: 0.0283
+    mean_trigram_unique: 0.0227
+  punctuation_density:
+    mean_bracket_nonalpha_prefix_count: 2.0000
+    mean_colon_suffix_density: -0.0269
+    mean_exclamation_density: 0.0234
+    mean_id_nonalpha_suffix_density: 0.0246
+    mean_question_mark_density: 1.1524
+  readability:
+    mean_avg_line_length: -0.0092
+    mean_avg_sub_words_per_id: -0.0097
+    mean_avg_tokens_per_line: 0.0248
+    mean_flesch_adapted: 0.0078
+    mean_fog_adapted: 0.0564
+  symbol_density:
+    mean_density: 0.0691
+    mean_distinct_symbol_types: 0.0138
+    mean_symbol_count: 0.0600
+  vocabulary:
+    mean_mattr: 0.0250
+    mean_raw_ttr: 0.0250
+    mean_unique_identifiers: 0.0175
+  vowel_density:
+    mean_total_chars: -0.0370
+  zipf:
+    mean_exponent: 0.0154
+    mean_r_squared: -0.0050
+    mean_total_tokens: 0.0278
+    mean_vocab_size: 0.0127
+
+cyclomatic_complexity_under_10:
+  _doc: "Functions should have a cyclomatic complexity under 10."
+  _log_baseline: -1.4896
+  branching:
+    mean_branch_count: -0.2373
+    mean_branching_density: -0.1952
+    mean_non_blank_count: -0.0421
+  casing_entropy:
+    mean_entropy: 0.0964
+    mean_other_count: 0.3306
+    mean_snake_case_count: 0.0321
+  compression:
+    mean_raw_bytes: -0.0162
+    mean_redundancy: -0.0172
+    mean_unique_line_ratio: -0.0305
+    mean_zlib_bytes: 0.0186
+    mean_zlib_ratio: -0.0347
+  entropy:
+    mean_char_entropy: 0.0349
+    mean_char_max_entropy: 0.0016
+    mean_char_normalized: 0.0333
+    mean_token_entropy: -0.0050
+    mean_token_normalized: -0.0050
+    mean_total_tokens: 0.0437
+  function_metrics:
+    mean_avg_function_lines: -0.4757
+    mean_function_count: 0.4636
+    mean_max_function_lines: -0.5038
+  halstead:
+    mean_N1_total_operators: 0.0708
+    mean_N2_total_operands: 0.0358
+    mean_difficulty: 0.0472
+    mean_effort: 0.1087
+    mean_estimated_bugs: 0.0615
+    mean_length: 0.0605
+    mean_n1_unique_operators: 0.0114
+    mean_time_to_implement_seconds: 0.1087
+    mean_vocabulary: 0.0043
+    mean_volume: 0.0615
+  heaps:
+    mean_beta: -0.0367
+    mean_k: 0.0672
+    mean_r_squared: 0.0049
+  identifier_length_variance:
+    mean_mean: 0.0130
+    mean_std_dev: 0.0120
+    mean_variance: 0.0240
+  indentation:
+    mean_blank_line_ratio: 0.1655
+    mean_max_depth: -0.2086
+    mean_mean_depth: -0.2901
+    mean_variance: -0.4637
+  line_patterns:
+    mean_blank_line_ratio: 0.1655
+    mean_string_literal_ratio: -0.0439
+    mean_unique_line_ratio: 0.0055
+  magic_number_density:
+    mean_density: -0.0329
+    mean_string_literal_ratio: -0.0439
+  near_duplicate_blocks_file:
+    mean_block_count: 0.1013
+    mean_near_dup_block_d8: -0.2086
+    mean_sub_block_count: 0.0994
+  ngram:
+    mean_bigram_hapax_fraction: -0.0068
+    mean_bigram_repeated_unique: 0.0301
+    mean_bigram_repetition_rate: 0.0115
+    mean_bigram_total: 0.0438
+    mean_bigram_unique: 0.0192
+    mean_trigram_hapax_fraction: -0.0027
+    mean_trigram_repeated_unique: 0.0456
+    mean_trigram_repetition_rate: 0.0055
+    mean_trigram_total: 0.0440
+    mean_trigram_unique: 0.0388
+  punctuation_density:
+    mean_arrow_density: -0.4960
+    mean_bracket_nonalpha_prefix_count: 0.2488
+    mean_bracket_nonalpha_suffix_count: 0.3306
+    mean_colon_suffix_density: 0.2760
+    mean_dot_count: -0.3005
+    mean_exclamation_density: 2.0000
+    mean_id_nonalpha_suffix_density: 0.0616
+  readability:
+    mean_avg_line_length: 0.0257
+    mean_avg_sub_words_per_id: 0.0067
+    mean_avg_tokens_per_line: 0.0858
+    mean_flesch_adapted: -0.0140
+    mean_fog_adapted: 0.1035
+    mean_total_lines: -0.0421
+  symbol_density:
+    mean_density: 0.0552
+    mean_distinct_symbol_types: -0.0172
+    mean_symbol_count: 0.0391
+  vocabulary:
+    mean_mattr: -0.0361
+    mean_raw_ttr: -0.0361
+    mean_total_identifiers: 0.0441
+    mean_unique_identifiers: 0.0080
+  vowel_density:
+    mean_total_chars: 0.0572
+  zipf:
+    mean_exponent: 0.0120
+    mean_r_squared: 0.0057
+    mean_total_tokens: 0.0437
+
+has_verb_in_name:
+  _doc: "Function names should contain a verb describing the action performed."
+  _log_baseline: 14.8350
+  compression:
+    mean_raw_bytes: 0.0816
+    mean_redundancy: -0.0390
+    mean_zlib_bytes: 0.2011
+    mean_zlib_ratio: -0.1195
+  identifier_length_variance:
+    mean_max: 0.7747
+    mean_mean: 0.2058
+    mean_std_dev: 1.0000
+    mean_variance: 2.0000
+  punctuation_density:
+    mean_exclamation_density: -0.1076
+  readability:
+    mean_avg_line_length: 0.0846
+    mean_avg_sub_words_per_id: 0.1330
+    mean_flesch_adapted: -0.1324
+    mean_fog_adapted: 1.3261
+  symbol_density:
+    mean_density: -0.0828
+  vowel_density:
+    mean_total_chars: 0.2058
+
+is_less_than_20_lines:
+  _doc: "Functions should be 20 lines or fewer."
+  _log_baseline: 23.9658
+  branching:
+    mean_branch_count: -0.0820
+    mean_branching_density: -0.1010
+    mean_max_nesting_depth: -0.1156
+    mean_non_blank_count: 0.0188
+  brevity:
+    mean_sample_size: 0.0165
+  casing_entropy:
+    mean_entropy: 0.0577
+    mean_other_count: 0.6266
+    mean_pascal_case_count: 0.0440
+    mean_snake_case_count: 0.0910
+  compression:
+    mean_raw_bytes: 0.0746
+    mean_redundancy: 0.0227
+    mean_unique_line_ratio: -0.0334
+    mean_zlib_bytes: 0.0366
+    mean_zlib_ratio: 0.0379
+  entropy:
+    mean_char_entropy: 0.0020
+    mean_token_entropy: -0.0041
+    mean_token_max_entropy: 0.0035
+    mean_token_normalized: -0.0076
+    mean_total_tokens: 0.0759
+    mean_vocab_size: 0.0165
+  function_metrics:
+    mean_avg_function_lines: -0.3598
+    mean_avg_param_count: 0.1156
+    mean_function_count: 0.3705
+    mean_max_function_lines: -0.4532
+    mean_max_param_count: 0.0820
+  halstead:
+    mean_N1_total_operators: 0.0857
+    mean_N2_total_operands: 0.0965
+    mean_difficulty: 0.0624
+    mean_effort: 0.1550
+    mean_estimated_bugs: 0.0926
+    mean_length: 0.0895
+    mean_n1_unique_operators: -0.0097
+    mean_n2_unique_operands: 0.0245
+    mean_time_to_implement_seconds: 0.1550
+    mean_vocabulary: 0.0143
+    mean_volume: 0.0926
+  heaps:
+    mean_k: -0.0254
+  identifier_length_variance:
+    mean_mean: 0.0122
+    mean_std_dev: 0.0297
+    mean_variance: 0.0593
+  indentation:
+    mean_blank_line_ratio: -0.0440
+    mean_mean_depth: -0.0962
+    mean_variance: -0.1115
+  line_patterns:
+    mean_blank_line_ratio: -0.0440
+    mean_max_nesting_depth: -0.1156
+    mean_string_literal_ratio: -0.0774
+    mean_unique_line_ratio: -0.0188
+  magic_number_density:
+    mean_density: 0.0389
+    mean_magic_number_count: 0.1156
+    mean_string_literal_ratio: -0.0774
+  near_duplicate_blocks_file:
+    mean_block_count: 0.2797
+    mean_near_dup_block_d8: 0.3133
+    mean_sub_block_count: 0.1886
+  ngram:
+    mean_bigram_hapax_fraction: -0.0508
+    mean_bigram_repeated_unique: 0.1067
+    mean_bigram_repetition_rate: 0.0562
+    mean_bigram_total: 0.0760
+    mean_bigram_unique: 0.0228
+    mean_trigram_hapax_fraction: -0.0300
+    mean_trigram_repeated_unique: 0.1516
+    mean_trigram_repetition_rate: 0.1014
+    mean_trigram_total: 0.0761
+    mean_trigram_unique: 0.0386
+  punctuation_density:
+    mean_arrow_density: -0.3892
+    mean_bracket_nonalpha_prefix_count: 0.0418
+    mean_bracket_nonalpha_suffix_count: 0.0476
+    mean_colon_suffix_density: 0.0941
+    mean_dot_count: 0.0717
+    mean_exclamation_density: -0.0820
+    mean_id_nonalpha_suffix_density: 0.0518
+    mean_question_mark_density: -2.0000
+  readability:
+    mean_avg_line_length: 0.0576
+    mean_avg_sub_words_per_id: -0.0034
+    mean_avg_tokens_per_line: 0.0570
+    mean_flesch_adapted: -0.0039
+    mean_fog_adapted: 0.0868
+    mean_total_lines: 0.0188
+  symbol_density:
+    mean_density: -0.0084
+    mean_distinct_symbol_types: 0.0127
+    mean_symbol_count: 0.0662
+  vocabulary:
+    mean_mattr: -0.0390
+    mean_raw_ttr: -0.0717
+    mean_total_identifiers: 0.0965
+    mean_unique_identifiers: 0.0248
+  vowel_density:
+    mean_total_chars: 0.1087
+  zipf:
+    mean_exponent: 0.0225
+    mean_r_squared: 0.0030
+    mean_total_tokens: 0.0759
+    mean_vocab_size: 0.0165
+
+nesting_depth_under_4:
+  _doc: "Code should not nest deeper than 4 levels."
+  _log_baseline: 1.0611
+  branching:
+    mean_branch_count: -0.3267
+    mean_branching_density: -0.2061
+    mean_max_nesting_depth: 0.2061
+    mean_non_blank_count: -0.1206
+  brevity:
+    mean_sample_size: 0.0178
+  casing_entropy:
+    mean_entropy: -0.0207
+    mean_other_count: 0.2917
+    mean_pascal_case_count: -0.2725
+    mean_snake_case_count: 0.0787
+  compression:
+    mean_raw_bytes: -0.0069
+    mean_redundancy: -0.0076
+    mean_unique_line_ratio: -0.0028
+    mean_zlib_bytes: 0.0117
+    mean_zlib_ratio: -0.0186
+  entropy:
+    mean_char_entropy: 0.0715
+    mean_char_max_entropy: -0.0017
+    mean_char_normalized: 0.0732
+    mean_token_entropy: -0.0118
+    mean_token_max_entropy: 0.0042
+    mean_token_normalized: -0.0161
+    mean_total_tokens: 0.1047
+    mean_vocab_size: 0.0178
+  function_metrics:
+    mean_avg_function_lines: -0.6349
+    mean_function_count: 0.5787
+    mean_max_function_lines: -0.3375
+  halstead:
+    mean_N1_total_operators: 0.1525
+    mean_N2_total_operands: 0.0950
+    mean_difficulty: 0.0991
+    mean_effort: 0.2385
+    mean_estimated_bugs: 0.1394
+    mean_length: 0.1347
+    mean_n1_unique_operators: 0.0229
+    mean_n2_unique_operands: 0.0188
+    mean_time_to_implement_seconds: 0.2385
+    mean_vocabulary: 0.0202
+    mean_volume: 0.1394
+  heaps:
+    mean_beta: -0.0464
+    mean_k: 0.0845
+  identifier_length_variance:
+    mean_mean: 0.0770
+    mean_std_dev: 0.1858
+    mean_variance: 0.3716
+  indentation:
+    mean_blank_line_ratio: 0.5622
+    mean_max_depth: -0.3155
+    mean_mean_depth: -0.3651
+    mean_variance: -0.6050
+  line_patterns:
+    mean_blank_line_ratio: 0.5622
+    mean_max_nesting_depth: 0.2061
+    mean_string_literal_ratio: -0.1046
+    mean_unique_line_ratio: 0.0786
+  magic_number_density:
+    mean_string_literal_ratio: -0.1046
+  near_duplicate_blocks_file:
+    mean_block_count: 0.0856
+    mean_sub_block_count: 0.1999
+  ngram:
+    mean_bigram_hapax_fraction: -0.0645
+    mean_bigram_repeated_unique: 0.1420
+    mean_bigram_repetition_rate: 0.0534
+    mean_bigram_total: 0.1049
+    mean_bigram_unique: 0.0292
+    mean_trigram_hapax_fraction: -0.0170
+    mean_trigram_repeated_unique: 0.1274
+    mean_trigram_repetition_rate: 0.0457
+    mean_trigram_total: 0.1052
+    mean_trigram_unique: 0.0686
+  punctuation_density:
+    mean_arrow_density: 0.9701
+    mean_bracket_nonalpha_prefix_count: 0.1748
+    mean_bracket_nonalpha_suffix_count: 0.9451
+    mean_colon_suffix_density: 0.8804
+    mean_dot_count: -0.2520
+    mean_exclamation_density: 2.0000
+    mean_id_nonalpha_suffix_density: 0.0946
+    mean_question_mark_density: -0.1977
+  readability:
+    mean_avg_line_length: 0.1152
+    mean_avg_sub_words_per_id: 0.0220
+    mean_avg_tokens_per_line: 0.2252
+    mean_flesch_adapted: -0.0374
+    mean_fog_adapted: 0.2252
+    mean_total_lines: -0.1206
+  symbol_density:
+    mean_density: 0.1426
+    mean_symbol_count: 0.1355
+  vocabulary:
+    mean_mattr: -0.0269
+    mean_raw_ttr: -0.0269
+    mean_total_identifiers: 0.0774
+    mean_unique_identifiers: 0.0505
+  vowel_density:
+    mean_total_chars: 0.1544
+  zipf:
+    mean_exponent: 0.0250
+    mean_r_squared: 0.0156
+    mean_total_tokens: 0.1047
+    mean_vocab_size: 0.0178
+
+no_boolean_parameter:
+  _doc: "Functions should not take boolean parameters — a flag usually means the function does two things."
+  _log_baseline: 13.6290
+  branching:
+    mean_branch_count: -2.0000
+    mean_branching_density: 1.0271
+    mean_max_nesting_depth: -0.3263
+    mean_non_blank_count: -0.0383
+  brevity:
+    mean_sample_size: -0.0253
+  casing_entropy:
+    mean_entropy: 0.0049
+    mean_pascal_case_count: 0.1180
+    mean_snake_case_count: 0.0931
+  compression:
+    mean_raw_bytes: 0.0435
+    mean_redundancy: 0.0777
+    mean_unique_line_ratio: -0.0656
+    mean_zlib_bytes: -0.1055
+    mean_zlib_ratio: 0.1490
+  entropy:
+    mean_char_entropy: 0.0152
+    mean_char_normalized: 0.0153
+    mean_token_entropy: -0.0129
+    mean_token_max_entropy: -0.0055
+    mean_token_normalized: -0.0073
+    mean_total_tokens: 0.0692
+    mean_vocab_size: -0.0253
+  function_metrics:
+    mean_avg_function_lines: -0.3850
+    mean_avg_param_count: -0.2935
+    mean_function_count: 0.4338
+    mean_max_function_lines: -0.5579
+  halstead:
+    mean_N1_total_operators: 0.0393
+    mean_N2_total_operands: 0.0832
+    mean_difficulty: 0.0207
+    mean_effort: 0.0660
+    mean_estimated_bugs: 0.0453
+    mean_length: 0.0543
+    mean_n1_unique_operators: -0.0806
+    mean_n2_unique_operands: -0.0181
+    mean_time_to_implement_seconds: 0.0660
+    mean_vocabulary: -0.0374
+    mean_volume: 0.0453
+  heaps:
+    mean_beta: -0.0314
+    mean_k: 0.0620
+  identifier_length_variance:
+    mean_mean: 0.0125
+    mean_std_dev: 0.1858
+    mean_variance: 0.3715
+  indentation:
+    mean_blank_line_ratio: 0.4402
+    mean_max_depth: -0.5579
+    mean_mean_depth: -0.2880
+    mean_variance: -0.8414
+  line_patterns:
+    mean_blank_line_ratio: 0.4402
+    mean_max_nesting_depth: -0.3263
+    mean_string_literal_ratio: 0.0206
+    mean_unique_line_ratio: 0.0101
+  magic_number_density:
+    mean_string_literal_ratio: 0.0206
+  near_duplicate_blocks_file:
+    mean_block_count: 0.4338
+    mean_near_dup_block_d0: 1.7685
+    mean_near_dup_block_d2: 1.1158
+    mean_near_dup_block_d4: 1.6737
+    mean_near_dup_block_d5: 1.6737
+    mean_near_dup_block_d6: 1.7685
+    mean_near_dup_block_d7: -0.8842
+    mean_near_dup_block_d8: 0.5579
+    mean_sub_block_count: 0.2775
+  ngram:
+    mean_bigram_hapax_fraction: -0.1940
+    mean_bigram_repeated_unique: 0.1467
+    mean_bigram_repetition_rate: 0.1504
+    mean_bigram_total: 0.0694
+    mean_bigram_unique: -0.1127
+    mean_trigram_hapax_fraction: -0.2208
+    mean_trigram_repeated_unique: 0.3783
+    mean_trigram_repetition_rate: 0.3150
+    mean_trigram_total: 0.0695
+    mean_trigram_unique: -0.1019
+  punctuation_density:
+    mean_bracket_nonalpha_prefix_count: 0.1152
+    mean_bracket_nonalpha_suffix_count: 0.1075
+    mean_colon_suffix_density: -0.2793
+    mean_dot_count: 0.1538
+    mean_exclamation_density: -0.0435
+    mean_id_nonalpha_suffix_density: 0.0357
+  readability:
+    mean_avg_line_length: 0.0817
+    mean_avg_sub_words_per_id: 0.0160
+    mean_avg_tokens_per_line: 0.1075
+    mean_flesch_adapted: -0.0254
+    mean_fog_adapted: 0.2928
+    mean_total_lines: -0.0383
+  symbol_density:
+    mean_density: 0.0479
+    mean_symbol_count: 0.0916
+  vocabulary:
+    mean_mattr: -0.0916
+    mean_raw_ttr: -0.1091
+    mean_total_identifiers: 0.0962
+    mean_unique_identifiers: -0.0129
+  vowel_density:
+    mean_total_chars: 0.1087
+  zipf:
+    mean_exponent: 0.0374
+    mean_total_tokens: 0.0692
+    mean_vocab_size: -0.0253
+
+no_magic_numbers:
+  _doc: "Numeric literals should be named constants, not inline magic numbers."
+  _log_baseline: 45.8808
+  branching:
+    mean_branch_count: -0.2708
+    mean_branching_density: -0.1682
+    mean_non_blank_count: -0.1029
+  brevity:
+    mean_sample_size: 0.1527
+  casing_entropy:
+    mean_entropy: -0.2908
+    mean_snake_case_count: 0.4279
+  compression:
+    mean_raw_bytes: 0.3823
+    mean_redundancy: 0.0584
+    mean_unique_line_ratio: 0.2269
+    mean_zlib_bytes: 0.2473
+    mean_zlib_ratio: 0.1350
+  entropy:
+    mean_char_entropy: 0.0661
+    mean_char_normalized: 0.0624
+    mean_token_entropy: 0.0148
+    mean_token_max_entropy: 0.0355
+    mean_token_normalized: -0.0207
+    mean_total_tokens: 0.2834
+    mean_vocab_size: 0.1527
+  function_metrics:
+    mean_avg_function_lines: -0.8758
+    mean_function_count: 0.4111
+  halstead:
+    mean_N1_total_operators: 0.1953
+    mean_N2_total_operands: 0.2960
+    mean_difficulty: 0.0408
+    mean_effort: 0.3105
+    mean_estimated_bugs: 0.2698
+    mean_length: 0.2359
+    mean_n1_unique_operators: -0.0413
+    mean_n2_unique_operands: 0.2139
+    mean_time_to_implement_seconds: 0.3105
+    mean_vocabulary: 0.1447
+    mean_volume: 0.2697
+  heaps:
+    mean_beta: -0.1129
+    mean_k: 0.5236
+    mean_r_squared: -0.0256
+  identifier_length_variance:
+    mean_max: 0.0987
+    mean_mean: 0.3721
+    mean_std_dev: 0.3878
+    mean_variance: 0.7757
+  indentation:
+    mean_blank_line_ratio: 0.2374
+    mean_mean_depth: -0.3518
+    mean_variance: -0.4760
+  line_patterns:
+    mean_blank_line_ratio: 0.2374
+    mean_string_literal_ratio: -0.2880
+    mean_unique_line_ratio: 0.2337
+  magic_number_density:
+    mean_density: -0.2831
+    mean_string_literal_ratio: -0.2880
+  near_duplicate_blocks_file:
+    mean_block_count: -0.7894
+    mean_near_dup_block_d0: -1.1158
+    mean_near_dup_block_d7: -1.1158
+    mean_sub_block_count: 0.2708
+  ngram:
+    mean_bigram_hapax_fraction: -0.1437
+    mean_bigram_repeated_unique: 0.4787
+    mean_bigram_repetition_rate: 0.1545
+    mean_bigram_total: 0.2844
+    mean_bigram_unique: 0.1437
+    mean_trigram_hapax_fraction: -0.0207
+    mean_trigram_repeated_unique: 0.2787
+    mean_trigram_repetition_rate: 0.1465
+    mean_trigram_total: 0.2854
+    mean_trigram_unique: 0.1843
+  punctuation_density:
+    mean_arrow_density: -1.1699
+    mean_bracket_nonalpha_suffix_count: 2.0000
+    mean_colon_suffix_density: 1.9476
+    mean_id_nonalpha_suffix_density: 0.3448
+  readability:
+    mean_avg_line_length: 0.5035
+    mean_avg_sub_words_per_id: 0.2699
+    mean_avg_tokens_per_line: 0.3863
+    mean_flesch_adapted: -0.3819
+    mean_fog_adapted: 1.0656
+    mean_total_lines: -0.1029
+  symbol_density:
+    mean_density: -0.0314
+    mean_distinct_symbol_types: 0.0644
+    mean_symbol_count: 0.3512
+  vocabulary:
+    mean_mattr: 0.0058
+    mean_raw_ttr: -0.0081
+    mean_total_identifiers: 0.3908
+    mean_unique_identifiers: 0.3826
+  vowel_density:
+    mean_total_chars: 0.7629
+  zipf:
+    mean_exponent: 0.0164
+    mean_r_squared: 0.0321
+    mean_total_tokens: 0.2834
+    mean_vocab_size: 0.1527
+
+parameter_count_under_4:
+  _doc: "Functions should take fewer than 4 parameters."
+  _log_baseline: 1.9637
+  branching:
+    mean_non_blank_count: 0.0967
+  brevity:
+    mean_sample_size: 0.0261
+  casing_entropy:
+    mean_entropy: 0.5731
+    mean_other_count: 0.5408
+    mean_pascal_case_count: 0.2329
+    mean_snake_case_count: -0.0351
+  compression:
+    mean_raw_bytes: -0.0343
+    mean_redundancy: -0.0308
+    mean_unique_line_ratio: -0.0166
+    mean_zlib_bytes: 0.0291
+    mean_zlib_ratio: -0.0634
+  entropy:
+    mean_char_entropy: 0.0082
+    mean_char_max_entropy: 0.0175
+    mean_char_normalized: -0.0093
+    mean_token_entropy: 0.0206
+    mean_token_max_entropy: 0.0063
+    mean_token_normalized: 0.0144
+    mean_total_tokens: -0.0335
+    mean_vocab_size: 0.0261
+  function_metrics:
+    mean_avg_function_lines: 0.1262
+    mean_avg_param_count: -0.3179
+    mean_function_count: -0.0320
+    mean_max_function_lines: 0.2037
+    mean_max_param_count: -0.2847
+  halstead:
+    mean_N1_total_operators: -0.0264
+    mean_N2_total_operands: -0.0507
+    mean_difficulty: 0.0180
+    mean_effort: -0.0147
+    mean_estimated_bugs: -0.0326
+    mean_length: -0.0353
+    mean_n1_unique_operators: 0.0613
+    mean_n2_unique_operands: -0.0074
+    mean_time_to_implement_seconds: -0.0147
+    mean_vocabulary: 0.0108
+    mean_volume: -0.0327
+  heaps:
+    mean_beta: 0.0179
+    mean_k: -0.0082
+    mean_r_squared: -0.0062
+  identifier_length_variance:
+    mean_mean: -0.0239
+    mean_std_dev: -0.0185
+    mean_variance: -0.0371
+  indentation:
+    mean_blank_line_ratio: 0.0518
+    mean_max_depth: 0.1362
+    mean_mean_depth: 0.0506
+    mean_variance: 0.1451
+  line_patterns:
+    mean_blank_line_ratio: 0.0518
+    mean_string_literal_ratio: 0.1674
+    mean_unique_line_ratio: -0.0137
+  magic_number_density:
+    mean_density: -2.0000
+    mean_string_literal_ratio: 0.1674
+  near_duplicate_blocks_file:
+    mean_block_count: 0.0967
+    mean_near_dup_block_d7: -0.4658
+  ngram:
+    mean_bigram_hapax_fraction: 0.0479
+    mean_bigram_repeated_unique: -0.0222
+    mean_bigram_repetition_rate: -0.0480
+    mean_bigram_total: -0.0336
+    mean_bigram_unique: 0.0376
+    mean_trigram_hapax_fraction: 0.0610
+    mean_trigram_repeated_unique: -0.1263
+    mean_trigram_repetition_rate: -0.1619
+    mean_trigram_total: -0.0337
+    mean_trigram_unique: 0.0524
+  punctuation_density:
+    mean_bracket_nonalpha_prefix_count: -0.0967
+    mean_bracket_nonalpha_suffix_count: -0.1131
+    mean_colon_suffix_density: 0.0056
+    mean_dot_count: 0.9099
+    mean_id_nonalpha_suffix_density: -0.0434
+  readability:
+    mean_avg_line_length: -0.1345
+    mean_avg_sub_words_per_id: -0.0093
+    mean_avg_tokens_per_line: -0.1302
+    mean_flesch_adapted: 0.0271
+    mean_fog_adapted: -0.1290
+    mean_total_lines: 0.0967
+  symbol_density:
+    mean_density: 0.0124
+    mean_distinct_symbol_types: 0.1042
+    mean_symbol_count: -0.0218
+  vocabulary:
+    mean_mattr: 0.0150
+    mean_raw_ttr: 0.0153
+    mean_total_identifiers: -0.0153
+  vowel_density:
+    mean_total_chars: -0.0393
+  zipf:
+    mean_exponent: 0.0101
+    mean_r_squared: -0.0074
+    mean_total_tokens: -0.0335
+    mean_vocab_size: 0.0261
+
+uses_ternary_expression:
+  _doc: "Simple conditional assignments should use inline expressions rather than full if-blocks."
+  _log_baseline: -4.5289
+  branching:
+    mean_branch_count: -0.4160
+    mean_branching_density: 0.1134
+    mean_non_blank_count: -0.5296
+  brevity:
+    mean_sample_size: 0.0095
+  casing_entropy:
+    mean_entropy: 0.0068
+    mean_snake_case_count: -0.0141
+  compression:
+    mean_raw_bytes: -0.0819
+    mean_redundancy: -0.0629
+    mean_unique_line_ratio: 0.1604
+    mean_zlib_bytes: 0.0148
+    mean_zlib_ratio: -0.0967
+  entropy:
+    mean_char_entropy: 0.0664
+    mean_char_normalized: 0.0636
+    mean_token_entropy: -0.0078
+    mean_token_normalized: -0.0101
+    mean_total_tokens: 0.0859
+    mean_vocab_size: 0.0095
+  function_metrics:
+    mean_avg_function_lines: -0.6785
+    mean_function_count: 0.2434
+    mean_max_function_lines: -0.4160
+  halstead:
+    mean_N1_total_operators: 0.1567
+    mean_N2_total_operands: 0.0551
+    mean_difficulty: 0.0844
+    mean_effort: 0.2135
+    mean_estimated_bugs: 0.1291
+    mean_length: 0.1267
+    mean_n1_unique_operators: 0.0293
+    mean_time_to_implement_seconds: 0.2135
+    mean_vocabulary: 0.0101
+    mean_volume: 0.1291
+  heaps:
+    mean_beta: -0.0301
+    mean_k: 0.0594
+  identifier_length_variance:
+    mean_mean: 0.0749
+    mean_std_dev: 0.0535
+    mean_variance: 0.1070
+  indentation:
+    mean_blank_line_ratio: 0.5054
+    mean_max_depth: -0.2434
+    mean_mean_depth: -0.3243
+    mean_variance: -0.5454
+  line_patterns:
+    mean_blank_line_ratio: 0.5054
+    mean_string_literal_ratio: -0.0855
+    mean_unique_line_ratio: 0.1630
+  magic_number_density:
+    mean_density: -0.0859
+    mean_string_literal_ratio: -0.0855
+  near_duplicate_blocks_file:
+    mean_block_count: -0.2821
+    mean_near_dup_block_d0: -2.0000
+    mean_sub_block_count: 0.2434
+  ngram:
+    mean_bigram_hapax_fraction: -0.0546
+    mean_bigram_repeated_unique: 0.2141
+    mean_bigram_repetition_rate: 0.0785
+    mean_bigram_total: 0.0863
+    mean_bigram_unique: 0.0432
+    mean_trigram_hapax_fraction: -0.0165
+    mean_trigram_repeated_unique: 0.1339
+    mean_trigram_repetition_rate: 0.1178
+    mean_trigram_total: 0.0866
+    mean_trigram_unique: 0.0400
+  punctuation_density:
+    mean_bracket_nonalpha_prefix_count: 0.2713
+    mean_bracket_nonalpha_suffix_count: 0.5255
+    mean_bracket_number_pair_count: 0.4160
+    mean_colon_suffix_density: 1.7729
+    mean_dot_count: -1.1679
+    mean_id_nonalpha_suffix_density: 0.1908
+  readability:
+    mean_avg_line_length: 0.4657
+    mean_avg_sub_words_per_id: 0.0312
+    mean_avg_tokens_per_line: 0.6155
+    mean_flesch_adapted: -0.0682
+    mean_fog_adapted: 0.5360
+    mean_total_lines: -0.5296
+  symbol_density:
+    mean_density: 0.3167
+    mean_distinct_symbol_types: 0.0364
+    mean_symbol_count: 0.2350
+  vocabulary:
+    mean_mattr: -0.0068
+    mean_raw_ttr: -0.0068
+    mean_total_identifiers: -0.0117
+    mean_unique_identifiers: -0.0185
+  vowel_density:
+    mean_total_chars: 0.0632
+  zipf:
+    mean_exponent: 0.0320
+    mean_r_squared: 0.0133
+    mean_total_tokens: 0.0859
+    mean_vocab_size: 0.0095
+
diff --git a/priv/combined_metrics/naming_conventions.yml b/priv/combined_metrics/naming_conventions.yml
new file mode 100644
index 00000000..83ce0781
--- /dev/null
+++ b/priv/combined_metrics/naming_conventions.yml
@@ -0,0 +1,268 @@
+class_name_is_noun:
+  _doc: "Class and module names should be nouns describing what they represent, not verbs or gerunds."
+  _languages: [elixir]
+  _log_baseline: 2.9861
+  brevity:
+    mean_sample_size: 0.7106
+  compression:
+    mean_raw_bytes: 0.1346
+    mean_redundancy: -0.0605
+    mean_zlib_bytes: 0.2139
+    mean_zlib_ratio: -0.0794
+  entropy:
+    mean_token_entropy: 0.1236
+    mean_token_max_entropy: 0.1716
+    mean_token_normalized: -0.0484
+    mean_vocab_size: 0.7106
+  halstead:
+    mean_difficulty: -1.1493
+    mean_effort: -0.9669
+    mean_estimated_bugs: 0.1818
+    mean_n2_unique_operands: 1.1492
+    mean_time_to_implement_seconds: -0.9669
+    mean_vocabulary: 0.7462
+    mean_volume: 0.1823
+  heaps:
+    mean_beta: 0.4086
+    mean_k: -0.6266
+  identifier_length_variance:
+    mean_max: -0.4031
+    mean_mean: 0.3287
+    mean_std_dev: -0.8347
+    mean_variance: -1.6695
+  ngram:
+    mean_bigram_hapax_fraction: 0.2542
+    mean_bigram_repeated_unique: -0.5967
+    mean_bigram_repetition_rate: -0.9599
+    mean_bigram_unique: 0.6173
+    mean_trigram_hapax_fraction: 0.2449
+    mean_trigram_repeated_unique: -2.0000
+    mean_trigram_repetition_rate: -1.9547
+    mean_trigram_unique: 0.6002
+  punctuation_density:
+    mean_exclamation_density: -0.3314
+  readability:
+    mean_avg_line_length: 0.1418
+  symbol_density:
+    mean_density: -0.1381
+  vocabulary:
+    mean_mattr: 1.4020
+    mean_raw_ttr: 1.4020
+    mean_unique_identifiers: 1.4020
+  vowel_density:
+    mean_total_chars: 0.3287
+  zipf:
+    mean_exponent: -0.2180
+    mean_vocab_size: 0.7106
+
+file_name_matches_primary_export:
+  _doc: "The file name should match the primary class or module it exports (e.g. `user.js` exports `User`)."
+  _fix_hint: "Rename the file to match the primary module it defines"
+  _languages: [elixir]
+  _log_baseline: 0.0000
+  casing_entropy:
+    mean_pascal_case_count: 0.0000
+  vocabulary:
+    mean_unique_identifiers: 0.0000
+
+function_name_is_not_single_word:
+  _doc: "Single-word function names like `run`, `process`, or `handle` are too vague to convey intent."
+  _languages: [elixir]
+  _log_baseline: 17.8470
+  compression:
+    mean_raw_bytes: 0.2434
+    mean_redundancy: 0.0776
+    mean_zlib_bytes: 0.1029
+    mean_zlib_ratio: 0.1405
+  entropy:
+    mean_char_entropy: 0.0241
+    mean_char_normalized: 0.0241
+  identifier_length_variance:
+    mean_max: 0.7685
+    mean_mean: 0.5825
+    mean_std_dev: 1.0000
+    mean_variance: 2.0000
+  readability:
+    mean_avg_line_length: 0.2559
+    mean_avg_sub_words_per_id: 0.3083
+    mean_flesch_adapted: -0.3181
+    mean_fog_adapted: 1.3258
+  symbol_density:
+    mean_density: -0.2431
+  vowel_density:
+    mean_total_chars: 0.5825
+
+function_name_matches_return_type:
+  _doc: "Functions prefixed with `get_`, `fetch_`, or `find_` should return the thing they name."
+  _languages: [elixir]
+  _log_baseline: 7.5638
+  branching:
+    mean_max_nesting_depth: 0.1335
+  brevity:
+    mean_sample_size: 0.0257
+  casing_entropy:
+    mean_entropy: 0.0310
+    mean_other_count: 0.0347
+    mean_snake_case_count: -0.0296
+  compression:
+    mean_raw_bytes: -0.0190
+    mean_redundancy: -0.0180
+    mean_unique_line_ratio: -0.0104
+    mean_zlib_bytes: 0.0143
+    mean_zlib_ratio: -0.0332
+  entropy:
+    mean_char_entropy: 0.0079
+    mean_char_max_entropy: 0.0071
+    mean_token_max_entropy: 0.0059
+    mean_token_normalized: -0.0045
+    mean_total_tokens: 0.0030
+    mean_vocab_size: 0.0257
+  halstead:
+    mean_N1_total_operators: 0.0392
+    mean_N2_total_operands: -0.0539
+    mean_difficulty: 0.0029
+    mean_effort: 0.0080
+    mean_estimated_bugs: 0.0050
+    mean_n1_unique_operators: 0.0629
+    mean_n2_unique_operands: 0.0060
+    mean_time_to_implement_seconds: 0.0080
+    mean_vocabulary: 0.0218
+    mean_volume: 0.0050
+  heaps:
+    mean_beta: 0.0291
+    mean_k: -0.0519
+    mean_r_squared: 0.0038
+  identifier_length_variance:
+    mean_max: 0.1082
+    mean_std_dev: 0.0326
+    mean_variance: 0.0653
+  line_patterns:
+    mean_max_nesting_depth: 0.1335
+    mean_string_literal_ratio: -0.0027
+    mean_unique_line_ratio: -0.0108
+  magic_number_density:
+    mean_density: -0.0108
+    mean_string_literal_ratio: -0.0027
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d0: -0.5899
+    mean_near_dup_block_d5: -0.2282
+    mean_near_dup_block_d7: 0.2282
+    mean_sub_block_count: 0.0314
+  ngram:
+    mean_bigram_hapax_fraction: 0.0106
+    mean_bigram_repeated_unique: 0.0095
+    mean_bigram_repetition_rate: -0.0167
+    mean_bigram_total: 0.0030
+    mean_bigram_unique: 0.0261
+    mean_trigram_hapax_fraction: 0.0174
+    mean_trigram_repeated_unique: -0.0297
+    mean_trigram_repetition_rate: -0.0444
+    mean_trigram_total: 0.0030
+    mean_trigram_unique: 0.0245
+  punctuation_density:
+    mean_bracket_nonalpha_prefix_count: 0.1048
+    mean_colon_suffix_density: -0.0027
+    mean_dot_count: 0.1335
+    mean_id_nonalpha_suffix_density: 0.0266
+    mean_question_mark_density: -2.0000
+  readability:
+    mean_avg_line_length: 0.0175
+    mean_avg_sub_words_per_id: 0.0087
+    mean_avg_tokens_per_line: 0.0030
+    mean_flesch_adapted: -0.0107
+    mean_fog_adapted: 0.0058
+  symbol_density:
+    mean_density: 0.0633
+    mean_distinct_symbol_types: 0.0639
+    mean_symbol_count: 0.0442
+  vocabulary:
+    mean_mattr: 0.0350
+    mean_raw_ttr: 0.0299
+    mean_total_identifiers: -0.0225
+    mean_unique_identifiers: 0.0074
+  vowel_density:
+    mean_total_chars: -0.0235
+  zipf:
+    mean_exponent: -0.0047
+    mean_r_squared: 0.0105
+    mean_total_tokens: 0.0030
+    mean_vocab_size: 0.0257
+
+test_name_starts_with_verb:
+  _doc: "Test descriptions should start with a verb: `creates`, `raises`, `returns`, not a noun phrase."
+  _languages: [elixir]
+  _log_baseline: 7.8915
+  branching:
+    mean_branch_count: 1.9977
+    mean_branching_density: 2.0000
+  brevity:
+    mean_sample_size: 0.0694
+  casing_entropy:
+    mean_entropy: -0.0711
+    mean_snake_case_count: 0.1381
+  compression:
+    mean_raw_bytes: 0.0914
+    mean_redundancy: 0.0182
+    mean_zlib_bytes: 0.0482
+    mean_zlib_ratio: 0.0431
+  entropy:
+    mean_char_max_entropy: 0.0064
+    mean_char_normalized: -0.0121
+    mean_token_entropy: 0.0259
+    mean_token_max_entropy: 0.0155
+    mean_token_normalized: 0.0104
+    mean_total_tokens: 0.0600
+    mean_vocab_size: 0.0694
+  halstead:
+    mean_N1_total_operators: 0.0411
+    mean_difficulty: 0.0577
+    mean_effort: 0.0855
+    mean_estimated_bugs: 0.0277
+    mean_length: 0.0240
+    mean_n1_unique_operators: 0.0577
+    mean_time_to_implement_seconds: 0.0855
+    mean_vocabulary: 0.0164
+    mean_volume: 0.0278
+  heaps:
+    mean_beta: -0.0149
+    mean_k: 0.0795
+    mean_r_squared: -0.0081
+  identifier_length_variance:
+    mean_std_dev: -0.0192
+    mean_variance: -0.0384
+  line_patterns:
+    mean_string_literal_ratio: -0.0611
+  magic_number_density:
+    mean_string_literal_ratio: -0.0611
+  ngram:
+    mean_bigram_hapax_fraction: -0.0506
+    mean_bigram_repeated_unique: 0.1209
+    mean_bigram_repetition_rate: 0.0150
+    mean_bigram_total: 0.0602
+    mean_bigram_unique: 0.0621
+    mean_trigram_hapax_fraction: -0.0206
+    mean_trigram_repeated_unique: 0.0961
+    mean_trigram_repetition_rate: 0.0117
+    mean_trigram_total: 0.0603
+    mean_trigram_unique: 0.0596
+  punctuation_density:
+    mean_arrow_density: -0.1129
+    mean_colon_suffix_density: -0.0591
+    mean_id_nonalpha_suffix_density: -0.0602
+  readability:
+    mean_avg_line_length: 0.0943
+    mean_avg_tokens_per_line: 0.0600
+    mean_fog_adapted: 0.0600
+  symbol_density:
+    mean_density: -0.0912
+  vocabulary:
+    mean_mattr: 0.0463
+    mean_total_identifiers: 0.1129
+    mean_unique_identifiers: 0.1161
+  vowel_density:
+    mean_total_chars: 0.1122
+  zipf:
+    mean_exponent: -0.0239
+    mean_total_tokens: 0.0600
+    mean_vocab_size: 0.0694
+
diff --git a/priv/combined_metrics/scope_and_assignment.yml b/priv/combined_metrics/scope_and_assignment.yml
new file mode 100644
index 00000000..c33ac845
--- /dev/null
+++ b/priv/combined_metrics/scope_and_assignment.yml
@@ -0,0 +1,674 @@
+declared_close_to_use:
+  _doc: "Variables should be declared near their first use, not hoisted to the top of the function."
+  _log_baseline: -44.7729
+  branching:
+    mean_branch_count: -0.3390
+    mean_branching_density: -0.1842
+    mean_non_blank_count: -0.1592
+  brevity:
+    mean_sample_size: -0.1806
+  casing_entropy:
+    mean_camel_case_count: -0.2981
+    mean_entropy: 0.0653
+    mean_other_count: -0.0412
+    mean_pascal_case_count: -0.0760
+    mean_snake_case_count: -0.3047
+  comment_structure:
+    mean_comment_line_count: -2.0000
+    mean_comment_line_ratio: -1.8774
+  compression:
+    mean_raw_bytes: -0.2184
+    mean_redundancy: -0.0281
+    mean_unique_line_ratio: 0.0060
+    mean_zlib_bytes: -0.1781
+    mean_zlib_ratio: -0.0415
+  entropy:
+    mean_char_entropy: 0.0068
+    mean_char_max_entropy: -0.0078
+    mean_char_normalized: 0.0146
+    mean_token_entropy: -0.0400
+    mean_token_max_entropy: -0.0376
+    mean_total_tokens: -0.1836
+    mean_vocab_size: -0.1806
+  function_metrics:
+    mean_avg_function_lines: -0.1689
+    mean_max_function_lines: -0.1306
+  halstead:
+    mean_N1_total_operators: -0.1260
+    mean_N2_total_operands: -0.2537
+    mean_difficulty: -0.1345
+    mean_effort: -0.3512
+    mean_estimated_bugs: -0.2227
+    mean_length: -0.1843
+    mean_n1_unique_operators: -0.0976
+    mean_n2_unique_operands: -0.2209
+    mean_time_to_implement_seconds: -0.3512
+    mean_vocabulary: -0.1908
+    mean_volume: -0.2227
+  heaps:
+    mean_beta: -0.0243
+  identifier_length_variance:
+    mean_mean: 0.0266
+    mean_std_dev: 0.0070
+    mean_variance: 0.0144
+  indentation:
+    mean_blank_line_ratio: 0.0709
+    mean_mean_depth: -0.0132
+    mean_variance: 0.0652
+  line_patterns:
+    mean_blank_line_ratio: 0.0709
+    mean_string_literal_ratio: 0.1211
+  magic_number_density:
+    mean_density: -0.2075
+    mean_magic_number_count: -0.3961
+    mean_string_literal_ratio: 0.1211
+  near_duplicate_blocks_file:
+    mean_block_count: -0.0380
+    mean_near_dup_block_d0: -0.7925
+    mean_near_dup_block_d8: 1.1610
+    mean_sub_block_count: 0.0116
+  ngram:
+    mean_bigram_repeated_unique: -0.1810
+    mean_bigram_repetition_rate: 0.0038
+    mean_bigram_total: -0.1839
+    mean_bigram_unique: -0.1871
+    mean_trigram_hapax_fraction: -0.0077
+    mean_trigram_repeated_unique: -0.1259
+    mean_trigram_repetition_rate: 0.0540
+    mean_trigram_total: -0.1843
+    mean_trigram_unique: -0.1904
+  punctuation_density:
+    mean_arrow_density: 0.1987
+    mean_bracket_nonalpha_prefix_count: -0.1993
+    mean_bracket_nonalpha_suffix_count: -0.0283
+    mean_colon_suffix_density: 0.1708
+    mean_id_nonalpha_suffix_density: 0.0916
+    mean_question_mark_density: 0.5000
+  readability:
+    mean_avg_line_length: -0.0057
+    mean_avg_sub_words_per_id: 0.0055
+    mean_avg_tokens_per_line: -0.0774
+    mean_fog_adapted: -0.0368
+    mean_total_lines: -0.1115
+  separator_counts:
+    mean_slash_count: -1.4037
+    mean_underscore_count: -0.1887
+  symbol_density:
+    mean_density: 0.0980
+    mean_distinct_symbol_types: -0.0329
+    mean_symbol_count: -0.1211
+  vocabulary:
+    mean_mattr: -0.0686
+    mean_raw_ttr: 0.0736
+    mean_total_identifiers: -0.2738
+    mean_unique_identifiers: -0.2075
+  vowel_density:
+    mean_total_chars: -0.2512
+  zipf:
+    mean_exponent: -0.0102
+    mean_total_tokens: -0.1836
+    mean_vocab_size: -0.1806
+
+mutated_after_initial_assignment:
+  _doc: "Variables should not be reassigned after their initial value — prefer introducing a new name."
+  _log_baseline: 6.2569
+  branching:
+    mean_branch_count: 0.1519
+    mean_branching_density: 0.2073
+    mean_max_nesting_depth: 0.0856
+    mean_non_blank_count: -0.0553
+  brevity:
+    mean_sample_size: 0.0068
+  casing_entropy:
+    mean_entropy: -0.0947
+    mean_pascal_case_count: -0.2061
+    mean_snake_case_count: -0.0436
+  compression:
+    mean_raw_bytes: -0.0496
+    mean_redundancy: -0.0291
+    mean_unique_line_ratio: -0.0110
+    mean_zlib_bytes: 0.0022
+    mean_zlib_ratio: -0.0518
+  entropy:
+    mean_char_entropy: -0.0034
+    mean_char_max_entropy: 0.0039
+    mean_char_normalized: -0.0074
+    mean_token_entropy: 0.0082
+    mean_token_max_entropy: 0.0015
+    mean_token_normalized: 0.0067
+    mean_total_tokens: -0.0392
+    mean_vocab_size: 0.0068
+  function_metrics:
+    mean_avg_function_lines: -0.1001
+    mean_max_function_lines: -0.0511
+  halstead:
+    mean_N1_total_operators: -0.0218
+    mean_N2_total_operands: -0.0623
+    mean_difficulty: -0.0341
+    mean_effort: -0.0690
+    mean_estimated_bugs: -0.0348
+    mean_length: -0.0382
+    mean_n1_unique_operators: 0.0337
+    mean_n2_unique_operands: 0.0056
+    mean_time_to_implement_seconds: -0.0690
+    mean_vocabulary: 0.0149
+    mean_volume: -0.0348
+  heaps:
+    mean_beta: 0.0300
+    mean_k: -0.0775
+    mean_r_squared: 0.0063
+  identifier_length_variance:
+    mean_mean: -0.0249
+    mean_std_dev: 0.0286
+    mean_variance: 0.0571
+  indentation:
+    mean_blank_line_ratio: -0.1139
+    mean_max_depth: 0.2725
+    mean_mean_depth: 0.0979
+    mean_variance: 0.5878
+  line_patterns:
+    mean_blank_line_ratio: -0.1139
+    mean_max_nesting_depth: 0.0856
+    mean_string_literal_ratio: 0.0397
+    mean_unique_line_ratio: -0.0203
+  magic_number_density:
+    mean_density: -0.6790
+    mean_magic_number_count: -0.7131
+    mean_string_literal_ratio: 0.0397
+  near_duplicate_blocks_file:
+    mean_sub_block_count: -0.1967
+  ngram:
+    mean_bigram_hapax_fraction: 0.0087
+    mean_bigram_repeated_unique: -0.0100
+    mean_bigram_repetition_rate: -0.0293
+    mean_bigram_total: -0.0393
+    mean_bigram_unique: 0.0068
+    mean_trigram_hapax_fraction: 0.0021
+    mean_trigram_repeated_unique: -0.0086
+    mean_trigram_repetition_rate: -0.0546
+    mean_trigram_total: -0.0394
+  punctuation_density:
+    mean_arrow_density: -1.5022
+    mean_bracket_nonalpha_prefix_count: 0.0497
+    mean_bracket_nonalpha_suffix_count: 0.4473
+    mean_colon_suffix_density: 0.3529
+    mean_dot_count: -0.1332
+    mean_exclamation_density: -2.0000
+    mean_id_nonalpha_suffix_density: 0.0542
+  readability:
+    mean_avg_line_length: 0.0073
+    mean_avg_sub_words_per_id: -0.0018
+    mean_avg_tokens_per_line: 0.0161
+    mean_fog_adapted: 0.0169
+    mean_total_lines: -0.0553
+  separator_counts:
+    mean_dot_count: -0.1332
+    mean_hyphen_count: 0.3267
+    mean_underscore_count: -0.0617
+  symbol_density:
+    mean_density: 0.0095
+    mean_distinct_symbol_types: 0.0436
+    mean_symbol_count: -0.0402
+  vocabulary:
+    mean_mattr: 0.0885
+    mean_raw_ttr: 0.0564
+    mean_total_identifiers: -0.0623
+    mean_unique_identifiers: -0.0059
+  vowel_density:
+    mean_total_chars: -0.0872
+  zipf:
+    mean_exponent: -0.0305
+    mean_r_squared: 0.0040
+    mean_total_tokens: -0.0392
+    mean_vocab_size: 0.0068
+
+reassigned_multiple_times:
+  _doc: "A variable reassigned many times is a sign the name is too generic or the function does too much."
+  _log_baseline: -6.7462
+  branching:
+    mean_max_nesting_depth: 0.0680
+    mean_non_blank_count: 0.0226
+  brevity:
+    mean_sample_size: 0.0344
+  casing_entropy:
+    mean_entropy: 0.0328
+    mean_other_count: 0.1073
+    mean_pascal_case_count: -0.1015
+    mean_screaming_snake_density: -2.0000
+    mean_snake_case_count: -0.1201
+  compression:
+    mean_raw_bytes: -0.0613
+    mean_redundancy: -0.0497
+    mean_unique_line_ratio: -0.0222
+    mean_zlib_bytes: 0.0248
+    mean_zlib_ratio: -0.0851
+  entropy:
+    mean_char_entropy: 0.0021
+    mean_char_max_entropy: 0.0066
+    mean_char_normalized: -0.0041
+    mean_token_entropy: 0.0105
+    mean_token_max_entropy: 0.0072
+    mean_token_normalized: 0.0033
+    mean_total_tokens: -0.0838
+    mean_vocab_size: 0.0344
+  function_metrics:
+    mean_avg_function_lines: 0.5612
+    mean_function_count: 0.0833
+    mean_max_function_lines: 0.5399
+  halstead:
+    mean_N1_total_operators: -0.0755
+    mean_N2_total_operands: -0.1513
+    mean_difficulty: -0.1428
+    mean_effort: -0.2522
+    mean_estimated_bugs: -0.1029
+    mean_length: -0.1057
+    mean_n1_unique_operators: 0.0270
+    mean_n2_unique_operands: 0.0094
+    mean_time_to_implement_seconds: -0.2522
+    mean_vocabulary: 0.0139
+    mean_volume: -0.1029
+  heaps:
+    mean_beta: 0.0677
+    mean_k: -0.1329
+    mean_r_squared: 0.0059
+  identifier_length_variance:
+    mean_max: 0.0594
+    mean_mean: 0.0509
+    mean_std_dev: 0.1441
+    mean_variance: 0.2867
+  indentation:
+    mean_blank_line_ratio: 0.0585
+    mean_max_depth: 0.1513
+    mean_mean_depth: 0.0393
+    mean_variance: 0.2588
+  line_patterns:
+    mean_blank_line_ratio: 0.0585
+    mean_max_nesting_depth: 0.0680
+    mean_string_literal_ratio: 0.1161
+    mean_unique_line_ratio: -0.0221
+  magic_number_density:
+    mean_density: -0.1020
+    mean_magic_number_count: -0.1906
+    mean_string_literal_ratio: 0.1161
+  near_duplicate_blocks_file:
+    mean_block_count: 0.0393
+    mean_near_dup_block_d1: -0.2586
+    mean_near_dup_block_d8: -0.2586
+    mean_sub_block_count: -0.0637
+  ngram:
+    mean_bigram_hapax_fraction: 0.0798
+    mean_bigram_repeated_unique: -0.1513
+    mean_bigram_repetition_rate: -0.0805
+    mean_bigram_total: -0.0839
+    mean_bigram_unique: 0.0180
+    mean_trigram_hapax_fraction: 0.0653
+    mean_trigram_repeated_unique: -0.2098
+    mean_trigram_repetition_rate: -0.1158
+    mean_trigram_total: -0.0841
+    mean_trigram_unique: -0.0052
+  punctuation_density:
+    mean_arrow_density: 0.2665
+    mean_bracket_nonalpha_prefix_count: -0.0325
+    mean_bracket_nonalpha_suffix_count: 0.1513
+    mean_bracket_number_pair_count: -0.1906
+    mean_colon_suffix_density: 0.0229
+    mean_dot_count: -0.1300
+    mean_id_nonalpha_suffix_density: -0.0584
+  readability:
+    mean_avg_line_length: -0.0872
+    mean_avg_sub_words_per_id: 0.0183
+    mean_avg_tokens_per_line: -0.1102
+    mean_flesch_adapted: -0.0030
+    mean_fog_adapted: -0.1102
+    mean_total_lines: 0.0226
+  separator_counts:
+    mean_dot_count: -0.1300
+    mean_hyphen_count: 0.2586
+    mean_slash_count: -0.2586
+    mean_underscore_count: 0.2460
+  symbol_density:
+    mean_density: -0.0051
+    mean_distinct_symbol_types: 0.0118
+    mean_symbol_count: -0.0622
+  vocabulary:
+    mean_mattr: 0.1928
+    mean_raw_ttr: 0.1782
+    mean_total_identifiers: -0.1113
+    mean_unique_identifiers: 0.0580
+  vowel_density:
+    mean_total_chars: -0.0602
+  zipf:
+    mean_exponent: -0.0522
+    mean_r_squared: 0.0034
+    mean_total_tokens: -0.0838
+    mean_vocab_size: 0.0344
+
+scope_is_minimal:
+  _doc: "Variables should be scoped as narrowly as possible — not declared at a wider scope than needed."
+  _log_baseline: -7.6942
+  branching:
+    mean_branch_count: -0.1072
+    mean_branching_density: -0.0452
+    mean_non_blank_count: -0.0619
+  brevity:
+    mean_sample_size: -0.0368
+  casing_entropy:
+    mean_entropy: -0.0341
+    mean_other_count: -0.1823
+    mean_snake_case_count: -0.0471
+  comment_structure:
+    mean_comment_line_count: -0.4075
+    mean_comment_line_ratio: 2.0000
+  compression:
+    mean_raw_bytes: -0.1109
+    mean_redundancy: -0.0367
+    mean_unique_line_ratio: -0.1005
+    mean_zlib_bytes: -0.0343
+    mean_zlib_ratio: -0.0767
+  entropy:
+    mean_char_entropy: 0.0481
+    mean_char_normalized: 0.0481
+    mean_token_entropy: 0.0037
+    mean_token_max_entropy: -0.0081
+    mean_token_normalized: 0.0118
+    mean_total_tokens: 0.0143
+    mean_vocab_size: -0.0368
+  function_metrics:
+    mean_avg_function_lines: -0.2553
+    mean_avg_param_count: -0.1037
+    mean_function_count: 0.1691
+    mean_max_function_lines: -0.8149
+    mean_max_param_count: -0.4075
+  halstead:
+    mean_N1_total_operators: 0.0334
+    mean_N2_total_operands: -0.0363
+    mean_difficulty: 0.1281
+    mean_effort: 0.1258
+    mean_length: 0.0079
+    mean_n1_unique_operators: 0.0751
+    mean_n2_unique_operands: -0.0892
+    mean_time_to_implement_seconds: 0.1258
+    mean_vocabulary: -0.0460
+  heaps:
+    mean_beta: -0.0099
+    mean_k: -0.0341
+    mean_r_squared: 0.0103
+  identifier_length_variance:
+    mean_mean: -0.0320
+    mean_std_dev: 0.0864
+    mean_variance: 0.1729
+  indentation:
+    mean_blank_line_ratio: -0.1076
+    mean_max_depth: -0.4694
+    mean_mean_depth: -0.4430
+    mean_variance: -1.1640
+  line_patterns:
+    mean_blank_line_ratio: -0.1076
+    mean_string_literal_ratio: -0.0144
+    mean_unique_line_ratio: -0.0307
+  magic_number_density:
+    mean_string_literal_ratio: -0.0144
+  near_duplicate_blocks_file:
+    mean_block_count: -0.1691
+    mean_sub_block_count: 0.0447
+  ngram:
+    mean_bigram_hapax_fraction: -0.0078
+    mean_bigram_repeated_unique: 0.0406
+    mean_bigram_total: 0.0144
+    mean_bigram_unique: 0.0265
+    mean_trigram_hapax_fraction: 0.0113
+    mean_trigram_repeated_unique: 0.0074
+    mean_trigram_repetition_rate: -0.0382
+    mean_trigram_total: 0.0144
+    mean_trigram_unique: 0.0419
+  punctuation_density:
+    mean_arrow_density: -0.2974
+    mean_bracket_nonalpha_prefix_count: 0.7364
+    mean_bracket_nonalpha_suffix_count: 0.1691
+    mean_colon_suffix_density: -0.1217
+    mean_dot_count: 0.3290
+    mean_id_nonalpha_suffix_density: 0.0100
+  readability:
+    mean_avg_line_length: -0.0336
+    mean_avg_sub_words_per_id: -0.0359
+    mean_avg_tokens_per_line: 0.0563
+    mean_flesch_adapted: 0.0319
+    mean_fog_adapted: 0.0564
+    mean_total_lines: -0.0420
+  separator_counts:
+    mean_dot_count: 0.3290
+    mean_underscore_count: -0.1941
+  symbol_density:
+    mean_density: 0.1427
+    mean_distinct_symbol_types: 0.0336
+    mean_symbol_count: 0.0316
+  vocabulary:
+    mean_raw_ttr: -0.0313
+    mean_total_identifiers: -0.0577
+    mean_unique_identifiers: -0.0891
+  vowel_density:
+    mean_total_chars: -0.0897
+  zipf:
+    mean_exponent: 0.0070
+    mean_total_tokens: 0.0143
+    mean_vocab_size: -0.0368
+
+shadowed_by_inner_scope:
+  _doc: "Inner-scope names that shadow outer-scope names cause confusion about which value is in play."
+  _log_baseline: -32.0799
+  branching:
+    mean_branching_density: 2.0000
+    mean_max_nesting_depth: -0.1450
+    mean_non_blank_count: -0.1418
+  brevity:
+    mean_sample_size: -0.0786
+  casing_entropy:
+    mean_entropy: 0.1132
+    mean_pascal_case_count: -0.0306
+    mean_snake_case_count: -0.2452
+  comment_structure:
+    mean_comment_line_count: -1.1073
+    mean_comment_line_ratio: 0.8936
+  compression:
+    mean_raw_bytes: -0.1107
+    mean_unique_line_ratio: -0.0068
+    mean_zlib_bytes: -0.1099
+  entropy:
+    mean_char_entropy: 0.0202
+    mean_char_max_entropy: -0.0068
+    mean_char_normalized: 0.0270
+    mean_token_entropy: -0.0147
+    mean_token_max_entropy: -0.0178
+    mean_token_normalized: 0.0030
+    mean_total_tokens: -0.1540
+    mean_vocab_size: -0.0786
+  function_metrics:
+    mean_avg_function_lines: -0.1314
+    mean_max_function_lines: -0.2043
+  halstead:
+    mean_N1_total_operators: -0.0341
+    mean_N2_total_operands: -0.2093
+    mean_difficulty: -0.1558
+    mean_effort: -0.2907
+    mean_estimated_bugs: -0.1349
+    mean_length: -0.1141
+    mean_n1_unique_operators: -0.0504
+    mean_n2_unique_operands: -0.1040
+    mean_time_to_implement_seconds: -0.2907
+    mean_vocabulary: -0.0894
+    mean_volume: -0.1349
+  heaps:
+    mean_beta: 0.0296
+    mean_k: -0.0817
+    mean_r_squared: -0.0030
+  identifier_length_variance:
+    mean_mean: 0.1622
+    mean_std_dev: 0.2441
+    mean_variance: 0.4883
+  indentation:
+    mean_blank_line_ratio: 0.0798
+    mean_mean_depth: -0.0595
+    mean_variance: -0.0634
+  line_patterns:
+    mean_blank_line_ratio: 0.0798
+    mean_max_nesting_depth: -0.1450
+    mean_string_literal_ratio: 0.1539
+    mean_unique_line_ratio: -0.0187
+  magic_number_density:
+    mean_density: 0.1539
+    mean_string_literal_ratio: 0.1539
+  ngram:
+    mean_bigram_hapax_fraction: 0.0443
+    mean_bigram_repeated_unique: -0.2754
+    mean_bigram_repetition_rate: -0.0922
+    mean_bigram_total: -0.1545
+    mean_bigram_unique: -0.1141
+    mean_trigram_hapax_fraction: 0.0321
+    mean_trigram_repeated_unique: -0.4412
+    mean_trigram_repetition_rate: -0.2046
+    mean_trigram_total: -0.1550
+    mean_trigram_unique: -0.1252
+  punctuation_density:
+    mean_arrow_density: 0.1539
+    mean_colon_suffix_density: 0.1539
+    mean_dot_count: -0.0206
+    mean_id_nonalpha_suffix_density: 0.0780
+  readability:
+    mean_avg_line_length: 0.0827
+    mean_avg_sub_words_per_id: 0.0859
+    mean_avg_tokens_per_line: -0.1060
+    mean_flesch_adapted: -0.0728
+    mean_fog_adapted: -0.1060
+    mean_total_lines: -0.0480
+  separator_counts:
+    mean_dot_count: -0.0206
+    mean_underscore_count: 0.6826
+  symbol_density:
+    mean_density: -0.0169
+    mean_distinct_symbol_types: -0.0561
+    mean_symbol_count: -0.1275
+  vocabulary:
+    mean_mattr: 0.1103
+    mean_raw_ttr: 0.1343
+    mean_total_identifiers: -0.2135
+    mean_unique_identifiers: -0.0792
+  vowel_density:
+    mean_total_chars: -0.0513
+  zipf:
+    mean_exponent: -0.0364
+    mean_r_squared: 0.0058
+    mean_total_tokens: -0.1540
+    mean_vocab_size: -0.0786
+
+used_only_once:
+  _doc: "A variable used only once is a candidate for inlining — it rarely adds clarity over a direct expression."
+  _log_baseline: -37.0606
+  branching:
+    mean_branch_count: -0.2490
+    mean_branching_density: -0.4526
+    mean_max_nesting_depth: 0.3756
+    mean_non_blank_count: -0.1648
+  brevity:
+    mean_sample_size: -0.1367
+  casing_entropy:
+    mean_camel_case_count: -0.6401
+    mean_entropy: 0.1031
+    mean_snake_case_count: -0.3410
+  comment_structure:
+    mean_comment_line_ratio: 0.2167
+  compression:
+    mean_raw_bytes: -0.2646
+    mean_redundancy: -0.0574
+    mean_unique_line_ratio: -0.0557
+    mean_zlib_bytes: -0.1898
+    mean_zlib_ratio: -0.0716
+  entropy:
+    mean_token_entropy: -0.0310
+    mean_token_max_entropy: -0.0279
+    mean_total_tokens: -0.1912
+    mean_vocab_size: -0.1367
+  function_metrics:
+    mean_avg_function_lines: -0.0842
+    mean_max_function_lines: -0.0181
+  halstead:
+    mean_N1_total_operators: -0.1122
+    mean_N2_total_operands: -0.3657
+    mean_difficulty: -0.0847
+    mean_effort: -0.2955
+    mean_estimated_bugs: -0.2562
+    mean_length: -0.2119
+    mean_n1_unique_operators: 0.0156
+    mean_n2_unique_operands: -0.2778
+    mean_time_to_implement_seconds: -0.2955
+    mean_vocabulary: -0.2073
+    mean_volume: -0.2563
+  heaps:
+    mean_beta: 0.0237
+    mean_k: -0.0587
+    mean_r_squared: -0.0061
+  identifier_length_variance:
+    mean_mean: -0.0354
+    mean_std_dev: 0.0671
+    mean_variance: 0.1339
+  indentation:
+    mean_blank_line_ratio: 0.1012
+    mean_max_depth: 0.5085
+    mean_mean_depth: 0.1325
+    mean_variance: 0.8783
+  line_patterns:
+    mean_blank_line_ratio: 0.1012
+    mean_max_nesting_depth: 0.3756
+    mean_string_literal_ratio: 0.1006
+    mean_unique_line_ratio: -0.0692
+  magic_number_density:
+    mean_density: -0.6247
+    mean_magic_number_count: -0.7735
+    mean_string_literal_ratio: 0.1006
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d0: 0.4526
+    mean_near_dup_block_d1: 2.0000
+    mean_near_dup_block_d7: 0.6562
+    mean_sub_block_count: -0.3167
+  ngram:
+    mean_bigram_hapax_fraction: -0.0797
+    mean_bigram_repeated_unique: 0.0377
+    mean_bigram_repetition_rate: 0.1844
+    mean_bigram_total: -0.1916
+    mean_bigram_unique: -0.2779
+    mean_trigram_hapax_fraction: -0.0334
+    mean_trigram_repeated_unique: 0.1364
+    mean_trigram_repetition_rate: 0.2670
+    mean_trigram_total: -0.1921
+    mean_trigram_unique: -0.2343
+  punctuation_density:
+    mean_arrow_density: 0.2929
+    mean_bracket_nonalpha_prefix_count: -0.1228
+    mean_bracket_nonalpha_suffix_count: 0.1242
+    mean_colon_suffix_density: 0.1986
+    mean_id_nonalpha_suffix_density: 0.0506
+    mean_question_mark_density: 0.2491
+  readability:
+    mean_avg_line_length: -0.1265
+    mean_avg_tokens_per_line: -0.0534
+    mean_fog_adapted: 0.0120
+    mean_total_lines: -0.1653
+  separator_counts:
+    mean_hyphen_count: 0.3212
+    mean_underscore_count: -0.1733
+  symbol_density:
+    mean_density: 0.1222
+    mean_distinct_symbol_types: -0.0098
+    mean_symbol_count: -0.1384
+  vocabulary:
+    mean_mattr: 0.0895
+    mean_raw_ttr: 0.1161
+    mean_total_identifiers: -0.2782
+    mean_unique_identifiers: -0.2003
+  vowel_density:
+    mean_total_chars: -0.3381
+  zipf:
+    mean_exponent: 0.0324
+    mean_r_squared: -0.0076
+    mean_total_tokens: -0.1912
+    mean_vocab_size: -0.1367
+
diff --git a/priv/combined_metrics/testing.yml b/priv/combined_metrics/testing.yml
new file mode 100644
index 00000000..1d036f93
--- /dev/null
+++ b/priv/combined_metrics/testing.yml
@@ -0,0 +1,441 @@
+reasonable_test_to_code_ratio:
+  _doc: "There should be an adequate number of test cases relative to the code being tested."
+  _languages: [elixir]
+  _log_baseline: 11.2157
+  branching:
+    mean_branch_count: 0.1869
+    mean_branching_density: 0.0352
+    mean_non_blank_count: 0.1517
+  brevity:
+    mean_sample_size: 0.0290
+  casing_entropy:
+    mean_entropy: 0.0656
+    mean_pascal_case_count: 0.2097
+    mean_snake_case_count: 0.0455
+  comment_structure:
+    mean_comment_line_count: -0.5246
+    mean_comment_line_ratio: 0.5016
+  compression:
+    mean_raw_bytes: 0.1290
+    mean_redundancy: 0.0346
+    mean_unique_line_ratio: -0.0556
+    mean_zlib_bytes: 0.0559
+    mean_zlib_ratio: 0.0732
+  entropy:
+    mean_char_entropy: -0.0074
+    mean_char_normalized: -0.0064
+    mean_token_entropy: 0.0033
+    mean_token_max_entropy: 0.0063
+    mean_token_normalized: -0.0029
+    mean_total_tokens: 0.1093
+    mean_vocab_size: 0.0290
+  function_metrics:
+    mean_avg_function_lines: -0.0273
+    mean_function_count: -0.0257
+    mean_max_function_lines: -0.0776
+  halstead:
+    mean_N1_total_operators: 0.1006
+    mean_N2_total_operands: 0.1082
+    mean_difficulty: 0.0314
+    mean_effort: 0.1446
+    mean_estimated_bugs: 0.1132
+    mean_length: 0.1034
+    mean_n1_unique_operators: -0.0102
+    mean_n2_unique_operands: 0.0667
+    mean_time_to_implement_seconds: 0.1446
+    mean_vocabulary: 0.0443
+    mean_volume: 0.1132
+  heaps:
+    mean_beta: -0.0095
+    mean_k: 0.0107
+    mean_r_squared: 0.0171
+  identifier_length_variance:
+    mean_mean: 0.0278
+    mean_std_dev: 0.0092
+    mean_variance: 0.0185
+  indentation:
+    mean_blank_line_ratio: -0.0175
+    mean_max_depth: 0.1093
+    mean_mean_depth: 0.1424
+    mean_variance: 0.3988
+  line_patterns:
+    mean_blank_line_ratio: -0.0175
+    mean_string_literal_ratio: 0.2536
+    mean_unique_line_ratio: -0.0582
+  magic_number_density:
+    mean_density: 0.3159
+    mean_magic_number_count: 0.4248
+    mean_string_literal_ratio: 0.2536
+  near_duplicate_blocks_file:
+    mean_block_count: 0.3136
+    mean_near_dup_block_d0: 0.6699
+    mean_near_dup_block_d3: 0.1869
+    mean_near_dup_block_d4: 0.5246
+    mean_near_dup_block_d5: 0.1869
+    mean_near_dup_block_d7: 0.7475
+    mean_near_dup_block_d8: 0.1869
+    mean_sub_block_count: 0.0723
+  ngram:
+    mean_bigram_hapax_fraction: -0.0173
+    mean_bigram_repeated_unique: 0.0681
+    mean_bigram_repetition_rate: 0.0346
+    mean_bigram_total: 0.1095
+    mean_bigram_unique: 0.0370
+    mean_trigram_hapax_fraction: -0.0158
+    mean_trigram_repeated_unique: 0.1118
+    mean_trigram_repetition_rate: 0.0487
+    mean_trigram_total: 0.1097
+    mean_trigram_unique: 0.0623
+  punctuation_density:
+    mean_bracket_number_pair_count: 0.1869
+    mean_colon_suffix_density: -0.1100
+    mean_dot_count: 0.2776
+    mean_exclamation_density: 2.0000
+    mean_id_nonalpha_suffix_density: -0.0296
+  readability:
+    mean_avg_line_length: -0.0303
+    mean_avg_tokens_per_line: -0.0880
+    mean_flesch_adapted: 0.0106
+    mean_fog_adapted: -0.0829
+    mean_total_lines: 0.1973
+  symbol_density:
+    mean_density: -0.0353
+    mean_distinct_symbol_types: -0.0284
+    mean_symbol_count: 0.0960
+  vocabulary:
+    mean_mattr: -0.0271
+    mean_raw_ttr: -0.0488
+    mean_total_identifiers: 0.0704
+    mean_unique_identifiers: 0.0216
+  vowel_density:
+    mean_total_chars: 0.0982
+  zipf:
+    mean_exponent: 0.0408
+    mean_r_squared: -0.0086
+    mean_total_tokens: 0.1093
+    mean_vocab_size: 0.0290
+
+test_has_assertion:
+  _doc: "Every test body must contain at least one assertion — a test without assertions proves nothing."
+  _languages: [elixir]
+  _log_baseline: -10.8081
+  branching:
+    mean_branch_count: 0.0918
+    mean_branching_density: 0.1642
+    mean_non_blank_count: -0.0727
+  brevity:
+    mean_sample_size: -0.0555
+  casing_entropy:
+    mean_entropy: -0.0026
+    mean_other_count: -0.1294
+    mean_pascal_case_count: -0.0450
+    mean_snake_case_count: -0.0568
+  comment_structure:
+    mean_comment_line_count: -0.6211
+    mean_comment_line_ratio: 0.6522
+  compression:
+    mean_raw_bytes: -0.0140
+    mean_redundancy: 0.0137
+    mean_unique_line_ratio: -0.0037
+    mean_zlib_bytes: -0.0369
+    mean_zlib_ratio: 0.0229
+  entropy:
+    mean_char_entropy: 0.0022
+    mean_char_max_entropy: -0.0025
+    mean_char_normalized: 0.0048
+    mean_token_entropy: -0.0124
+    mean_token_max_entropy: -0.0118
+    mean_vocab_size: -0.0555
+  halstead:
+    mean_N1_total_operators: 0.0184
+    mean_N2_total_operands: -0.0481
+    mean_difficulty: 0.0979
+    mean_effort: 0.0682
+    mean_estimated_bugs: -0.0297
+    mean_length: -0.0087
+    mean_n1_unique_operators: 0.0130
+    mean_n2_unique_operands: -0.1329
+    mean_time_to_implement_seconds: 0.0682
+    mean_vocabulary: -0.0965
+    mean_volume: -0.0297
+  heaps:
+    mean_beta: -0.0415
+    mean_k: 0.0917
+    mean_r_squared: -0.0091
+  identifier_length_variance:
+    mean_mean: 0.0215
+    mean_std_dev: 0.0214
+    mean_variance: 0.0429
+  indentation:
+    mean_blank_line_ratio: 0.0261
+    mean_max_depth: -0.1294
+    mean_mean_depth: -0.0289
+    mean_variance: -0.0253
+  line_patterns:
+    mean_blank_line_ratio: 0.0261
+    mean_string_literal_ratio: -0.0139
+    mean_unique_line_ratio: -0.0056
+  magic_number_density:
+    mean_string_literal_ratio: -0.0139
+  near_duplicate_blocks_file:
+    mean_block_count: -0.0376
+    mean_near_dup_block_d0: -0.3507
+    mean_near_dup_block_d5: 0.2212
+    mean_near_dup_block_d6: -0.4425
+    mean_near_dup_block_d7: 0.2212
+    mean_near_dup_block_d8: -0.5719
+    mean_sub_block_count: 0.0228
+  ngram:
+    mean_bigram_hapax_fraction: -0.0509
+    mean_bigram_repeated_unique: 0.0678
+    mean_bigram_repetition_rate: 0.0505
+    mean_bigram_unique: -0.0434
+    mean_trigram_hapax_fraction: -0.0376
+    mean_trigram_repeated_unique: 0.1149
+    mean_trigram_repetition_rate: 0.0832
+    mean_trigram_unique: -0.0336
+  punctuation_density:
+    mean_arrow_density: 2.0000
+    mean_bracket_nonalpha_prefix_count: -0.0712
+    mean_bracket_nonalpha_suffix_count: -0.0492
+    mean_colon_suffix_density: -0.0460
+    mean_dot_count: 0.0108
+    mean_id_nonalpha_suffix_density: 0.0127
+  readability:
+    mean_avg_line_length: 0.0691
+    mean_avg_sub_words_per_id: 0.0042
+    mean_avg_tokens_per_line: 0.0284
+    mean_flesch_adapted: -0.0065
+    mean_fog_adapted: 0.0284
+    mean_total_lines: -0.0284
+  symbol_density:
+    mean_density: 0.0263
+    mean_distinct_symbol_types: -0.0194
+    mean_symbol_count: 0.0126
+  vocabulary:
+    mean_mattr: -0.0607
+    mean_raw_ttr: -0.0243
+    mean_total_identifiers: -0.0553
+    mean_unique_identifiers: -0.0796
+  vowel_density:
+    mean_total_chars: -0.0338
+  zipf:
+    mean_exponent: 0.0248
+    mean_r_squared: -0.0049
+    mean_vocab_size: -0.0555
+
+test_name_describes_behavior:
+  _doc: "Test names should describe the expected behaviour, not just the method under test."
+  _languages: [elixir]
+  _log_baseline: 57.2080
+  branching:
+    mean_branch_count: 2.0000
+    mean_branching_density: -1.5965
+    mean_non_blank_count: 0.2388
+  brevity:
+    mean_sample_size: 0.1814
+  casing_entropy:
+    mean_entropy: -0.1610
+    mean_pascal_case_count: 0.0729
+    mean_snake_case_count: 0.4125
+  compression:
+    mean_raw_bytes: 0.3524
+    mean_redundancy: 0.0412
+    mean_unique_line_ratio: -0.0357
+    mean_zlib_bytes: 0.2483
+    mean_zlib_ratio: 0.1041
+  entropy:
+    mean_char_entropy: -0.0171
+    mean_char_max_entropy: 0.0138
+    mean_char_normalized: -0.0308
+    mean_token_entropy: 0.0449
+    mean_token_max_entropy: 0.0420
+    mean_total_tokens: 0.2704
+    mean_vocab_size: 0.1814
+  halstead:
+    mean_N1_total_operators: 0.2985
+    mean_N2_total_operands: 0.1414
+    mean_difficulty: 0.2542
+    mean_effort: 0.5528
+    mean_estimated_bugs: 0.2986
+    mean_length: 0.2432
+    mean_n1_unique_operators: 0.3063
+    mean_n2_unique_operands: 0.1935
+    mean_time_to_implement_seconds: 0.5528
+    mean_vocabulary: 0.2268
+    mean_volume: 0.2986
+  heaps:
+    mean_beta: 0.0106
+    mean_k: -0.0084
+  identifier_length_variance:
+    mean_mean: 0.0422
+    mean_std_dev: -0.0249
+    mean_variance: -0.0498
+  indentation:
+    mean_blank_line_ratio: -0.1184
+    mean_max_depth: 0.3691
+    mean_mean_depth: 0.3712
+    mean_variance: 0.8827
+  line_patterns:
+    mean_blank_line_ratio: -0.1184
+    mean_string_literal_ratio: -0.1419
+    mean_unique_line_ratio: -0.0535
+  magic_number_density:
+    mean_string_literal_ratio: -0.1419
+  near_duplicate_blocks_file:
+    mean_block_count: 0.0868
+    mean_near_dup_block_d0: 0.6309
+    mean_near_dup_block_d7: -0.2619
+    mean_sub_block_count: 0.0868
+  ngram:
+    mean_bigram_hapax_fraction: 0.0841
+    mean_bigram_repeated_unique: 0.1923
+    mean_bigram_repetition_rate: -0.0199
+    mean_bigram_total: 0.2709
+    mean_bigram_unique: 0.2850
+    mean_trigram_hapax_fraction: 0.0812
+    mean_trigram_repeated_unique: 0.1582
+    mean_trigram_repetition_rate: -0.0339
+    mean_trigram_total: 0.2715
+    mean_trigram_unique: 0.2823
+  punctuation_density:
+    mean_colon_suffix_density: -0.2725
+    mean_dot_count: 0.0701
+    mean_id_nonalpha_suffix_density: -0.0527
+  readability:
+    mean_avg_line_length: 0.1184
+    mean_avg_tokens_per_line: 0.0316
+    mean_total_lines: 0.2388
+  symbol_density:
+    mean_density: -0.1391
+    mean_distinct_symbol_types: 0.0729
+    mean_symbol_count: 0.2136
+  vocabulary:
+    mean_mattr: -0.0698
+    mean_raw_ttr: -0.1544
+    mean_total_identifiers: 0.3273
+    mean_unique_identifiers: 0.1730
+  vowel_density:
+    mean_total_chars: 0.3695
+  zipf:
+    mean_r_squared: 0.0095
+    mean_total_tokens: 0.2704
+    mean_vocab_size: 0.1814
+
+test_single_concept:
+  _doc: "Each test should verify a single concept — tests covering multiple things are harder to diagnose when they fail."
+  _languages: [elixir]
+  _log_baseline: 37.2588
+  branching:
+    mean_branch_count: 0.3696
+    mean_branching_density: -2.0000
+    mean_max_nesting_depth: 0.1534
+    mean_non_blank_count: 0.2620
+  brevity:
+    mean_sample_size: 0.0495
+  casing_entropy:
+    mean_entropy: -0.0830
+    mean_other_count: 0.3696
+    mean_pascal_case_count: -0.0146
+    mean_snake_case_count: 0.1912
+  comment_structure:
+    mean_comment_line_count: -1.0376
+    mean_comment_line_ratio: 1.0694
+  compression:
+    mean_raw_bytes: 0.1970
+    mean_redundancy: 0.0534
+    mean_unique_line_ratio: -0.1814
+    mean_zlib_bytes: 0.0851
+    mean_zlib_ratio: 0.1119
+  entropy:
+    mean_char_entropy: -0.0169
+    mean_char_normalized: -0.0187
+    mean_token_entropy: 0.0065
+    mean_token_max_entropy: 0.0104
+    mean_token_normalized: -0.0039
+    mean_total_tokens: 0.1633
+    mean_vocab_size: 0.0495
+  function_metrics:
+    mean_avg_function_lines: 0.8129
+    mean_avg_param_count: -0.0628
+    mean_function_count: 0.0628
+    mean_max_function_lines: 1.3538
+  halstead:
+    mean_N1_total_operators: 0.1507
+    mean_N2_total_operands: 0.0958
+    mean_difficulty: 0.1387
+    mean_effort: 0.2736
+    mean_estimated_bugs: 0.1348
+    mean_length: 0.1284
+    mean_n1_unique_operators: 0.0628
+    mean_n2_unique_operands: 0.0199
+    mean_time_to_implement_seconds: 0.2736
+    mean_vocabulary: 0.0302
+    mean_volume: 0.1349
+  heaps:
+    mean_beta: -0.0648
+    mean_k: 0.1502
+    mean_r_squared: 0.0046
+  identifier_length_variance:
+    mean_mean: 0.0255
+    mean_std_dev: 0.0733
+    mean_variance: 0.1466
+  indentation:
+    mean_blank_line_ratio: -0.0702
+    mean_max_depth: 0.2162
+    mean_mean_depth: 0.1207
+    mean_variance: 0.3787
+  line_patterns:
+    mean_blank_line_ratio: -0.0702
+    mean_max_nesting_depth: 0.1534
+    mean_string_literal_ratio: 0.3581
+    mean_unique_line_ratio: -0.1894
+  magic_number_density:
+    mean_density: -0.0115
+    mean_magic_number_count: 0.1534
+    mean_string_literal_ratio: 0.3581
+  near_duplicate_blocks_file:
+    mean_block_count: 0.5858
+    mean_near_dup_block_d0: 0.3696
+    mean_near_dup_block_d4: 0.3696
+    mean_sub_block_count: 0.1857
+  ngram:
+    mean_bigram_hapax_fraction: -0.0471
+    mean_bigram_repeated_unique: 0.1345
+    mean_bigram_repetition_rate: 0.0547
+    mean_bigram_total: 0.1635
+    mean_bigram_unique: 0.0656
+    mean_trigram_hapax_fraction: -0.0703
+    mean_trigram_repeated_unique: 0.2632
+    mean_trigram_repetition_rate: 0.1415
+    mean_trigram_total: 0.1638
+    mean_trigram_unique: 0.0672
+  punctuation_density:
+    mean_arrow_density: -0.1592
+    mean_bracket_nonalpha_prefix_count: 0.0712
+    mean_colon_suffix_density: -0.0922
+    mean_dot_count: 0.0317
+    mean_id_nonalpha_suffix_density: -0.0343
+  readability:
+    mean_avg_line_length: -0.1064
+    mean_avg_sub_words_per_id: 0.0125
+    mean_avg_tokens_per_line: -0.1758
+    mean_flesch_adapted: 0.0098
+    mean_fog_adapted: -0.1758
+    mean_total_lines: 0.3391
+  symbol_density:
+    mean_density: -0.0634
+    mean_symbol_count: 0.1338
+  vocabulary:
+    mean_mattr: -0.0701
+    mean_raw_ttr: -0.1129
+    mean_total_identifiers: 0.1594
+    mean_unique_identifiers: 0.0464
+  vowel_density:
+    mean_total_chars: 0.1849
+  zipf:
+    mean_exponent: 0.0281
+    mean_r_squared: -0.0039
+    mean_total_tokens: 0.1633
+    mean_vocab_size: 0.0495
+
diff --git a/priv/combined_metrics/type_and_value.yml b/priv/combined_metrics/type_and_value.yml
new file mode 100644
index 00000000..b9737213
--- /dev/null
+++ b/priv/combined_metrics/type_and_value.yml
@@ -0,0 +1,563 @@
+boolean_assigned_from_comparison:
+  _doc: "Boolean variables should be assigned directly from comparisons or predicate calls, not set via conditionals."
+  _log_baseline: 2.8516
+  branching:
+    mean_branch_count: -0.8402
+    mean_branching_density: 0.3349
+    mean_max_nesting_depth: 0.1944
+    mean_non_blank_count: -0.2723
+  brevity:
+    mean_sample_size: 0.0454
+  casing_entropy:
+    mean_entropy: -0.1192
+    mean_other_count: -0.1944
+    mean_pascal_case_count: 0.1137
+    mean_snake_case_count: 0.1080
+  compression:
+    mean_raw_bytes: -0.0537
+    mean_redundancy: -0.0549
+    mean_unique_line_ratio: 0.0903
+    mean_zlib_bytes: 0.0350
+    mean_zlib_ratio: -0.0887
+  entropy:
+    mean_char_entropy: 0.0469
+    mean_char_max_entropy: 0.0084
+    mean_char_normalized: 0.0385
+    mean_token_entropy: 0.0094
+    mean_token_max_entropy: 0.0103
+    mean_total_tokens: 0.0172
+    mean_vocab_size: 0.0454
+  function_metrics:
+    mean_avg_function_lines: -0.3955
+    mean_function_count: 0.1137
+    mean_max_function_lines: -0.4184
+  halstead:
+    mean_N1_total_operators: 0.0223
+    mean_N2_total_operands: 0.0441
+    mean_difficulty: -0.1054
+    mean_effort: -0.0588
+    mean_estimated_bugs: 0.0466
+    mean_length: 0.0317
+    mean_n1_unique_operators: -0.0359
+    mean_n2_unique_operands: 0.1137
+    mean_time_to_implement_seconds: -0.0588
+    mean_vocabulary: 0.0635
+    mean_volume: 0.0466
+  heaps:
+    mean_beta: 0.0212
+    mean_k: -0.0531
+    mean_r_squared: 0.0046
+  identifier_length_variance:
+    mean_max: 0.0208
+    mean_mean: 0.0168
+    mean_std_dev: -0.0019
+    mean_variance: -0.0038
+  indentation:
+    mean_blank_line_ratio: 0.1081
+    mean_max_depth: -0.2570
+    mean_mean_depth: -0.2106
+    mean_variance: -0.3879
+  line_patterns:
+    mean_blank_line_ratio: 0.1081
+    mean_max_nesting_depth: 0.1944
+    mean_string_literal_ratio: -0.0182
+    mean_unique_line_ratio: 0.1190
+  magic_number_density:
+    mean_density: 0.1772
+    mean_magic_number_count: 0.1944
+    mean_string_literal_ratio: -0.0182
+  near_duplicate_blocks_file:
+    mean_block_count: -0.2570
+    mean_near_dup_block_d0: -0.3081
+    mean_sub_block_count: 0.2455
+  ngram:
+    mean_bigram_hapax_fraction: 0.0123
+    mean_bigram_repeated_unique: -0.0200
+    mean_bigram_repetition_rate: -0.0701
+    mean_bigram_total: 0.0173
+    mean_bigram_unique: 0.0466
+    mean_trigram_hapax_fraction: 0.0153
+    mean_trigram_repeated_unique: -0.1345
+    mean_trigram_repetition_rate: -0.1804
+    mean_trigram_total: 0.0174
+    mean_trigram_unique: 0.0434
+  punctuation_density:
+    mean_arrow_density: 0.9500
+    mean_bracket_nonalpha_prefix_count: 0.1137
+    mean_colon_suffix_density: 0.0338
+    mean_dot_count: 0.1570
+    mean_exclamation_density: -2.0000
+    mean_id_nonalpha_suffix_density: 0.1365
+  readability:
+    mean_avg_line_length: 0.2263
+    mean_avg_sub_words_per_id: 0.0384
+    mean_avg_tokens_per_line: 0.2895
+    mean_flesch_adapted: -0.0583
+    mean_fog_adapted: 0.2895
+    mean_total_lines: -0.2723
+  separator_counts:
+    mean_dot_count: 0.1570
+    mean_hyphen_count: -0.5832
+    mean_underscore_count: 0.1351
+  symbol_density:
+    mean_density: 0.1390
+    mean_distinct_symbol_types: 0.0351
+    mean_symbol_count: 0.0855
+  vocabulary:
+    mean_raw_ttr: -0.0051
+    mean_total_identifiers: 0.1004
+    mean_unique_identifiers: 0.0953
+  vowel_density:
+    mean_total_chars: 0.1172
+  zipf:
+    mean_exponent: -0.0211
+    mean_r_squared: 0.0120
+    mean_total_tokens: 0.0172
+    mean_vocab_size: 0.0454
+
+hardcoded_url_or_path:
+  _doc: "URLs, file paths, and host names should be configuration values, not inline string literals."
+  _log_baseline: 57.6828
+  branching:
+    mean_max_nesting_depth: 0.4526
+  brevity:
+    mean_sample_size: 0.1491
+  casing_entropy:
+    mean_entropy: -0.0622
+    mean_other_count: -0.9458
+    mean_pascal_case_count: 0.4526
+    mean_snake_case_count: 0.0807
+  compression:
+    mean_raw_bytes: 0.3137
+    mean_redundancy: 0.0589
+    mean_unique_line_ratio: 0.0620
+    mean_zlib_bytes: 0.2242
+    mean_zlib_ratio: 0.0896
+  entropy:
+    mean_char_entropy: 0.0090
+    mean_char_normalized: 0.0133
+    mean_token_entropy: 0.0319
+    mean_token_max_entropy: 0.0321
+    mean_total_tokens: 0.2263
+    mean_vocab_size: 0.1491
+  function_metrics:
+    mean_avg_function_lines: -0.4231
+    mean_avg_param_count: 0.4526
+    mean_function_count: 0.3756
+    mean_max_param_count: 0.4526
+  halstead:
+    mean_N1_total_operators: 0.2692
+    mean_N2_total_operands: 0.3555
+    mean_difficulty: 0.0776
+    mean_effort: 0.4552
+    mean_estimated_bugs: 0.3775
+    mean_length: 0.2955
+    mean_n1_unique_operators: 0.1560
+    mean_n2_unique_operands: 0.4338
+    mean_time_to_implement_seconds: 0.4552
+    mean_vocabulary: 0.3487
+    mean_volume: 0.3775
+  heaps:
+    mean_beta: 0.0218
+    mean_k: -0.0742
+  identifier_length_variance:
+    mean_mean: 0.3229
+    mean_std_dev: 0.2786
+    mean_variance: 0.5571
+  indentation:
+    mean_blank_line_ratio: 0.1827
+    mean_mean_depth: -0.1314
+    mean_variance: -0.1681
+  line_patterns:
+    mean_blank_line_ratio: 0.1827
+    mean_max_nesting_depth: 0.4526
+    mean_string_literal_ratio: -0.0095
+    mean_unique_line_ratio: 0.0639
+  magic_number_density:
+    mean_density: -0.1946
+    mean_string_literal_ratio: -0.0095
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d0: -0.7737
+    mean_near_dup_block_d7: -2.0000
+    mean_sub_block_count: 0.4147
+  ngram:
+    mean_bigram_hapax_fraction: -0.0739
+    mean_bigram_repeated_unique: 0.4206
+    mean_bigram_repetition_rate: 0.0333
+    mean_bigram_total: 0.2268
+    mean_bigram_unique: 0.2470
+    mean_trigram_repeated_unique: 0.3267
+    mean_trigram_repetition_rate: -0.1301
+    mean_trigram_total: 0.2273
+    mean_trigram_unique: 0.3319
+  punctuation_density:
+    mean_bracket_nonalpha_prefix_count: 0.5246
+    mean_bracket_nonalpha_suffix_count: 0.9458
+    mean_colon_suffix_density: -0.2240
+    mean_dot_count: 0.0519
+    mean_exclamation_density: 0.1397
+    mean_id_nonalpha_suffix_density: -0.0717
+    mean_question_mark_density: -0.3462
+  readability:
+    mean_avg_line_length: 0.3239
+    mean_avg_sub_words_per_id: 0.2015
+    mean_avg_tokens_per_line: 0.2263
+    mean_flesch_adapted: -0.2352
+    mean_fog_adapted: 0.4805
+  separator_counts:
+    mean_dot_count: 0.0519
+    mean_slash_count: -0.5872
+    mean_underscore_count: 1.5120
+  symbol_density:
+    mean_density: -0.0101
+    mean_symbol_count: 0.3038
+  vocabulary:
+    mean_mattr: 0.1813
+    mean_raw_ttr: 0.0726
+    mean_total_identifiers: 0.0850
+    mean_unique_identifiers: 0.1575
+  vowel_density:
+    mean_total_chars: 0.4079
+  zipf:
+    mean_r_squared: 0.0163
+    mean_total_tokens: 0.2263
+    mean_vocab_size: 0.1491
+
+no_empty_string_initial:
+  _doc: "Initialising a variable to an empty string and reassigning later signals missing structure."
+  _log_baseline: -5.2140
+  branching:
+    mean_branch_count: -0.1509
+    mean_branching_density: -0.0146
+    mean_max_nesting_depth: 0.0360
+    mean_non_blank_count: -0.0973
+  brevity:
+    mean_sample_size: 0.0215
+  casing_entropy:
+    mean_entropy: 0.0015
+    mean_other_count: -0.1377
+    mean_pascal_case_count: 0.0650
+    mean_screaming_snake_density: -1.4394
+    mean_snake_case_count: -0.0016
+  compression:
+    mean_raw_bytes: -0.0402
+    mean_redundancy: -0.0283
+    mean_unique_line_ratio: 0.0029
+    mean_zlib_bytes: -0.0041
+    mean_zlib_ratio: -0.0348
+  entropy:
+    mean_char_entropy: 0.0151
+    mean_char_max_entropy: 0.0021
+    mean_char_normalized: 0.0130
+    mean_token_entropy: 0.0194
+    mean_token_max_entropy: 0.0055
+    mean_token_normalized: 0.0140
+    mean_total_tokens: -0.0095
+    mean_vocab_size: 0.0215
+  function_metrics:
+    mean_avg_function_lines: 0.0337
+    mean_function_count: 0.1377
+    mean_max_function_lines: 0.0057
+  halstead:
+    mean_N1_total_operators: 0.0288
+    mean_N2_total_operands: -0.0392
+    mean_difficulty: -0.0209
+    mean_effort: -0.0057
+    mean_estimated_bugs: 0.0052
+    mean_length: 0.0035
+    mean_n1_unique_operators: 0.0154
+    mean_n2_unique_operands: -0.0068
+    mean_time_to_implement_seconds: -0.0057
+    mean_vocabulary: 0.0014
+    mean_volume: 0.0052
+  heaps:
+    mean_beta: 0.0191
+    mean_k: -0.0225
+  identifier_length_variance:
+    mean_max: 0.0398
+    mean_mean: -0.0269
+    mean_std_dev: -0.0088
+    mean_variance: -0.0270
+  indentation:
+    mean_blank_line_ratio: -0.0374
+    mean_max_depth: -0.0257
+    mean_mean_depth: -0.0491
+    mean_variance: -0.0047
+  line_patterns:
+    mean_blank_line_ratio: -0.0374
+    mean_max_nesting_depth: 0.0360
+    mean_string_literal_ratio: -0.1459
+    mean_unique_line_ratio: -0.0016
+  magic_number_density:
+    mean_density: 0.1538
+    mean_magic_number_count: 0.1869
+    mean_string_literal_ratio: -0.1459
+  near_duplicate_blocks_file:
+    mean_block_count: -0.0962
+    mean_near_dup_block_d0: -0.5606
+    mean_near_dup_block_d4: 0.2962
+    mean_near_dup_block_d8: -0.2962
+    mean_sub_block_count: 0.1045
+  ngram:
+    mean_bigram_repeated_unique: 0.0065
+    mean_bigram_repetition_rate: -0.0091
+    mean_bigram_total: -0.0095
+    mean_trigram_hapax_fraction: -0.0027
+    mean_trigram_repeated_unique: 0.0056
+    mean_trigram_repetition_rate: 0.0124
+    mean_trigram_total: -0.0095
+    mean_trigram_unique: -0.0170
+  punctuation_density:
+    mean_arrow_density: -0.0278
+    mean_bracket_nonalpha_prefix_count: -0.0345
+    mean_bracket_nonalpha_suffix_count: 0.0463
+    mean_colon_suffix_density: 0.1503
+    mean_dot_count: 0.0512
+    mean_exclamation_density: 2.0000
+    mean_id_nonalpha_suffix_density: 0.0806
+  readability:
+    mean_avg_line_length: 0.0536
+    mean_avg_sub_words_per_id: -0.0116
+    mean_avg_tokens_per_line: 0.0704
+    mean_flesch_adapted: 0.0051
+    mean_fog_adapted: 0.0674
+    mean_total_lines: -0.0973
+  separator_counts:
+    mean_dot_count: 0.0512
+    mean_hyphen_count: 0.1377
+    mean_slash_count: 0.3738
+    mean_underscore_count: -0.1093
+  symbol_density:
+    mean_density: 0.0072
+    mean_distinct_symbol_types: 0.0051
+    mean_symbol_count: -0.0197
+  vocabulary:
+    mean_mattr: 0.0067
+    mean_raw_ttr: 0.0068
+    mean_total_identifiers: 0.0059
+    mean_unique_identifiers: 0.0122
+  vowel_density:
+    mean_total_chars: -0.0176
+  zipf:
+    mean_exponent: -0.0117
+    mean_r_squared: -0.0027
+    mean_total_tokens: -0.0095
+    mean_vocab_size: 0.0215
+
+no_implicit_null_initial:
+  _doc: "Initialising a variable to `nil`/`null` and assigning it later in a branch signals missing structure."
+  _log_baseline: -3.2593
+  branching:
+    mean_branch_count: 0.0293
+    mean_branching_density: 0.0871
+    mean_non_blank_count: -0.0578
+  brevity:
+    mean_sample_size: 0.0132
+  casing_entropy:
+    mean_entropy: 0.0660
+    mean_other_count: 0.1247
+    mean_screaming_snake_density: 0.0448
+    mean_snake_case_count: -0.0534
+  compression:
+    mean_raw_bytes: -0.0246
+    mean_redundancy: -0.0100
+    mean_unique_line_ratio: -0.0159
+    mean_zlib_bytes: -0.0071
+    mean_zlib_ratio: -0.0174
+  entropy:
+    mean_char_entropy: 0.0071
+    mean_char_max_entropy: 0.0063
+    mean_token_entropy: -0.0025
+    mean_token_max_entropy: 0.0029
+    mean_token_normalized: -0.0055
+    mean_vocab_size: 0.0132
+  function_metrics:
+    mean_avg_function_lines: -0.1325
+    mean_avg_param_count: -0.0029
+    mean_function_count: 0.0440
+    mean_max_function_lines: -0.1618
+  halstead:
+    mean_N1_total_operators: 0.0268
+    mean_N2_total_operands: -0.0416
+    mean_difficulty: 0.0085
+    mean_effort: 0.0109
+    mean_estimated_bugs: 0.0024
+    mean_n1_unique_operators: 0.0393
+    mean_n2_unique_operands: -0.0108
+    mean_time_to_implement_seconds: 0.0109
+    mean_vocabulary: 0.0038
+    mean_volume: 0.0024
+  heaps:
+    mean_beta: -0.0079
+    mean_k: 0.0418
+    mean_r_squared: -0.0034
+  identifier_length_variance:
+    mean_mean: 0.0185
+    mean_std_dev: 0.0177
+    mean_variance: 0.0354
+  indentation:
+    mean_blank_line_ratio: -0.1146
+    mean_mean_depth: 0.0089
+    mean_variance: 0.1759
+  line_patterns:
+    mean_blank_line_ratio: -0.1146
+    mean_string_literal_ratio: -0.0022
+    mean_unique_line_ratio: -0.0135
+  magic_number_density:
+    mean_density: -0.0066
+    mean_string_literal_ratio: -0.0022
+  near_duplicate_blocks_file:
+    mean_block_count: -0.1493
+    mean_sub_block_count: 0.0422
+  ngram:
+    mean_bigram_hapax_fraction: -0.0181
+    mean_bigram_repeated_unique: 0.0178
+    mean_bigram_repetition_rate: 0.0335
+    mean_bigram_unique: -0.0287
+    mean_trigram_hapax_fraction: -0.0209
+    mean_trigram_repeated_unique: 0.0959
+    mean_trigram_repetition_rate: 0.0860
+    mean_trigram_unique: -0.0245
+  punctuation_density:
+    mean_arrow_density: -1.4388
+    mean_bracket_nonalpha_prefix_count: 0.0807
+    mean_bracket_nonalpha_suffix_count: 0.1035
+    mean_colon_suffix_density: 0.1166
+    mean_exclamation_density: 2.0000
+    mean_id_nonalpha_suffix_density: 0.0685
+  readability:
+    mean_avg_line_length: 0.0358
+    mean_avg_sub_words_per_id: 0.0035
+    mean_avg_tokens_per_line: 0.0586
+    mean_flesch_adapted: -0.0082
+    mean_fog_adapted: 0.0549
+    mean_total_lines: -0.0578
+  separator_counts:
+    mean_hyphen_count: 0.4264
+    mean_slash_count: 0.0885
+    mean_underscore_count: -0.0054
+  symbol_density:
+    mean_density: 0.0549
+    mean_distinct_symbol_types: 0.0342
+    mean_symbol_count: 0.0303
+  vocabulary:
+    mean_mattr: 0.0540
+    mean_raw_ttr: 0.0466
+    mean_total_identifiers: -0.0466
+  vowel_density:
+    mean_total_chars: -0.0281
+  zipf:
+    mean_exponent: -0.0074
+    mean_r_squared: 0.0047
+    mean_vocab_size: 0.0132
+
+no_magic_value_assigned:
+  _doc: "Literal strings and numbers assigned to variables should be named constants, not inline values."
+  _log_baseline: -9.5635
+  branching:
+    mean_branch_count: -0.2035
+    mean_branching_density: -0.1140
+    mean_non_blank_count: -0.0893
+  brevity:
+    mean_sample_size: -0.0122
+  casing_entropy:
+    mean_entropy: -0.0411
+    mean_other_count: -0.3211
+    mean_snake_case_count: -0.0502
+  compression:
+    mean_raw_bytes: -0.1800
+    mean_redundancy: -0.0342
+    mean_unique_line_ratio: 0.0140
+    mean_zlib_bytes: -0.1207
+    mean_zlib_ratio: -0.0593
+  entropy:
+    mean_char_max_entropy: -0.0144
+    mean_char_normalized: 0.0154
+    mean_token_entropy: 0.0837
+    mean_token_normalized: 0.0863
+    mean_total_tokens: -0.1270
+    mean_vocab_size: -0.0122
+  function_metrics:
+    mean_avg_function_lines: -1.1104
+    mean_avg_param_count: -0.3099
+    mean_function_count: 0.8801
+    mean_max_function_lines: -0.5872
+  halstead:
+    mean_N1_total_operators: 0.2796
+    mean_N2_total_operands: -0.0340
+    mean_difficulty: -0.1408
+    mean_effort: 0.0099
+    mean_estimated_bugs: 0.1506
+    mean_length: 0.1539
+    mean_n1_unique_operators: -0.0859
+    mean_n2_unique_operands: 0.0209
+    mean_time_to_implement_seconds: 0.0099
+    mean_vocabulary: -0.0140
+    mean_volume: 0.1507
+  heaps:
+    mean_beta: 0.0889
+    mean_k: -0.2042
+    mean_r_squared: 0.0230
+  identifier_length_variance:
+    mean_max: -0.0438
+    mean_mean: -0.1501
+    mean_std_dev: -0.2041
+    mean_variance: -0.4083
+  indentation:
+    mean_blank_line_ratio: -0.1918
+    mean_mean_depth: -0.2808
+    mean_variance: -0.1572
+  line_patterns:
+    mean_blank_line_ratio: -0.1918
+    mean_string_literal_ratio: -1.7484
+    mean_unique_line_ratio: 0.0086
+  magic_number_density:
+    mean_string_literal_ratio: -1.7484
+  near_duplicate_blocks_file:
+    mean_block_count: 0.1491
+    mean_near_dup_block_d0: 0.7737
+    mean_sub_block_count: 0.2805
+  ngram:
+    mean_bigram_hapax_fraction: 0.0998
+    mean_bigram_repeated_unique: -0.1265
+    mean_bigram_repetition_rate: -0.2414
+    mean_bigram_total: -0.1274
+    mean_bigram_unique: 0.1208
+    mean_trigram_hapax_fraction: 0.1034
+    mean_trigram_repeated_unique: -0.9134
+    mean_trigram_repetition_rate: -0.8787
+    mean_trigram_total: -0.1278
+    mean_trigram_unique: 0.1009
+  punctuation_density:
+    mean_arrow_density: -0.3964
+    mean_bracket_nonalpha_prefix_count: -0.7737
+    mean_bracket_nonalpha_suffix_count: 2.0000
+    mean_colon_suffix_density: -0.2477
+    mean_id_nonalpha_suffix_density: -0.0427
+    mean_question_mark_density: 0.9772
+  readability:
+    mean_avg_line_length: -0.0927
+    mean_avg_sub_words_per_id: -0.1514
+    mean_avg_tokens_per_line: -0.0377
+    mean_flesch_adapted: 0.2297
+    mean_fog_adapted: -0.5835
+    mean_total_lines: -0.0893
+  separator_counts:
+    mean_hyphen_count: -0.5246
+    mean_underscore_count: -0.4596
+  symbol_density:
+    mean_density: -0.0945
+    mean_symbol_count: -0.2742
+  vocabulary:
+    mean_mattr: 0.0346
+    mean_raw_ttr: 0.0360
+    mean_total_identifiers: -0.0550
+    mean_unique_identifiers: -0.0191
+  vowel_density:
+    mean_total_chars: -0.2051
+  zipf:
+    mean_exponent: -0.0525
+    mean_r_squared: 0.0208
+    mean_total_tokens: -0.1270
+    mean_vocab_size: -0.0122
+
diff --git a/priv/combined_metrics/variable_naming.yml b/priv/combined_metrics/variable_naming.yml
new file mode 100644
index 00000000..f07e7891
--- /dev/null
+++ b/priv/combined_metrics/variable_naming.yml
@@ -0,0 +1,1289 @@
+boolean_has_is_has_prefix:
+  _doc: "Boolean variables should be prefixed with `is_`, `has_`, or `can_`."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: 22.4319
+  brevity:
+    mean_sample_size: 0.0752
+  casing_entropy:
+    mean_camel_case_count: 2.0000
+    mean_entropy: 0.4870
+    mean_snake_case_count: -0.2309
+  compression:
+    mean_raw_bytes: 0.1698
+    mean_redundancy: 0.0581
+    mean_zlib_bytes: 0.0387
+    mean_zlib_ratio: 0.1318
+  entropy:
+    mean_char_entropy: 0.0136
+    mean_token_entropy: 0.0198
+    mean_token_max_entropy: 0.0173
+    mean_vocab_size: 0.0752
+  halstead:
+    mean_difficulty: -0.0725
+    mean_effort: -0.0594
+    mean_estimated_bugs: 0.0109
+    mean_n2_unique_operands: 0.0694
+    mean_time_to_implement_seconds: -0.0594
+    mean_vocabulary: 0.0494
+    mean_volume: 0.0110
+  heaps:
+    mean_k: 0.1146
+  identifier_length_variance:
+    mean_max: 0.3229
+    mean_mean: 0.3109
+    mean_std_dev: 0.3325
+    mean_variance: 0.6646
+  ngram:
+    mean_bigram_hapax_fraction: 0.0164
+    mean_bigram_repeated_unique: -0.0209
+    mean_bigram_repetition_rate: -0.0199
+    mean_bigram_unique: 0.0182
+    mean_trigram_repeated_unique: -0.0226
+    mean_trigram_repetition_rate: -0.0134
+  punctuation_density:
+    mean_exclamation_density: -0.1826
+    mean_question_mark_density: -0.1720
+  readability:
+    mean_avg_line_length: 0.1754
+    mean_avg_sub_words_per_id: 0.3932
+    mean_flesch_adapted: -0.4857
+    mean_fog_adapted: 0.5482
+  separator_counts:
+    mean_underscore_count: 1.8116
+  symbol_density:
+    mean_density: -0.1660
+  vocabulary:
+    mean_mattr: 0.1186
+    mean_raw_ttr: 0.1173
+    mean_unique_identifiers: 0.1175
+  vowel_density:
+    mean_total_chars: 0.3117
+  zipf:
+    mean_exponent: -0.0403
+    mean_r_squared: 0.0110
+    mean_vocab_size: 0.0752
+
+collection_name_is_plural:
+  _doc: "Variables holding a collection should use a plural name."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: 24.0478
+  brevity:
+    mean_sample_size: -0.5320
+  casing_entropy:
+    mean_camel_case_count: 0.4724
+    mean_entropy: 0.1726
+    mean_snake_case_count: -0.2009
+  compression:
+    mean_raw_bytes: 0.8299
+    mean_redundancy: 0.3207
+    mean_zlib_bytes: 0.1239
+    mean_zlib_ratio: 0.7180
+  entropy:
+    mean_token_entropy: -0.1027
+    mean_token_max_entropy: -0.1240
+    mean_vocab_size: -0.5320
+  halstead:
+    mean_N2_total_operands: -0.0506
+    mean_difficulty: 0.7075
+    mean_effort: 0.5207
+    mean_estimated_bugs: -0.1558
+    mean_n2_unique_operands: -0.7698
+    mean_time_to_implement_seconds: 0.5207
+    mean_vocabulary: -0.5251
+    mean_volume: -0.1559
+  heaps:
+    mean_k: -0.7238
+  identifier_length_variance:
+    mean_mean: 1.6364
+    mean_std_dev: -0.9858
+    mean_variance: -2.0000
+  ngram:
+    mean_bigram_hapax_fraction: -0.1392
+    mean_bigram_repetition_rate: 0.1490
+    mean_bigram_unique: -0.1850
+    mean_trigram_repeated_unique: -0.1677
+  punctuation_density:
+    mean_arrow_density: 0.0702
+    mean_colon_suffix_density: -0.7988
+    mean_question_mark_density: -0.5639
+  readability:
+    mean_avg_line_length: 0.8649
+    mean_avg_sub_words_per_id: 0.1285
+    mean_flesch_adapted: -0.1311
+    mean_fog_adapted: 0.8035
+  separator_counts:
+    mean_underscore_count: 0.6811
+  symbol_density:
+    mean_density: -0.8598
+  vocabulary:
+    mean_mattr: -0.6972
+    mean_raw_ttr: -0.7582
+    mean_total_identifiers: -0.1337
+    mean_unique_identifiers: -0.8807
+  vowel_density:
+    mean_total_chars: 1.4857
+  zipf:
+    mean_exponent: 0.1576
+    mean_r_squared: -0.0933
+    mean_vocab_size: -0.5320
+
+loop_var_is_single_letter:
+  _doc: "Loop index variables (`i`, `j`, `k`) are acceptable inside loop bodies."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: -32.9785
+  brevity:
+    mean_sample_size: -0.1049
+  casing_entropy:
+    mean_camel_case_count: -2.0000
+    mean_entropy: -0.3919
+    mean_snake_case_count: 0.2033
+  comment_structure:
+    mean_comment_line_ratio: 0.0080
+  compression:
+    mean_raw_bytes: -0.4302
+    mean_redundancy: -0.1176
+    mean_unique_line_ratio: -0.0195
+    mean_zlib_bytes: -0.1067
+    mean_zlib_ratio: -0.3218
+  entropy:
+    mean_char_normalized: -0.0122
+    mean_token_entropy: -0.0181
+    mean_token_max_entropy: -0.0244
+    mean_vocab_size: -0.1049
+  function_metrics:
+    mean_max_function_lines: -0.0254
+  halstead:
+    mean_N1_total_operators: 0.0117
+    mean_difficulty: 0.1678
+    mean_effort: 0.1521
+    mean_estimated_bugs: -0.0119
+    mean_n1_unique_operators: 0.0342
+    mean_n2_unique_operands: -0.1437
+    mean_time_to_implement_seconds: 0.1521
+    mean_vocabulary: -0.0770
+    mean_volume: -0.0118
+  heaps:
+    mean_beta: -0.0911
+    mean_k: 0.1233
+    mean_r_squared: -0.0216
+  identifier_length_variance:
+    mean_max: -0.5833
+    mean_mean: -0.8498
+    mean_std_dev: -0.9251
+    mean_variance: -1.8576
+  indentation:
+    mean_max_depth: -0.0956
+    mean_mean_depth: -0.0126
+    mean_variance: -0.0698
+  line_patterns:
+    mean_unique_line_ratio: -0.0229
+  ngram:
+    mean_bigram_repeated_unique: -0.0377
+    mean_bigram_repetition_rate: 0.0174
+    mean_bigram_unique: -0.0494
+    mean_trigram_hapax_fraction: -0.0166
+    mean_trigram_repetition_rate: 0.0370
+    mean_trigram_unique: -0.0433
+  punctuation_density:
+    mean_id_nonalpha_suffix_density: 0.0170
+  readability:
+    mean_avg_line_length: -0.4449
+    mean_avg_sub_words_per_id: -0.3465
+    mean_avg_tokens_per_line: 0.0100
+    mean_flesch_adapted: 0.4102
+    mean_fog_adapted: -1.3612
+  separator_counts:
+    mean_hyphen_count: 0.1363
+    mean_underscore_count: -1.4177
+  symbol_density:
+    mean_density: 0.4330
+    mean_distinct_symbol_types: 0.0533
+    mean_symbol_count: 0.0087
+  vocabulary:
+    mean_mattr: -0.1052
+    mean_raw_ttr: -0.1626
+    mean_unique_identifiers: -0.1618
+  vowel_density:
+    mean_total_chars: -0.8389
+  zipf:
+    mean_exponent: 0.0112
+    mean_r_squared: -0.0134
+    mean_vocab_size: -0.1049
+
+name_contains_and:
+  _doc: "Variable names containing `and` signal a variable that holds two concerns."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: -2.9877
+  branching:
+    mean_branch_count: -0.3666
+    mean_branching_density: -0.3925
+    mean_non_blank_count: 0.0242
+  brevity:
+    mean_sample_size: 0.0107
+  casing_entropy:
+    mean_camel_case_count: -0.2172
+    mean_entropy: 0.0678
+    mean_other_count: 0.6301
+    mean_pascal_case_count: 0.0894
+    mean_snake_case_count: 0.1042
+  comment_structure:
+    mean_comment_line_ratio: -0.0282
+  compression:
+    mean_raw_bytes: -0.0626
+    mean_redundancy: -0.0240
+    mean_unique_line_ratio: 0.0672
+    mean_zlib_bytes: -0.0158
+    mean_zlib_ratio: -0.0478
+  entropy:
+    mean_char_max_entropy: 0.0084
+    mean_char_normalized: -0.0068
+    mean_token_normalized: -0.0068
+    mean_total_tokens: 0.0583
+    mean_vocab_size: 0.0107
+  function_metrics:
+    mean_avg_function_lines: -0.3218
+    mean_avg_param_count: -0.0939
+    mean_function_count: 0.3368
+    mean_max_function_lines: 0.0231
+  halstead:
+    mean_N1_total_operators: 0.0512
+    mean_N2_total_operands: 0.0805
+    mean_difficulty: 0.0870
+    mean_effort: 0.1544
+    mean_estimated_bugs: 0.0665
+    mean_length: 0.0626
+    mean_n1_unique_operators: 0.0264
+    mean_n2_unique_operands: 0.0189
+    mean_time_to_implement_seconds: 0.1544
+    mean_vocabulary: 0.0205
+    mean_volume: 0.0665
+  heaps:
+    mean_beta: -0.0639
+    mean_k: 0.2017
+  identifier_length_variance:
+    mean_max: -0.3666
+    mean_mean: -0.2347
+    mean_std_dev: -0.4600
+    mean_variance: -0.9236
+  indentation:
+    mean_blank_line_ratio: -0.0595
+    mean_max_depth: -0.1211
+    mean_mean_depth: -0.1378
+    mean_variance: -0.2812
+  line_patterns:
+    mean_blank_line_ratio: -0.0595
+    mean_string_literal_ratio: -0.3480
+    mean_unique_line_ratio: 0.0808
+  magic_number_density:
+    mean_density: 0.3971
+    mean_magic_number_count: 0.4541
+    mean_string_literal_ratio: -0.0605
+  near_duplicate_blocks_file:
+    mean_block_count: 0.1874
+    mean_near_dup_block_d0: 1.2114
+    mean_near_dup_block_d3: 1.3353
+    mean_near_dup_block_d4: 1.7204
+    mean_near_dup_block_d5: 2.0000
+    mean_near_dup_block_d6: 0.3458
+    mean_near_dup_block_d7: -0.2294
+    mean_near_dup_block_d8: 0.5102
+    mean_sub_block_count: 0.2831
+  ngram:
+    mean_bigram_hapax_fraction: 0.0107
+    mean_bigram_repeated_unique: 0.0144
+    mean_bigram_total: 0.0584
+    mean_bigram_unique: 0.0410
+    mean_trigram_hapax_fraction: -0.0161
+    mean_trigram_repeated_unique: 0.1073
+    mean_trigram_repetition_rate: 0.0834
+    mean_trigram_total: 0.0585
+    mean_trigram_unique: 0.0248
+  punctuation_density:
+    mean_arrow_density: -0.0894
+    mean_bracket_nonalpha_suffix_count: -0.1211
+    mean_colon_suffix_density: -0.4936
+    mean_dot_count: -0.2504
+    mean_exclamation_density: 0.7124
+    mean_id_nonalpha_suffix_density: -0.0280
+    mean_question_mark_density: 0.7124
+  readability:
+    mean_avg_line_length: -0.0925
+    mean_avg_sub_words_per_id: -0.2424
+    mean_avg_tokens_per_line: 0.0317
+    mean_flesch_adapted: 0.3817
+    mean_fog_adapted: -0.9412
+    mean_total_lines: 0.0244
+  separator_counts:
+    mean_dot_count: -0.2504
+    mean_underscore_count: -0.6180
+  symbol_density:
+    mean_density: 0.0832
+    mean_distinct_symbol_types: 0.0748
+    mean_symbol_count: 0.0212
+  vocabulary:
+    mean_mattr: -0.0887
+    mean_raw_ttr: -0.0633
+    mean_total_identifiers: 0.0782
+    mean_unique_identifiers: 0.0162
+  vowel_density:
+    mean_total_chars: -0.1561
+  zipf:
+    mean_exponent: 0.0059
+    mean_total_tokens: 0.0583
+    mean_vocab_size: 0.0107
+
+name_contains_type_suffix:
+  _doc: "Type suffixes in names (`userString`, `nameList`) are redundant noise."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: -33.1356
+  branching:
+    mean_branch_count: -0.4150
+    mean_branching_density: -0.4125
+  brevity:
+    mean_sample_size: -0.1936
+  casing_entropy:
+    mean_camel_case_count: -1.4300
+    mean_entropy: -0.3631
+    mean_other_count: -2.0000
+    mean_pascal_case_count: -0.1660
+    mean_snake_case_count: 0.1449
+  compression:
+    mean_raw_bytes: -0.2768
+    mean_redundancy: -0.1061
+    mean_zlib_bytes: -0.1005
+    mean_zlib_ratio: -0.1770
+  entropy:
+    mean_char_entropy: -0.0082
+    mean_token_entropy: -0.0294
+    mean_token_max_entropy: -0.0396
+    mean_token_normalized: 0.0102
+    mean_total_tokens: -0.0142
+    mean_vocab_size: -0.1936
+  halstead:
+    mean_N1_total_operators: -0.0138
+    mean_N2_total_operands: -0.0198
+    mean_difficulty: 0.2824
+    mean_effort: 0.2335
+    mean_estimated_bugs: -0.0503
+    mean_length: -0.0161
+    mean_n1_unique_operators: 0.0679
+    mean_n2_unique_operands: -0.2321
+    mean_time_to_implement_seconds: 0.2335
+    mean_vocabulary: -0.1655
+    mean_volume: -0.0503
+  heaps:
+    mean_beta: -0.1004
+    mean_k: 0.1792
+  identifier_length_variance:
+    mean_max: -0.3735
+    mean_mean: -0.4788
+    mean_std_dev: -0.5916
+    mean_variance: -1.1882
+  line_patterns:
+    mean_string_literal_ratio: 0.0109
+  ngram:
+    mean_bigram_hapax_fraction: -0.0337
+    mean_bigram_repeated_unique: 0.0355
+    mean_bigram_repetition_rate: 0.0884
+    mean_bigram_total: -0.0143
+    mean_bigram_unique: -0.0700
+    mean_trigram_hapax_fraction: -0.0406
+    mean_trigram_repeated_unique: 0.2397
+    mean_trigram_repetition_rate: 0.2387
+    mean_trigram_total: -0.0143
+    mean_trigram_unique: -0.0591
+  punctuation_density:
+    mean_colon_suffix_density: -0.1444
+    mean_id_nonalpha_suffix_density: -0.0281
+    mean_question_mark_density: 0.5850
+  readability:
+    mean_avg_line_length: -0.2896
+    mean_avg_sub_words_per_id: -0.3400
+    mean_avg_tokens_per_line: -0.0147
+    mean_flesch_adapted: 0.4136
+    mean_fog_adapted: -1.0490
+  separator_counts:
+    mean_underscore_count: -1.7225
+  symbol_density:
+    mean_density: 0.2546
+    mean_symbol_count: -0.0213
+  vocabulary:
+    mean_mattr: -0.2373
+    mean_raw_ttr: -0.2260
+    mean_total_identifiers: -0.0765
+    mean_unique_identifiers: -0.3022
+  vowel_density:
+    mean_total_chars: -0.5536
+  zipf:
+    mean_exponent: 0.0794
+    mean_r_squared: 0.0102
+    mean_total_tokens: -0.0142
+    mean_vocab_size: -0.1936
+
+name_is_abbreviation:
+  _doc: "Abbreviated names (`usr`, `cfg`, `mgr`) reduce readability."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: 9.2985
+  brevity:
+    mean_sample_size: -0.1542
+  casing_entropy:
+    mean_camel_case_count: 0.3184
+    mean_entropy: 0.2713
+    mean_snake_case_count: -0.4803
+  compression:
+    mean_raw_bytes: 0.5303
+    mean_redundancy: 0.1964
+    mean_unique_line_ratio: 0.1217
+    mean_zlib_bytes: 0.0699
+    mean_zlib_ratio: 0.4576
+  entropy:
+    mean_char_entropy: -0.0398
+    mean_char_normalized: -0.0573
+    mean_token_entropy: -0.0375
+    mean_token_max_entropy: -0.0330
+    mean_total_tokens: -0.1093
+    mean_vocab_size: -0.1542
+  halstead:
+    mean_N1_total_operators: -0.1081
+    mean_N2_total_operands: -0.1080
+    mean_difficulty: 0.2026
+    mean_effort: 0.0309
+    mean_estimated_bugs: -0.1545
+    mean_length: -0.1081
+    mean_n2_unique_operands: -0.2963
+    mean_time_to_implement_seconds: 0.0309
+    mean_vocabulary: -0.2193
+    mean_volume: -0.1547
+  heaps:
+    mean_beta: -0.1056
+    mean_k: 0.2303
+    mean_r_squared: -0.0265
+  identifier_length_variance:
+    mean_max: 1.2862
+    mean_mean: 1.3727
+    mean_variance: 0.0294
+  line_patterns:
+    mean_string_literal_ratio: -0.0949
+    mean_unique_line_ratio: 0.1274
+  magic_number_density:
+    mean_density: 0.3656
+    mean_string_literal_ratio: -0.2174
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d8: -2.0000
+  ngram:
+    mean_bigram_repeated_unique: -0.0607
+    mean_bigram_total: -0.1094
+    mean_bigram_unique: -0.0339
+    mean_trigram_hapax_fraction: 0.0915
+    mean_trigram_repeated_unique: -0.2019
+    mean_trigram_repetition_rate: -0.0842
+    mean_trigram_total: -0.1095
+    mean_trigram_unique: -0.0336
+  punctuation_density:
+    mean_arrow_density: 0.2295
+    mean_bracket_nonalpha_prefix_count: -0.3052
+    mean_bracket_nonalpha_suffix_count: -0.1134
+    mean_colon_suffix_density: -0.4215
+    mean_dot_count: -0.1179
+    mean_exclamation_density: -0.4702
+    mean_id_nonalpha_suffix_density: -0.0410
+    mean_question_mark_density: -0.5810
+  readability:
+    mean_avg_line_length: 0.5519
+    mean_avg_tokens_per_line: -0.1093
+    mean_fog_adapted: -0.0959
+  separator_counts:
+    mean_dot_count: -0.1179
+    mean_slash_count: -0.5591
+    mean_underscore_count: 0.0690
+  symbol_density:
+    mean_density: -0.6485
+    mean_symbol_count: -0.1218
+  vocabulary:
+    mean_mattr: -0.1900
+    mean_raw_ttr: 0.1813
+    mean_total_identifiers: -0.3611
+    mean_unique_identifiers: -0.2161
+  vowel_density:
+    mean_total_chars: 1.0156
+  zipf:
+    mean_exponent: 0.0603
+    mean_total_tokens: -0.1093
+    mean_vocab_size: -0.1542
+
+name_is_generic:
+  _doc: "Generic names (`data`, `result`, `tmp`, `val`, `obj`) convey no domain meaning."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: 43.6270
+  branching:
+    mean_branch_count: 0.5193
+    mean_branching_density: 0.3889
+    mean_max_nesting_depth: -0.0599
+    mean_non_blank_count: 0.0756
+  brevity:
+    mean_sample_size: 0.2053
+  casing_entropy:
+    mean_camel_case_count: 2.0000
+    mean_entropy: 0.3582
+    mean_snake_case_count: 0.0915
+  compression:
+    mean_raw_bytes: 0.3477
+    mean_redundancy: 0.0524
+    mean_zlib_bytes: 0.2391
+    mean_zlib_ratio: 0.1093
+  entropy:
+    mean_char_entropy: 0.0197
+    mean_char_max_entropy: 0.0088
+    mean_char_normalized: 0.0110
+    mean_token_entropy: 0.0372
+    mean_token_max_entropy: 0.0450
+    mean_token_normalized: -0.0077
+    mean_total_tokens: 0.0784
+    mean_vocab_size: 0.2053
+  function_metrics:
+    mean_avg_function_lines: -0.1306
+    mean_avg_param_count: -0.0443
+    mean_function_count: 0.2694
+    mean_max_function_lines: -0.2279
+  halstead:
+    mean_N1_total_operators: 0.0995
+    mean_N2_total_operands: 0.0454
+    mean_difficulty: -0.2194
+    mean_effort: -0.0846
+    mean_estimated_bugs: 0.1286
+    mean_length: 0.0789
+    mean_n1_unique_operators: 0.0328
+    mean_n2_unique_operands: 0.2960
+    mean_time_to_implement_seconds: -0.0846
+    mean_vocabulary: 0.2199
+    mean_volume: 0.1286
+  heaps:
+    mean_beta: 0.1174
+    mean_k: -0.2339
+    mean_r_squared: 0.0145
+  identifier_length_variance:
+    mean_max: 0.4477
+    mean_mean: 0.5582
+    mean_std_dev: 0.6755
+    mean_variance: 1.3586
+  indentation:
+    mean_blank_line_ratio: 0.0556
+    mean_max_depth: -0.1451
+    mean_mean_depth: -0.0760
+    mean_variance: -0.2765
+  line_patterns:
+    mean_blank_line_ratio: 0.0556
+    mean_max_nesting_depth: -0.0599
+    mean_string_literal_ratio: -0.0386
+    mean_unique_line_ratio: 0.0182
+  magic_number_density:
+    mean_density: -0.0624
+    mean_string_literal_ratio: -0.1451
+  near_duplicate_blocks_file:
+    mean_block_count: 0.1243
+    mean_near_dup_block_d0: 0.9543
+    mean_near_dup_block_d6: -0.3521
+    mean_near_dup_block_d7: 0.6021
+    mean_near_dup_block_d8: 0.0644
+    mean_sub_block_count: 0.1831
+  ngram:
+    mean_bigram_hapax_fraction: 0.1528
+    mean_bigram_repeated_unique: -0.1344
+    mean_bigram_repetition_rate: -0.1251
+    mean_bigram_total: 0.0786
+    mean_bigram_unique: 0.1718
+    mean_trigram_hapax_fraction: 0.1086
+    mean_trigram_repeated_unique: -0.2389
+    mean_trigram_repetition_rate: -0.2091
+    mean_trigram_total: 0.0787
+    mean_trigram_unique: 0.1550
+  punctuation_density:
+    mean_arrow_density: -0.1087
+    mean_bracket_nonalpha_prefix_count: 0.2766
+    mean_bracket_number_pair_count: 0.2499
+    mean_colon_suffix_density: -0.0752
+    mean_exclamation_density: -0.2923
+    mean_id_nonalpha_suffix_density: -0.0340
+    mean_question_mark_density: 0.1584
+  readability:
+    mean_avg_line_length: 0.2897
+    mean_avg_sub_words_per_id: 0.2590
+    mean_flesch_adapted: -0.2843
+    mean_fog_adapted: 0.5030
+    mean_total_lines: 0.0756
+  separator_counts:
+    mean_slash_count: 0.9542
+    mean_underscore_count: 1.9344
+  symbol_density:
+    mean_density: -0.2642
+    mean_distinct_symbol_types: 0.0252
+    mean_symbol_count: 0.0858
+  vocabulary:
+    mean_mattr: 0.1932
+    mean_raw_ttr: 0.1681
+    mean_total_identifiers: 0.2205
+    mean_unique_identifiers: 0.3862
+  vowel_density:
+    mean_total_chars: 0.7766
+  zipf:
+    mean_exponent: -0.0977
+    mean_r_squared: 0.0316
+    mean_total_tokens: 0.0784
+    mean_vocab_size: 0.2053
+
+name_is_number_like:
+  _doc: "Number-suffixed names (`var1`, `thing2`) signal a missing abstraction."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: 4.1505
+  brevity:
+    mean_sample_size: -0.0262
+  casing_entropy:
+    mean_camel_case_count: 0.6902
+    mean_entropy: -0.4687
+    mean_other_count: -2.0000
+    mean_snake_case_count: 0.1969
+  compression:
+    mean_raw_bytes: 0.1098
+    mean_redundancy: 0.0379
+    mean_zlib_bytes: 0.0415
+    mean_zlib_ratio: 0.0696
+  entropy:
+    mean_char_entropy: -0.0074
+    mean_char_max_entropy: -0.0070
+    mean_token_entropy: -0.0056
+    mean_token_max_entropy: -0.0054
+    mean_total_tokens: -0.0075
+    mean_vocab_size: -0.0262
+  halstead:
+    mean_N1_total_operators: -0.0036
+    mean_N2_total_operands: -0.0096
+    mean_difficulty: -0.0139
+    mean_effort: -0.0244
+    mean_estimated_bugs: -0.0103
+    mean_length: -0.0059
+    mean_n1_unique_operators: -0.0253
+    mean_n2_unique_operands: -0.0217
+    mean_time_to_implement_seconds: -0.0244
+    mean_vocabulary: -0.0224
+    mean_volume: -0.0102
+  heaps:
+    mean_beta: 0.0225
+    mean_k: -0.1085
+    mean_r_squared: 0.0046
+  identifier_length_variance:
+    mean_max: 0.0623
+    mean_mean: 0.2335
+    mean_std_dev: 0.2269
+    mean_variance: 0.4543
+  line_patterns:
+    mean_string_literal_ratio: 0.0201
+  ngram:
+    mean_bigram_repeated_unique: -0.0072
+    mean_bigram_repetition_rate: 0.0064
+    mean_bigram_total: -0.0075
+    mean_bigram_unique: -0.0124
+    mean_trigram_repeated_unique: -0.0135
+    mean_trigram_repetition_rate: 0.0035
+    mean_trigram_total: -0.0075
+    mean_trigram_unique: -0.0093
+  punctuation_density:
+    mean_colon_suffix_density: 0.0087
+    mean_question_mark_density: -0.1007
+  readability:
+    mean_avg_line_length: 0.1147
+    mean_avg_sub_words_per_id: 0.1104
+    mean_avg_tokens_per_line: -0.0075
+    mean_flesch_adapted: -0.1154
+    mean_fog_adapted: 0.0448
+  separator_counts:
+    mean_hyphen_count: -0.5988
+    mean_underscore_count: 0.6819
+  symbol_density:
+    mean_density: -0.1135
+    mean_distinct_symbol_types: -0.0272
+    mean_symbol_count: -0.0042
+  vocabulary:
+    mean_mattr: -0.0033
+    mean_total_identifiers: -0.0235
+    mean_unique_identifiers: -0.0258
+  vowel_density:
+    mean_total_chars: 0.2085
+  zipf:
+    mean_exponent: 0.0060
+    mean_total_tokens: -0.0075
+    mean_vocab_size: -0.0262
+
+name_is_single_letter:
+  _doc: "Single-letter names outside loop indices are too opaque."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: 30.8986
+  branching:
+    mean_branching_density: -0.0445
+    mean_non_blank_count: 0.0426
+  brevity:
+    mean_sample_size: 0.2360
+  casing_entropy:
+    mean_camel_case_count: 1.9409
+    mean_entropy: 0.3197
+    mean_snake_case_count: -0.1073
+  comment_structure:
+    mean_comment_line_ratio: -0.0655
+  compression:
+    mean_raw_bytes: 0.6122
+    mean_redundancy: 0.1536
+    mean_unique_line_ratio: 0.0952
+    mean_zlib_bytes: 0.3372
+    mean_zlib_ratio: 0.2802
+  entropy:
+    mean_char_entropy: 0.0250
+    mean_char_max_entropy: 0.0106
+    mean_char_normalized: 0.0143
+    mean_token_entropy: 0.0808
+    mean_token_max_entropy: 0.0529
+    mean_token_normalized: 0.0277
+    mean_total_tokens: -0.0290
+    mean_vocab_size: 0.2360
+  function_metrics:
+    mean_avg_function_lines: 0.0364
+  halstead:
+    mean_N1_total_operators: -0.0299
+    mean_N2_total_operands: -0.0195
+    mean_difficulty: -0.5303
+    mean_effort: -0.4968
+    mean_estimated_bugs: 0.0311
+    mean_length: -0.0257
+    mean_n1_unique_operators: -0.0930
+    mean_n2_unique_operands: 0.4254
+    mean_time_to_implement_seconds: -0.4968
+    mean_vocabulary: 0.2528
+    mean_volume: 0.0311
+  heaps:
+    mean_beta: 0.2456
+    mean_k: -0.4826
+    mean_r_squared: 0.0606
+  identifier_length_variance:
+    mean_mean: 1.2618
+    mean_variance: 0.0143
+  indentation:
+    mean_blank_line_ratio: -0.0401
+    mean_mean_depth: 0.0273
+    mean_variance: 0.0329
+  line_patterns:
+    mean_blank_line_ratio: -0.0401
+    mean_unique_line_ratio: 0.0990
+  magic_number_density:
+    mean_density: 0.0296
+  ngram:
+    mean_bigram_hapax_fraction: 0.2403
+    mean_bigram_repeated_unique: -0.2404
+    mean_bigram_repetition_rate: -0.2658
+    mean_bigram_total: -0.0291
+    mean_bigram_unique: 0.2611
+    mean_trigram_hapax_fraction: 0.1247
+    mean_trigram_repeated_unique: -0.4172
+    mean_trigram_repetition_rate: -0.3206
+    mean_trigram_total: -0.0291
+    mean_trigram_unique: 0.1202
+  punctuation_density:
+    mean_colon_suffix_density: 0.0303
+    mean_id_nonalpha_suffix_density: -0.0315
+    mean_question_mark_density: -0.8899
+  readability:
+    mean_avg_line_length: 0.6061
+    mean_avg_sub_words_per_id: 0.2456
+    mean_avg_tokens_per_line: -0.0753
+    mean_flesch_adapted: -0.2349
+    mean_fog_adapted: -0.0666
+    mean_total_lines: 0.0431
+  separator_counts:
+    mean_hyphen_count: -0.1345
+    mean_underscore_count: 2.0000
+  symbol_density:
+    mean_density: -0.6407
+    mean_symbol_count: -0.0254
+  vocabulary:
+    mean_mattr: 0.4875
+    mean_raw_ttr: 0.4010
+    mean_total_identifiers: 0.0481
+    mean_unique_identifiers: 0.4452
+  vowel_density:
+    mean_total_chars: 1.2917
+  zipf:
+    mean_exponent: -0.2098
+    mean_r_squared: 0.1010
+    mean_total_tokens: -0.0290
+    mean_vocab_size: 0.2360
+
+name_is_too_long:
+  _doc: "Names longer than ~30 characters harm readability."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: -10.5110
+  branching:
+    mean_branch_count: 0.0340
+    mean_branching_density: 0.0916
+    mean_max_nesting_depth: 0.0484
+    mean_non_blank_count: -0.0724
+  brevity:
+    mean_sample_size: -0.0167
+  casing_entropy:
+    mean_camel_case_count: -0.1082
+    mean_entropy: 0.0194
+    mean_other_count: 0.0922
+    mean_pascal_case_count: 0.0340
+    mean_snake_case_count: 0.1095
+  comment_structure:
+    mean_comment_line_ratio: 0.1321
+  compression:
+    mean_raw_bytes: -0.2235
+    mean_redundancy: -0.0299
+    mean_unique_line_ratio: -0.0154
+    mean_zlib_bytes: -0.1618
+    mean_zlib_ratio: -0.0643
+  entropy:
+    mean_token_entropy: -0.0111
+    mean_token_max_entropy: -0.0038
+    mean_token_normalized: -0.0072
+    mean_total_tokens: 0.0756
+    mean_vocab_size: -0.0167
+  function_metrics:
+    mean_avg_function_lines: -0.1811
+    mean_avg_param_count: -0.0267
+    mean_function_count: 0.1054
+    mean_max_function_lines: -0.1862
+  halstead:
+    mean_N1_total_operators: 0.0815
+    mean_N2_total_operands: 0.0706
+    mean_difficulty: 0.1104
+    mean_effort: 0.1960
+    mean_estimated_bugs: 0.0734
+    mean_length: 0.0774
+    mean_n1_unique_operators: 0.0062
+    mean_n2_unique_operands: -0.0320
+    mean_time_to_implement_seconds: 0.1960
+    mean_vocabulary: -0.0217
+    mean_volume: 0.0734
+  heaps:
+    mean_beta: -0.0480
+    mean_k: 0.1004
+    mean_r_squared: -0.0095
+  identifier_length_variance:
+    mean_max: -0.4664
+    mean_mean: -0.4056
+    mean_std_dev: -0.5951
+    mean_variance: -1.1923
+  indentation:
+    mean_blank_line_ratio: 0.0206
+    mean_max_depth: -0.2280
+    mean_mean_depth: -0.1355
+    mean_variance: -0.2997
+  line_patterns:
+    mean_blank_line_ratio: 0.0206
+    mean_max_nesting_depth: 0.0484
+    mean_string_literal_ratio: -0.0763
+    mean_unique_line_ratio: -0.0145
+  magic_number_density:
+    mean_density: -0.0708
+    mean_string_literal_ratio: -0.1025
+  near_duplicate_blocks_file:
+    mean_block_count: 0.0593
+    mean_near_dup_block_d0: 0.3891
+    mean_near_dup_block_d4: 0.6367
+    mean_near_dup_block_d6: 0.5046
+    mean_near_dup_block_d7: -0.5046
+    mean_near_dup_block_d8: 0.9550
+    mean_sub_block_count: 0.1005
+  ngram:
+    mean_bigram_hapax_fraction: -0.0613
+    mean_bigram_repeated_unique: 0.1532
+    mean_bigram_repetition_rate: 0.0826
+    mean_bigram_total: 0.0758
+    mean_bigram_unique: 0.0140
+    mean_trigram_hapax_fraction: -0.0412
+    mean_trigram_repeated_unique: 0.2154
+    mean_trigram_repetition_rate: 0.1235
+    mean_trigram_total: 0.0759
+    mean_trigram_unique: 0.0351
+  punctuation_density:
+    mean_arrow_density: 0.1321
+    mean_bracket_nonalpha_prefix_count: 0.0708
+    mean_bracket_nonalpha_suffix_count: 0.0511
+    mean_colon_suffix_density: -0.0806
+    mean_dot_count: 0.0613
+    mean_exclamation_density: 0.3183
+    mean_id_nonalpha_suffix_density: 0.0149
+    mean_question_mark_density: 0.1862
+  readability:
+    mean_avg_line_length: -0.1589
+    mean_avg_sub_words_per_id: -0.3392
+    mean_avg_tokens_per_line: 0.1430
+    mean_flesch_adapted: 2.0000
+    mean_fog_adapted: -0.3969
+    mean_total_lines: -0.0733
+  separator_counts:
+    mean_dot_count: 0.0613
+    mean_hyphen_count: 0.1862
+    mean_slash_count: 0.1025
+    mean_underscore_count: -0.5989
+  symbol_density:
+    mean_density: 0.2971
+    mean_distinct_symbol_types: 0.0446
+    mean_symbol_count: 0.0831
+  vocabulary:
+    mean_mattr: -0.0939
+    mean_raw_ttr: -0.1041
+    mean_total_identifiers: 0.0610
+    mean_unique_identifiers: -0.0445
+  vowel_density:
+    mean_total_chars: -0.3409
+  zipf:
+    mean_exponent: 0.0555
+    mean_total_tokens: 0.0756
+    mean_vocab_size: -0.0167
+
+name_is_too_short:
+  _doc: "Names shorter than 3 characters (outside loops) are too opaque."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: -3.8620
+  branching:
+    mean_branch_count: -0.2327
+    mean_branching_density: -0.2381
+  brevity:
+    mean_sample_size: -0.1256
+  casing_entropy:
+    mean_camel_case_count: -0.0450
+    mean_entropy: -0.4018
+    mean_other_count: -2.0000
+    mean_snake_case_count: -0.1480
+  comment_structure:
+    mean_comment_line_ratio: -0.0092
+  compression:
+    mean_raw_bytes: 0.2713
+    mean_redundancy: 0.1160
+    mean_zlib_bytes: 0.0526
+    mean_zlib_ratio: 0.2197
+  entropy:
+    mean_char_entropy: -0.0115
+    mean_char_max_entropy: 0.0191
+    mean_char_normalized: -0.0304
+    mean_token_entropy: -0.0467
+    mean_token_max_entropy: -0.0267
+    mean_token_normalized: -0.0201
+    mean_total_tokens: -0.0256
+    mean_vocab_size: -0.1256
+  halstead:
+    mean_N1_total_operators: -0.0225
+    mean_N2_total_operands: -0.0374
+    mean_difficulty: 0.0242
+    mean_effort: -0.0350
+    mean_estimated_bugs: -0.0584
+    mean_length: -0.0283
+    mean_n1_unique_operators: -0.0974
+    mean_n2_unique_operands: -0.1584
+    mean_time_to_implement_seconds: -0.0350
+    mean_vocabulary: -0.1401
+    mean_volume: -0.0584
+  heaps:
+    mean_k: -0.1166
+    mean_r_squared: 0.0306
+  identifier_length_variance:
+    mean_mean: 0.6923
+    mean_std_dev: -0.2499
+    mean_variance: -0.5009
+  indentation:
+    mean_variance: 0.0168
+  line_patterns:
+    mean_string_literal_ratio: 0.0229
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d6: -1.2621
+  ngram:
+    mean_bigram_hapax_fraction: 0.0137
+    mean_bigram_repeated_unique: -0.0944
+    mean_bigram_repetition_rate: 0.0110
+    mean_bigram_total: -0.0256
+    mean_bigram_unique: -0.0691
+    mean_trigram_repeated_unique: -0.0446
+    mean_trigram_total: -0.0256
+    mean_trigram_unique: -0.0318
+  punctuation_density:
+    mean_colon_suffix_density: -0.2391
+    mean_id_nonalpha_suffix_density: -0.0101
+    mean_question_mark_density: -0.2722
+  readability:
+    mean_avg_line_length: 0.2778
+    mean_avg_tokens_per_line: -0.0329
+    mean_fog_adapted: -0.0263
+  separator_counts:
+    mean_hyphen_count: -0.5797
+    mean_underscore_count: -0.1641
+  symbol_density:
+    mean_density: -0.2806
+    mean_distinct_symbol_types: 0.0241
+    mean_symbol_count: -0.0137
+  vocabulary:
+    mean_mattr: -0.2556
+    mean_raw_ttr: -0.1188
+    mean_total_identifiers: -0.1971
+    mean_unique_identifiers: -0.3128
+  vowel_density:
+    mean_total_chars: 0.4916
+  zipf:
+    mean_exponent: 0.0521
+    mean_total_tokens: -0.0256
+    mean_vocab_size: -0.1256
+
+negated_boolean_name:
+  _doc: "Negated boolean names (`isNotValid`, `notActive`) are harder to reason about."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: -6.4001
+  brevity:
+    mean_sample_size: -0.0998
+  casing_entropy:
+    mean_camel_case_count: -0.1117
+  compression:
+    mean_raw_bytes: -0.0414
+    mean_zlib_bytes: -0.0643
+    mean_zlib_ratio: 0.0231
+  entropy:
+    mean_token_max_entropy: -0.0196
+    mean_vocab_size: -0.0998
+  halstead:
+    mean_difficulty: 0.0956
+    mean_effort: 0.0772
+    mean_estimated_bugs: -0.0162
+    mean_n2_unique_operands: -0.1082
+    mean_time_to_implement_seconds: 0.0772
+    mean_vocabulary: -0.0818
+    mean_volume: -0.0162
+  heaps:
+    mean_beta: 0.0357
+    mean_k: -0.2055
+  identifier_length_variance:
+    mean_max: -0.0454
+    mean_mean: -0.1116
+    mean_std_dev: -0.2685
+    mean_variance: -0.5427
+  line_patterns:
+    mean_string_literal_ratio: 0.0321
+  magic_number_density:
+    mean_string_literal_ratio: 0.0648
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d4: -2.0000
+    mean_near_dup_block_d5: 2.0000
+    mean_sub_block_count: -0.0546
+  ngram:
+    mean_bigram_hapax_fraction: -0.0303
+    mean_bigram_repeated_unique: 0.0479
+    mean_bigram_repetition_rate: 0.0366
+    mean_bigram_unique: -0.0275
+    mean_trigram_hapax_fraction: -0.0202
+    mean_trigram_repeated_unique: 0.0439
+    mean_trigram_repetition_rate: 0.0732
+    mean_trigram_unique: -0.0366
+  punctuation_density:
+    mean_bracket_nonalpha_prefix_count: -0.2405
+    mean_bracket_nonalpha_suffix_count: -0.0403
+    mean_colon_suffix_density: -0.1292
+    mean_exclamation_density: -0.4070
+    mean_id_nonalpha_suffix_density: -0.0280
+  readability:
+    mean_avg_line_length: -0.0419
+    mean_avg_sub_words_per_id: -0.1284
+    mean_flesch_adapted: 0.1760
+    mean_fog_adapted: -0.1206
+  separator_counts:
+    mean_underscore_count: -0.4507
+  symbol_density:
+    mean_density: 0.0375
+  vocabulary:
+    mean_mattr: -0.0261
+    mean_raw_ttr: -0.1352
+    mean_unique_identifiers: -0.1462
+  vowel_density:
+    mean_total_chars: -0.1238
+  zipf:
+    mean_exponent: 0.0151
+    mean_vocab_size: -0.0998
+
+no_hungarian_notation:
+  _doc: "Hungarian notation prefixes (`strName`, `bFlag`) add noise without type safety."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: -8.4371
+  brevity:
+    mean_sample_size: -0.0295
+  casing_entropy:
+    mean_camel_case_count: -1.8340
+    mean_entropy: -0.2028
+    mean_pascal_case_count: 0.0217
+    mean_screaming_snake_density: 0.0473
+    mean_snake_case_count: 0.2648
+  compression:
+    mean_raw_bytes: -0.1404
+    mean_redundancy: -0.0554
+    mean_zlib_bytes: -0.0494
+    mean_zlib_ratio: -0.0857
+  entropy:
+    mean_char_entropy: -0.0226
+    mean_char_normalized: -0.0276
+    mean_total_tokens: 0.0139
+    mean_vocab_size: -0.0295
+  function_metrics:
+    mean_avg_function_lines: 1.1030
+    mean_function_count: 0.0489
+    mean_max_function_lines: 0.6027
+  halstead:
+    mean_difficulty: 0.0972
+    mean_effort: 0.0546
+    mean_estimated_bugs: -0.0179
+    mean_n2_unique_operands: -0.1112
+    mean_time_to_implement_seconds: 0.0546
+    mean_vocabulary: -0.0811
+    mean_volume: -0.0178
+  heaps:
+    mean_k: 0.0266
+  identifier_length_variance:
+    mean_max: -0.0632
+    mean_mean: -0.2501
+    mean_std_dev: 0.0436
+    mean_variance: 0.0825
+  indentation:
+    mean_blank_line_ratio: -0.0083
+  line_patterns:
+    mean_blank_line_ratio: -0.0083
+    mean_string_literal_ratio: -0.2464
+  magic_number_density:
+    mean_density: -0.0242
+    mean_string_literal_ratio: -0.3014
+  near_duplicate_blocks_file:
+    mean_near_dup_block_d4: -2.0000
+    mean_near_dup_block_d7: -0.4150
+    mean_sub_block_count: -0.0133
+  ngram:
+    mean_bigram_hapax_fraction: 0.0082
+    mean_bigram_total: 0.0140
+    mean_trigram_hapax_fraction: 0.0091
+    mean_trigram_repeated_unique: -0.0105
+    mean_trigram_repetition_rate: -0.0222
+    mean_trigram_total: 0.0140
+    mean_trigram_unique: 0.0176
+  punctuation_density:
+    mean_arrow_density: -0.0515
+    mean_bracket_nonalpha_suffix_count: -0.0310
+    mean_colon_suffix_density: -0.1246
+    mean_dot_count: 0.0112
+    mean_question_mark_density: 0.1699
+  readability:
+    mean_avg_line_length: -0.1457
+    mean_avg_sub_words_per_id: -0.3755
+    mean_avg_tokens_per_line: 0.0141
+    mean_flesch_adapted: 0.4848
+    mean_fog_adapted: -0.5445
+  separator_counts:
+    mean_dot_count: 0.0112
+    mean_underscore_count: -1.6136
+  symbol_density:
+    mean_density: 0.1326
+    mean_distinct_symbol_types: 0.0704
+    mean_symbol_count: 0.0074
+  vocabulary:
+    mean_mattr: -0.0445
+    mean_raw_ttr: -0.0346
+    mean_total_identifiers: -0.0810
+    mean_unique_identifiers: -0.1508
+  vowel_density:
+    mean_total_chars: -0.3322
+  zipf:
+    mean_exponent: 0.0415
+    mean_total_tokens: 0.0139
+    mean_vocab_size: -0.0295
+
+screaming_snake_for_constants:
+  _doc: "Module-level constants should use SCREAMING_SNAKE_CASE."
+  _languages: [elixir, javascript, ruby]
+  _log_baseline: -4.4685
+  branching:
+    mean_branching_density: 0.0176
+    mean_non_blank_count: -0.0180
+  brevity:
+    mean_sample_size: -0.0136
+  casing_entropy:
+    mean_camel_case_count: 0.0302
+    mean_entropy: 0.0261
+    mean_macro_case_count: 1.9913
+    mean_pascal_case_count: -0.1674
+    mean_screaming_snake_density: 2.0000
+  comment_structure:
+    mean_comment_line_ratio: 0.0267
+  compression:
+    mean_raw_bytes: -0.0086
+    mean_redundancy: -0.0090
+    mean_unique_line_ratio: -0.0048
+    mean_zlib_bytes: 0.0081
+    mean_zlib_ratio: -0.0168
+  entropy:
+    mean_char_entropy: 0.0305
+    mean_char_max_entropy: 0.0092
+    mean_char_normalized: 0.0213
+    mean_total_tokens: -0.0066
+    mean_vocab_size: -0.0136
+  halstead:
+    mean_N1_total_operators: -0.0036
+    mean_N2_total_operands: -0.0117
+    mean_difficulty: 0.0034
+    mean_effort: -0.0052
+    mean_estimated_bugs: -0.0088
+    mean_length: -0.0067
+    mean_n2_unique_operands: -0.0163
+    mean_time_to_implement_seconds: -0.0052
+    mean_vocabulary: -0.0115
+    mean_volume: -0.0088
+  identifier_length_variance:
+    mean_mean: 0.0048
+    mean_std_dev: 0.0244
+    mean_variance: 0.0482
+  indentation:
+    mean_blank_line_ratio: 0.0078
+    mean_mean_depth: 0.0069
+    mean_variance: 0.0048
+  line_patterns:
+    mean_blank_line_ratio: 0.0078
+    mean_string_literal_ratio: 0.0089
+    mean_unique_line_ratio: -0.0059
+  magic_number_density:
+    mean_density: 0.0057
+    mean_string_literal_ratio: 0.0090
+  ngram:
+    mean_bigram_hapax_fraction: -0.0085
+    mean_bigram_repetition_rate: 0.0069
+    mean_bigram_total: -0.0066
+    mean_bigram_unique: -0.0131
+    mean_trigram_hapax_fraction: -0.0045
+    mean_trigram_repetition_rate: 0.0072
+    mean_trigram_total: -0.0066
+    mean_trigram_unique: -0.0101
+  punctuation_density:
+    mean_arrow_density: 0.0071
+    mean_colon_suffix_density: 0.0103
+    mean_id_nonalpha_suffix_density: 0.0078
+  readability:
+    mean_avg_line_length: 0.0087
+    mean_avg_sub_words_per_id: -0.0090
+    mean_avg_tokens_per_line: 0.0102
+    mean_flesch_adapted: 0.0095
+    mean_fog_adapted: -0.0082
+    mean_total_lines: -0.0182
+  separator_counts:
+    mean_underscore_count: 0.3971
+  symbol_density:
+    mean_symbol_count: -0.0036
+  vocabulary:
+    mean_mattr: -0.0037
+    mean_raw_ttr: -0.0055
+    mean_total_identifiers: -0.0157
+    mean_unique_identifiers: -0.0212
+  vowel_density:
+    mean_total_chars: -0.0111
+  zipf:
+    mean_exponent: 0.0038
+    mean_total_tokens: -0.0066
+    mean_vocab_size: -0.0136
+
diff --git a/scripts/run.sh b/scripts/run.sh
index 98042053..4cc0918f 100755
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -47,17 +47,20 @@ esac
 # --- Build CLI arguments ---
 ARGS=("$INPUT_COMMAND" "$INPUT_PATH")
 CAPTURE_STDOUT=false
+COMMENT_MODE=false
 
 case "$INPUT_COMMAND" in
   health-report)
-    ARGS+=("--output" "$OUTPUT_FILE")
     ARGS+=("--detail" "$INPUT_DETAIL")
     ARGS+=("--top" "$INPUT_TOP")
     if [[ -n "$INPUT_CONFIG" ]]; then
       ARGS+=("--config" "$INPUT_CONFIG")
     fi
     if [[ "${INPUT_COMMENT:-false}" == "true" ]]; then
-      ARGS+=("--format" "github")
+      ARGS+=("--comment")
+      COMMENT_MODE=true
+    else
+      ARGS+=("--output" "$OUTPUT_FILE")
     fi
     ;;
   compare)
@@ -117,6 +120,77 @@ else
   "$CODEQA" "${ARGS[@]}"
 fi
 
+# --- Post multi-part PR comments (health-report with comment mode) ---
+if [[ "$COMMENT_MODE" == "true" ]]; then
+  TMPDIR="${TMPDIR:-/tmp}"
+  PART_COUNT_FILE="${TMPDIR}/codeqa-part-count.txt"
+
+  if [[ ! -f "$PART_COUNT_FILE" ]]; then
+    echo "::error::Part count file not found at ${PART_COUNT_FILE}"
+    exit 1
+  fi
+
+  PART_COUNT=$(cat "$PART_COUNT_FILE")
+  echo "Posting ${PART_COUNT} comment parts..."
+
+  # GitHub API settings
+  API_URL="${GITHUB_API_URL:-https://api.github.com}"
+  REPO="${GITHUB_REPOSITORY}"
+  PR_NUMBER="${PR_NUMBER:-}"
+
+  if [[ -z "$PR_NUMBER" ]]; then
+    echo "::error::PR_NUMBER not set. Cannot post PR comments."
+    exit 1
+  fi
+
+  for i in $(seq 1 "$PART_COUNT"); do
+    PART_FILE="${TMPDIR}/codeqa-part-${i}.md"
+    SENTINEL="<!-- codeqa-health-report-${i} -->"
+
+    if [[ ! -f "$PART_FILE" ]]; then
+      echo "::warning::Part file ${PART_FILE} not found, skipping"
+      continue
+    fi
+
+    BODY=$(cat "$PART_FILE")
+
+    # Search for existing comment with this sentinel
+    echo "Searching for existing comment with sentinel: ${SENTINEL}"
+    COMMENTS_JSON=$(curl -fsSL \
+      -H "Authorization: Bearer ${GITHUB_TOKEN}" \
+      -H "Accept: application/vnd.github+json" \
+      "${API_URL}/repos/${REPO}/issues/${PR_NUMBER}/comments?per_page=100" 2>/dev/null || echo "[]")
+
+    # Find comment ID containing the sentinel
+    COMMENT_ID=$(echo "$COMMENTS_JSON" | jq -r --arg sentinel "$SENTINEL" \
+      '.[] | select(.body | contains($sentinel)) | .id' | head -1)
+
+    # Prepare JSON payload
+    PAYLOAD=$(jq -n --arg body "$BODY" '{"body": $body}')
+
+    if [[ -n "$COMMENT_ID" && "$COMMENT_ID" != "null" ]]; then
+      echo "Updating existing comment ${COMMENT_ID} for part ${i}..."
+      curl -fsSL -X PATCH \
+        -H "Authorization: Bearer ${GITHUB_TOKEN}" \
+        -H "Accept: application/vnd.github+json" \
+        "${API_URL}/repos/${REPO}/issues/comments/${COMMENT_ID}" \
+        -d "$PAYLOAD" > /dev/null
+    else
+      echo "Creating new comment for part ${i}..."
+      curl -fsSL -X POST \
+        -H "Authorization: Bearer ${GITHUB_TOKEN}" \
+        -H "Accept: application/vnd.github+json" \
+        "${API_URL}/repos/${REPO}/issues/${PR_NUMBER}/comments" \
+        -d "$PAYLOAD" > /dev/null
+    fi
+  done
+
+  echo "All ${PART_COUNT} comment parts posted successfully"
+
+  # Use part 1 as the main output file for grade extraction
+  OUTPUT_FILE="${TMPDIR}/codeqa-part-1.md"
+fi
+
 # --- Extract grade (health-report only) ---
 GRADE=""
 if [[ "$INPUT_COMMAND" == "health-report" && -f "$OUTPUT_FILE" ]]; then
diff --git a/test/codeqa/analysis/behavior_config_server_test.exs b/test/codeqa/analysis/behavior_config_server_test.exs
new file mode 100644
index 00000000..ebcc31bb
--- /dev/null
+++ b/test/codeqa/analysis/behavior_config_server_test.exs
@@ -0,0 +1,75 @@
+defmodule CodeQA.Analysis.BehaviorConfigServerTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.Analysis.BehaviorConfigServer
+
+  setup do
+    {:ok, pid} = BehaviorConfigServer.start_link()
+    {:ok, pid: pid}
+  end
+
+  test "get_all_behaviors/1 returns a non-empty map of categories", %{pid: pid} do
+    behaviors = BehaviorConfigServer.get_all_behaviors(pid)
+    assert is_map(behaviors)
+    assert map_size(behaviors) > 0
+
+    Enum.each(behaviors, fn {category, list} ->
+      assert is_binary(category)
+      assert is_list(list)
+      assert list != []
+
+      Enum.each(list, fn {behavior, data} ->
+        assert is_binary(behavior)
+        assert is_map(data)
+      end)
+    end)
+  end
+
+  test "get_all_behaviors/1 matches YamlElixir direct reads", %{pid: pid} do
+    behaviors = BehaviorConfigServer.get_all_behaviors(pid)
+    yaml_dir = "priv/combined_metrics"
+
+    {:ok, files} = File.ls(yaml_dir)
+
+    Enum.each(files |> Enum.filter(&String.ends_with?(&1, ".yml")), fn yml_file ->
+      category = String.trim_trailing(yml_file, ".yml")
+      {:ok, data} = YamlElixir.read_from_file(Path.join(yaml_dir, yml_file))
+
+      expected_behaviors =
+        data |> Enum.filter(fn {_k, v} -> is_map(v) end) |> Enum.map(&elem(&1, 0))
+
+      server_behaviors = Map.get(behaviors, category, []) |> Enum.map(&elem(&1, 0))
+      assert Enum.sort(expected_behaviors) == Enum.sort(server_behaviors)
+    end)
+  end
+
+  test "get_scalars/3 returns a map of {group, key} => scalar", %{pid: pid} do
+    behaviors = BehaviorConfigServer.get_all_behaviors(pid)
+    {category, [{behavior, _data} | _]} = Enum.at(behaviors, 0)
+
+    scalars = BehaviorConfigServer.get_scalars(pid, category, behavior)
+    assert is_map(scalars)
+
+    Enum.each(scalars, fn {{group, key}, scalar} ->
+      assert is_binary(group)
+      assert is_binary(key)
+      assert is_float(scalar)
+    end)
+  end
+
+  test "get_scalars/3 returns empty map for unknown behavior", %{pid: pid} do
+    assert BehaviorConfigServer.get_scalars(pid, "nonexistent", "also_nonexistent") == %{}
+  end
+
+  test "get_log_baseline/3 returns a float", %{pid: pid} do
+    behaviors = BehaviorConfigServer.get_all_behaviors(pid)
+    {category, [{behavior, _data} | _]} = Enum.at(behaviors, 0)
+
+    baseline = BehaviorConfigServer.get_log_baseline(pid, category, behavior)
+    assert is_float(baseline)
+  end
+
+  test "get_log_baseline/3 returns 0.0 for unknown behavior", %{pid: pid} do
+    assert BehaviorConfigServer.get_log_baseline(pid, "nonexistent", "also_nonexistent") == 0.0
+  end
+end
diff --git a/test/codeqa/analysis/file_context_server_test.exs b/test/codeqa/analysis/file_context_server_test.exs
new file mode 100644
index 00000000..660bd9a3
--- /dev/null
+++ b/test/codeqa/analysis/file_context_server_test.exs
@@ -0,0 +1,38 @@
+defmodule CodeQA.Analysis.FileContextServerTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.Analysis.FileContextServer
+  alias CodeQA.Engine.{FileContext, Pipeline}
+
+  setup do
+    {:ok, pid} = FileContextServer.start_link()
+    {:ok, pid: pid}
+  end
+
+  test "get/2 returns a Pipeline.FileContext", %{pid: pid} do
+    content = "defmodule Foo do\n  def bar, do: :ok\nend\n"
+    ctx = FileContextServer.get(pid, content)
+    assert %FileContext{} = ctx
+    assert is_binary(ctx.content)
+  end
+
+  test "get/2 returns identical struct on second call without rebuilding", %{pid: pid} do
+    content = "defmodule Foo do\n  def bar, do: :ok\nend\n"
+    ctx1 = FileContextServer.get(pid, content)
+    ctx2 = FileContextServer.get(pid, content)
+    assert ctx1 == ctx2
+  end
+
+  test "get/2 with different content returns different results", %{pid: pid} do
+    ctx_a = FileContextServer.get(pid, "defmodule A do\nend\n")
+    ctx_b = FileContextServer.get(pid, "defmodule B do\n  def foo, do: 1\nend\n")
+    assert ctx_a != ctx_b
+  end
+
+  test "get/2 matches Pipeline.build_file_context/1 directly", %{pid: pid} do
+    content = "x = 1\ny = 2\n"
+    expected = Pipeline.build_file_context(content)
+    result = FileContextServer.get(pid, content)
+    assert result == expected
+  end
+end
diff --git a/test/codeqa/analysis/file_metrics_server_test.exs b/test/codeqa/analysis/file_metrics_server_test.exs
new file mode 100644
index 00000000..b68f4b37
--- /dev/null
+++ b/test/codeqa/analysis/file_metrics_server_test.exs
@@ -0,0 +1,92 @@
+defmodule CodeQA.Analysis.FileMetricsServerTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.Analysis.FileMetricsServer
+  alias CodeQA.Engine.Analyzer
+
+  defp build_registry do
+    Analyzer.build_registry()
+  end
+
+  setup do
+    {:ok, pid} = FileMetricsServer.start_link()
+    {:ok, pid: pid}
+  end
+
+  describe "populate/3 and get_by_path/2" do
+    test "returns pre-populated baseline metrics for a path", %{pid: pid} do
+      content = "defmodule A do\n  def foo, do: 1\nend\n"
+
+      pipeline_result = %{
+        "files" => %{
+          "lib/a.ex" => %{"metrics" => %{"halstead" => %{"tokens" => 5.0}}}
+        }
+      }
+
+      files_map = %{"lib/a.ex" => content}
+      :ok = FileMetricsServer.populate(pid, pipeline_result, files_map)
+
+      metrics = FileMetricsServer.get_by_path(pid, "lib/a.ex")
+      assert metrics == %{"halstead" => %{"tokens" => 5.0}}
+    end
+
+    test "returns nil for unknown path", %{pid: pid} do
+      :ok = FileMetricsServer.populate(pid, %{"files" => %{}}, %{})
+      assert FileMetricsServer.get_by_path(pid, "nonexistent.ex") == nil
+    end
+  end
+
+  describe "get_for_content/3" do
+    test "computes and caches metrics on first call", %{pid: pid} do
+      registry = build_registry()
+      content = "defmodule A do\n  def foo, do: 1\nend\n"
+
+      metrics = FileMetricsServer.get_for_content(pid, registry, content)
+      assert is_map(metrics)
+      assert map_size(metrics) > 0
+    end
+
+    test "returns identical result on second call (cache hit)", %{pid: pid} do
+      registry = build_registry()
+      content = "defmodule A do\n  def foo, do: 1\nend\n"
+
+      m1 = FileMetricsServer.get_for_content(pid, registry, content)
+      m2 = FileMetricsServer.get_for_content(pid, registry, content)
+      assert m1 == m2
+    end
+
+    test "different content returns different metrics", %{pid: pid} do
+      registry = build_registry()
+      ma = FileMetricsServer.get_for_content(pid, registry, "x = 1\n")
+
+      mb =
+        FileMetricsServer.get_for_content(
+          pid,
+          registry,
+          String.duplicate("def foo(a, b), do: a + b\n", 20)
+        )
+
+      assert ma != mb
+    end
+
+    test "populate cross-indexes hash so get_for_content hits cache", %{pid: pid} do
+      registry = build_registry()
+      content = "defmodule A do\n  def foo, do: 1\nend\n"
+
+      pipeline_result = %{
+        "files" => %{
+          "lib/a.ex" => %{
+            "metrics" => %{"halstead" => %{"tokens" => 99.0}}
+          }
+        }
+      }
+
+      files_map = %{"lib/a.ex" => content}
+      :ok = FileMetricsServer.populate(pid, pipeline_result, files_map)
+
+      # Should hit the hash-keyed cache entry seeded from pipeline_result
+      metrics = FileMetricsServer.get_for_content(pid, registry, content)
+      assert metrics == %{"halstead" => %{"tokens" => 99.0}}
+    end
+  end
+end
diff --git a/test/codeqa/ast/classification/node_classifier_test.exs b/test/codeqa/ast/classification/node_classifier_test.exs
new file mode 100644
index 00000000..990a35d1
--- /dev/null
+++ b/test/codeqa/ast/classification/node_classifier_test.exs
@@ -0,0 +1,312 @@
+defmodule CodeQA.AST.NodeClassifierTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Classification.NodeClassifier
+  alias CodeQA.AST.Enrichment.Node
+  alias CodeQA.AST.Lexing.Token
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Parser
+
+  alias CodeQA.AST.Nodes.{
+    AttributeNode,
+    CodeNode,
+    DocNode,
+    FunctionNode,
+    ImportNode,
+    ModuleNode,
+    TestNode
+  }
+
+  alias CodeQA.Languages.Code.Native.Go
+  alias CodeQA.Languages.Code.Native.Rust
+  alias CodeQA.Languages.Code.Scripting.Python
+  alias CodeQA.Languages.Code.Scripting.Ruby
+  alias CodeQA.Languages.Code.Vm.CSharp
+  alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang
+  alias CodeQA.Languages.Code.Vm.Java
+  alias CodeQA.Languages.Code.Web.JavaScript
+  alias CodeQA.Languages.Code.Web.TypeScript
+  alias CodeQA.Languages.Unknown
+
+  defp classify_first(code, opts \\ []) do
+    lang_mod = opts[:language_module] || Unknown
+
+    [block | _] =
+      code
+      |> TokenNormalizer.normalize_structural()
+      |> Parser.detect_blocks(lang_mod)
+
+    NodeClassifier.classify(block, lang_mod)
+  end
+
+  defp node_with_tokens(tokens) do
+    %Node{
+      tokens: tokens,
+      line_count: 1,
+      children: []
+    }
+  end
+
+  describe "classify/1 — function detection" do
+    test "def → FunctionNode" do
+      assert %FunctionNode{} =
+               classify_first("def foo(x), do: x + 1", language_module: ElixirLang)
+    end
+
+    test "defp → FunctionNode" do
+      assert %FunctionNode{} = classify_first("defp bar(x), do: x", language_module: ElixirLang)
+    end
+
+    test "defmacro → FunctionNode" do
+      assert %FunctionNode{} =
+               classify_first("defmacro my_macro(x), do: x", language_module: ElixirLang)
+    end
+
+    test "function keyword → FunctionNode" do
+      assert %FunctionNode{} =
+               classify_first("function foo(x) {\n  return x\n}", language_module: JavaScript)
+    end
+
+    test "func keyword → FunctionNode" do
+      assert %FunctionNode{} =
+               classify_first("func Foo(x int) int {\n  return x\n}", language_module: Go)
+    end
+
+    test "fn keyword → FunctionNode" do
+      assert %FunctionNode{} =
+               classify_first("fn main() {\n  println!(\"hello\")\n}", language_module: Rust)
+    end
+  end
+
+  describe "classify/1 — module detection" do
+    test "defmodule → ModuleNode" do
+      assert %ModuleNode{} =
+               classify_first("defmodule Foo do\n  :ok\nend", language_module: ElixirLang)
+    end
+
+    test "class → ModuleNode" do
+      assert %ModuleNode{} = classify_first("class Foo:\n  pass", language_module: Python)
+    end
+
+    test "module → ModuleNode" do
+      assert %ModuleNode{} =
+               classify_first("module Foo\n  def bar; end\nend", language_module: Ruby)
+    end
+
+    test "interface → ModuleNode" do
+      assert %ModuleNode{} =
+               classify_first("interface Foo {\n  bar(): void\n}", language_module: TypeScript)
+    end
+
+    test "struct → ModuleNode" do
+      assert %ModuleNode{} =
+               classify_first("struct Point {\n  x: f64,\n  y: f64,\n}", language_module: Rust)
+    end
+  end
+
+  describe "classify/1 — import detection" do
+    test "import → ImportNode" do
+      assert %ImportNode{} = classify_first("import Foo", language_module: ElixirLang)
+    end
+
+    test "alias → ImportNode" do
+      assert %ImportNode{} = classify_first("alias Foo.Bar", language_module: ElixirLang)
+    end
+
+    test "use → ImportNode" do
+      assert %ImportNode{} =
+               classify_first("use ExUnit.Case, async: true", language_module: ElixirLang)
+    end
+
+    test "require → ImportNode" do
+      assert %ImportNode{} = classify_first("require Logger", language_module: ElixirLang)
+    end
+
+    test "from keyword → ImportNode" do
+      assert %ImportNode{} = classify_first("from os import path", language_module: Python)
+    end
+  end
+
+  describe "classify/1 — test detection" do
+    test "test macro → TestNode" do
+      assert %TestNode{} =
+               classify_first(~s(test "something" do\n  :ok\nend), language_module: ElixirLang)
+    end
+
+    test "describe → TestNode" do
+      assert %TestNode{} =
+               classify_first(~s(describe "some context" do\n  :ok\nend),
+                 language_module: ElixirLang
+               )
+    end
+
+    test "it → TestNode" do
+      code = "it \"behaves correctly\" do\n  :ok\nend"
+      assert %TestNode{} = classify_first(code, language_module: JavaScript)
+    end
+  end
+
+  describe "classify/1 — doc detection" do
+    test "<DOC> token → DocNode" do
+      # A standalone triple-quoted string starts directly with the <DOC> token
+      assert %DocNode{} = classify_first(~s("""\nSome doc.\n"""))
+    end
+
+    test "direct <DOC> token in node → DocNode" do
+      doc_token = %Token{kind: "<DOC>", content: ~s("""), line: 1, col: 0}
+      nl = %Token{kind: "<NL>", content: "\n", line: 2, col: 0}
+      node = node_with_tokens([doc_token, nl])
+      assert %DocNode{} = NodeClassifier.classify(node, Unknown)
+    end
+  end
+
+  describe "classify/1 — attribute detection" do
+    test "@spec → AttributeNode with kind: :typespec" do
+      result = classify_first("@spec foo(integer()) :: :ok", language_module: ElixirLang)
+      assert %AttributeNode{kind: :typespec} = result
+    end
+
+    test "@type → AttributeNode with kind: :typespec" do
+      result = classify_first("@type user_id :: integer()", language_module: ElixirLang)
+      assert %AttributeNode{kind: :typespec} = result
+    end
+
+    test "@typep → AttributeNode with kind: :typespec" do
+      result = classify_first("@typep internal :: atom()", language_module: ElixirLang)
+      assert %AttributeNode{kind: :typespec} = result
+    end
+
+    test "@callback → AttributeNode with kind: :typespec" do
+      result =
+        classify_first("@callback fetch(term()) :: {:ok, term()}", language_module: ElixirLang)
+
+      assert %AttributeNode{kind: :typespec} = result
+    end
+
+    test "@enforce_keys → AttributeNode with kind: nil" do
+      result = classify_first("@enforce_keys [:name, :age]", language_module: ElixirLang)
+      assert %AttributeNode{kind: nil} = result
+    end
+
+    test "all Elixir typespec attributes are recognized" do
+      for attr <- ~w[spec type typep opaque callback macrocallback] do
+        result = classify_first("@#{attr} foo :: bar", language_module: ElixirLang)
+
+        assert %AttributeNode{kind: :typespec} = result,
+               "expected AttributeNode(kind: :typespec) for @#{attr}"
+      end
+    end
+  end
+
+  describe "classify/1 — code fallback" do
+    test "unrecognized token → CodeNode" do
+      assert %CodeNode{} = classify_first("x = 1 + 2")
+    end
+
+    test "empty-like node with only whitespace tokens → CodeNode" do
+      nl = %Token{kind: "<NL>", content: "\n", line: 1, col: 0}
+      node = node_with_tokens([nl])
+
+      assert %CodeNode{} =
+               NodeClassifier.classify(node, Unknown)
+    end
+  end
+
+  describe "classify/1 — ambiguity resolution" do
+    test "test beats function (test is not defp-style)" do
+      # 'test' is in TestSignal; FunctionSignal does not include 'test'
+      result = classify_first(~s(test "foo" do\n  :ok\nend), language_module: ElixirLang)
+      assert %TestNode{} = result
+    end
+
+    test "@inside code body at indent > 0 does not make block :attribute" do
+      code = "def foo do\n  @cache true\n  :ok\nend"
+      # FunctionSignal sees 'def' at indent 0 → :function wins
+      # AttributeSignal sees '@cache' but at indent 2, not 0 → no vote
+      result = classify_first(code, language_module: ElixirLang)
+      assert %FunctionNode{} = result
+    end
+  end
+
+  describe "classify/1 — field preservation" do
+    test "preserves tokens, line_count, children, start/end_line" do
+      tokens =
+        "def foo, do: :ok"
+        |> TokenNormalizer.normalize_structural()
+
+      [node] = Parser.detect_blocks(tokens, ElixirLang)
+      result = NodeClassifier.classify(node, ElixirLang)
+
+      assert result.tokens == node.tokens
+      assert result.line_count == node.line_count
+      assert result.children == node.children
+      assert result.start_line == node.start_line
+      assert result.end_line == node.end_line
+    end
+  end
+
+  describe "classify/3 — sub-block parent context" do
+    test "alias-list sub-block classifies as :import when parent_context contains alias keyword" do
+      # Simulates a multi-line `alias Foo.{Bar, Baz}` where the BracketSignal
+      # has split off the `{Bar, Baz}` sub-block, leaving `alias` in the parent.
+      code = """
+      alias Foo.{
+        Bar,
+        Baz
+      }
+      """
+
+      tokens = TokenNormalizer.normalize_structural(code)
+      [parent] = Parser.detect_blocks(tokens, ElixirLang)
+      [sub_block] = parent.children
+
+      # Premise: in isolation, the sub-block is :code (no alias keyword visible).
+      assert %CodeNode{} = NodeClassifier.classify(sub_block, ElixirLang)
+
+      # With parent context (the parent's tokens that come BEFORE the sub-block),
+      # the classifier should see the `alias` keyword and vote :import.
+      parent_context = parent_tokens_before(parent, sub_block)
+
+      assert %ImportNode{} = NodeClassifier.classify(sub_block, ElixirLang, parent_context)
+    end
+
+    test "attribute-list sub-block classifies as :attribute when parent_context contains @name" do
+      code = """
+      @all_signals [
+        :a,
+        :b
+      ]
+      """
+
+      tokens = TokenNormalizer.normalize_structural(code)
+      [parent] = Parser.detect_blocks(tokens, ElixirLang)
+      [sub_block] = parent.children
+
+      assert %CodeNode{} = NodeClassifier.classify(sub_block, ElixirLang)
+
+      parent_context = parent_tokens_before(parent, sub_block)
+
+      assert %AttributeNode{} = NodeClassifier.classify(sub_block, ElixirLang, parent_context)
+    end
+
+    test "classify/3 with nil parent_context behaves identically to classify/2" do
+      code = "def foo, do: :ok"
+      [block] = code |> TokenNormalizer.normalize_structural() |> Parser.detect_blocks(ElixirLang)
+
+      assert NodeClassifier.classify(block, ElixirLang) ==
+               NodeClassifier.classify(block, ElixirLang, nil)
+    end
+  end
+
+  # Returns the parent's tokens that come strictly before the sub-block's first token.
+  # Look-back is bounded to the current source line (everything since the last newline).
+  defp parent_tokens_before(parent, sub_block) do
+    sub_first = List.first(sub_block.tokens)
+
+    parent.tokens
+    |> Enum.take_while(fn t -> t != sub_first end)
+    |> Enum.reverse()
+    |> Enum.take_while(fn t -> t.kind != :"<NL>" end)
+    |> Enum.reverse()
+  end
+end
diff --git a/test/codeqa/ast/classification/node_protocol_test.exs b/test/codeqa/ast/classification/node_protocol_test.exs
new file mode 100644
index 00000000..5e79a00d
--- /dev/null
+++ b/test/codeqa/ast/classification/node_protocol_test.exs
@@ -0,0 +1,113 @@
+defmodule CodeQA.AST.NodeProtocolTest.FakeNode do
+  defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label]
+
+  defimpl CodeQA.AST.Classification.NodeProtocol do
+    alias CodeQA.AST.Classification.NodeProtocol
+
+    def tokens(n), do: n.tokens
+    def line_count(n), do: n.line_count
+    def children(n), do: n.children
+    def start_line(n), do: n.start_line
+    def end_line(n), do: n.end_line
+    def label(n), do: n.label
+
+    def flat_tokens(n) do
+      if Enum.empty?(n.children),
+        do: n.tokens,
+        else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1)
+    end
+  end
+end
+
+defmodule CodeQA.AST.NodeProtocolTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Classification.NodeProtocol
+  alias CodeQA.AST.Enrichment.Node
+  alias CodeQA.AST.NodeProtocolTest.FakeNode
+
+  @node %FakeNode{
+    tokens: [:a, :b],
+    line_count: 3,
+    children: [],
+    start_line: 1,
+    end_line: 3,
+    label: "foo.ex:1"
+  }
+
+  test "tokens/1" do
+    assert NodeProtocol.tokens(@node) == [:a, :b]
+  end
+
+  test "line_count/1" do
+    assert NodeProtocol.line_count(@node) == 3
+  end
+
+  test "children/1" do
+    assert NodeProtocol.children(@node) == []
+  end
+
+  test "start_line/1" do
+    assert NodeProtocol.start_line(@node) == 1
+  end
+
+  test "end_line/1" do
+    assert NodeProtocol.end_line(@node) == 3
+  end
+
+  test "label/1" do
+    assert NodeProtocol.label(@node) == "foo.ex:1"
+  end
+
+  describe "flat_tokens/1" do
+    test "leaf node returns own tokens" do
+      leaf = %Node{tokens: [:a, :b], line_count: 1, children: []}
+      assert NodeProtocol.flat_tokens(leaf) == [:a, :b]
+    end
+
+    test "non-leaf node returns flattened descendant tokens" do
+      child_a = %Node{tokens: [:a], line_count: 1, children: []}
+      child_b = %Node{tokens: [:b, :c], line_count: 1, children: []}
+      parent = %Node{tokens: [:x], line_count: 2, children: [child_a, child_b]}
+      assert NodeProtocol.flat_tokens(parent) == [:a, :b, :c]
+    end
+
+    test "deeply nested node returns all leaf tokens" do
+      leaf = %Node{tokens: [:z], line_count: 1, children: []}
+      mid = %Node{tokens: [:y], line_count: 1, children: [leaf]}
+      root = %Node{tokens: [:x], line_count: 2, children: [mid]}
+      assert NodeProtocol.flat_tokens(root) == [:z]
+    end
+  end
+
+  describe "Node implements NodeProtocol" do
+    setup do
+      node = %Node{
+        tokens: [:x, :y],
+        line_count: 3,
+        children: [],
+        start_line: 1,
+        end_line: 3,
+        label: "f.ex:1"
+      }
+
+      %{node: node}
+    end
+
+    test "tokens/1", %{node: node} do
+      assert NodeProtocol.tokens(node) == [:x, :y]
+    end
+
+    test "children/1", %{node: node} do
+      assert NodeProtocol.children(node) == []
+    end
+
+    test "start_line/1", %{node: node} do
+      assert NodeProtocol.start_line(node) == 1
+    end
+
+    test "label/1", %{node: node} do
+      assert NodeProtocol.label(node) == "f.ex:1"
+    end
+  end
+end
diff --git a/test/codeqa/ast/classification/node_type_detector_test.exs b/test/codeqa/ast/classification/node_type_detector_test.exs
new file mode 100644
index 00000000..f4c97530
--- /dev/null
+++ b/test/codeqa/ast/classification/node_type_detector_test.exs
@@ -0,0 +1,147 @@
+defmodule CodeQA.AST.Classification.NodeTypeDetectorTest do
+  use ExUnit.Case, async: true
+  alias CodeQA.AST.Classification.NodeTypeDetector
+  alias CodeQA.AST.Enrichment.Node
+  alias CodeQA.AST.Lexing.Token
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Nodes.{AttributeNode, CodeNode, DocNode, FunctionNode}
+  alias CodeQA.AST.Parsing.Parser
+  alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang
+  alias CodeQA.Languages.Unknown
+
+  defp detect_types(code, lang_mod \\ ElixirLang) do
+    code
+    |> TokenNormalizer.normalize_structural()
+    |> Parser.detect_blocks(lang_mod)
+    |> NodeTypeDetector.detect_types(lang_mod)
+  end
+
+  describe "detect_types/1" do
+    test "block with <TRIP_QUOTES> gets type :doc" do
+      code = ~s(@moduledoc """\nSome doc.\n""")
+      [block] = detect_types(code)
+      assert is_struct(block, DocNode)
+    end
+
+    test "block with @spec gets type :typespec" do
+      code = "@spec fetch_user(integer()) :: {:ok, term()}"
+      [block] = detect_types(code)
+      assert is_struct(block, AttributeNode)
+      assert block.kind == :typespec
+    end
+
+    test "block with @type gets type :typespec" do
+      code = "@type user_id :: integer()"
+      [block] = detect_types(code)
+      assert is_struct(block, AttributeNode)
+      assert block.kind == :typespec
+    end
+
+    test "block starting with def gets type :function" do
+      code = "def foo(x), do: x + 1"
+      [block] = detect_types(code)
+      assert is_struct(block, FunctionNode)
+    end
+
+    test "@ attribute inside function body does not make block :attribute" do
+      # FunctionSignal sees 'def' first → :function wins
+      # AttributeSignal sees '@cache' but at indent > 0 → no vote
+      code = "def foo do\n  @cache true\n  :ok\nend"
+      blocks = detect_types(code)
+
+      code_block =
+        Enum.find(blocks, fn b ->
+          Enum.any?(b.tokens, &(&1.kind == "<ID>" and &1.content == "def"))
+        end)
+
+      assert is_struct(code_block, FunctionNode)
+    end
+
+    test "returns same number of blocks as input" do
+      code = "@spec foo() :: :ok\n\n\ndef foo, do: :ok"
+      blocks = detect_types(code)
+      assert length(blocks) == 2
+    end
+
+    test "all @typespec_attributes are recognized" do
+      for attr <- ~w[spec type typep opaque callback macrocallback] do
+        code = "@#{attr} foo :: bar"
+        [block] = detect_types(code)
+
+        assert is_struct(block, AttributeNode) and block.kind == :typespec,
+               "expected AttributeNode with kind: :typespec for @#{attr}"
+      end
+    end
+
+    test "empty list returns empty list" do
+      assert [] == NodeTypeDetector.detect_types([], Unknown)
+    end
+  end
+
+  describe "detect_types/1 — typed struct output" do
+    test "returns DocNode for doc blocks" do
+      doc_token = %Token{kind: "<DOC>", content: ~s("""), line: 1, col: 0}
+      nl = %Token{kind: "<NL>", content: "\n", line: 2, col: 0}
+
+      node = %Node{
+        tokens: [doc_token, nl],
+        line_count: 2,
+        children: [],
+        start_line: 1,
+        end_line: 2
+      }
+
+      [result] =
+        NodeTypeDetector.detect_types(
+          [node],
+          ElixirLang
+        )
+
+      assert is_struct(result, DocNode)
+    end
+
+    test "returns AttributeNode for typespec blocks" do
+      at = %Token{kind: "@", content: "@", line: 1, col: 0}
+      spec = %Token{kind: "<ID>", content: "spec", line: 1, col: 1}
+      nl = %Token{kind: "<NL>", content: "\n", line: 1, col: 5}
+
+      node = %Node{
+        tokens: [at, spec, nl],
+        line_count: 1,
+        children: [],
+        start_line: 1,
+        end_line: 1
+      }
+
+      [result] =
+        NodeTypeDetector.detect_types(
+          [node],
+          ElixirLang
+        )
+
+      assert is_struct(result, AttributeNode)
+      assert result.kind == :typespec
+    end
+
+    test "returns CodeNode for unclassified blocks" do
+      id = %Token{kind: "<ID>", content: "foo", line: 1, col: 0}
+      nl = %Token{kind: "<NL>", content: "\n", line: 1, col: 3}
+
+      node = %Node{
+        tokens: [id, nl],
+        line_count: 1,
+        children: [],
+        start_line: 1,
+        end_line: 1
+      }
+
+      [result] =
+        NodeTypeDetector.detect_types(
+          [node],
+          ElixirLang
+        )
+
+      assert is_struct(result, CodeNode)
+    end
+  end
+end
diff --git a/test/codeqa/ast/classification/typed_node_kind_test.exs b/test/codeqa/ast/classification/typed_node_kind_test.exs
new file mode 100644
index 00000000..84149cd6
--- /dev/null
+++ b/test/codeqa/ast/classification/typed_node_kind_test.exs
@@ -0,0 +1,25 @@
+defmodule CodeQA.AST.Classification.TypedNodeKindTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Classification.TypedNodeKind
+
+  alias CodeQA.AST.Nodes.{
+    AttributeNode,
+    CodeNode,
+    DocNode,
+    FunctionNode,
+    ImportNode,
+    ModuleNode,
+    TestNode
+  }
+
+  test "maps each typed node struct to its kind atom" do
+    assert TypedNodeKind.of(%DocNode{}) == :doc
+    assert TypedNodeKind.of(%AttributeNode{}) == :attribute
+    assert TypedNodeKind.of(%FunctionNode{}) == :function
+    assert TypedNodeKind.of(%ModuleNode{}) == :module
+    assert TypedNodeKind.of(%ImportNode{}) == :import
+    assert TypedNodeKind.of(%TestNode{}) == :test
+    assert TypedNodeKind.of(%CodeNode{}) == :code
+  end
+end
diff --git a/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs b/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs
new file mode 100644
index 00000000..3a6adbb6
--- /dev/null
+++ b/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs
@@ -0,0 +1,136 @@
+defmodule CodeQA.AST.Enrichment.CompoundNodeAssertionsLanguagesTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Classification.NodeProtocol
+  alias CodeQA.AST.Classification.NodeTypeDetector
+  alias CodeQA.AST.Enrichment.CompoundNode
+  alias CodeQA.AST.Enrichment.CompoundNodeBuilder
+  alias CodeQA.AST.Enrichment.Node
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Parser
+  alias CodeQA.Languages.Unknown
+
+  Module.register_attribute(__MODULE__, :fixture, accumulate: true, persist: false)
+
+  # Elixir fixtures
+  use Test.Fixtures.Elixir.Calculator
+  use Test.Fixtures.Elixir.EventBus
+  use Test.Fixtures.Elixir.RateLimiter
+
+  # Python fixtures
+  use Test.Fixtures.Python.Calculator
+  use Test.Fixtures.Python.CsvPipeline
+  use Test.Fixtures.Python.ConfigParser
+
+  # JavaScript fixtures
+  use Test.Fixtures.JavaScript.Calculator
+  use Test.Fixtures.JavaScript.FormValidator
+  use Test.Fixtures.JavaScript.ShoppingCart
+
+  # Go fixtures
+  use Test.Fixtures.Go.Calculator
+  use Test.Fixtures.Go.HttpMiddleware
+  use Test.Fixtures.Go.CliParser
+
+  # Rust fixtures
+  use Test.Fixtures.Rust.Calculator
+  use Test.Fixtures.Rust.Tokenizer
+  use Test.Fixtures.Rust.RingBuffer
+
+  # Ruby fixtures
+  use Test.Fixtures.Ruby.Calculator
+  use Test.Fixtures.Ruby.OrmLite
+  use Test.Fixtures.Ruby.MarkdownRenderer
+
+  # TypeScript fixtures
+  use Test.Fixtures.TypeScript.UserProfileStore
+  use Test.Fixtures.TypeScript.EventEmitter
+  use Test.Fixtures.TypeScript.DependencyInjection
+
+  # Java fixtures
+  use Test.Fixtures.Java.BuilderPattern
+  use Test.Fixtures.Java.RepositoryPattern
+  use Test.Fixtures.Java.StrategyPattern
+
+  # C# fixtures
+  use Test.Fixtures.CSharp.LinqPipeline
+  use Test.Fixtures.CSharp.AsyncTaskManager
+  use Test.Fixtures.CSharp.PluginSystem
+
+  # Swift fixtures
+  use Test.Fixtures.Swift.ResultType
+  use Test.Fixtures.Swift.CombineStream
+  use Test.Fixtures.Swift.ActorModel
+
+  # Kotlin fixtures
+  use Test.Fixtures.Kotlin.SealedState
+  use Test.Fixtures.Kotlin.CoroutineFlow
+  use Test.Fixtures.Kotlin.ExtensionLibrary
+
+  # C++ fixtures
+  use Test.Fixtures.Cpp.SmartPointer
+  use Test.Fixtures.Cpp.TemplateContainer
+  use Test.Fixtures.Cpp.ObserverPattern
+
+  # Scala fixtures
+  use Test.Fixtures.Scala.CaseClassAlgebra
+  use Test.Fixtures.Scala.TypeclassPattern
+  use Test.Fixtures.Scala.ActorMessages
+
+  # Dart fixtures
+  use Test.Fixtures.Dart.WidgetState
+  use Test.Fixtures.Dart.FuturesAsync
+  use Test.Fixtures.Dart.MixinComposition
+
+  # Zig fixtures
+  use Test.Fixtures.Zig.AllocatorInterface
+  use Test.Fixtures.Zig.TaggedUnion
+  use Test.Fixtures.Zig.IteratorProtocol
+
+  # Lua fixtures
+  use Test.Fixtures.Lua.ClassSystem
+  use Test.Fixtures.Lua.EventSystem
+  use Test.Fixtures.Lua.StateMachine
+
+  # Generate tests for fixtures with block_assertions
+  for {language, code, block_assertions} <- @fixture, block_assertion <- block_assertions do
+    test "[#{language}] #{block_assertion.description}" do
+      compounds = compound_nodes(unquote(code))
+      none_of = Map.get(unquote(Macro.escape(block_assertion)), :none_of, [])
+      all_of = unquote(Macro.escape(block_assertion)).all_of
+
+      assert Enum.any?(compounds, fn compound ->
+               tokens = all_tokens(compound)
+               compound_satisfies?(tokens, all_of, none_of)
+             end),
+             "No compound node found matching: #{unquote(block_assertion.description)}"
+    end
+  end
+
+  defp compound_nodes(code) do
+    code
+    |> TokenNormalizer.normalize_structural()
+    |> Parser.detect_blocks(Unknown)
+    |> NodeTypeDetector.detect_types(Unknown)
+    |> CompoundNodeBuilder.build()
+  end
+
+  defp all_tokens(%CompoundNode{docs: docs, typespecs: typespecs, code: code}) do
+    (docs ++ typespecs ++ code)
+    |> Enum.flat_map(&node_tokens/1)
+  end
+
+  defp node_tokens(node) do
+    NodeProtocol.tokens(node)
+  end
+
+  defp matches?({:exact, field, value}, token), do: Map.get(token, field) == value
+
+  defp matches?({:partial, field, value}, token),
+    do: String.contains?(Map.get(token, field, ""), value)
+
+  defp compound_satisfies?(tokens, all_of, none_of) do
+    Enum.all?(all_of, fn matcher -> Enum.any?(tokens, &matches?(matcher, &1)) end) and
+      Enum.all?(none_of, fn matcher -> not Enum.any?(tokens, &matches?(matcher, &1)) end)
+  end
+end
diff --git a/test/codeqa/ast/enrichment/compound_node_builder_test.exs b/test/codeqa/ast/enrichment/compound_node_builder_test.exs
new file mode 100644
index 00000000..00a10065
--- /dev/null
+++ b/test/codeqa/ast/enrichment/compound_node_builder_test.exs
@@ -0,0 +1,136 @@
+defmodule CodeQA.AST.Enrichment.CompoundNodeBuilderTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Classification.NodeTypeDetector
+  alias CodeQA.AST.Enrichment.CompoundNode
+  alias CodeQA.AST.Enrichment.CompoundNodeBuilder
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Nodes.{AttributeNode, CodeNode, DocNode}
+  alias CodeQA.AST.Parsing.Parser
+
+  defp build(code) do
+    lang_mod = CodeQA.Languages.Code.Vm.Elixir
+    opts = [language_module: lang_mod]
+
+    code
+    |> TokenNormalizer.normalize_structural()
+    |> Parser.detect_blocks(lang_mod)
+    |> NodeTypeDetector.detect_types(lang_mod)
+    |> CompoundNodeBuilder.build()
+  end
+
+  describe "build/1" do
+    test "returns CompoundNode structs" do
+      [compound | _] = build("def foo, do: :ok")
+      assert %CompoundNode{} = compound
+    end
+
+    test "bare code block wraps in compound with empty docs and typespecs" do
+      [compound] = build("def foo, do: :ok")
+      assert compound.docs == []
+      assert compound.typespecs == []
+      assert length(compound.code) == 1
+    end
+
+    test "@doc block attaches to following code block" do
+      code = ~s(@doc """\nSome doc.\n"""\ndef foo, do: :ok)
+      [compound] = build(code)
+      assert length(compound.docs) == 1
+      assert length(compound.code) == 1
+    end
+
+    test "@spec block attaches to following code block" do
+      code = "@spec foo() :: :ok\ndef foo, do: :ok"
+      [compound] = build(code)
+      assert length(compound.typespecs) == 1
+      assert length(compound.code) == 1
+    end
+
+    test "consecutive code clauses accumulate in same compound" do
+      code = "def foo(:a), do: 1\ndef foo(:b), do: 2\ndef foo(_), do: 3"
+      [compound] = build(code)
+      assert length(compound.code) == 3
+    end
+
+    test "doc after code starts a new compound" do
+      code = ~s(def foo do\n  :ok\nend\n\n\n@doc """\nSome doc.\n"""\ndef bar, do: :ok)
+      compounds = build(code)
+      assert length(compounds) == 2
+      [first, second] = compounds
+      assert first.docs == []
+      assert length(second.docs) == 1
+    end
+
+    test "two blank lines between code blocks starts a new compound" do
+      code = "def foo, do: :ok\n\n\ndef bar, do: :ok"
+      compounds = build(code)
+      assert length(compounds) == 2
+    end
+
+    test "single blank line between code blocks does NOT start a new compound" do
+      code = "def foo(:a), do: 1\n\ndef foo(:b), do: 2"
+      [compound] = build(code)
+      assert length(compound.code) == 2
+    end
+
+    test "start_line is set from first non-whitespace token" do
+      [compound] = build("def foo, do: :ok")
+      assert is_integer(compound.start_line)
+      assert compound.start_line >= 1
+    end
+
+    test "start_col is set from first non-whitespace token" do
+      [compound] = build("def foo, do: :ok")
+      assert is_integer(compound.start_col)
+    end
+
+    test "typespec block before any code attaches to compound (no flush)" do
+      code = "@spec foo() :: :ok\ndef foo, do: :ok"
+      [compound] = build(code)
+      assert length(compound.typespecs) == 1
+      assert length(compound.code) == 1
+    end
+
+    test "end_line is set from last non-whitespace token" do
+      [compound] = build("def foo, do: :ok")
+      assert is_integer(compound.end_line)
+    end
+
+    test "end_col is set from last non-whitespace token" do
+      [compound] = build("def foo, do: :ok")
+      assert is_integer(compound.end_col)
+    end
+
+    test "empty list returns empty list" do
+      assert [] == CompoundNodeBuilder.build([])
+    end
+  end
+
+  describe "build/1 with typed node structs" do
+    test "routes DocNode to docs bucket" do
+      doc = %DocNode{tokens: [:d], line_count: 1, children: [], start_line: 1, end_line: 1}
+      code = %CodeNode{tokens: [:c], line_count: 2, children: [], start_line: 2, end_line: 3}
+
+      [compound] = CompoundNodeBuilder.build([doc, code])
+      assert length(compound.docs) == 1
+      assert is_struct(hd(compound.docs), DocNode)
+    end
+
+    test "routes AttributeNode to typespecs bucket" do
+      attr = %AttributeNode{
+        tokens: [:a],
+        line_count: 1,
+        children: [],
+        start_line: 1,
+        end_line: 1,
+        kind: :typespec
+      }
+
+      code = %CodeNode{tokens: [:c], line_count: 2, children: [], start_line: 2, end_line: 3}
+
+      [compound] = CompoundNodeBuilder.build([attr, code])
+      assert length(compound.typespecs) == 1
+      assert is_struct(hd(compound.typespecs), AttributeNode)
+    end
+  end
+end
diff --git a/test/codeqa/ast/enrichment/node_analyzer_test.exs b/test/codeqa/ast/enrichment/node_analyzer_test.exs
new file mode 100644
index 00000000..6f3e4398
--- /dev/null
+++ b/test/codeqa/ast/enrichment/node_analyzer_test.exs
@@ -0,0 +1,62 @@
+defmodule CodeQA.AST.Enrichment.NodeAnalyzerTest do
+  use ExUnit.Case, async: true
+  alias CodeQA.AST.Enrichment.NodeAnalyzer
+  alias CodeQA.AST.Lexing.TokenNormalizer
+
+  defp tokenize(code), do: TokenNormalizer.normalize_structural(code)
+  defp bound(code), do: code |> tokenize() |> NodeAnalyzer.bound_variables()
+
+  describe "bound_variables/1" do
+    test "simple assignment binds the LHS identifier" do
+      assert "user" in bound("user = Repo.get!(id)")
+    end
+
+    test "assignment RHS identifiers are NOT bound" do
+      result = bound("user = Repo.get!(id)")
+      refute "repo" in result
+      refute "id" in result
+    end
+
+    test "with-clause binding (<-) binds the LHS identifier" do
+      assert "user" in bound("{:ok, user} <- fetch_user(id)")
+    end
+
+    test "multiple assignments in a block are all bound" do
+      code = "a = foo()\nb = bar()\nc = baz()"
+      result = bound(code)
+      assert "a" in result
+      assert "b" in result
+      assert "c" in result
+    end
+
+    test "compound LHS: only the <ID> immediately before = is bound" do
+      # `x.field = val` — `x` is not re-bound; skip non-simple LHS
+      result = bound("result = compute(x)")
+      assert "result" in result
+    end
+
+    test "== operator does not create a binding" do
+      result = bound("x == y")
+      refute "x" in result
+      refute "y" in result
+    end
+
+    test "=> fat arrow does not create a binding" do
+      result = bound("key => value")
+      refute "key" in result
+    end
+
+    test "=~ regex match does not create a binding" do
+      result = bound("str =~ pattern")
+      refute "str" in result
+    end
+
+    test "returns MapSet" do
+      assert %MapSet{} = bound("x = 1")
+    end
+
+    test "empty token list returns empty MapSet" do
+      assert MapSet.new() == NodeAnalyzer.bound_variables([])
+    end
+  end
+end
diff --git a/test/codeqa/ast/lexing/string_token_test.exs b/test/codeqa/ast/lexing/string_token_test.exs
new file mode 100644
index 00000000..0a99e9e0
--- /dev/null
+++ b/test/codeqa/ast/lexing/string_token_test.exs
@@ -0,0 +1,195 @@
+defmodule CodeQA.AST.StringTokenTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.StringToken
+  alias CodeQA.AST.Lexing.TokenNormalizer
+
+  describe "StringToken struct" do
+    test "has kind, content, line, col, interpolations, multiline, and quotes fields" do
+      tok = %StringToken{
+        kind: "<STR>",
+        content: ~s("hello"),
+        line: 1,
+        col: 0,
+        interpolations: nil
+      }
+
+      assert tok.kind == "<STR>"
+      assert tok.content == ~s("hello")
+      assert tok.line == 1
+      assert tok.col == 0
+      assert tok.interpolations == nil
+      assert tok.multiline == false
+      assert tok.quotes == :double
+    end
+
+    test "interpolations defaults to nil" do
+      tok = %StringToken{kind: "<STR>", content: ~s("hello")}
+      assert tok.interpolations == nil
+    end
+
+    test "multiline defaults to false" do
+      tok = %StringToken{kind: "<STR>", content: ~s("hello")}
+      assert tok.multiline == false
+    end
+
+    test "quotes defaults to :double" do
+      tok = %StringToken{kind: "<STR>", content: ~s("hello")}
+      assert tok.quotes == :double
+    end
+
+    test "multiline triple-quote struct" do
+      tok = %StringToken{kind: "<DOC>", content: ~s("""), multiline: true, quotes: :double}
+      assert tok.multiline == true
+      assert tok.quotes == :double
+    end
+  end
+
+  describe "TokenNormalizer emits StringToken for strings" do
+    test "plain string emits a StringToken" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~s("hello"))
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert %StringToken{} = tok
+    end
+
+    test "plain string StringToken has nil interpolations" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~s("hello"))
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.interpolations == nil
+    end
+
+    test "Elixir/Ruby interpolated string emits a StringToken" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|"hello #{name}"|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert %StringToken{} = tok
+    end
+
+    test "JS/TS backtick interpolated string emits a StringToken" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|`hello ${name}`|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert %StringToken{} = tok
+    end
+
+    test "Kotlin/Dart/Scala interpolated string emits a StringToken" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|"hello ${name}"|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert %StringToken{} = tok
+    end
+
+    test "Swift interpolated string emits a StringToken" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|"hello \(name)"|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert %StringToken{} = tok
+    end
+
+    test "plain backtick string emits a StringToken" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|`hello`|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert %StringToken{} = tok
+    end
+
+    test "non-string tokens are still plain Token structs" do
+      tokens = TokenNormalizer.normalize_structural("foo = 42")
+      id = Enum.find(tokens, &(&1.kind == "<ID>"))
+      refute match?(%StringToken{}, id)
+    end
+  end
+
+  describe "quotes field" do
+    test "double-quoted string has quotes :double" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~s("hello"))
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.quotes == :double
+    end
+
+    test "single-quoted string has quotes :single" do
+      [tok] =
+        TokenNormalizer.normalize_structural("'hello'")
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.quotes == :single
+    end
+
+    test "backtick string has quotes :backtick" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|`hello`|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.quotes == :backtick
+    end
+
+    test "backtick interpolated string has quotes :backtick" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|`hello ${name}`|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.quotes == :backtick
+    end
+
+    test "Elixir interpolated string has quotes :double" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|"hello #{name}"|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.quotes == :double
+    end
+  end
+
+  describe "multiline field" do
+    test "regular string has multiline false" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~s("hello"))
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.multiline == false
+    end
+
+    test "double triple-quote token has multiline true" do
+      [tok | _] =
+        TokenNormalizer.normalize_structural(~s("""\nhello\n"""))
+        |> Enum.filter(&(&1.kind == "<DOC>"))
+
+      assert tok.multiline == true
+    end
+
+    test "single triple-quote token has multiline true" do
+      [tok | _] =
+        TokenNormalizer.normalize_structural("'''\nhello\n'''")
+        |> Enum.filter(&(&1.kind == "<DOC>"))
+
+      assert tok.multiline == true
+    end
+
+    test "triple-quote token quotes :double for \"\"\"" do
+      [tok | _] =
+        TokenNormalizer.normalize_structural(~s("""\nhello\n"""))
+        |> Enum.filter(&(&1.kind == "<DOC>"))
+
+      assert tok.quotes == :double
+    end
+
+    test "triple-quote token quotes :single for '''" do
+      [tok | _] =
+        TokenNormalizer.normalize_structural("'''\nhello\n'''")
+        |> Enum.filter(&(&1.kind == "<DOC>"))
+
+      assert tok.quotes == :single
+    end
+  end
+end
diff --git a/test/codeqa/ast/lexing/token_normalizer_test.exs b/test/codeqa/ast/lexing/token_normalizer_test.exs
new file mode 100644
index 00000000..19a886ae
--- /dev/null
+++ b/test/codeqa/ast/lexing/token_normalizer_test.exs
@@ -0,0 +1,332 @@
+defmodule CodeQA.AST.TokenNormalizerTest do
+  use ExUnit.Case, async: true
+  alias CodeQA.AST.Lexing.StringToken
+  alias CodeQA.AST.Lexing.Token
+  alias CodeQA.AST.Lexing.TokenNormalizer
+
+  defp kinds(tokens), do: Enum.map(tokens, & &1.kind)
+
+  describe "normalize_structural/1" do
+    test "emits <NL> between lines" do
+      result = TokenNormalizer.normalize_structural("a\nb")
+      assert "<NL>" in kinds(result)
+    end
+
+    test "two blank lines produce two or more consecutive <NL> tokens" do
+      result = TokenNormalizer.normalize_structural("a\n\nb")
+
+      nl_runs =
+        result
+        |> Enum.chunk_by(&(&1.kind == "<NL>"))
+        |> Enum.filter(fn [h | _] -> h.kind == "<NL>" end)
+        |> Enum.map(&length/1)
+
+      assert Enum.any?(nl_runs, &(&1 >= 2))
+    end
+
+    test "emits one <WS> token per 2 leading spaces" do
+      result = TokenNormalizer.normalize_structural("    foo")
+      assert Enum.count(result, &(&1.kind == "<WS>")) == 2
+    end
+
+    test "emits one <WS> token per tab" do
+      result = TokenNormalizer.normalize_structural("\t\tfoo")
+      assert Enum.count(result, &(&1.kind == "<WS>")) == 2
+    end
+
+    test "normalizes identifiers to <ID>" do
+      result = TokenNormalizer.normalize_structural("foo bar")
+      assert kinds(result) == ["<ID>", "<ID>"]
+    end
+
+    test "normalizes numbers to <NUM>" do
+      result = TokenNormalizer.normalize_structural("x = 42")
+      assert "<NUM>" in kinds(result)
+    end
+
+    test "empty string returns empty list" do
+      assert TokenNormalizer.normalize_structural("") == []
+    end
+
+    test "single leading space produces zero <WS> tokens (below threshold)" do
+      result = TokenNormalizer.normalize_structural(" foo")
+      assert not Enum.any?(result, &(&1.kind == "<WS>"))
+    end
+
+    test "punctuation tokens like ( and : survive as individual tokens" do
+      result = TokenNormalizer.normalize_structural("foo(x):")
+      assert "(" in kinds(result)
+      assert ")" in kinds(result)
+      assert ":" in kinds(result)
+    end
+
+    test "tokens carry line numbers" do
+      result = TokenNormalizer.normalize_structural("foo\nbar")
+      lines = Enum.map(result, & &1.line)
+      assert 1 in lines
+      assert 2 in lines
+    end
+
+    test "tokens carry col offsets" do
+      result = TokenNormalizer.normalize_structural("foo")
+      [tok] = result
+      assert tok.col == 0
+    end
+
+    test "identifier token preserves original content" do
+      result = TokenNormalizer.normalize_structural("myVar")
+      [tok] = result
+      assert tok.kind == "<ID>"
+      assert tok.content == "myVar"
+    end
+
+    test "keyword content is preserved (not normalized away)" do
+      result = TokenNormalizer.normalize_structural("def foo")
+      contents = Enum.map(result, & &1.content)
+      assert "def" in contents
+    end
+
+    test "string token content is the original literal" do
+      result = TokenNormalizer.normalize_structural(~s("hello"))
+      tok = Enum.find(result, &(&1.kind == "<STR>"))
+      assert tok.content == ~s("hello")
+    end
+
+    # multi-char operator tests
+
+    test ">= is a single token" do
+      result = TokenNormalizer.normalize_structural("x >= y")
+      assert ">=" in kinds(result)
+      refute ">" in kinds(result)
+    end
+
+    test "<= is a single token" do
+      result = TokenNormalizer.normalize_structural("x <= y")
+      assert "<=" in kinds(result)
+      refute "<" in kinds(result)
+    end
+
+    test "== is a single token" do
+      result = TokenNormalizer.normalize_structural("x == y")
+      assert "==" in kinds(result)
+    end
+
+    test "!= is a single token" do
+      result = TokenNormalizer.normalize_structural("x != y")
+      assert "!=" in kinds(result)
+      refute "!" in kinds(result)
+    end
+
+    test "=== is a single token (not == + =)" do
+      result = TokenNormalizer.normalize_structural("x === y")
+      assert "===" in kinds(result)
+      refute "==" in kinds(result)
+    end
+
+    test "!== is a single token" do
+      result = TokenNormalizer.normalize_structural("x !== y")
+      assert "!==" in kinds(result)
+      refute "!=" in kinds(result)
+    end
+
+    test "|> is a single token (Elixir pipe)" do
+      result = TokenNormalizer.normalize_structural("x |> f")
+      assert "|>" in kinds(result)
+      refute "|" in kinds(result)
+    end
+
+    test "<> is a single token (Elixir concat)" do
+      result = TokenNormalizer.normalize_structural(~s("a" <> "b"))
+      assert "<>" in kinds(result)
+    end
+
+    test "<- is a single token (Elixir/Go arrow)" do
+      result = TokenNormalizer.normalize_structural("x <- y")
+      assert "<-" in kinds(result)
+      refute "<" in kinds(result)
+    end
+
+    test "-> is a single token" do
+      result = TokenNormalizer.normalize_structural("x -> y")
+      assert "->" in kinds(result)
+      refute "-" in kinds(result)
+    end
+
+    test "=> is a single token (fat arrow)" do
+      result = TokenNormalizer.normalize_structural("k => v")
+      assert "=>" in kinds(result)
+    end
+
+    test "=~ is a single token (regex match)" do
+      result = TokenNormalizer.normalize_structural("x =~ y")
+      assert "=~" in kinds(result)
+    end
+
+    test "&& is a single token" do
+      result = TokenNormalizer.normalize_structural("a && b")
+      assert "&&" in kinds(result)
+      refute "&" in kinds(result)
+    end
+
+    test "|| is a single token" do
+      result = TokenNormalizer.normalize_structural("a || b")
+      assert "||" in kinds(result)
+      refute "|" in kinds(result)
+    end
+
+    test ":: is a single token" do
+      result = TokenNormalizer.normalize_structural("Foo::Bar")
+      assert "::" in kinds(result)
+      refute ":" in kinds(result)
+    end
+
+    test ".. is a single token" do
+      result = TokenNormalizer.normalize_structural("1..10")
+      assert ".." in kinds(result)
+    end
+
+    test "... is a single token (not .. + .)" do
+      result = TokenNormalizer.normalize_structural("1...10")
+      assert "..." in kinds(result)
+      refute ".." in kinds(result)
+    end
+
+    test "multi-char operator value equals content (no normalization)" do
+      result = TokenNormalizer.normalize_structural("x >= y")
+      tok = Enum.find(result, &(&1.kind == ">="))
+      assert tok.content == ">="
+    end
+  end
+
+  describe "interpolated string tokens are normalised to <STR>" do
+    test "Elixir/Ruby #{} emits <STR> with interpolations" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|"hello #{name}"|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.interpolations == ["name"]
+    end
+
+    test "JS/TS backtick with \${} emits <STR> with interpolations" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|`hello ${name}`|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.interpolations == ["name"]
+    end
+
+    test "JS/TS backtick static content has interpolation stripped" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|`hello ${name} world`|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.content == "`hello  world`"
+    end
+
+    test "JS/TS backtick two interpolations are both captured" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|`${a} and ${b}`|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.interpolations == ["a", "b"]
+    end
+
+    test "plain backtick string without interpolation emits <STR> with nil interpolations" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|`hello world`|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.interpolations == nil
+    end
+
+    test "Kotlin/Dart/Scala \${} emits <STR> with interpolations" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|"hello ${name}"|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.interpolations == ["name"]
+    end
+
+    test "Kotlin/Dart/Scala static content has interpolation stripped" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|"hello ${name} world"|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.content == ~S|"hello  world"|
+    end
+
+    test "Kotlin/Dart/Scala two interpolations are both captured" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|"${a} and ${b}"|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.interpolations == ["a", "b"]
+    end
+
+    test "Swift \\(...) emits <STR> with interpolations" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|"hello \(name)"|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.interpolations == ["name"]
+    end
+
+    test "Swift static content has interpolation stripped" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|"hello \(name) world"|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.content == ~S|"hello  world"|
+    end
+
+    test "Swift two interpolations are both captured" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~S|"\(a) and \(b)"|)
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.interpolations == ["a", "b"]
+    end
+
+    test "plain double-quoted string has nil interpolations" do
+      [tok] =
+        TokenNormalizer.normalize_structural(~s("hello"))
+        |> Enum.filter(&(&1.kind == "<STR>"))
+
+      assert tok.interpolations == nil
+    end
+  end
+
+  describe "<TRIP_QUOTES> token" do
+    test "triple double-quotes emits a StringToken with kind <DOC>" do
+      tokens = TokenNormalizer.normalize_structural(~s("""))
+
+      assert [%StringToken{kind: "<DOC>", content: ~s("""), multiline: true, quotes: :double}] =
+               tokens
+    end
+
+    test "triple single-quotes emits a StringToken with kind <DOC>" do
+      tokens = TokenNormalizer.normalize_structural("'''")
+
+      assert [%StringToken{kind: "<DOC>", content: "'''", multiline: true, quotes: :single}] =
+               tokens
+    end
+
+    test "triple-quote is not consumed as empty string + bare quote" do
+      tokens = TokenNormalizer.normalize_structural(~s("""))
+      refute Enum.any?(tokens, &(&1.kind == "<STR>"))
+    end
+
+    test "content between triple-quotes is tokenized normally" do
+      code = ~s("""\nhello world\n""")
+      tokens = TokenNormalizer.normalize_structural(code)
+      trip_count = Enum.count(tokens, &(&1.kind == "<DOC>"))
+      assert trip_count == 2
+      assert Enum.any?(tokens, &(&1.kind == "<ID>" and &1.content == "hello"))
+    end
+
+    test "regular double-quoted string still works" do
+      tokens = TokenNormalizer.normalize_structural(~s("hello"))
+      assert [%StringToken{kind: "<STR>"}] = tokens
+    end
+  end
+end
diff --git a/test/codeqa/ast/lexing/token_protocol_test.exs b/test/codeqa/ast/lexing/token_protocol_test.exs
new file mode 100644
index 00000000..340d94a9
--- /dev/null
+++ b/test/codeqa/ast/lexing/token_protocol_test.exs
@@ -0,0 +1,142 @@
+defmodule CodeQA.AST.Lexing.TokenProtocolTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.StringToken
+  alias CodeQA.AST.Lexing.Token
+  alias CodeQA.AST.Lexing.TokenProtocol
+
+  describe "Token implementation" do
+    setup do
+      {:ok, token: %Token{kind: "<ID>", content: "foo", line: 3, col: 7}}
+    end
+
+    test "kind/1", %{token: t} do
+      assert TokenProtocol.kind(t) == "<ID>"
+    end
+
+    test "content/1", %{token: t} do
+      assert TokenProtocol.content(t) == "foo"
+    end
+
+    test "line/1", %{token: t} do
+      assert TokenProtocol.line(t) == 3
+    end
+
+    test "col/1", %{token: t} do
+      assert TokenProtocol.col(t) == 7
+    end
+
+    test "nil location fields are preserved" do
+      t = %Token{kind: "<NL>", content: "\n", line: nil, col: nil}
+      assert TokenProtocol.line(t) == nil
+      assert TokenProtocol.col(t) == nil
+    end
+  end
+
+  describe "StringToken implementation" do
+    setup do
+      {:ok,
+       token: %StringToken{
+         kind: "<STR>",
+         content: "\"hello\"",
+         line: 10,
+         col: 2,
+         interpolations: nil
+       }}
+    end
+
+    test "kind/1", %{token: t} do
+      assert TokenProtocol.kind(t) == "<STR>"
+    end
+
+    test "content/1", %{token: t} do
+      assert TokenProtocol.content(t) == "\"hello\""
+    end
+
+    test "line/1", %{token: t} do
+      assert TokenProtocol.line(t) == 10
+    end
+
+    test "col/1", %{token: t} do
+      assert TokenProtocol.col(t) == 2
+    end
+
+    test "works with interpolated string token" do
+      t = %StringToken{
+        kind: "<STR>",
+        content: "\"\#{x}\"",
+        line: 5,
+        col: 0,
+        interpolations: ["x"]
+      }
+
+      assert TokenProtocol.kind(t) == "<STR>"
+      assert TokenProtocol.content(t) == "\"\#{x}\""
+    end
+  end
+
+  describe "StringToken <DOC> (multiline) via protocol" do
+    setup do
+      {:ok,
+       token: %StringToken{
+         kind: "<DOC>",
+         content: ~s("""),
+         line: 2,
+         col: 0,
+         multiline: true,
+         quotes: :double
+       }}
+    end
+
+    test "kind/1", %{token: t} do
+      assert TokenProtocol.kind(t) == "<DOC>"
+    end
+
+    test "content/1", %{token: t} do
+      assert TokenProtocol.content(t) == ~s(""")
+    end
+
+    test "line/1", %{token: t} do
+      assert TokenProtocol.line(t) == 2
+    end
+
+    test "col/1", %{token: t} do
+      assert TokenProtocol.col(t) == 0
+    end
+
+    test "single-quote variant" do
+      t = %StringToken{
+        kind: "<DOC>",
+        content: "'''",
+        line: 5,
+        col: 0,
+        multiline: true,
+        quotes: :single
+      }
+
+      assert TokenProtocol.kind(t) == "<DOC>"
+      assert t.quotes == :single
+    end
+  end
+
+  describe "polymorphic use" do
+    test "mixed token list can be processed uniformly" do
+      tokens = [
+        %Token{kind: "<ID>", content: "x", line: 1, col: 0},
+        %StringToken{kind: "<STR>", content: "\"hi\"", line: 1, col: 4},
+        %StringToken{
+          kind: "<DOC>",
+          content: ~s("""),
+          line: 2,
+          col: 0,
+          multiline: true,
+          quotes: :double
+        },
+        %Token{kind: "<NL>", content: "\n", line: 2, col: 3}
+      ]
+
+      kinds = Enum.map(tokens, &TokenProtocol.kind/1)
+      assert kinds == ["<ID>", "<STR>", "<DOC>", "<NL>"]
+    end
+  end
+end
diff --git a/test/codeqa/ast/nodes/code_node_test.exs b/test/codeqa/ast/nodes/code_node_test.exs
new file mode 100644
index 00000000..20082f0c
--- /dev/null
+++ b/test/codeqa/ast/nodes/code_node_test.exs
@@ -0,0 +1,55 @@
+defmodule CodeQA.AST.Nodes.CodeNodeTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Classification.NodeProtocol
+  alias CodeQA.AST.Nodes.{CodeNode, DocNode}
+
+  @tokens [:a, :b, :c]
+
+  describe "CodeNode" do
+    setup do
+      node = %CodeNode{
+        tokens: @tokens,
+        line_count: 2,
+        children: [],
+        start_line: 1,
+        end_line: 2,
+        label: "f.ex:1"
+      }
+
+      %{node: node}
+    end
+
+    test "implements NodeProtocol", %{node: node} do
+      assert NodeProtocol.tokens(node) == @tokens
+      assert NodeProtocol.line_count(node) == 2
+      assert NodeProtocol.children(node) == []
+      assert NodeProtocol.start_line(node) == 1
+      assert NodeProtocol.end_line(node) == 2
+      assert NodeProtocol.label(node) == "f.ex:1"
+    end
+
+    test "all common fields default to nil except children" do
+      node = %CodeNode{tokens: [], line_count: 0, children: []}
+      assert NodeProtocol.start_line(node) == nil
+      assert NodeProtocol.end_line(node) == nil
+      assert NodeProtocol.label(node) == nil
+    end
+  end
+
+  describe "DocNode" do
+    test "implements NodeProtocol" do
+      node = %DocNode{
+        tokens: @tokens,
+        line_count: 1,
+        children: [],
+        start_line: 5,
+        end_line: 5,
+        label: nil
+      }
+
+      assert NodeProtocol.tokens(node) == @tokens
+      assert NodeProtocol.children(node) == []
+    end
+  end
+end
diff --git a/test/codeqa/ast/nodes/function_node_test.exs b/test/codeqa/ast/nodes/function_node_test.exs
new file mode 100644
index 00000000..a1770bce
--- /dev/null
+++ b/test/codeqa/ast/nodes/function_node_test.exs
@@ -0,0 +1,68 @@
+defmodule CodeQA.AST.Nodes.FunctionNodeTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Classification.NodeProtocol
+  alias CodeQA.AST.Nodes.{FunctionNode, ModuleNode}
+
+  describe "FunctionNode" do
+    setup do
+      node = %FunctionNode{
+        tokens: [:a],
+        line_count: 5,
+        children: [],
+        start_line: 10,
+        end_line: 14,
+        label: "foo.ex:10",
+        name: "calculate",
+        arity: 2,
+        visibility: :public
+      }
+
+      %{node: node}
+    end
+
+    test "implements NodeProtocol", %{node: node} do
+      assert NodeProtocol.tokens(node) == [:a]
+      assert NodeProtocol.line_count(node) == 5
+      assert NodeProtocol.start_line(node) == 10
+    end
+
+    test "specific fields are accessible", %{node: node} do
+      assert node.name == "calculate"
+      assert node.arity == 2
+      assert node.visibility == :public
+    end
+
+    test "specific fields default to nil" do
+      node = %FunctionNode{tokens: [], line_count: 0, children: []}
+      assert node.name == nil
+      assert node.arity == nil
+      assert node.visibility == nil
+    end
+  end
+
+  describe "ModuleNode" do
+    test "implements NodeProtocol" do
+      node = %ModuleNode{
+        tokens: [:m],
+        line_count: 20,
+        children: [],
+        start_line: 1,
+        end_line: 20,
+        label: nil,
+        name: "MyApp.Foo",
+        kind: :module
+      }
+
+      assert NodeProtocol.tokens(node) == [:m]
+      assert node.name == "MyApp.Foo"
+      assert node.kind == :module
+    end
+
+    test "specific fields default to nil" do
+      node = %ModuleNode{tokens: [], line_count: 0, children: []}
+      assert node.name == nil
+      assert node.kind == nil
+    end
+  end
+end
diff --git a/test/codeqa/ast/nodes/import_node_test.exs b/test/codeqa/ast/nodes/import_node_test.exs
new file mode 100644
index 00000000..53c4a989
--- /dev/null
+++ b/test/codeqa/ast/nodes/import_node_test.exs
@@ -0,0 +1,74 @@
+defmodule CodeQA.AST.Nodes.ImportNodeTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Classification.NodeProtocol
+  alias CodeQA.AST.Nodes.{AttributeNode, ImportNode, TestNode}
+
+  describe "ImportNode" do
+    test "implements NodeProtocol" do
+      node = %ImportNode{
+        tokens: [:i],
+        line_count: 1,
+        children: [],
+        start_line: 3,
+        end_line: 3,
+        label: nil,
+        target: "MyApp.Repo"
+      }
+
+      assert NodeProtocol.tokens(node) == [:i]
+      assert node.target == "MyApp.Repo"
+    end
+
+    test "target defaults to nil" do
+      node = %ImportNode{tokens: [], line_count: 0, children: []}
+      assert node.target == nil
+    end
+  end
+
+  describe "AttributeNode" do
+    test "implements NodeProtocol" do
+      node = %AttributeNode{
+        tokens: [:a],
+        line_count: 1,
+        children: [],
+        start_line: 2,
+        end_line: 2,
+        label: nil,
+        name: "moduledoc",
+        kind: :annotation
+      }
+
+      assert NodeProtocol.tokens(node) == [:a]
+      assert node.name == "moduledoc"
+      assert node.kind == :annotation
+    end
+
+    test "supports :typespec kind" do
+      node = %AttributeNode{tokens: [], line_count: 0, children: [], kind: :typespec}
+      assert node.kind == :typespec
+    end
+  end
+
+  describe "TestNode" do
+    test "implements NodeProtocol" do
+      node = %TestNode{
+        tokens: [:t],
+        line_count: 4,
+        children: [],
+        start_line: 10,
+        end_line: 13,
+        label: nil,
+        description: "returns the sum"
+      }
+
+      assert NodeProtocol.tokens(node) == [:t]
+      assert node.description == "returns the sum"
+    end
+
+    test "description defaults to nil" do
+      node = %TestNode{tokens: [], line_count: 0, children: []}
+      assert node.description == nil
+    end
+  end
+end
diff --git a/test/codeqa/ast/parsing/parser_languages_test.exs b/test/codeqa/ast/parsing/parser_languages_test.exs
new file mode 100644
index 00000000..5526d10b
--- /dev/null
+++ b/test/codeqa/ast/parsing/parser_languages_test.exs
@@ -0,0 +1,168 @@
+defmodule CodeQA.AST.Parsing.ParserLanguagesTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Parser
+  alias CodeQA.Language
+  alias CodeQA.Languages.Unknown
+
+  Module.register_attribute(__MODULE__, :fixture, accumulate: true, persist: false)
+
+  # Elixir fixtures
+  use Test.Fixtures.Elixir.Calculator
+  use Test.Fixtures.Elixir.EventBus
+  use Test.Fixtures.Elixir.RateLimiter
+
+  # Python fixtures
+  use Test.Fixtures.Python.Calculator
+  use Test.Fixtures.Python.CsvPipeline
+  use Test.Fixtures.Python.ConfigParser
+
+  # JavaScript fixtures
+  use Test.Fixtures.JavaScript.Calculator
+  use Test.Fixtures.JavaScript.FormValidator
+  use Test.Fixtures.JavaScript.ShoppingCart
+
+  # Go fixtures
+  use Test.Fixtures.Go.Calculator
+  use Test.Fixtures.Go.HttpMiddleware
+  use Test.Fixtures.Go.CliParser
+
+  # Rust fixtures
+  use Test.Fixtures.Rust.Calculator
+  use Test.Fixtures.Rust.Tokenizer
+  use Test.Fixtures.Rust.RingBuffer
+
+  # Ruby fixtures
+  use Test.Fixtures.Ruby.Calculator
+  use Test.Fixtures.Ruby.OrmLite
+  use Test.Fixtures.Ruby.MarkdownRenderer
+
+  # TypeScript fixtures
+  use Test.Fixtures.TypeScript.UserProfileStore
+  use Test.Fixtures.TypeScript.EventEmitter
+  use Test.Fixtures.TypeScript.DependencyInjection
+
+  # Java fixtures
+  use Test.Fixtures.Java.BuilderPattern
+  use Test.Fixtures.Java.RepositoryPattern
+  use Test.Fixtures.Java.StrategyPattern
+
+  # C# fixtures
+  use Test.Fixtures.CSharp.LinqPipeline
+  use Test.Fixtures.CSharp.AsyncTaskManager
+  use Test.Fixtures.CSharp.PluginSystem
+
+  # Swift fixtures
+  use Test.Fixtures.Swift.ResultType
+  use Test.Fixtures.Swift.CombineStream
+  use Test.Fixtures.Swift.ActorModel
+
+  # Kotlin fixtures
+  use Test.Fixtures.Kotlin.SealedState
+  use Test.Fixtures.Kotlin.CoroutineFlow
+  use Test.Fixtures.Kotlin.ExtensionLibrary
+
+  # C++ fixtures
+  use Test.Fixtures.Cpp.SmartPointer
+  use Test.Fixtures.Cpp.TemplateContainer
+  use Test.Fixtures.Cpp.ObserverPattern
+
+  # Scala fixtures
+  use Test.Fixtures.Scala.CaseClassAlgebra
+  use Test.Fixtures.Scala.TypeclassPattern
+  use Test.Fixtures.Scala.ActorMessages
+
+  # Dart fixtures
+  use Test.Fixtures.Dart.WidgetState
+  use Test.Fixtures.Dart.FuturesAsync
+  use Test.Fixtures.Dart.MixinComposition
+
+  # Zig fixtures
+  use Test.Fixtures.Zig.AllocatorInterface
+  use Test.Fixtures.Zig.TaggedUnion
+  use Test.Fixtures.Zig.IteratorProtocol
+
+  # Lua fixtures
+  use Test.Fixtures.Lua.ClassSystem
+  use Test.Fixtures.Lua.EventSystem
+  use Test.Fixtures.Lua.StateMachine
+
+  # Note: accumulate: true prepends, so Enum.at(0) is the LAST registered fixture.
+  # All @code values use 0 leading spaces, so @indentation_level will always be 0
+  # and the normalization branch below is never taken.
+  @indentation_level @fixture
+                     |> Enum.at(0)
+                     |> elem(1)
+                     |> String.split("\n")
+                     |> List.first()
+                     |> then(&Regex.run(~r/^\s*/, &1))
+                     |> List.first()
+                     |> String.length()
+
+  @normalized_fixtures for {language, code, block_assertions} <- @fixture,
+                           do:
+                             {language,
+                              if @indentation_level > 0 do
+                                code
+                                |> String.split("\n")
+                                |> Enum.map_join(
+                                  "\n",
+                                  &String.replace_leading(
+                                    &1,
+                                    String.duplicate(" ", @indentation_level),
+                                    ""
+                                  )
+                                )
+                              else
+                                code
+                              end, block_assertions}
+
+  defp blocks(code, lang_mod \\ Unknown) do
+    code
+    |> TokenNormalizer.normalize_structural()
+    |> Parser.detect_blocks(lang_mod)
+  end
+
+  defp children(code, lang_mod \\ Unknown) do
+    code
+    |> TokenNormalizer.normalize_structural()
+    |> Parser.detect_blocks(lang_mod)
+    |> Enum.flat_map(& &1.children)
+  end
+
+  describe "blocks/2" do
+    for {language, code, _block_assertions} <- @normalized_fixtures do
+      lang_name = language |> String.split() |> hd()
+      lang_mod = Language.find(lang_name)
+
+      test "detects at least 3 blocks for #{language} code" do
+        lang_mod = unquote(lang_mod)
+        result = blocks(unquote(code), lang_mod)
+
+        if unquote(lang_mod) == Unknown do
+          assert result != []
+        else
+          assert length(result) >= 3
+        end
+      end
+
+      test "detects at least 3 sub-blocks for #{language} code" do
+        lang_mod = unquote(lang_mod)
+        result = children(unquote(code), lang_mod)
+
+        if unquote(lang_mod) == Unknown do
+          assert is_list(result)
+        else
+          assert length(result) >= 3
+        end
+      end
+
+      test "detects less sub-blocks than line-numbers for #{language} code" do
+        lang_mod = unquote(lang_mod)
+        result = children(unquote(code), lang_mod)
+        assert length(result) < length(String.split(unquote(code), "\n"))
+      end
+    end
+  end
+end
diff --git a/test/codeqa/ast/parsing/parser_test.exs b/test/codeqa/ast/parsing/parser_test.exs
new file mode 100644
index 00000000..51ead52e
--- /dev/null
+++ b/test/codeqa/ast/parsing/parser_test.exs
@@ -0,0 +1,188 @@
+defmodule CodeQA.AST.Parsing.ParserTest do
+  use ExUnit.Case, async: true
+  alias CodeQA.AST.Enrichment.Node
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Parser
+  alias CodeQA.Languages.Code.Scripting.Python
+  alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang
+  alias CodeQA.Languages.Unknown
+
+  defp tokenize(code), do: TokenNormalizer.normalize_structural(code)
+
+  describe "detect_blocks/2" do
+    test "single block for file with no blank lines" do
+      tokens = tokenize("def foo\n  x = 1\nend\n")
+      blocks = Parser.detect_blocks(tokens, ElixirLang)
+      assert length(blocks) == 1
+    end
+
+    test "splits into two blocks at blank line" do
+      tokens = tokenize("def foo\n  x\nend\n\n\ndef bar\n  y\nend\n")
+      blocks = Parser.detect_blocks(tokens, ElixirLang)
+      assert length(blocks) == 2
+    end
+
+    test "each block has correct line_count" do
+      tokens = tokenize("def foo\n  x\nend\n\n\ndef bar\n  y\nend\n")
+      [b1, b2] = Parser.detect_blocks(tokens, ElixirLang)
+      assert b1.line_count >= 3
+      assert b2.line_count >= 3
+    end
+
+    test "empty input returns empty list" do
+      assert Parser.detect_blocks([], Unknown) == []
+    end
+
+    test "detects bracket sub-blocks" do
+      tokens = tokenize("foo(a, b)\nbar(c)\n")
+      [block] = Parser.detect_blocks(tokens, Unknown)
+      assert block.children != []
+    end
+
+    test "detects colon-indent sub-blocks for python language hint" do
+      tokens = tokenize("def foo:\n    return 1\n")
+      [block] = Parser.detect_blocks(tokens, Python)
+      assert block.children != []
+    end
+
+    test "fewer sub-blocks without python hint than with it (colon rule not applied)" do
+      tokens = tokenize("def foo:\n    return 1\n")
+      without_hint = Parser.detect_blocks(tokens, Unknown)
+      with_hint = Parser.detect_blocks(tokens, Python)
+      count_without = without_hint |> Enum.map(&length(&1.children)) |> Enum.sum()
+      count_with = with_hint |> Enum.map(&length(&1.children)) |> Enum.sum()
+      assert count_with >= count_without
+    end
+
+    test "block has children_count accessible via Node.children_count/1" do
+      tokens = tokenize("foo(a)\nbar(b)\n")
+      [block] = Parser.detect_blocks(tokens, Unknown)
+      assert Node.children_count(block) == length(block.children)
+    end
+  end
+
+  describe "recursive sub-block nesting" do
+    test "nested bracket calls produce a multi-level sub-block tree" do
+      # def foo(bar(x, y), baz) — the arg list contains another call with its own args
+      tokens = tokenize("def foo(bar(x, y), baz)\n  result\nend\n")
+      [block] = Parser.detect_blocks(tokens, Unknown)
+
+      # depth 1 — the outer argument list
+      args =
+        Enum.find(block.children, fn b ->
+          Enum.any?(b.tokens, &(&1.content == "bar"))
+        end)
+
+      assert args != nil, "expected an arg-list sub-block containing 'bar'"
+
+      # depth 2 — the inner call (x, y) inside bar(...)
+      inner =
+        Enum.find(args.children, fn b ->
+          Enum.any?(b.tokens, &(&1.content == "x"))
+        end)
+
+      assert inner != nil, "expected a sub-block for the inner call (x, y)"
+
+      # depth 3 — (x, y) is a leaf: no further bracket structure inside
+      assert inner.children == []
+    end
+
+    test "triply nested brackets produce three levels of sub-blocks" do
+      tokens = tokenize("def outer(inner(deep(value)))\n  :ok\nend\n")
+      [block] = Parser.detect_blocks(tokens, Unknown)
+
+      # depth 1: (inner(deep(value)))
+      d1 =
+        Enum.find(block.children, fn b ->
+          Enum.any?(b.tokens, &(&1.content == "inner"))
+        end)
+
+      assert d1 != nil
+
+      # depth 2: (deep(value))
+      d2 =
+        Enum.find(d1.children, fn b ->
+          Enum.any?(b.tokens, &(&1.content == "deep"))
+        end)
+
+      assert d2 != nil
+
+      # depth 3: (value) — leaf
+      d3 =
+        Enum.find(d2.children, fn b ->
+          Enum.any?(b.tokens, &(&1.content == "value"))
+        end)
+
+      assert d3 != nil
+      assert d3.children == []
+    end
+  end
+
+  describe "triple-quote protection" do
+    test "blank lines inside a heredoc do not create a new block" do
+      code = """
+      before
+
+
+      \"""
+      Some doc.
+
+      More doc.
+      \"""
+
+      after
+      """
+
+      tokens = TokenNormalizer.normalize_structural(code)
+      blocks = Parser.detect_blocks(tokens, Unknown)
+      # The heredoc (including its blank line) should be ONE block, not split
+      heredoc_block =
+        Enum.find(blocks, fn b ->
+          Enum.any?(b.tokens, &(&1.kind == "<DOC>"))
+        end)
+
+      assert heredoc_block != nil
+      # Ensure no split happened inside — the heredoc block contains both "Some" and "More"
+      contents = Enum.filter(heredoc_block.tokens, &(&1.kind == "<ID>"))
+      names = Enum.map(contents, & &1.content)
+      assert "Some" in names
+      assert "More" in names
+    end
+
+    test "content before and after a heredoc becomes separate blocks" do
+      code = """
+      def foo do
+        :ok
+      end
+
+
+      \"""
+      doc here
+      \"""
+
+
+      def bar do
+        :ok
+      end
+      """
+
+      tokens = TokenNormalizer.normalize_structural(code)
+      blocks = Parser.detect_blocks(tokens, Unknown)
+      # Expect exactly 3 blocks: code-before, heredoc, code-after
+      assert length(blocks) == 3
+      assert Enum.any?(Enum.at(blocks, 0).tokens, &(&1.content == "foo"))
+      assert Enum.any?(Enum.at(blocks, 1).tokens, &(&1.kind == "<DOC>"))
+      assert Enum.any?(Enum.at(blocks, 2).tokens, &(&1.content == "bar"))
+    end
+  end
+
+  describe "language_from_path/1" do
+    test "returns :python for .py files" do
+      assert Parser.language_from_path("lib/foo.py") == :python
+    end
+
+    test "returns :unknown for unknown extensions" do
+      assert Parser.language_from_path("lib/foo.xyz") == :unknown
+    end
+  end
+end
diff --git a/test/codeqa/ast/parsing/signal_registry_test.exs b/test/codeqa/ast/parsing/signal_registry_test.exs
new file mode 100644
index 00000000..f0c07887
--- /dev/null
+++ b/test/codeqa/ast/parsing/signal_registry_test.exs
@@ -0,0 +1,33 @@
+defmodule CodeQA.AST.Parsing.SignalRegistryTest do
+  use ExUnit.Case, async: true
+  alias CodeQA.AST.Parsing.SignalRegistry
+
+  test "new/0 returns empty registry" do
+    r = SignalRegistry.new()
+    assert r.structural == []
+    assert r.classification == []
+  end
+
+  test "register_structural/2 appends signal" do
+    alias CodeQA.AST.Signals.Structural.BlankLineSignal
+    r = SignalRegistry.new() |> SignalRegistry.register_structural(%BlankLineSignal{})
+    assert length(r.structural) == 1
+  end
+
+  test "register_classification/2 appends signal" do
+    alias CodeQA.AST.Signals.Classification.FunctionSignal
+    r = SignalRegistry.new() |> SignalRegistry.register_classification(%FunctionSignal{})
+    assert length(r.classification) == 1
+  end
+
+  test "default/0 includes all built-in signals" do
+    r = SignalRegistry.default()
+    assert length(r.structural) >= 4
+    assert length(r.classification) >= 6
+  end
+
+  test "default/0 has exactly 10 classification signals" do
+    r = SignalRegistry.default()
+    assert length(r.classification) == 10
+  end
+end
diff --git a/test/codeqa/ast/parsing/signal_stream_test.exs b/test/codeqa/ast/parsing/signal_stream_test.exs
new file mode 100644
index 00000000..69cfcaf2
--- /dev/null
+++ b/test/codeqa/ast/parsing/signal_stream_test.exs
@@ -0,0 +1,43 @@
+defmodule CodeQA.AST.SignalStreamTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.Token
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.Support.CounterSignal
+
+  defp tok(kind, content), do: %Token{kind: kind, content: content, line: 1, col: 0}
+
+  test "returns one emission list per signal" do
+    tokens = [tok("<ID>", "foo"), tok("<NL>", "\n"), tok("<ID>", "bar")]
+    results = SignalStream.run(tokens, [%CounterSignal{}], [])
+    assert length(results) == 1
+  end
+
+  test "emissions list contains all emitted values from the signal" do
+    tokens = [tok("<ID>", "foo"), tok("<NL>", "\n"), tok("<ID>", "bar")]
+
+    [
+      [
+        {CodeQA.Support.CounterSignal, :test, :id_seen, 0},
+        {CodeQA.Support.CounterSignal, :test, :id_seen, 2}
+      ]
+    ] =
+      SignalStream.run(tokens, [%CounterSignal{}], [])
+  end
+
+  test "non-emitting tokens produce no entries" do
+    tokens = [tok("<NL>", "\n"), tok("<NL>", "\n")]
+    [[]] = SignalStream.run(tokens, [%CounterSignal{}], [])
+  end
+
+  test "multiple signals run independently" do
+    tokens = [tok("<ID>", "x")]
+    results = SignalStream.run(tokens, [%CounterSignal{}, %CounterSignal{}], [])
+    assert length(results) == 2
+  end
+
+  test "empty token stream returns empty emissions per signal" do
+    results = SignalStream.run([], [%CounterSignal{}], [])
+    assert results == [[]]
+  end
+end
diff --git a/test/codeqa/ast/parsing/signal_test.exs b/test/codeqa/ast/parsing/signal_test.exs
new file mode 100644
index 00000000..47d72ad6
--- /dev/null
+++ b/test/codeqa/ast/parsing/signal_test.exs
@@ -0,0 +1,56 @@
+defmodule CodeQA.AST.SignalTest do
+  use ExUnit.Case, async: true
+
+  defmodule TestSignal do
+    defstruct []
+
+    defimpl CodeQA.AST.Parsing.Signal do
+      def source(_), do: TestSignal
+      def group(_), do: :split
+      def init(_, _opts), do: %{count: 0}
+
+      def emit(_, _token, state) do
+        new_state = %{state | count: state.count + 1}
+        {MapSet.new([{:tick, state.count}]), new_state}
+      end
+    end
+  end
+
+  defmodule SilentSignal do
+    defstruct []
+
+    defimpl CodeQA.AST.Parsing.Signal do
+      def source(_), do: SilentSignal
+      def group(_), do: :split
+      def init(_, _), do: %{}
+      def emit(_, _token, state), do: {MapSet.new(), state}
+    end
+  end
+
+  alias CodeQA.AST.Parsing.Signal
+
+  test "source returns the implementing module" do
+    assert Signal.source(%TestSignal{}) == TestSignal
+  end
+
+  test "group returns the signal's group atom" do
+    assert Signal.group(%TestSignal{}) == :split
+  end
+
+  test "init returns initial state" do
+    assert Signal.init(%TestSignal{}, []) == %{count: 0}
+  end
+
+  test "emit returns {MapSet of {name, value} pairs, new_state}" do
+    token = %CodeQA.AST.Lexing.Token{kind: "<ID>", content: "foo", line: 1, col: 0}
+    {emissions, new_state} = Signal.emit(%TestSignal{}, token, %{count: 0})
+    assert MapSet.member?(emissions, {:tick, 0})
+    assert new_state == %{count: 1}
+  end
+
+  test "emit may return empty MapSet for no emission" do
+    token = %CodeQA.AST.Lexing.Token{kind: "<NL>", content: "\n", line: 1, col: 0}
+    {emissions, _state} = Signal.emit(%SilentSignal{}, token, %{})
+    assert MapSet.size(emissions) == 0
+  end
+end
diff --git a/test/codeqa/ast/signals/classification/comment_density_signal_test.exs b/test/codeqa/ast/signals/classification/comment_density_signal_test.exs
new file mode 100644
index 00000000..374b191a
--- /dev/null
+++ b/test/codeqa/ast/signals/classification/comment_density_signal_test.exs
@@ -0,0 +1,46 @@
+defmodule CodeQA.AST.Signals.Classification.CommentDensitySignalTest do
+  use ExUnit.Case, async: true
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Classification.CommentDensitySignal
+  alias CodeQA.Languages.Code.Scripting.Python
+  alias CodeQA.Languages.Unknown
+
+  defp run(tokens, lang_mod \\ Unknown),
+    do: SignalStream.run(tokens, [%CommentDensitySignal{}], lang_mod) |> List.flatten()
+
+  defp t(content, kind \\ "<ID>"), do: %{kind: kind, content: content, line: 1, col: 0}
+  defp nl, do: %{kind: "<NL>", content: "\n", line: 1, col: 0}
+  defp on_line(tokens, line), do: Enum.map(tokens, &%{&1 | line: line})
+
+  test "votes comment when >60% of lines start with #" do
+    tokens =
+      on_line([t("#"), t("license")], 1) ++
+        [nl()] ++
+        on_line([t("#"), t("copyright")], 2) ++
+        [nl()] ++
+        on_line([t("#"), t("author")], 3) ++
+        [nl()] ++
+        on_line([t("def"), t("foo")], 4)
+
+    emissions = run(tokens, Python)
+    assert [{CommentDensitySignal, :classification, :comment_vote, _}] = emissions
+  end
+
+  test "does not vote when comment density is low" do
+    tokens =
+      on_line([t("def"), t("foo")], 1) ++
+        [nl()] ++
+        on_line([t("#"), t("note")], 2)
+
+    assert run(tokens, Python) == []
+  end
+
+  test "does not vote when no comment_prefixes provided" do
+    tokens =
+      on_line([t("#"), t("comment")], 1) ++
+        [nl()] ++
+        on_line([t("#"), t("comment")], 2)
+
+    assert run(tokens, Unknown) == []
+  end
+end
diff --git a/test/codeqa/ast/signals/classification/config_signal_test.exs b/test/codeqa/ast/signals/classification/config_signal_test.exs
new file mode 100644
index 00000000..da510c2b
--- /dev/null
+++ b/test/codeqa/ast/signals/classification/config_signal_test.exs
@@ -0,0 +1,28 @@
+defmodule CodeQA.AST.Signals.Classification.ConfigSignalTest do
+  use ExUnit.Case, async: true
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Classification.ConfigSignal
+
+  defp run(tokens), do: SignalStream.run(tokens, [%ConfigSignal{}], []) |> List.flatten()
+  defp t(content, kind \\ "<ID>"), do: %{kind: kind, content: content, line: 1, col: 0}
+
+  test "emits config_vote for 'config' keyword at indent 0" do
+    emissions = run([t("config"), t(":app"), t(","), t("key:"), t("val")])
+    assert [{ConfigSignal, :classification, :config_vote, 3}] = emissions
+  end
+
+  test "emits config_vote for 'configure' keyword" do
+    emissions = run([t("configure")])
+    assert [{ConfigSignal, :classification, :config_vote, 3}] = emissions
+  end
+
+  test "does not emit when indented" do
+    emissions = run([t("<WS>", "<WS>"), t("config")])
+    assert emissions == []
+  end
+
+  test "does not emit for 'config' inside brackets" do
+    tokens = [t("(", "("), t("config"), t(")", ")")]
+    assert run(tokens) == []
+  end
+end
diff --git a/test/codeqa/ast/signals/classification/data_signal_test.exs b/test/codeqa/ast/signals/classification/data_signal_test.exs
new file mode 100644
index 00000000..852067bc
--- /dev/null
+++ b/test/codeqa/ast/signals/classification/data_signal_test.exs
@@ -0,0 +1,28 @@
+defmodule CodeQA.AST.Signals.Classification.DataSignalTest do
+  use ExUnit.Case, async: true
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Classification.DataSignal
+
+  defp run(tokens), do: SignalStream.run(tokens, [%DataSignal{}], []) |> List.flatten()
+
+  defp t(content, kind), do: %{kind: kind, content: content, line: 1, col: 0}
+  defp str(v), do: t(v, "<STR>")
+  defp num(v), do: t(v, "<NUM>")
+  defp id(v), do: t(v, "<ID>")
+
+  test "votes data for high-literal token stream" do
+    tokens = [str("foo"), str("bar"), num("1"), num("2"), id("key")]
+    emissions = run(tokens)
+    assert [{DataSignal, :classification, :data_vote, _}] = emissions
+  end
+
+  test "does not vote when control-flow keyword present" do
+    tokens = [str("foo"), id("if"), str("bar")]
+    assert run(tokens) == []
+  end
+
+  test "does not vote when literal ratio is low" do
+    tokens = [id("foo"), id("bar"), id("baz"), str("one")]
+    assert run(tokens) == []
+  end
+end
diff --git a/test/codeqa/ast/signals/classification/type_signal_test.exs b/test/codeqa/ast/signals/classification/type_signal_test.exs
new file mode 100644
index 00000000..aa400d38
--- /dev/null
+++ b/test/codeqa/ast/signals/classification/type_signal_test.exs
@@ -0,0 +1,40 @@
+defmodule CodeQA.AST.Signals.Classification.TypeSignalTest do
+  use ExUnit.Case, async: true
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Classification.TypeSignal
+
+  defp run(tokens), do: SignalStream.run(tokens, [%TypeSignal{}], []) |> List.flatten()
+
+  defp t(content, kind \\ "<ID>"), do: %{kind: kind, content: content, line: 1, col: 0}
+
+  test "emits type_vote weight 3 for @type at indent 0" do
+    emissions = run([t("@", "@"), t("type"), t("t"), t("::"), t("integer")])
+    assert [{TypeSignal, :classification, :type_vote, 3}] = emissions
+  end
+
+  test "emits type_vote for @typep" do
+    emissions = run([t("@", "@"), t("typep"), t("t"), t("::")])
+    assert [{TypeSignal, :classification, :type_vote, 3}] = emissions
+  end
+
+  test "emits type_vote for @opaque" do
+    emissions = run([t("@", "@"), t("opaque"), t("t"), t("::")])
+    assert [{TypeSignal, :classification, :type_vote, 3}] = emissions
+  end
+
+  test "does not emit for @spec" do
+    emissions = run([t("@", "@"), t("spec"), t("foo"), t("()")])
+    assert emissions == []
+  end
+
+  test "does not emit for @type inside indented block" do
+    emissions = run([t("<WS>", "<WS>"), t("@", "@"), t("type"), t("t")])
+    assert emissions == []
+  end
+
+  test "emits at most one vote" do
+    tokens = [t("@", "@"), t("type"), t("a"), t("<NL>", "<NL>"), t("@", "@"), t("typep"), t("b")]
+    emissions = run(tokens)
+    assert length(emissions) == 1
+  end
+end
diff --git a/test/codeqa/ast/signals/structural/access_modifier_signal_test.exs b/test/codeqa/ast/signals/structural/access_modifier_signal_test.exs
new file mode 100644
index 00000000..2a863526
--- /dev/null
+++ b/test/codeqa/ast/signals/structural/access_modifier_signal_test.exs
@@ -0,0 +1,49 @@
+defmodule CodeQA.AST.Signals.Structural.AccessModifierSignalTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Signal
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Structural.AccessModifierSignal
+  alias CodeQA.Languages.Code.Vm.Java
+
+  defp split_values(code, lang_mod) do
+    tokens = TokenNormalizer.normalize_structural(code)
+    [emissions] = SignalStream.run(tokens, [%AccessModifierSignal{}], lang_mod)
+    for {_src, :split, :access_modifier_split, v} <- emissions, do: v
+  end
+
+  test "no split for first modifier (seen_content == false)" do
+    assert split_values("public void foo() {}\n", Java) == []
+  end
+
+  test "emits split at second public modifier after content" do
+    splits = split_values("public void foo() {}\npublic void bar() {}\n", Java)
+    assert length(splits) == 1
+  end
+
+  test "emits split at private modifier after content" do
+    splits = split_values("public void foo() {}\nprivate void bar() {}\n", Java)
+    assert length(splits) == 1
+  end
+
+  test "does not split when modifier is inside brackets" do
+    splits = split_values("public void foo(private int x) {}\n", Java)
+    assert splits == []
+  end
+
+  test "does not split on identifier that matches modifier but is not at line start" do
+    splits = split_values("public void foo() {}\nfoo.public.bar()\n", Java)
+    assert splits == []
+  end
+
+  test "works at indent > 0 (unlike KeywordSignal)" do
+    # Two indented public declarations, no enclosing brackets — should split
+    splits = split_values("  public void foo() {}\n  public void bar() {}\n", Java)
+    assert length(splits) == 1
+  end
+
+  test "group is :split" do
+    assert Signal.group(%AccessModifierSignal{}) == :split
+  end
+end
diff --git a/test/codeqa/ast/signals/structural/assignment_function_signal_test.exs b/test/codeqa/ast/signals/structural/assignment_function_signal_test.exs
new file mode 100644
index 00000000..bd76abf1
--- /dev/null
+++ b/test/codeqa/ast/signals/structural/assignment_function_signal_test.exs
@@ -0,0 +1,84 @@
+defmodule CodeQA.AST.Signals.Structural.AssignmentFunctionSignalTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Signal
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Structural.AssignmentFunctionSignal
+
+  defp split_indices(code) do
+    tokens = TokenNormalizer.normalize_structural(code)
+    [emissions] = SignalStream.run(tokens, [%AssignmentFunctionSignal{}], [])
+    for {_src, :split, :assignment_function_split, v} <- emissions, do: v
+  end
+
+  test "emits split for identifier = function() pattern (second in file)" do
+    code = """
+    const first = function() {}
+    const foo = function() {}
+    """
+
+    splits = split_indices(code)
+    assert length(splits) == 1
+  end
+
+  test "emits split for arrow function pattern: bar = () => {}" do
+    code = """
+    const first = function() {}
+    const bar = () => {}
+    """
+
+    splits = split_indices(code)
+    assert length(splits) == 1
+  end
+
+  test "emits split for async function pattern: baz = async function() {}" do
+    code = """
+    const first = function() {}
+    const baz = async function() {}
+    """
+
+    splits = split_indices(code)
+    assert length(splits) == 1
+  end
+
+  test "does NOT emit for the first assignment in file (seen_content == false)" do
+    code = "const foo = function() {}\n"
+    splits = split_indices(code)
+    assert splits == []
+  end
+
+  test "does NOT emit for plain assignment: x = 1" do
+    code = """
+    const first = function() {}
+    x = 1
+    """
+
+    splits = split_indices(code)
+    assert splits == []
+  end
+
+  test "does NOT emit when identifier is indented (indent > 0)" do
+    code = """
+    const first = function() {}
+      foo = function() {}
+    """
+
+    splits = split_indices(code)
+    assert splits == []
+  end
+
+  test "emits split for module.exports = function() pattern" do
+    code = """
+    const first = function() {}
+    module.exports = function() {}
+    """
+
+    splits = split_indices(code)
+    assert length(splits) == 1
+  end
+
+  test "group/1 returns :split" do
+    assert Signal.group(%AssignmentFunctionSignal{}) == :split
+  end
+end
diff --git a/test/codeqa/ast/signals/structural/blank_line_signal_test.exs b/test/codeqa/ast/signals/structural/blank_line_signal_test.exs
new file mode 100644
index 00000000..4e7d9d27
--- /dev/null
+++ b/test/codeqa/ast/signals/structural/blank_line_signal_test.exs
@@ -0,0 +1,36 @@
+defmodule CodeQA.AST.Signals.Structural.BlankLineSignalTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Signal
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Structural.BlankLineSignal
+  alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang
+
+  defp split_values(code, lang_mod) do
+    tokens = TokenNormalizer.normalize_structural(code)
+    [emissions] = SignalStream.run(tokens, [%BlankLineSignal{}], lang_mod)
+    for {_src, :split, :blank_split, v} <- emissions, do: v
+  end
+
+  test "no splits for single block" do
+    assert split_values("def foo\n  x\nend\n", ElixirLang) == []
+  end
+
+  test "emits split after blank line following block-end token" do
+    splits = split_values("def foo\n  x\nend\n\n\ndef bar\n  y\nend\n", ElixirLang)
+    assert length(splits) == 1
+  end
+
+  test "no split when blank line does not follow block-end token" do
+    assert split_values("x = 1\n\n\ny = 2\n", ElixirLang) == []
+  end
+
+  test "group is :split" do
+    assert Signal.group(%BlankLineSignal{}) == :split
+  end
+
+  test "source is BlankLineSignal" do
+    assert Signal.source(%BlankLineSignal{}) == BlankLineSignal
+  end
+end
diff --git a/test/codeqa/ast/signals/structural/bracket_signal_test.exs b/test/codeqa/ast/signals/structural/bracket_signal_test.exs
new file mode 100644
index 00000000..611474bd
--- /dev/null
+++ b/test/codeqa/ast/signals/structural/bracket_signal_test.exs
@@ -0,0 +1,43 @@
+defmodule CodeQA.AST.Signals.Structural.BracketSignalTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Signal
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Structural.BracketSignal
+
+  defp enclosure_values(code) do
+    tokens = TokenNormalizer.normalize_structural(code)
+    [emissions] = SignalStream.run(tokens, [%BracketSignal{}], [])
+    for {_src, :enclosure, :bracket_enclosure, v} <- emissions, do: v
+  end
+
+  test "no enclosures for code without brackets" do
+    assert enclosure_values("foo\n") == []
+  end
+
+  test "emits enclosure for a single bracketed expression" do
+    enclosures = enclosure_values("foo(a, b)\n")
+    assert length(enclosures) == 1
+  end
+
+  test "emits only outermost enclosure for nested brackets" do
+    enclosures = enclosure_values("foo(bar(x))\n")
+    assert length(enclosures) == 1
+  end
+
+  test "enclosure value is {start_idx, end_idx} tuple" do
+    [{start, stop}] = enclosure_values("foo(a)\n")
+    assert is_integer(start)
+    assert is_integer(stop)
+    assert stop > start
+  end
+
+  test "mismatched closing bracket is silently skipped" do
+    assert enclosure_values("foo)\n") == []
+  end
+
+  test "group is :enclosure" do
+    assert Signal.group(%BracketSignal{}) == :enclosure
+  end
+end
diff --git a/test/codeqa/ast/signals/structural/branch_split_signal_test.exs b/test/codeqa/ast/signals/structural/branch_split_signal_test.exs
new file mode 100644
index 00000000..320390c9
--- /dev/null
+++ b/test/codeqa/ast/signals/structural/branch_split_signal_test.exs
@@ -0,0 +1,93 @@
+defmodule CodeQA.AST.Signals.Structural.BranchSplitSignalTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.{Signal, SignalStream}
+  alias CodeQA.AST.Signals.Structural.BranchSplitSignal
+  alias CodeQA.Languages.Code.Scripting.PHP
+  alias CodeQA.Languages.Code.Scripting.Python
+  alias CodeQA.Languages.Code.Scripting.Ruby
+  alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang
+  alias CodeQA.Languages.Code.Vm.Java
+
+  defp split_values(code, lang_mod) do
+    tokens = TokenNormalizer.normalize_structural(code)
+    [emissions] = SignalStream.run(tokens, [%BranchSplitSignal{}], lang_mod)
+    for {_src, :branch_split, :branch_split, v} <- emissions, do: v
+  end
+
+  test "group is :branch_split" do
+    assert Signal.group(%BranchSplitSignal{}) == :branch_split
+  end
+
+  test "no split for code with no branch keywords" do
+    assert split_values("x = 1\ny = 2\n", ElixirLang) == []
+  end
+
+  test "emits split at else after seen content" do
+    splits = split_values("if x do\n  :a\nelse\n  :b\nend\n", ElixirLang)
+    assert length(splits) == 1
+  end
+
+  test "emits split at elif" do
+    splits = split_values("if x:\n  pass\nelif y:\n  pass\n", Python)
+    assert length(splits) == 1
+  end
+
+  test "emits split at multiple branch keywords" do
+    splits = split_values("if x do\n  :a\nelsif y\n  :b\nelse\n  :c\nend\n", Ruby)
+    assert length(splits) == 2
+  end
+
+  test "does not emit at first keyword (no seen_content yet)" do
+    splits = split_values("if x do\n  :a\nend\n", ElixirLang)
+    assert splits == []
+  end
+
+  test "does not emit when keyword is inside brackets" do
+    splits = split_values("foo(if x do 1 else 2 end)\n", ElixirLang)
+    assert splits == []
+  end
+
+  test "emits split at rescue" do
+    splits = split_values("try do\n  :ok\nrescue\n  _ -> :error\nend\n", ElixirLang)
+    assert length(splits) == 1
+  end
+
+  test "emits split at cond branch" do
+    splits = split_values("x = 1\ncond do\n  x -> :a\nend\n", ElixirLang)
+    assert length(splits) == 1
+  end
+
+  test "emits split at except (Python)" do
+    splits = split_values("try:\n  pass\nexcept ValueError:\n  pass\n", Python)
+    assert length(splits) == 1
+  end
+
+  test "emits split at ensure (Elixir)" do
+    splits =
+      split_values(
+        "try do\n  :ok\nrescue\n  _ -> :error\nensure\n  cleanup()\nend\n",
+        ElixirLang
+      )
+
+    assert length(splits) == 2
+  end
+
+  test "emits split at elseif (PHP)" do
+    splits = split_values("if x then\n  :a\nelseif y then\n  :b\nend\n", PHP)
+    assert length(splits) == 1
+  end
+
+  test "emits split at case label (switch body)" do
+    splits =
+      split_values("switch x\n  case 1:\n    :a\n  case 2:\n    :b\nend\n", Java)
+
+    assert splits != []
+  end
+
+  test "emits split at when keyword" do
+    splits = split_values("x = 1\nwhen x > 0 do\n  :pos\nend\n", ElixirLang)
+    assert length(splits) == 1
+  end
+end
diff --git a/test/codeqa/ast/signals/structural/colon_indent_signal_test.exs b/test/codeqa/ast/signals/structural/colon_indent_signal_test.exs
new file mode 100644
index 00000000..7ff96a0c
--- /dev/null
+++ b/test/codeqa/ast/signals/structural/colon_indent_signal_test.exs
@@ -0,0 +1,30 @@
+defmodule CodeQA.AST.Signals.Structural.ColonIndentSignalTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Signal
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Structural.ColonIndentSignal
+  alias CodeQA.Languages.Code.Scripting.Python
+  alias CodeQA.Languages.Unknown
+
+  defp enclosure_values(code, lang_mod \\ Python) do
+    tokens = TokenNormalizer.normalize_structural(code)
+    [emissions] = SignalStream.run(tokens, [%ColonIndentSignal{}], lang_mod)
+    for {_src, :enclosure, :colon_indent_enclosure, v} <- emissions, do: v
+  end
+
+  test "no enclosures for non-python language" do
+    assert enclosure_values("def foo:\n    return 1\n", Unknown) ==
+             []
+  end
+
+  test "emits enclosure for colon-indented block in python" do
+    enclosures = enclosure_values("def foo:\n    return 1\n")
+    assert enclosures != []
+  end
+
+  test "group is :enclosure" do
+    assert Signal.group(%ColonIndentSignal{}) == :enclosure
+  end
+end
diff --git a/test/codeqa/ast/signals/structural/comment_divider_signal_test.exs b/test/codeqa/ast/signals/structural/comment_divider_signal_test.exs
new file mode 100644
index 00000000..29762cb1
--- /dev/null
+++ b/test/codeqa/ast/signals/structural/comment_divider_signal_test.exs
@@ -0,0 +1,52 @@
+defmodule CodeQA.AST.Signals.Structural.CommentDividerSignalTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Signal
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Structural.CommentDividerSignal
+  alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang
+  alias CodeQA.Languages.Code.Vm.Java
+  alias CodeQA.Languages.Data.Sql
+
+  defp split_values(code, lang_mod) do
+    tokens = TokenNormalizer.normalize_structural(code)
+    [emissions] = SignalStream.run(tokens, [%CommentDividerSignal{}], lang_mod)
+    for {_src, :split, :comment_divider_split, v} <- emissions, do: v
+  end
+
+  test "no split for first divider comment (seen_content == false at start of file)" do
+    assert split_values("# ---\n", ElixirLang) == []
+  end
+
+  test "emits split at # --- after prior content" do
+    splits = split_values("x = 1\n# ---\ny = 2\n", ElixirLang)
+    assert length(splits) == 1
+  end
+
+  test "emits split at // === after prior content" do
+    splits = split_values("x = 1\n// ===\ny = 2\n", Java)
+    assert length(splits) == 1
+  end
+
+  test "emits split at -- --- after prior content (SQL style)" do
+    splits = split_values("x = 1\n-- ---\ny = 2\n", Sql)
+    assert length(splits) == 1
+  end
+
+  test "does NOT emit for # followed by identifier (real comment)" do
+    assert split_values("x = 1\n# This is a real comment\n", ElixirLang) == []
+  end
+
+  test "does NOT emit when # is not at line start" do
+    assert split_values("x = 1\nx # ---\n", ElixirLang) == []
+  end
+
+  test "does NOT emit for indented divider comment (inside a block)" do
+    assert split_values("x = 1\n  # ---\n", ElixirLang) == []
+  end
+
+  test "group is :split" do
+    assert Signal.group(%CommentDividerSignal{}) == :split
+  end
+end
diff --git a/test/codeqa/ast/signals/structural/decorator_signal_test.exs b/test/codeqa/ast/signals/structural/decorator_signal_test.exs
new file mode 100644
index 00000000..6a5bb108
--- /dev/null
+++ b/test/codeqa/ast/signals/structural/decorator_signal_test.exs
@@ -0,0 +1,47 @@
+defmodule CodeQA.AST.Signals.Structural.DecoratorSignalTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Signal
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Structural.DecoratorSignal
+
+  defp split_values(code) do
+    tokens = TokenNormalizer.normalize_structural(code)
+    [emissions] = SignalStream.run(tokens, [%DecoratorSignal{}], [])
+    for {_src, :split, :decorator_split, v} <- emissions, do: v
+  end
+
+  test "no split for first @ (seen_content == false at start of file)" do
+    assert split_values("@decorator\ndef foo() {}\n") == []
+  end
+
+  test "emits split at second @decorator after content" do
+    splits = split_values("@decorator\ndef foo() {}\n@decorator\ndef bar() {}\n")
+    assert length(splits) == 1
+  end
+
+  test "does not emit when @ is inside brackets" do
+    splits = split_values("@decorator\ndef foo(@param x) {}\n")
+    assert splits == []
+  end
+
+  test "does not emit when @ is not at line start (mid-expression)" do
+    splits = split_values("@decorator\ndef foo() { x@y }\n")
+    assert splits == []
+  end
+
+  test "emits split for Rust #[ pattern at line start after content" do
+    splits = split_values("#[derive(Debug)]\nstruct Foo {}\n#[derive(Clone)]\nstruct Bar {}\n")
+    assert length(splits) == 1
+  end
+
+  test "does not emit for # at line start when next token is not [" do
+    splits = split_values("@decorator\ndef foo() {}\n# comment\ndef bar() {}\n")
+    assert splits == []
+  end
+
+  test "group is :split" do
+    assert Signal.group(%DecoratorSignal{}) == :split
+  end
+end
diff --git a/test/codeqa/ast/signals/structural/dedent_to_zero_signal_test.exs b/test/codeqa/ast/signals/structural/dedent_to_zero_signal_test.exs
new file mode 100644
index 00000000..ddf8702d
--- /dev/null
+++ b/test/codeqa/ast/signals/structural/dedent_to_zero_signal_test.exs
@@ -0,0 +1,55 @@
+defmodule CodeQA.AST.Signals.Structural.DedentToZeroSignalTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Signal
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Structural.DedentToZeroSignal
+
+  defp split_count(code) do
+    tokens = TokenNormalizer.normalize_structural(code)
+    [emissions] = SignalStream.run(tokens, [%DedentToZeroSignal{}], [])
+    length(for {_src, :split, :dedent_split, _v} <- emissions, do: true)
+  end
+
+  test "no split in a single flat block (no indentation change)" do
+    code = "foo\nbar\nbaz\n"
+    assert split_count(code) == 0
+  end
+
+  test "emits split when first token of a new line at indent 0 after indented content" do
+    code = "def foo:\n  return 1\ndef bar:\n"
+    assert split_count(code) == 1
+  end
+
+  test "does NOT emit when returning to indent 0 from same-level content (no prior indent)" do
+    code = "foo\nbar\n"
+    assert split_count(code) == 0
+  end
+
+  test "does NOT emit at the very start of file (seen_content == false)" do
+    code = "foo\n  bar\n"
+    # The very first line has no prior indent, so no split should fire
+    assert split_count(code) == 0
+  end
+
+  test "handles multiple indented blocks with splits" do
+    code = "foo:\n  x = 1\nbar:\n  y = 2\nbaz:\n"
+    # split at "bar" and "baz"
+    assert split_count(code) == 2
+  end
+
+  test "does NOT split if current line also has indent (both lines indented)" do
+    code = "foo:\n  x = 1\n  y = 2\n"
+    assert split_count(code) == 0
+  end
+
+  test "emits split when a blank line separates an indented block from a new block at indent 0" do
+    code = "def foo:\n  return 1\n\ndef bar:\n"
+    assert split_count(code) == 1
+  end
+
+  test "group/1 returns :split" do
+    assert Signal.group(%DedentToZeroSignal{}) == :split
+  end
+end
diff --git a/test/codeqa/ast/signals/structural/doc_comment_lead_signal_test.exs b/test/codeqa/ast/signals/structural/doc_comment_lead_signal_test.exs
new file mode 100644
index 00000000..da269e8b
--- /dev/null
+++ b/test/codeqa/ast/signals/structural/doc_comment_lead_signal_test.exs
@@ -0,0 +1,44 @@
+defmodule CodeQA.AST.Signals.Structural.DocCommentLeadSignalTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Signal
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Structural.DocCommentLeadSignal
+
+  defp split_values(code) do
+    tokens = TokenNormalizer.normalize_structural(code)
+    [emissions] = SignalStream.run(tokens, [%DocCommentLeadSignal{}], [])
+    for {_src, :split, :doc_comment_split, v} <- emissions, do: v
+  end
+
+  test "no split for first /// (seen_content == false at start of file)" do
+    assert split_values("/// doc\n") == []
+  end
+
+  test "emits split at /// after prior content (Rust/C# doc comment)" do
+    splits = split_values("fn foo() {}\n/// doc\n")
+    assert length(splits) == 1
+  end
+
+  test "emits split at /** after prior content (Java/JS JSDoc)" do
+    splits = split_values("function foo() {}\n/**\n * doc\n */\n")
+    assert length(splits) == 1
+  end
+
+  test "does NOT emit for // followed by identifier (regular line comment)" do
+    assert split_values("x = 1\n// regular comment\n") == []
+  end
+
+  test "does NOT emit for // that is not at line start" do
+    assert split_values("x = 1\nx // doc\n") == []
+  end
+
+  test "does NOT emit for / at line start when next is not *" do
+    assert split_values("x = 1\n/ something\n") == []
+  end
+
+  test "group is :split" do
+    assert Signal.group(%DocCommentLeadSignal{}) == :split
+  end
+end
diff --git a/test/codeqa/ast/signals/structural/keyword_signal_test.exs b/test/codeqa/ast/signals/structural/keyword_signal_test.exs
new file mode 100644
index 00000000..b269c408
--- /dev/null
+++ b/test/codeqa/ast/signals/structural/keyword_signal_test.exs
@@ -0,0 +1,38 @@
+defmodule CodeQA.AST.Signals.Structural.KeywordSignalTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Signal
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Structural.KeywordSignal
+  alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang
+
+  defp split_values(code, lang_mod) do
+    tokens = TokenNormalizer.normalize_structural(code)
+    [emissions] = SignalStream.run(tokens, [%KeywordSignal{}], lang_mod)
+    for {_src, :split, :keyword_split, v} <- emissions, do: v
+  end
+
+  test "no split for single def" do
+    assert split_values("def foo\n  x\nend\n", ElixirLang) == []
+  end
+
+  test "emits split at second def keyword at depth 0 indent 0" do
+    splits = split_values("def foo\n  x\nend\ndef bar\n  y\nend\n", ElixirLang)
+    assert length(splits) == 1
+  end
+
+  test "does not split on def inside a module (indented)" do
+    splits = split_values("defmodule Foo do\n  def foo, do: 1\nend\n", ElixirLang)
+    assert splits == []
+  end
+
+  test "does not split on keyword inside brackets" do
+    splits = split_values("foo(def, bar)\n", ElixirLang)
+    assert splits == []
+  end
+
+  test "group is :split" do
+    assert Signal.group(%KeywordSignal{}) == :split
+  end
+end
diff --git a/test/codeqa/ast/signals/structural/sql_block_signal_test.exs b/test/codeqa/ast/signals/structural/sql_block_signal_test.exs
new file mode 100644
index 00000000..5f89598a
--- /dev/null
+++ b/test/codeqa/ast/signals/structural/sql_block_signal_test.exs
@@ -0,0 +1,60 @@
+defmodule CodeQA.AST.Signals.Structural.SQLBlockSignalTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Signal
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Structural.SQLBlockSignal
+  alias CodeQA.Languages.Data.Sql
+
+  defp split_values(code) do
+    tokens = TokenNormalizer.normalize_structural(code)
+    [emissions] = SignalStream.run(tokens, [%SQLBlockSignal{}], Sql)
+    for {_src, :split, :sql_block_split, v} <- emissions, do: v
+  end
+
+  test "no split for the first statement (seen_content == false)" do
+    assert split_values("CREATE TABLE users (id INT);\n") == []
+  end
+
+  test "emits split at second CREATE TABLE DDL statement" do
+    code = "CREATE TABLE users (id INT);\nCREATE TABLE orders (id INT);\n"
+    splits = split_values(code)
+    assert length(splits) == 1
+  end
+
+  test "emits split at SELECT when a query follows other content" do
+    code = "CREATE TABLE users (id INT);\nSELECT id FROM users;\n"
+    splits = split_values(code)
+    assert length(splits) == 1
+  end
+
+  test "emits split at lowercase create (case-insensitive match)" do
+    code = "create table users (id INT);\ncreate table orders (id INT);\n"
+    splits = split_values(code)
+    assert length(splits) == 1
+  end
+
+  test "emits split at INSERT after prior content" do
+    code = "CREATE TABLE users (id INT);\nINSERT INTO users VALUES (1);\n"
+    splits = split_values(code)
+    assert length(splits) == 1
+  end
+
+  test "does NOT emit for SQL keyword mid-statement (not at line start)" do
+    # FROM is not at line start; only SELECT is, but it's the first statement
+    code = "SELECT id FROM users;\n"
+    splits = split_values(code)
+    assert splits == []
+  end
+
+  test "does NOT emit for non-SQL identifier at line start" do
+    code = "CREATE TABLE users (id INT);\nusername VARCHAR(255);\n"
+    splits = split_values(code)
+    assert splits == []
+  end
+
+  test "group/1 returns :split" do
+    assert Signal.group(%SQLBlockSignal{}) == :split
+  end
+end
diff --git a/test/codeqa/ast/signals/structural/triple_quote_signal_test.exs b/test/codeqa/ast/signals/structural/triple_quote_signal_test.exs
new file mode 100644
index 00000000..2b840bd1
--- /dev/null
+++ b/test/codeqa/ast/signals/structural/triple_quote_signal_test.exs
@@ -0,0 +1,35 @@
+defmodule CodeQA.AST.Signals.Structural.TripleQuoteSignalTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Signal
+  alias CodeQA.AST.Parsing.SignalStream
+  alias CodeQA.AST.Signals.Structural.TripleQuoteSignal
+
+  defp split_values(code) do
+    tokens = TokenNormalizer.normalize_structural(code)
+    [emissions] = SignalStream.run(tokens, [%TripleQuoteSignal{}], [])
+    for {_src, :split, :triple_split, v} <- emissions, do: v
+  end
+
+  test "no splits for plain code" do
+    assert split_values("def foo\n  :ok\nend\n") == []
+  end
+
+  test "emits two splits for a complete heredoc" do
+    code = "\"\"\"\nhello\n\"\"\"\n"
+    splits = split_values(code)
+    assert length(splits) == 2
+  end
+
+  test "emits one split for unclosed heredoc (mismatch tolerance)" do
+    # single <DOC> token with no closing pair
+    code = "\"\"\"\nhello\n"
+    splits = split_values(code)
+    assert length(splits) == 1
+  end
+
+  test "group is :split" do
+    assert Signal.group(%TripleQuoteSignal{}) == :split
+  end
+end
diff --git a/test/codeqa/block_impact/codebase_impact_test.exs b/test/codeqa/block_impact/codebase_impact_test.exs
new file mode 100644
index 00000000..55ef4b44
--- /dev/null
+++ b/test/codeqa/block_impact/codebase_impact_test.exs
@@ -0,0 +1,60 @@
+defmodule CodeQA.BlockImpact.CodebaseImpactTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Parser
+  alias CodeQA.BlockImpact.CodebaseImpact
+  alias CodeQA.Engine.Analyzer
+  alias CodeQA.Languages.Unknown
+
+  @content_a """
+  defmodule A do
+    def foo do
+      x = 1 + 2
+      y = x * 3
+      x + y
+    end
+
+    def bar do
+      :ok
+    end
+  end
+  """
+
+  @content_b """
+  defmodule B do
+    def baz, do: :baz
+  end
+  """
+
+  defp files_map, do: %{"lib/a.ex" => @content_a, "lib/b.ex" => @content_b}
+
+  defp first_block(content) do
+    tokens = TokenNormalizer.normalize_structural(content)
+    [first | _] = Parser.detect_blocks(tokens, Unknown)
+    first
+  end
+
+  describe "compute/4" do
+    test "returns a codebase aggregate map" do
+      node = first_block(@content_a)
+      result = CodebaseImpact.compute("lib/a.ex", @content_a, node, files_map())
+      assert is_map(result)
+      # Should have at least one group with mean_ keys
+      all_keys = result |> Map.values() |> Enum.flat_map(&Map.keys/1)
+      assert Enum.any?(all_keys, &String.starts_with?(&1, "mean_"))
+    end
+
+    test "produces a different aggregate than the baseline when a large node is removed" do
+      node = first_block(@content_a)
+
+      if length(node.tokens) >= 10 do
+        baseline = Analyzer.analyze_codebase_aggregate(files_map())
+        without = CodebaseImpact.compute("lib/a.ex", @content_a, node, files_map())
+        # Not necessarily different in all keys, but result is valid
+        assert is_map(without)
+        assert is_map(baseline)
+      end
+    end
+  end
+end
diff --git a/test/codeqa/block_impact/file_impact_test.exs b/test/codeqa/block_impact/file_impact_test.exs
new file mode 100644
index 00000000..b44f0a9d
--- /dev/null
+++ b/test/codeqa/block_impact/file_impact_test.exs
@@ -0,0 +1,64 @@
+defmodule CodeQA.BlockImpact.FileImpactTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.AST.Lexing.TokenNormalizer
+  alias CodeQA.AST.Parsing.Parser
+  alias CodeQA.BlockImpact.FileImpact
+  alias CodeQA.Languages.Unknown
+
+  @fixture_content """
+  defmodule MyModule do
+    def foo do
+      x = 1
+      y = 2
+      x + y
+    end
+
+    def bar do
+      :ok
+    end
+  end
+  """
+
+  defp get_first_block(content) do
+    tokens = TokenNormalizer.normalize_structural(content)
+    [first | _] = Parser.detect_blocks(tokens, Unknown)
+    first
+  end
+
+  describe "compute/2" do
+    test "returns a metrics map when node has >= 10 tokens" do
+      node = get_first_block(@fixture_content)
+
+      if length(node.tokens) >= 10 do
+        result = FileImpact.compute(@fixture_content, node)
+        assert is_map(result)
+        assert map_size(result) > 0
+      end
+    end
+
+    test "returns nil for a node with fewer than 10 tokens" do
+      # Create a tiny node by parsing very short content
+      tiny_content = "x = 1"
+      tokens = TokenNormalizer.normalize_structural(tiny_content)
+      nodes = Parser.detect_blocks(tokens, Unknown)
+      # Find or construct a node with < 10 tokens
+      small_nodes = Enum.filter(nodes, fn n -> length(n.tokens) < 10 end)
+
+      if small_nodes != [] do
+        node = List.first(small_nodes)
+        assert FileImpact.compute(tiny_content, node) == nil
+      end
+    end
+
+    test "reconstructed content does not contain the removed node's first token line" do
+      tokens = TokenNormalizer.normalize_structural(@fixture_content)
+      [node | _] = Parser.detect_blocks(tokens, Unknown)
+      # Only test if node is large enough
+      if length(node.tokens) >= 10 do
+        result = FileImpact.compute(@fixture_content, node)
+        assert is_map(result)
+      end
+    end
+  end
+end
diff --git a/test/codeqa/block_impact/refactoring_potentials_test.exs b/test/codeqa/block_impact/refactoring_potentials_test.exs
new file mode 100644
index 00000000..8593dfc3
--- /dev/null
+++ b/test/codeqa/block_impact/refactoring_potentials_test.exs
@@ -0,0 +1,202 @@
+defmodule CodeQA.BlockImpact.RefactoringPotentialsTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.BlockImpact.RefactoringPotentials
+  alias CodeQA.CombinedMetrics.FileScorer
+  alias CodeQA.CombinedMetrics.SampleRunner
+  alias CodeQA.Engine.Analyzer
+
+  defp file_cosines(fm) do
+    fm
+    |> FileScorer.file_to_aggregate()
+    |> SampleRunner.diagnose_aggregate(top: 99_999)
+  end
+
+  describe "compute/5" do
+    test "returns a list of maps with category, behavior, cosine_delta" do
+      content = """
+      defmodule Foo do
+        def bar(a, b, c) do
+          if a do
+            if b do
+              if c do
+                :nested
+              end
+            end
+          end
+        end
+      end
+      """
+
+      baseline_fm = Analyzer.analyze_file("lib/foo.ex", content)
+      simple = "defmodule Foo do\n  def bar, do: :ok\nend\n"
+      without_fm = Analyzer.analyze_file("lib/foo.ex", simple)
+
+      files = %{"lib/foo.ex" => content}
+      baseline_agg = Analyzer.analyze_codebase_aggregate(files)
+      without_agg = Analyzer.analyze_codebase_aggregate(%{"lib/foo.ex" => simple})
+
+      baseline_file_cosines = file_cosines(baseline_fm)
+      baseline_codebase_cosines = SampleRunner.diagnose_aggregate(baseline_agg, top: 99_999)
+
+      result =
+        RefactoringPotentials.compute(
+          baseline_file_cosines,
+          without_fm,
+          baseline_codebase_cosines,
+          without_agg
+        )
+
+      assert is_list(result)
+
+      Enum.each(result, fn item ->
+        assert Map.has_key?(item, "category")
+        assert Map.has_key?(item, "behavior")
+        assert Map.has_key?(item, "cosine_delta")
+        assert is_binary(item["category"])
+        assert is_binary(item["behavior"])
+        assert is_float(item["cosine_delta"])
+      end)
+    end
+
+    test "returns at most top N results (default 3)" do
+      content = "defmodule A do\n  def foo, do: 1\nend\n"
+      fm = Analyzer.analyze_file("lib/a.ex", content)
+      agg = Analyzer.analyze_codebase_aggregate(%{"lib/a.ex" => content})
+
+      baseline_file_cosines = file_cosines(fm)
+      baseline_codebase_cosines = SampleRunner.diagnose_aggregate(agg, top: 99_999)
+
+      result =
+        RefactoringPotentials.compute(baseline_file_cosines, fm, baseline_codebase_cosines, agg)
+
+      assert length(result) <= 3
+    end
+
+    test "respects top: N option" do
+      content = "defmodule A do\n  def foo, do: 1\nend\n"
+      fm = Analyzer.analyze_file("lib/a.ex", content)
+      agg = Analyzer.analyze_codebase_aggregate(%{"lib/a.ex" => content})
+
+      baseline_file_cosines = file_cosines(fm)
+      baseline_codebase_cosines = SampleRunner.diagnose_aggregate(agg, top: 99_999)
+
+      result =
+        RefactoringPotentials.compute(baseline_file_cosines, fm, baseline_codebase_cosines, agg,
+          top: 5
+        )
+
+      assert length(result) <= 5
+    end
+
+    test "results are sorted descending by cosine_delta" do
+      content = "defmodule A do\n  def foo, do: 1\nend\n"
+      fm = Analyzer.analyze_file("lib/a.ex", content)
+      agg = Analyzer.analyze_codebase_aggregate(%{"lib/a.ex" => content})
+
+      baseline_file_cosines = file_cosines(fm)
+      baseline_codebase_cosines = SampleRunner.diagnose_aggregate(agg, top: 99_999)
+
+      result =
+        RefactoringPotentials.compute(baseline_file_cosines, fm, baseline_codebase_cosines, agg,
+          top: 99
+        )
+
+      deltas = Enum.map(result, & &1["cosine_delta"])
+      assert deltas == Enum.sort(deltas, :desc)
+    end
+
+    test "skips behaviors whose _excludes_block_types includes block_type" do
+      content = """
+      defmodule Foo do
+        def bar(a, b, c) do
+          if a do
+            if b do
+              if c do
+                :nested
+              end
+            end
+          end
+        end
+      end
+      """
+
+      baseline_fm = Analyzer.analyze_file("lib/foo.ex", content)
+      simple = "defmodule Foo do\n  def bar, do: :ok\nend\n"
+      without_fm = Analyzer.analyze_file("lib/foo.ex", simple)
+
+      files = %{"lib/foo.ex" => content}
+      baseline_agg = Analyzer.analyze_codebase_aggregate(files)
+      without_agg = Analyzer.analyze_codebase_aggregate(%{"lib/foo.ex" => simple})
+
+      baseline_file_cosines = file_cosines(baseline_fm)
+      baseline_codebase_cosines = SampleRunner.diagnose_aggregate(baseline_agg, top: 99_999)
+
+      behavior_map = %{
+        "function_design" => [
+          {"cyclomatic_complexity_under_10", %{"_excludes_block_types" => ["module"]}}
+        ]
+      }
+
+      result_unfiltered =
+        RefactoringPotentials.compute(
+          baseline_file_cosines,
+          without_fm,
+          baseline_codebase_cosines,
+          without_agg,
+          top: 99_999
+        )
+
+      result_module =
+        RefactoringPotentials.compute(
+          baseline_file_cosines,
+          without_fm,
+          baseline_codebase_cosines,
+          without_agg,
+          top: 99_999,
+          block_type: :module,
+          behavior_map: behavior_map
+        )
+
+      result_function =
+        RefactoringPotentials.compute(
+          baseline_file_cosines,
+          without_fm,
+          baseline_codebase_cosines,
+          without_agg,
+          top: 99_999,
+          block_type: :function,
+          behavior_map: behavior_map
+        )
+
+      excluded_present? = fn result ->
+        Enum.any?(result, fn p ->
+          p["category"] == "function_design" and p["behavior"] == "cyclomatic_complexity_under_10"
+        end)
+      end
+
+      assert excluded_present?.(result_unfiltered),
+             "test premise: excluded behavior must appear when no filter is set"
+
+      refute excluded_present?.(result_module),
+             "behavior should be filtered out for :module block"
+
+      assert excluded_present?.(result_function),
+             "behavior should remain for :function block (not in negative list)"
+    end
+
+    test "no block_type option means no filtering (backwards compat)" do
+      content = "defmodule A do\n  def foo, do: 1\nend\n"
+      fm = Analyzer.analyze_file("lib/a.ex", content)
+      agg = Analyzer.analyze_codebase_aggregate(%{"lib/a.ex" => content})
+
+      baseline_file_cosines = file_cosines(fm)
+      baseline_codebase_cosines = SampleRunner.diagnose_aggregate(agg, top: 99_999)
+
+      result =
+        RefactoringPotentials.compute(baseline_file_cosines, fm, baseline_codebase_cosines, agg)
+
+      assert is_list(result)
+    end
+  end
+end
diff --git a/test/codeqa/block_impact_analyzer_test.exs b/test/codeqa/block_impact_analyzer_test.exs
new file mode 100644
index 00000000..e793f088
--- /dev/null
+++ b/test/codeqa/block_impact_analyzer_test.exs
@@ -0,0 +1,108 @@
+defmodule CodeQA.BlockImpactAnalyzerTest do
+  # async: false because the orchestrator uses Task.async_stream internally
+  use ExUnit.Case, async: false
+
+  alias CodeQA.BlockImpactAnalyzer
+  alias CodeQA.Engine.Analyzer
+
+  @fixture_content """
+  defmodule MyModule do
+    def foo do
+      x = 1
+      y = 2
+      x + y
+    end
+
+    def bar do
+      :ok
+    end
+  end
+  """
+
+  describe "analyze/3" do
+    test "adds 'nodes' key to each file entry in the pipeline result" do
+      files = %{"lib/my_module.ex" => @fixture_content}
+      pipeline_result = Analyzer.analyze_codebase(files)
+
+      result = BlockImpactAnalyzer.analyze(pipeline_result, files)
+
+      assert Map.has_key?(result, "files")
+      assert Map.has_key?(result["files"], "lib/my_module.ex")
+      file_data = result["files"]["lib/my_module.ex"]
+      assert Map.has_key?(file_data, "nodes")
+      assert is_list(file_data["nodes"])
+    end
+
+    test "each node has required fields" do
+      files = %{"lib/my_module.ex" => @fixture_content}
+      pipeline_result = Analyzer.analyze_codebase(files)
+      result = BlockImpactAnalyzer.analyze(pipeline_result, files)
+
+      nodes = result["files"]["lib/my_module.ex"]["nodes"]
+
+      Enum.each(nodes, fn node ->
+        assert Map.has_key?(node, "start_line")
+        assert Map.has_key?(node, "end_line")
+        assert Map.has_key?(node, "column_start")
+        assert Map.has_key?(node, "char_length")
+        assert Map.has_key?(node, "type")
+        assert Map.has_key?(node, "token_count")
+        assert Map.has_key?(node, "refactoring_potentials")
+        assert Map.has_key?(node, "children")
+        assert is_list(node["refactoring_potentials"])
+        assert is_list(node["children"])
+      end)
+    end
+
+    test "nodes are sorted by start_line ascending" do
+      files = %{"lib/my_module.ex" => @fixture_content}
+      pipeline_result = Analyzer.analyze_codebase(files)
+      result = BlockImpactAnalyzer.analyze(pipeline_result, files)
+
+      nodes = result["files"]["lib/my_module.ex"]["nodes"]
+      start_lines = Enum.map(nodes, & &1["start_line"])
+      assert start_lines == Enum.sort(start_lines)
+    end
+
+    test "preserves existing 'codebase' key in pipeline result" do
+      files = %{"lib/my_module.ex" => @fixture_content}
+      pipeline_result = Analyzer.analyze_codebase(files)
+      result = BlockImpactAnalyzer.analyze(pipeline_result, files)
+
+      assert Map.has_key?(result, "codebase")
+      assert result["codebase"] == pipeline_result["codebase"]
+    end
+
+    test "nodes_top option limits refactoring_potentials per node" do
+      files = %{"lib/my_module.ex" => @fixture_content}
+      pipeline_result = Analyzer.analyze_codebase(files)
+      result = BlockImpactAnalyzer.analyze(pipeline_result, files, nodes_top: 1)
+
+      nodes = result["files"]["lib/my_module.ex"]["nodes"]
+
+      Enum.each(nodes, fn node ->
+        assert length(node["refactoring_potentials"]) <= 1
+      end)
+    end
+
+    test "node['type'] reflects classified block kind, not the always-:code default" do
+      content = """
+      defmodule Foo do
+        @moduledoc "hi"
+
+        def bar, do: :ok
+      end
+      """
+
+      files = %{"lib/foo.ex" => content}
+      pipeline_result = Analyzer.analyze_codebase(files)
+      result = BlockImpactAnalyzer.analyze(pipeline_result, files)
+
+      nodes = result["files"]["lib/foo.ex"]["nodes"]
+      types = nodes |> Enum.map(& &1["type"]) |> Enum.uniq()
+
+      assert "module" in types or "function" in types or "doc" in types,
+             "expected real classification, got only: #{inspect(types)}"
+    end
+  end
+end
diff --git a/test/codeqa/block_matcher_test.exs b/test/codeqa/block_matcher_test.exs
new file mode 100644
index 00000000..da55a099
--- /dev/null
+++ b/test/codeqa/block_matcher_test.exs
@@ -0,0 +1,37 @@
+defmodule Test.NodeMatcherTest do
+  use ExUnit.Case, async: true
+
+  alias Test.NodeMatcher
+
+  describe "exact/2" do
+    test "returns tagged tuple for :content field" do
+      assert {:exact, :content, "add"} = NodeMatcher.exact(:content, "add")
+    end
+
+    test "returns tagged tuple for :value field" do
+      assert {:exact, :value, "identifier"} = NodeMatcher.exact(:value, "identifier")
+    end
+
+    test "raises FunctionClauseError for unsupported field" do
+      assert_raise FunctionClauseError, fn ->
+        NodeMatcher.exact(:type, "something")
+      end
+    end
+  end
+
+  describe "partial/2" do
+    test "returns tagged tuple for :content field" do
+      assert {:partial, :content, "@doc"} = NodeMatcher.partial(:content, "@doc")
+    end
+
+    test "returns tagged tuple for :value field" do
+      assert {:partial, :value, "doc"} = NodeMatcher.partial(:value, "doc")
+    end
+
+    test "raises FunctionClauseError for unsupported field" do
+      assert_raise FunctionClauseError, fn ->
+        NodeMatcher.partial(:type, "something")
+      end
+    end
+  end
+end
diff --git a/test/codeqa/cli_compare_test.exs b/test/codeqa/cli_compare_test.exs
deleted file mode 100644
index f43578af..00000000
--- a/test/codeqa/cli_compare_test.exs
+++ /dev/null
@@ -1,85 +0,0 @@
-defmodule CodeQA.CLI.CompareTest do
-  use ExUnit.Case, async: true
-
-  @moduletag :tmp_dir
-
-  setup %{tmp_dir: tmp_dir} do
-    # Initialize a git repo with one source file and one non-source file
-    System.cmd("git", ["init"], cd: tmp_dir)
-    System.cmd("git", ["config", "user.email", "test@test.com"], cd: tmp_dir)
-    System.cmd("git", ["config", "user.name", "Test"], cd: tmp_dir)
-
-    File.mkdir_p!(Path.join(tmp_dir, "lib"))
-    File.write!(Path.join(tmp_dir, "lib/app.ex"), "defmodule App do\nend")
-    System.cmd("git", ["add", "."], cd: tmp_dir)
-    System.cmd("git", ["commit", "-m", "initial"], cd: tmp_dir)
-
-    %{repo: tmp_dir}
-  end
-
-  describe "compare with github format" do
-    test "file changes section shows actual file count when source files changed", %{repo: repo} do
-      File.write!(Path.join(repo, "lib/app.ex"), """
-      defmodule App do
-        def hello, do: :world
-        def goodbye, do: :world
-      end
-      """)
-
-      System.cmd("git", ["add", "."], cd: repo)
-      System.cmd("git", ["commit", "-m", "update app"], cd: repo)
-
-      stdout =
-        ExUnit.CaptureIO.capture_io(fn ->
-          ExUnit.CaptureIO.capture_io(:stderr, fn ->
-            CodeQA.CLI.main(["compare", repo, "--base-ref", "HEAD~1", "--format", "github"])
-          end)
-        end)
-
-      assert stdout =~ "File changes — 1 modified"
-      refute stdout =~ "File changes — no changes"
-    end
-  end
-
-  describe "compare with no source file changes" do
-    test "exits 0 when only non-source files changed", %{repo: repo} do
-      # Create a branch, change only a .md file (not a source file)
-      File.write!(Path.join(repo, "README.md"), "# Hello")
-      System.cmd("git", ["add", "."], cd: repo)
-      System.cmd("git", ["commit", "-m", "add readme"], cd: repo)
-
-      # compare should succeed (not crash) when no source files changed
-      {base_ref, head_ref} = {"HEAD~1", "HEAD"}
-
-      changes = CodeQA.Git.changed_files(repo, base_ref, head_ref)
-      assert changes == [], "expected no source file changes, got: #{inspect(changes)}"
-
-      # Verify the CLI handles this gracefully by calling main
-      # Capture stderr to verify the message
-      output =
-        ExUnit.CaptureIO.capture_io(:stderr, fn ->
-          CodeQA.CLI.main(["compare", repo, "--base-ref", base_ref, "--changes-only", "--format", "json"])
-        end)
-
-      assert output =~ "No source files changed"
-    end
-
-    test "outputs valid JSON with empty comparison", %{repo: repo} do
-      # Change only a non-source file
-      File.write!(Path.join(repo, "README.md"), "# Hello")
-      System.cmd("git", ["add", "."], cd: repo)
-      System.cmd("git", ["commit", "-m", "add readme"], cd: repo)
-
-      # Capture stdout (the JSON output) to verify it's valid
-      stdout =
-        ExUnit.CaptureIO.capture_io(fn ->
-          ExUnit.CaptureIO.capture_io(:stderr, fn ->
-            CodeQA.CLI.main(["compare", repo, "--base-ref", "HEAD~1", "--changes-only", "--format", "json"])
-          end)
-        end)
-
-      assert {:ok, result} = Jason.decode(stdout)
-      assert result["metadata"]["total_files_compared"] == 0
-    end
-  end
-end
diff --git a/test/codeqa/cli_test.exs b/test/codeqa/cli_test.exs
index 2f2b51f6..9abd9911 100644
--- a/test/codeqa/cli_test.exs
+++ b/test/codeqa/cli_test.exs
@@ -1,11 +1,17 @@
 defmodule CodeQA.CLITest do
-  use ExUnit.Case, async: true
+  use ExUnit.Case, async: false
 
-  @moduletag :tmp_dir
-
-  setup %{tmp_dir: tmp_dir} do
+  setup do
+    CodeQA.Config.reset()
+    tmp_dir = Path.join(System.tmp_dir!(), "codeqa_test_#{System.unique_integer([:positive])}")
     File.mkdir_p!(Path.join(tmp_dir, "lib"))
     File.write!(Path.join(tmp_dir, "lib/app.ex"), "defmodule App do\nend\n")
+
+    on_exit(fn ->
+      CodeQA.Config.reset()
+      File.rm_rf!(tmp_dir)
+    end)
+
     %{dir: tmp_dir}
   end
 
@@ -19,23 +25,20 @@ defmodule CodeQA.CLITest do
         - ignored/**
       """)
 
-      output =
-        ExUnit.CaptureIO.capture_io(:stderr, fn ->
-          CodeQA.CLI.main(["analyze", dir])
-        end)
+      json = CodeQA.CLI.main(["analyze", dir, "--show-files"])
+      report = Jason.decode!(json)
 
-      # The ignored file should not be counted
-      refute output =~ "secret.ex"
-      assert output =~ "Analyzing 1 files"
+      # total_files == 1 proves the ignored file was excluded (setup has exactly 2 files)
+      assert report["metadata"]["total_files"] == 1
+      # file paths confirm secret.ex is absent
+      refute Map.has_key?(report["files"], Path.join(dir, "ignored/secret.ex"))
     end
 
     test "works normally when .codeqa.yml is absent", %{dir: dir} do
-      output =
-        ExUnit.CaptureIO.capture_io(:stderr, fn ->
-          CodeQA.CLI.main(["analyze", dir])
-        end)
+      json = CodeQA.CLI.main(["analyze", dir])
+      report = Jason.decode!(json)
 
-      assert output =~ "Analyzing 1 files"
+      assert report["metadata"]["total_files"] == 1
     end
 
     test "config file and --ignore-paths are merged additively", %{dir: dir} do
@@ -49,13 +52,11 @@ defmodule CodeQA.CLITest do
         - ignored_by_config/**
       """)
 
-      output =
-        ExUnit.CaptureIO.capture_io(:stderr, fn ->
-          CodeQA.CLI.main(["analyze", dir, "--ignore-paths", "ignored_by_flag/**"])
-        end)
+      json = CodeQA.CLI.main(["analyze", dir, "--ignore-paths", "ignored_by_flag/**"])
+      report = Jason.decode!(json)
 
-      # Only lib/app.ex should be analyzed
-      assert output =~ "Analyzing 1 files"
+      # Only lib/app.ex should be analyzed — both ignore sources must apply
+      assert report["metadata"]["total_files"] == 1
     end
   end
 end
diff --git a/test/codeqa/collector_test.exs b/test/codeqa/collector_test.exs
index 0a2a3f5c..f2aeb599 100644
--- a/test/codeqa/collector_test.exs
+++ b/test/codeqa/collector_test.exs
@@ -1,7 +1,12 @@
 defmodule CodeQA.CollectorTest do
-  use ExUnit.Case, async: true
+  use ExUnit.Case, async: false
 
-  alias CodeQA.Collector
+  alias CodeQA.Engine.Collector
+
+  setup do
+    CodeQA.Config.reset()
+    on_exit(&CodeQA.Config.reset/0)
+  end
 
   describe "ignored?/2" do
     test "matches simple wildcard pattern" do
@@ -93,7 +98,7 @@ defmodule CodeQA.CollectorTest do
         %{path: "lib/bar.ex", status: "modified"}
       ]
 
-      result = Collector.reject_ignored(items, ["test/*"], & &1.path)
+      result = Collector.reject_ignored(items, & &1.path, ["test/*"])
 
       assert length(result) == 1
       assert hd(result).path == "lib/bar.ex"
@@ -101,7 +106,32 @@ defmodule CodeQA.CollectorTest do
 
     test "empty patterns returns list unchanged" do
       items = [%{path: "test/foo.ex"}]
-      assert Collector.reject_ignored(items, [], & &1.path) == items
+      assert Collector.reject_ignored(items, & &1.path, []) == items
+    end
+  end
+
+  describe "collect_files/2 respects .gitignore" do
+    setup do
+      tmp_dir =
+        Path.join(System.tmp_dir!(), "codeqa_git_collector_#{System.unique_integer([:positive])}")
+
+      File.mkdir_p!(Path.join(tmp_dir, "lib"))
+      System.cmd("git", ["init"], cd: tmp_dir)
+      System.cmd("git", ["config", "user.email", "test@test.com"], cd: tmp_dir)
+      System.cmd("git", ["config", "user.name", "Test"], cd: tmp_dir)
+      File.write!(Path.join(tmp_dir, "lib/app.ex"), "defmodule App do\nend")
+      File.write!(Path.join(tmp_dir, "lib/generated.ex"), "defmodule Gen do\nend")
+      File.write!(Path.join(tmp_dir, ".gitignore"), "lib/generated.ex\n")
+
+      on_exit(fn -> File.rm_rf!(tmp_dir) end)
+
+      %{tmp_dir: tmp_dir}
+    end
+
+    test "excludes files listed in .gitignore", %{tmp_dir: tmp_dir} do
+      files = Collector.collect_files(tmp_dir)
+      assert Map.has_key?(files, "lib/app.ex")
+      refute Map.has_key?(files, "lib/generated.ex")
     end
   end
 
@@ -125,9 +155,21 @@ defmodule CodeQA.CollectorTest do
     end
 
     test "with ignore patterns excludes matching files", %{tmp_dir: tmp_dir} do
-      files = Collector.collect_files(tmp_dir, ignore_patterns: ["test/*"])
+      files = Collector.collect_files(tmp_dir, ["test/*"])
       assert Map.has_key?(files, "lib/app.ex")
       refute Map.has_key?(files, "test/app_test.exs")
     end
+
+    test "respects ignore_paths from .codeqa.yml", %{tmp_dir: tmp_dir} do
+      File.mkdir_p!(Path.join(tmp_dir, "generated"))
+      File.write!(Path.join(tmp_dir, "generated/schema.ex"), "defmodule Schema do\nend")
+      File.write!(Path.join(tmp_dir, ".codeqa.yml"), "ignore_paths:\n  - generated/**\n")
+
+      CodeQA.Config.load(tmp_dir)
+      files = Collector.collect_files(tmp_dir)
+
+      assert Map.has_key?(files, "lib/app.ex")
+      refute Map.has_key?(files, "generated/schema.ex")
+    end
   end
 end
diff --git a/test/codeqa/combined_metrics/file_scorer_test.exs b/test/codeqa/combined_metrics/file_scorer_test.exs
new file mode 100644
index 00000000..55ef9334
--- /dev/null
+++ b/test/codeqa/combined_metrics/file_scorer_test.exs
@@ -0,0 +1,295 @@
+defmodule CodeQA.CombinedMetrics.FileScorerTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.CombinedMetrics.FileScorer
+
+  describe "file_to_aggregate/1" do
+    test "prefixes each key with mean_" do
+      input = %{"halstead" => %{"tokens" => 42.0, "effort" => 100.5}}
+
+      assert FileScorer.file_to_aggregate(input) == %{
+               "halstead" => %{"mean_tokens" => 42.0, "mean_effort" => 100.5}
+             }
+    end
+
+    test "handles multiple groups" do
+      input = %{
+        "halstead" => %{"tokens" => 10.0},
+        "branching" => %{"branching_density" => 0.5}
+      }
+
+      result = FileScorer.file_to_aggregate(input)
+
+      assert result == %{
+               "halstead" => %{"mean_tokens" => 10.0},
+               "branching" => %{"mean_branching_density" => 0.5}
+             }
+    end
+
+    test "returns empty map for empty input" do
+      assert FileScorer.file_to_aggregate(%{}) == %{}
+    end
+
+    test "preserves values unchanged" do
+      input = %{"entropy" => %{"normalized_entropy" => 0.87}}
+      result = FileScorer.file_to_aggregate(input)
+      assert get_in(result, ["entropy", "mean_normalized_entropy"]) == 0.87
+    end
+  end
+
+  describe "worst_files_per_behavior/2" do
+    test "returns a map with string keys in category.behavior format" do
+      files_map = build_files_map()
+      result = FileScorer.worst_files_per_behavior(files_map, combined_top: 2)
+
+      assert is_map(result)
+
+      for {key, entries} <- result do
+        assert is_binary(key)
+        assert String.contains?(key, ".")
+        assert is_list(entries)
+      end
+    end
+
+    test "each entry has file and cosine keys" do
+      files_map = build_files_map()
+      result = FileScorer.worst_files_per_behavior(files_map, combined_top: 2)
+
+      for {_key, entries} <- result do
+        for entry <- entries do
+          assert Map.has_key?(entry, :file)
+          assert Map.has_key?(entry, :cosine)
+          assert is_binary(entry.file)
+          assert is_float(entry.cosine)
+        end
+      end
+    end
+
+    test "respects combined_top limit" do
+      files_map = build_files_map()
+      result = FileScorer.worst_files_per_behavior(files_map, combined_top: 1)
+
+      for {_key, entries} <- result do
+        assert length(entries) <= 1
+      end
+    end
+
+    test "entries are sorted ascending by cosine (most negative first)" do
+      files_map = build_files_map()
+      result = FileScorer.worst_files_per_behavior(files_map, combined_top: 99)
+
+      for {_key, entries} <- result do
+        cosines = Enum.map(entries, & &1.cosine)
+        assert cosines == Enum.sort(cosines)
+      end
+    end
+
+    test "skips files with empty metrics" do
+      files_map = %{
+        "lib/empty.ex" => %{"metrics" => %{}, "lines" => 10},
+        "lib/nokey.ex" => %{"lines" => 5}
+      }
+
+      result = FileScorer.worst_files_per_behavior(files_map)
+
+      for {_key, entries} <- result do
+        file_paths = Enum.map(entries, & &1.file)
+        refute "lib/empty.ex" in file_paths
+        refute "lib/nokey.ex" in file_paths
+      end
+    end
+
+    test "uses default combined_top of 2" do
+      files_map = build_files_map()
+      result = FileScorer.worst_files_per_behavior(files_map)
+
+      for {_key, entries} <- result do
+        assert length(entries) <= 2
+      end
+    end
+
+    test "each entry has top_metrics key" do
+      files_map = build_files_map()
+      result = FileScorer.worst_files_per_behavior(files_map)
+
+      for {_key, entries} <- result, entry <- entries do
+        assert Map.has_key?(entry, :top_metrics), "missing :top_metrics in #{inspect(entry)}"
+      end
+    end
+
+    test "top_metrics is a list" do
+      files_map = build_files_map()
+      result = FileScorer.worst_files_per_behavior(files_map)
+
+      for {_key, entries} <- result, entry <- entries do
+        assert is_list(entry.top_metrics)
+      end
+    end
+
+    test "top_metrics is [] (not nil) when all contributions are zero" do
+      # Single file with no variation — cosines will be near 0
+      files_map = %{
+        "lib/zero.ex" => %{
+          "metrics" => %{
+            "halstead" => %{"tokens" => 0.0}
+          },
+          "lines" => 1,
+          "bytes" => 5
+        }
+      }
+
+      result = FileScorer.worst_files_per_behavior(files_map)
+
+      for {_key, entries} <- result, entry <- entries do
+        assert entry.top_metrics == []
+      end
+    end
+
+    test "each entry has top_nodes key" do
+      files_map = build_files_map()
+      result = FileScorer.worst_files_per_behavior(files_map)
+
+      for {_key, entries} <- result, entry <- entries do
+        assert Map.has_key?(entry, :top_nodes), "missing :top_nodes in #{inspect(entry)}"
+      end
+    end
+
+    test "top_nodes is [] when file_data has no nodes key" do
+      files_map = build_files_map()
+      result = FileScorer.worst_files_per_behavior(files_map)
+
+      for {_key, entries} <- result, entry <- entries do
+        assert entry.top_nodes == []
+      end
+    end
+
+    test "top_nodes is [] when file_data nodes is nil" do
+      files_map =
+        build_files_map()
+        |> Map.new(fn {path, data} -> {path, Map.put(data, "nodes", nil)} end)
+
+      result = FileScorer.worst_files_per_behavior(files_map)
+
+      for {_key, entries} <- result, entry <- entries do
+        assert entry.top_nodes == []
+      end
+    end
+
+    test "top_nodes is [] when file_data nodes is []" do
+      files_map =
+        build_files_map()
+        |> Map.new(fn {path, data} -> {path, Map.put(data, "nodes", [])} end)
+
+      result = FileScorer.worst_files_per_behavior(files_map)
+
+      for {_key, entries} <- result, entry <- entries do
+        assert entry.top_nodes == []
+      end
+    end
+  end
+
+  describe "worst_files_per_behavior/2 language filtering" do
+    test "does not include rust-only behaviors when scoring an elixir file" do
+      fake_metrics = %{"halstead" => %{"tokens" => 100.0, "difficulty" => 5.0}}
+      files_map = %{"lib/foo.ex" => %{"metrics" => fake_metrics}}
+
+      results = FileScorer.worst_files_per_behavior(files_map)
+
+      # Any behavior that only applies to rust should not have this .ex file in results
+      rust_only_keys =
+        Enum.filter(results, fn {key, entries} ->
+          [cat, beh] = String.split(key, ".", parts: 2)
+          yaml_path = "priv/combined_metrics/#{cat}.yml"
+
+          case YamlElixir.read_from_file(yaml_path) do
+            {:ok, data} ->
+              langs = get_in(data, [beh, "_languages"]) || []
+              langs != [] and "elixir" not in langs and entries != []
+
+            _ ->
+              false
+          end
+        end)
+
+      assert rust_only_keys == []
+    end
+  end
+
+  # Build a realistic files_map using a real project file so diagnose_aggregate
+  # has real metric values to work with. We use a small fixed map rather than
+  # running the full analyzer to keep tests fast.
+  defp build_files_map do
+    %{
+      "lib/example_a.ex" => %{
+        "metrics" => %{
+          "halstead" => %{
+            "tokens" => 80.0,
+            "vocabulary" => 30.0,
+            "volume" => 400.0,
+            "difficulty" => 12.0,
+            "effort" => 4800.0,
+            "bugs" => 0.1
+          },
+          "branching" => %{
+            "branching_density" => 0.3
+          },
+          "entropy" => %{
+            "normalized_entropy" => 0.75
+          },
+          "function_metrics" => %{
+            "avg_function_length" => 20.0,
+            "max_function_length" => 40.0,
+            "function_count" => 5.0,
+            "avg_params" => 2.0,
+            "max_params" => 4.0
+          },
+          "readability" => %{
+            "readability_score" => 0.6
+          },
+          "indentation" => %{
+            "avg_indent_level" => 2.0,
+            "max_indent_level" => 4.0,
+            "indent_variance" => 0.5
+          }
+        },
+        "lines" => 100,
+        "bytes" => 2048
+      },
+      "lib/example_b.ex" => %{
+        "metrics" => %{
+          "halstead" => %{
+            "tokens" => 200.0,
+            "vocabulary" => 60.0,
+            "volume" => 1200.0,
+            "difficulty" => 30.0,
+            "effort" => 36_000.0,
+            "bugs" => 0.4
+          },
+          "branching" => %{
+            "branching_density" => 0.7
+          },
+          "entropy" => %{
+            "normalized_entropy" => 0.9
+          },
+          "function_metrics" => %{
+            "avg_function_length" => 50.0,
+            "max_function_length" => 120.0,
+            "function_count" => 15.0,
+            "avg_params" => 4.0,
+            "max_params" => 8.0
+          },
+          "readability" => %{
+            "readability_score" => 0.3
+          },
+          "indentation" => %{
+            "avg_indent_level" => 4.0,
+            "max_indent_level" => 8.0,
+            "indent_variance" => 2.0
+          }
+        },
+        "lines" => 300,
+        "bytes" => 8192
+      }
+    }
+  end
+end
diff --git a/test/codeqa/combined_metrics/sample_runner_test.exs b/test/codeqa/combined_metrics/sample_runner_test.exs
new file mode 100644
index 00000000..692c306a
--- /dev/null
+++ b/test/codeqa/combined_metrics/sample_runner_test.exs
@@ -0,0 +1,148 @@
+defmodule CodeQA.CombinedMetrics.SampleRunnerTest do
+  use ExUnit.Case
+
+  alias CodeQA.CombinedMetrics.SampleRunner
+  alias CodeQA.Engine.Analyzer
+  alias CodeQA.Engine.Collector
+  alias CodeQA.HealthReport.Grader
+
+  setup_all do
+    results = SampleRunner.run(category: "variable_naming", verbose: true)
+    %{results: results}
+  end
+
+  describe "apply_languages/1" do
+    test "returns one entry per requested category" do
+      stats = SampleRunner.apply_languages(category: "variable_naming")
+      assert length(stats) == 1
+      [entry] = stats
+      assert entry.category == "variable_naming"
+      assert is_integer(entry.behaviors_with_languages)
+    end
+
+    test "writes _languages to behaviors that have samples" do
+      SampleRunner.apply_languages(category: "variable_naming")
+      {:ok, data} = YamlElixir.read_from_file("priv/combined_metrics/variable_naming.yml")
+      langs = get_in(data, ["name_is_generic", "_languages"])
+      assert is_list(langs)
+      assert langs != []
+      assert Enum.all?(langs, &is_binary/1)
+    end
+
+    test "behaviors without sample dirs get no _languages key" do
+      SampleRunner.apply_languages(category: "variable_naming")
+      {:ok, data} = YamlElixir.read_from_file("priv/combined_metrics/variable_naming.yml")
+
+      Enum.each(data, fn {_behavior, groups} ->
+        if is_map(groups) do
+          case Map.get(groups, "_languages") do
+            nil -> :ok
+            langs -> assert is_list(langs) and langs != []
+          end
+        end
+      end)
+    end
+
+    test "only includes languages with both good and bad samples" do
+      # uses code_smells which has single-language behaviors
+      SampleRunner.apply_languages(category: "code_smells")
+      {:ok, data} = YamlElixir.read_from_file("priv/combined_metrics/code_smells.yml")
+
+      # no_dead_code_after_return has only .ex samples
+      langs = get_in(data, ["no_dead_code_after_return", "_languages"])
+      assert langs == ["elixir"]
+    end
+  end
+
+  describe "diagnose_aggregate/2 language option" do
+    test "accepts :language option without crashing" do
+      # minimal aggregate — behavior will be scored but most will have no scalars
+      agg = %{}
+      result = SampleRunner.diagnose_aggregate(agg, top: 5, language: "elixir")
+      assert is_list(result)
+    end
+
+    test "accepts :languages option without crashing" do
+      agg = %{}
+      result = SampleRunner.diagnose_aggregate(agg, top: 5, languages: ["elixir", "rust"])
+      assert is_list(result)
+    end
+
+    # NOTE: This test uses `<=` intentionally. Before Task 7 + `mix compile --force`,
+    # all behaviors have empty `_languages` in the compiled cache, so no filtering
+    # occurs and all three counts are equal. The `<=` assertion passes in both
+    # pre- and post-Task-7 states.
+    test "with language option returns subset of unfiltered results" do
+      agg =
+        "priv/combined_metrics/samples/variable_naming/name_is_generic/bad"
+        |> Collector.collect_files()
+        |> Analyzer.analyze_codebase()
+        |> get_in(["codebase", "aggregate"])
+
+      all = SampleRunner.diagnose_aggregate(agg, top: 999)
+      elixir_only = SampleRunner.diagnose_aggregate(agg, top: 999, language: "elixir")
+      rust_only = SampleRunner.diagnose_aggregate(agg, top: 999, language: "rust")
+
+      # Filtered sets are subsets (or equal, pre-Task-7) of unfiltered
+      assert length(elixir_only) <= length(all)
+      assert length(rust_only) <= length(all)
+    end
+  end
+
+  describe "score_aggregate/2 language filtering" do
+    test "accepts :languages option without crashing" do
+      result = SampleRunner.score_aggregate(%{}, languages: ["elixir"])
+      assert is_list(result)
+      assert Enum.all?(result, &Map.has_key?(&1, :behaviors))
+    end
+
+    test "with languages option returns fewer behaviors than unfiltered" do
+      agg =
+        "priv/combined_metrics/samples/variable_naming/name_is_generic/bad"
+        |> Collector.collect_files()
+        |> Analyzer.analyze_codebase()
+        |> get_in(["codebase", "aggregate"])
+
+      all_count = SampleRunner.score_aggregate(agg) |> Enum.flat_map(& &1.behaviors) |> length()
+
+      elixir_count =
+        SampleRunner.score_aggregate(agg, languages: ["elixir"])
+        |> Enum.flat_map(& &1.behaviors)
+        |> length()
+
+      # elixir-only project sees fewer or equal behaviors
+      assert elixir_count <= all_count
+    end
+  end
+
+  describe "grade_cosine_categories/3" do
+    test "returns a list for empty input" do
+      result = Grader.grade_cosine_categories(%{}, %{})
+      assert is_list(result)
+    end
+  end
+
+  describe "run/1" do
+    test "returns a list of results with required keys", %{results: results} do
+      assert is_list(results)
+      assert results != []
+      result = hd(results)
+      assert Map.has_key?(result, :bad_score)
+      assert Map.has_key?(result, :good_score)
+      assert Map.has_key?(result, :ratio)
+      assert Map.has_key?(result, :direction_ok)
+    end
+
+    test "name_is_generic result has good_score > bad_score", %{results: results} do
+      generic = Enum.find(results, &(&1.behavior == "name_is_generic"))
+      assert generic != nil
+      assert generic.good_score > generic.bad_score
+    end
+
+    test "verbose: true populates metric_detail", %{results: results} do
+      [result | _] = results
+      assert is_list(result.metric_detail)
+      # only populated when behavior has scalars configured
+    end
+  end
+end
diff --git a/test/codeqa/combined_metrics/scorer_test.exs b/test/codeqa/combined_metrics/scorer_test.exs
new file mode 100644
index 00000000..42713dd6
--- /dev/null
+++ b/test/codeqa/combined_metrics/scorer_test.exs
@@ -0,0 +1,29 @@
+defmodule CodeQA.CombinedMetrics.ScorerTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.CombinedMetrics.Scorer
+
+  describe "referenced_file_metric_names/0" do
+    test "returns a MapSet" do
+      assert %MapSet{} = Scorer.referenced_file_metric_names()
+    end
+
+    test "contains heavy hitters that obviously appear in YAMLs" do
+      set = Scorer.referenced_file_metric_names()
+
+      for name <- ~w[halstead ngram entropy branching readability] do
+        assert MapSet.member?(set, name),
+               "expected #{name} in referenced file metric names"
+      end
+    end
+
+    test "excludes meta keys (anything starting with _)" do
+      set = Scorer.referenced_file_metric_names()
+
+      for name <- set do
+        refute String.starts_with?(name, "_"),
+               "meta key leaked into referenced metrics: #{inspect(name)}"
+      end
+    end
+  end
+end
diff --git a/test/codeqa/config_test.exs b/test/codeqa/config_test.exs
new file mode 100644
index 00000000..fced9036
--- /dev/null
+++ b/test/codeqa/config_test.exs
@@ -0,0 +1,104 @@
+defmodule CodeQA.ConfigTest do
+  use ExUnit.Case, async: false
+
+  alias CodeQA.Config
+
+  setup do
+    Config.reset()
+    on_exit(&Config.reset/0)
+  end
+
+  describe "load/1 and accessors" do
+    test "returns defaults when no .codeqa.yml exists" do
+      dir = System.tmp_dir!()
+      Config.load(dir)
+
+      assert Config.ignore_paths() == []
+      assert Config.combined_top() == 2
+      assert Config.cosine_significance_threshold() == 0.15
+      assert Config.near_duplicate_blocks_opts() == []
+      assert is_map(Config.impact_map())
+      assert Map.get(Config.impact_map(), "complexity") == 5
+    end
+
+    test "loads ignore_paths from .codeqa.yml" do
+      dir =
+        tmp_dir_with_config("""
+        ignore_paths:
+          - priv/**
+          - docs/**
+        """)
+
+      Config.load(dir)
+
+      assert Config.ignore_paths() == ["priv/**", "docs/**"]
+    end
+
+    test "loads impact overrides" do
+      dir =
+        tmp_dir_with_config("""
+        impact:
+          complexity: 10
+          documentation: 3
+        """)
+
+      Config.load(dir)
+
+      assert Config.impact_map()["complexity"] == 10
+      assert Config.impact_map()["documentation"] == 3
+      assert Config.impact_map()["function_design"] == 4
+    end
+
+    test "loads combined_top" do
+      dir = tmp_dir_with_config("combined_top: 5\n")
+      Config.load(dir)
+      assert Config.combined_top() == 5
+    end
+
+    test "loads cosine_significance_threshold" do
+      dir = tmp_dir_with_config("cosine_significance_threshold: 0.25\n")
+      Config.load(dir)
+      assert Config.cosine_significance_threshold() == 0.25
+    end
+
+    test "loads near_duplicate_blocks opts" do
+      dir =
+        tmp_dir_with_config("""
+        near_duplicate_blocks:
+          max_pairs_per_bucket: 25
+        """)
+
+      Config.load(dir)
+
+      assert Config.near_duplicate_blocks_opts() == [max_pairs_per_bucket: 25]
+    end
+
+    test "caches: second load/1 call is a no-op" do
+      dir1 = tmp_dir_with_config("combined_top: 7\n")
+      dir2 = tmp_dir_with_config("combined_top: 3\n")
+
+      Config.load(dir1)
+      Config.load(dir2)
+
+      assert Config.combined_top() == 7
+    end
+
+    test "reset/0 clears cache so load/1 works again" do
+      dir1 = tmp_dir_with_config("combined_top: 7\n")
+      dir2 = tmp_dir_with_config("combined_top: 3\n")
+
+      Config.load(dir1)
+      Config.reset()
+      Config.load(dir2)
+
+      assert Config.combined_top() == 3
+    end
+  end
+
+  defp tmp_dir_with_config(yaml) do
+    dir = Path.join(System.tmp_dir!(), "codeqa_config_test_#{System.unique_integer([:positive])}")
+    File.mkdir_p!(dir)
+    File.write!(Path.join(dir, ".codeqa.yml"), yaml)
+    dir
+  end
+end
diff --git a/test/codeqa/diagnostics_test.exs b/test/codeqa/diagnostics_test.exs
new file mode 100644
index 00000000..4e5db617
--- /dev/null
+++ b/test/codeqa/diagnostics_test.exs
@@ -0,0 +1,48 @@
+defmodule CodeQA.DiagnosticsTest do
+  use ExUnit.Case, async: true
+
+  @small_path Path.expand("../../lib/codeqa/health_report/formatter", __DIR__)
+
+  describe "run/1 aggregate mode" do
+    test "plain format output structure" do
+      output = CodeQA.Diagnostics.run(path: @small_path, mode: :aggregate, top: 5, format: :plain)
+
+      assert output =~ "## Diagnose: aggregate"
+      assert output =~ "| Behavior | Cosine | Score |"
+      assert output =~ "###"
+    end
+
+    test "json format returns valid JSON with issues and categories keys" do
+      output = CodeQA.Diagnostics.run(path: @small_path, mode: :aggregate, top: 5, format: :json)
+
+      decoded = Jason.decode!(output)
+      assert Map.has_key?(decoded, "issues")
+      assert Map.has_key?(decoded, "categories")
+    end
+  end
+
+  describe "run/1 per-file mode" do
+    @tag timeout: 120_000
+    test "runs without error on a small directory" do
+      output = CodeQA.Diagnostics.run(path: @small_path, mode: :per_file, top: 3, format: :plain)
+
+      assert output =~ "## Diagnose: per-file"
+    end
+
+    @tag timeout: 120_000
+    test "output contains per-file table header" do
+      output = CodeQA.Diagnostics.run(path: @small_path, mode: :per_file, top: 3, format: :plain)
+
+      assert output =~ "| File | Behavior | Cosine | Score |"
+    end
+
+    @tag timeout: 120_000
+    test "json format returns valid JSON with files key" do
+      output = CodeQA.Diagnostics.run(path: @small_path, mode: :per_file, top: 3, format: :json)
+
+      decoded = Jason.decode!(output)
+      assert Map.has_key?(decoded, "files")
+      assert is_list(decoded["files"])
+    end
+  end
+end
diff --git a/test/codeqa/engine/analyzer_test.exs b/test/codeqa/engine/analyzer_test.exs
new file mode 100644
index 00000000..38886c6c
--- /dev/null
+++ b/test/codeqa/engine/analyzer_test.exs
@@ -0,0 +1,99 @@
+defmodule CodeQA.Engine.AnalyzerTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.Engine.Analyzer
+
+  describe "analyze_file/2" do
+    test "returns a metrics map with group keys" do
+      content = "defmodule Foo do\n  def bar, do: :ok\nend\n"
+      result = Analyzer.analyze_file("lib/foo.ex", content)
+      assert is_map(result)
+      assert map_size(result) > 0
+      # Each value should be a map of metric keys to numbers
+      Enum.each(result, fn {_group, keys} ->
+        assert is_map(keys)
+      end)
+    end
+  end
+
+  describe "analyze_codebase_aggregate/2" do
+    test "returns aggregate map with mean_ keys" do
+      files = %{
+        "lib/a.ex" => "defmodule A do\n  def foo, do: :a\nend\n",
+        "lib/b.ex" => "defmodule B do\n  def bar, do: :b\nend\n"
+      }
+
+      agg = Analyzer.analyze_codebase_aggregate(files)
+      assert is_map(agg)
+      # At least one group should have mean_ keys
+      Enum.each(agg, fn {_group, keys} ->
+        Enum.each(keys, fn {key, val} ->
+          assert String.starts_with?(key, "mean_") or String.starts_with?(key, "std_") or
+                   String.starts_with?(key, "min_") or String.starts_with?(key, "max_")
+
+          assert is_float(val) or is_integer(val)
+        end)
+      end)
+    end
+
+    test "does not run codebase metrics (returns quickly for large input)" do
+      # Just assert it returns without error for a reasonable input
+      files = %{"lib/foo.ex" => "defmodule Foo do\n  def bar, do: 1\nend\n"}
+      agg = Analyzer.analyze_codebase_aggregate(files)
+      assert is_map(agg)
+    end
+  end
+
+  describe "analyze_file_for_loo_partial/3" do
+    @sample """
+    defmodule Foo do
+      def bar do
+        x = 1
+        y = 2
+        x + y
+      end
+    end
+    """
+
+    test "result matches analyze_file_for_loo/2 for referenced metrics" do
+      baseline = Analyzer.analyze_file_for_loo("lib/foo.ex", @sample)
+      partial = Analyzer.analyze_file_for_loo_partial("lib/foo.ex", @sample, baseline)
+      referenced = CodeQA.CombinedMetrics.Scorer.referenced_file_metric_names()
+
+      for name <- referenced, Map.has_key?(baseline, name) do
+        assert Map.get(partial, name) == Map.get(baseline, name),
+               "referenced metric #{name} diverges in partial"
+      end
+    end
+
+    test "non-referenced metrics are inherited verbatim from baseline" do
+      baseline = Analyzer.analyze_file_for_loo("lib/foo.ex", @sample)
+      sentinel = %{"sentinel_key" => 99.0}
+
+      tampered_baseline =
+        Enum.reduce(baseline, %{}, fn {name, _val}, acc ->
+          if name in CodeQA.CombinedMetrics.Scorer.referenced_file_metric_names() do
+            Map.put(acc, name, baseline[name])
+          else
+            Map.put(acc, name, sentinel)
+          end
+        end)
+
+      partial =
+        Analyzer.analyze_file_for_loo_partial("lib/foo.ex", @sample, tampered_baseline)
+
+      for {name, value} <- partial,
+          name not in CodeQA.CombinedMetrics.Scorer.referenced_file_metric_names() do
+        assert value == sentinel,
+               "non-referenced metric #{name} was recomputed instead of inherited"
+      end
+    end
+
+    test "result has same set of metric names as analyze_file_for_loo/2" do
+      baseline = Analyzer.analyze_file_for_loo("lib/foo.ex", @sample)
+      partial = Analyzer.analyze_file_for_loo_partial("lib/foo.ex", @sample, baseline)
+
+      assert MapSet.new(Map.keys(partial)) == MapSet.new(Map.keys(baseline))
+    end
+  end
+end
diff --git a/test/codeqa/formatter_test.exs b/test/codeqa/formatter_test.exs
deleted file mode 100644
index ccefca53..00000000
--- a/test/codeqa/formatter_test.exs
+++ /dev/null
@@ -1,149 +0,0 @@
-defmodule CodeQA.FormatterTest do
-  use ExUnit.Case, async: true
-
-  alias CodeQA.Formatter
-
-  @sample_comparison %{
-    "metadata" => %{
-      "total_files_compared" => 1,
-      "summary" => "1 modified",
-      "base_ref" => "abc123",
-      "head_ref" => "HEAD"
-    },
-    "files" => %{
-      "lib/foo.ex" => %{
-        "status" => "modified",
-        "base" => %{
-          "metrics" => %{"halstead" => %{"volume" => 1000.0}},
-          "lines" => 100,
-          "bytes" => 3000
-        },
-        "head" => %{
-          "metrics" => %{"halstead" => %{"volume" => 800.0}},
-          "lines" => 95,
-          "bytes" => 2800
-        },
-        "delta" => %{
-          "metrics" => %{"halstead" => %{"volume" => -200.0}},
-          "lines" => -5,
-          "bytes" => -200
-        }
-      }
-    },
-    "codebase" => %{
-      "base" => %{
-        "aggregate" => %{
-          "readability" => %{
-            "mean_flesch_adapted" => 65.0,
-            "mean_fog_adapted" => 8.0,
-            "mean_avg_tokens_per_line" => 7.0,
-            "mean_avg_line_length" => 45.0
-          },
-          "halstead" => %{
-            "mean_difficulty" => 15.0,
-            "mean_effort" => 8000.0,
-            "mean_volume" => 500.0,
-            "mean_estimated_bugs" => 0.2
-          }
-        }
-      },
-      "head" => %{
-        "aggregate" => %{
-          "readability" => %{
-            "mean_flesch_adapted" => 75.0,
-            "mean_fog_adapted" => 7.0,
-            "mean_avg_tokens_per_line" => 6.0,
-            "mean_avg_line_length" => 42.0
-          },
-          "halstead" => %{
-            "mean_difficulty" => 12.0,
-            "mean_effort" => 6000.0,
-            "mean_volume" => 400.0,
-            "mean_estimated_bugs" => 0.15
-          }
-        }
-      },
-      "delta" => %{
-        "aggregate" => %{
-          "readability" => %{
-            "mean_flesch_adapted" => 10.0,
-            "mean_fog_adapted" => -1.0,
-            "mean_avg_tokens_per_line" => -1.0,
-            "mean_avg_line_length" => -3.0
-          },
-          "halstead" => %{
-            "mean_difficulty" => -3.0,
-            "mean_effort" => -2000.0,
-            "mean_volume" => -100.0,
-            "mean_estimated_bugs" => -0.05
-          }
-        }
-      }
-    }
-  }
-
-  describe "format_github/1" do
-    test "includes mermaid chart of head scores" do
-      result = Formatter.format_github(@sample_comparison)
-      assert result =~ "```mermaid"
-      assert result =~ "xychart-beta"
-      assert result =~ "bar ["
-    end
-
-    test "includes progress bars with base → head" do
-      result = Formatter.format_github(@sample_comparison)
-      assert result =~ "→"
-    end
-
-    test "includes grade emoji" do
-      result = Formatter.format_github(@sample_comparison)
-      assert result =~ "🟢" or result =~ "🟡" or result =~ "🟠" or result =~ "🔴"
-    end
-
-    test "wraps file details in collapsible section" do
-      result = Formatter.format_github(@sample_comparison)
-      assert result =~ "<details>"
-      assert result =~ "</details>"
-    end
-
-    test "shows no changes message when zero files compared" do
-      comparison = put_in(@sample_comparison, ["metadata", "total_files_compared"], 0)
-      result = Formatter.format_github(comparison)
-      assert result =~ "No file changes detected"
-    end
-
-    test "shows 🟢 in aggregate delta for improving high-is-better metric" do
-      # flesch_adapted is good: :high, delta +10.0 → improvement
-      result = Formatter.format_github(@sample_comparison)
-      assert result =~ "🟢 +10.00"
-    end
-
-    test "file changes section shows actual file counts, not 'no changes'" do
-      result = Formatter.format_github(@sample_comparison)
-      assert result =~ "File changes — 1 modified"
-      refute result =~ "File changes — no changes"
-    end
-
-    test "file changes section reflects metric directions from codebase data" do
-      result = Formatter.format_github(@sample_comparison)
-      # halstead.mean_volume drops 100/500 = 20% → "decreased"; readability rises 10/65 ≈ 15% → "increased slightly"
-      refute result =~ "File changes — 1 modified — all metrics stable"
-    end
-
-    test "shows 🔴 in aggregate delta for worsening low-is-better metric" do
-      # halstead.volume is good: :low, delta +300 → regression
-      worsening =
-        put_in(
-          @sample_comparison,
-          ["codebase", "head", "aggregate", "halstead", "mean_volume"],
-          800.0
-        )
-        |> put_in(["codebase", "delta", "aggregate"], %{
-          "halstead" => %{"mean_volume" => 300.0}
-        })
-
-      result = Formatter.format_github(worsening)
-      assert result =~ "🔴 +300.00"
-    end
-  end
-end
diff --git a/test/codeqa/git_test.exs b/test/codeqa/git_test.exs
new file mode 100644
index 00000000..f1a800cb
--- /dev/null
+++ b/test/codeqa/git_test.exs
@@ -0,0 +1,297 @@
+defmodule CodeQA.GitTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.Git
+
+  describe "gitignored_files/2" do
+    test "returns files that are gitignored" do
+      in_tmp_git_repo(fn repo ->
+        File.write!(Path.join(repo, ".gitignore"), "*.secret\n")
+        File.write!(Path.join(repo, "config.secret"), "password=123")
+        File.write!(Path.join(repo, "app.ex"), "defmodule App do end")
+
+        ignored = Git.gitignored_files(repo, ["config.secret", "app.ex"])
+
+        assert ignored == MapSet.new(["config.secret"])
+      end)
+    end
+
+    test "returns empty set when no files are gitignored" do
+      in_tmp_git_repo(fn repo ->
+        File.write!(Path.join(repo, ".gitignore"), "*.secret\n")
+        File.write!(Path.join(repo, "app.ex"), "defmodule App do end")
+
+        ignored = Git.gitignored_files(repo, ["app.ex"])
+
+        assert ignored == MapSet.new()
+      end)
+    end
+
+    test "handles empty file list" do
+      in_tmp_git_repo(fn repo ->
+        File.write!(Path.join(repo, ".gitignore"), "*.secret\n")
+
+        ignored = Git.gitignored_files(repo, [])
+
+        assert ignored == MapSet.new()
+      end)
+    end
+
+    test "respects nested .gitignore files" do
+      in_tmp_git_repo(fn repo ->
+        File.mkdir_p!(Path.join(repo, "subdir"))
+        File.write!(Path.join(repo, "subdir/.gitignore"), "local.ex\n")
+        File.write!(Path.join(repo, "subdir/local.ex"), "# local")
+        File.write!(Path.join(repo, "subdir/other.ex"), "# other")
+
+        ignored = Git.gitignored_files(repo, ["subdir/local.ex", "subdir/other.ex"])
+
+        assert ignored == MapSet.new(["subdir/local.ex"])
+      end)
+    end
+
+    test "handles more than 1000 paths without ARG_MAX issues" do
+      in_tmp_git_repo(fn repo ->
+        File.write!(Path.join(repo, ".gitignore"), "ignored.ex\n")
+
+        paths = Enum.map(1..1200, fn i -> "file_#{i}.ex" end) ++ ["ignored.ex"]
+
+        ignored = Git.gitignored_files(repo, paths)
+
+        assert ignored == MapSet.new(["ignored.ex"])
+      end)
+    end
+
+    test "filters files inside a gitignored directory" do
+      in_tmp_git_repo(fn repo ->
+        File.write!(Path.join(repo, ".gitignore"), "/docs/\n")
+
+        ignored =
+          Git.gitignored_files(repo, [
+            "docs/readme.md",
+            "docs/guide/intro.md",
+            "lib/app.ex"
+          ])
+
+        assert ignored == MapSet.new(["docs/readme.md", "docs/guide/intro.md"])
+      end)
+    end
+
+    test "filters gitignored-pattern files even when already tracked by git" do
+      in_tmp_git_repo(fn repo ->
+        File.mkdir_p!(Path.join(repo, "docs"))
+        File.mkdir_p!(Path.join(repo, "lib"))
+        File.write!(Path.join(repo, "docs/readme.md"), "# Docs")
+        File.write!(Path.join(repo, "lib/app.ex"), "defmodule App do end")
+
+        System.cmd("git", ["add", "."], cd: repo)
+        System.cmd("git", ["commit", "-m", "initial"], cd: repo)
+
+        File.write!(Path.join(repo, ".gitignore"), "/docs/\n")
+
+        ignored = Git.gitignored_files(repo, ["docs/readme.md", "lib/app.ex"])
+
+        assert ignored == MapSet.new(["docs/readme.md"])
+      end)
+    end
+  end
+
+  describe "diff_line_ranges/3" do
+    test "parses single-line hunks" do
+      in_tmp_git_repo(fn repo ->
+        # Create initial commit
+        File.write!(Path.join(repo, "foo.ex"), "line1\nline2\nline3\n")
+        System.cmd("git", ["add", "."], cd: repo)
+        System.cmd("git", ["commit", "-m", "initial"], cd: repo)
+
+        # Modify a single line
+        File.write!(Path.join(repo, "foo.ex"), "line1\nmodified\nline3\n")
+        System.cmd("git", ["add", "."], cd: repo)
+        System.cmd("git", ["commit", "-m", "change"], cd: repo)
+
+        {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD")
+
+        assert Map.has_key?(ranges, "foo.ex")
+        assert {2, 2} in ranges["foo.ex"]
+      end)
+    end
+
+    test "parses multi-line hunks" do
+      in_tmp_git_repo(fn repo ->
+        File.write!(Path.join(repo, "foo.ex"), "a\nb\nc\nd\ne\n")
+        {_, 0} = System.cmd("git", ["add", "."], cd: repo)
+        {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo)
+
+        # Replace lines 2-4
+        File.write!(Path.join(repo, "foo.ex"), "a\nX\nY\nZ\ne\n")
+        {_, 0} = System.cmd("git", ["add", "."], cd: repo)
+        {_, 0} = System.cmd("git", ["commit", "-m", "change"], cd: repo)
+
+        {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD")
+
+        assert Map.has_key?(ranges, "foo.ex")
+        assert {2, 4} in ranges["foo.ex"]
+      end)
+    end
+
+    test "handles multiple hunks in same file" do
+      in_tmp_git_repo(fn repo ->
+        lines = Enum.map_join(1..20, "\n", &"line#{&1}")
+        File.write!(Path.join(repo, "foo.ex"), lines <> "\n")
+        {_, 0} = System.cmd("git", ["add", "."], cd: repo)
+        {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo)
+
+        # Change line 2 and line 15
+        new_lines =
+          1..20
+          |> Enum.map(fn
+            2 -> "changed2"
+            15 -> "changed15"
+            n -> "line#{n}"
+          end)
+          |> Enum.join("\n")
+
+        File.write!(Path.join(repo, "foo.ex"), new_lines <> "\n")
+        {_, 0} = System.cmd("git", ["add", "."], cd: repo)
+        {_, 0} = System.cmd("git", ["commit", "-m", "change"], cd: repo)
+
+        {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD")
+
+        assert Map.has_key?(ranges, "foo.ex")
+        assert length(ranges["foo.ex"]) == 2
+        assert {2, 2} in ranges["foo.ex"]
+        assert {15, 15} in ranges["foo.ex"]
+      end)
+    end
+
+    test "handles multiple files" do
+      in_tmp_git_repo(fn repo ->
+        File.write!(Path.join(repo, "a.ex"), "a1\na2\n")
+        File.write!(Path.join(repo, "b.ex"), "b1\nb2\n")
+        {_, 0} = System.cmd("git", ["add", "."], cd: repo)
+        {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo)
+
+        File.write!(Path.join(repo, "a.ex"), "a1\nchanged\n")
+        File.write!(Path.join(repo, "b.ex"), "b1\nchanged\n")
+        {_, 0} = System.cmd("git", ["add", "."], cd: repo)
+        {_, 0} = System.cmd("git", ["commit", "-m", "change"], cd: repo)
+
+        {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD")
+
+        assert {2, 2} in ranges["a.ex"]
+        assert {2, 2} in ranges["b.ex"]
+      end)
+    end
+
+    test "handles added lines (insertion)" do
+      in_tmp_git_repo(fn repo ->
+        File.write!(Path.join(repo, "foo.ex"), "a\nb\n")
+        System.cmd("git", ["add", "."], cd: repo)
+        System.cmd("git", ["commit", "-m", "initial"], cd: repo)
+
+        # Insert new line between a and b
+        File.write!(Path.join(repo, "foo.ex"), "a\nnew\nb\n")
+        System.cmd("git", ["add", "."], cd: repo)
+        System.cmd("git", ["commit", "-m", "insert"], cd: repo)
+
+        {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD")
+
+        assert Map.has_key?(ranges, "foo.ex")
+        # Line 2 is the new line
+        assert {2, 2} in ranges["foo.ex"]
+      end)
+    end
+
+    test "handles deleted lines (no new lines)" do
+      in_tmp_git_repo(fn repo ->
+        File.write!(Path.join(repo, "foo.ex"), "a\nb\nc\n")
+        System.cmd("git", ["add", "."], cd: repo)
+        System.cmd("git", ["commit", "-m", "initial"], cd: repo)
+
+        # Delete line b
+        File.write!(Path.join(repo, "foo.ex"), "a\nc\n")
+        System.cmd("git", ["add", "."], cd: repo)
+        System.cmd("git", ["commit", "-m", "delete"], cd: repo)
+
+        {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD")
+
+        # File should either not be in ranges or have empty list (deletion only)
+        ranges_for_file = Map.get(ranges, "foo.ex", [])
+        # No new lines were added, so no ranges pointing to new content
+        assert ranges_for_file == [] or not Map.has_key?(ranges, "foo.ex")
+      end)
+    end
+
+    test "returns empty map when no diff" do
+      in_tmp_git_repo(fn repo ->
+        File.write!(Path.join(repo, "foo.ex"), "content\n")
+        {_, 0} = System.cmd("git", ["add", "."], cd: repo)
+        {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo)
+
+        {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD", "HEAD")
+
+        assert ranges == %{}
+      end)
+    end
+
+    test "handles new file (no base version)" do
+      in_tmp_git_repo(fn repo ->
+        File.write!(Path.join(repo, "existing.ex"), "existing\n")
+        {_, 0} = System.cmd("git", ["add", "."], cd: repo)
+        {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo)
+
+        File.write!(Path.join(repo, "new.ex"), "line1\nline2\nline3\n")
+        {_, 0} = System.cmd("git", ["add", "."], cd: repo)
+        {_, 0} = System.cmd("git", ["commit", "-m", "add new file"], cd: repo)
+
+        {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD")
+
+        assert Map.has_key?(ranges, "new.ex")
+        assert {1, 3} in ranges["new.ex"]
+      end)
+    end
+
+    test "returns ranges in ascending order" do
+      in_tmp_git_repo(fn repo ->
+        lines = Enum.map_join(1..20, "\n", &"line#{&1}")
+        File.write!(Path.join(repo, "foo.ex"), lines <> "\n")
+        {_, 0} = System.cmd("git", ["add", "."], cd: repo)
+        {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo)
+
+        # Change lines 2, 10, and 18
+        new_lines =
+          1..20
+          |> Enum.map(fn
+            2 -> "changed2"
+            10 -> "changed10"
+            18 -> "changed18"
+            n -> "line#{n}"
+          end)
+          |> Enum.join("\n")
+
+        File.write!(Path.join(repo, "foo.ex"), new_lines <> "\n")
+        {_, 0} = System.cmd("git", ["add", "."], cd: repo)
+        {_, 0} = System.cmd("git", ["commit", "-m", "change"], cd: repo)
+
+        {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD")
+
+        # Ranges should be in ascending order by start line
+        assert ranges["foo.ex"] == [{2, 2}, {10, 10}, {18, 18}]
+      end)
+    end
+  end
+
+  defp in_tmp_git_repo(fun) do
+    tmp = Path.join(System.tmp_dir!(), "codeqa_git_test_#{:rand.uniform(999_999)}")
+    File.mkdir_p!(tmp)
+    System.cmd("git", ["init"], cd: tmp)
+    System.cmd("git", ["config", "user.email", "test@test.com"], cd: tmp)
+    System.cmd("git", ["config", "user.name", "Test"], cd: tmp)
+
+    try do
+      fun.(tmp)
+    after
+      File.rm_rf!(tmp)
+    end
+  end
+end
diff --git a/test/codeqa/health_report/behavior_labels_test.exs b/test/codeqa/health_report/behavior_labels_test.exs
new file mode 100644
index 00000000..0992488e
--- /dev/null
+++ b/test/codeqa/health_report/behavior_labels_test.exs
@@ -0,0 +1,42 @@
+defmodule CodeQA.HealthReport.BehaviorLabelsTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.HealthReport.BehaviorLabels
+
+  describe "label/2" do
+    test "returns human-readable label for known behavior" do
+      assert BehaviorLabels.label("function_design", "no_boolean_parameter") ==
+               "Boolean parameter increases coupling"
+    end
+
+    test "falls back to humanized behavior name for unknown" do
+      assert BehaviorLabels.label("unknown_cat", "some_weird_behavior") ==
+               "Some Weird Behavior"
+    end
+
+    test "labels the report's most common false-positive behaviors" do
+      assert BehaviorLabels.label("code_smells", "no_debug_print_statements") ==
+               "Debug print left in code"
+
+      assert BehaviorLabels.label("scope_and_assignment", "used_only_once") ==
+               "Variable used only once"
+
+      assert BehaviorLabels.label("consistency", "consistent_error_return_shape") ==
+               "Mixed error-return shapes"
+    end
+  end
+
+  describe "action/2" do
+    test "returns action for known behavior" do
+      assert is_binary(BehaviorLabels.action("function_design", "no_boolean_parameter"))
+    end
+
+    test "falls back to fix_hint from YAML when no hardcoded action" do
+      assert is_binary(BehaviorLabels.action("naming_conventions", "filename_matches_module"))
+    end
+
+    test "returns generic action for completely unknown behavior" do
+      assert BehaviorLabels.action("unknown", "unknown") == "Review this code block"
+    end
+  end
+end
diff --git a/test/codeqa/health_report/categories_test.exs b/test/codeqa/health_report/categories_test.exs
new file mode 100644
index 00000000..0912d46b
--- /dev/null
+++ b/test/codeqa/health_report/categories_test.exs
@@ -0,0 +1,71 @@
+defmodule CodeQA.HealthReport.CategoriesTest do
+  use ExUnit.Case
+
+  alias CodeQA.HealthReport.Categories
+
+  describe "defaults/0" do
+    test "all metrics have fix_hint field" do
+      categories = Categories.defaults()
+
+      metrics = Enum.flat_map(categories, & &1.metrics)
+
+      Enum.each(metrics, fn metric ->
+        assert Map.has_key?(metric, :fix_hint),
+               "Metric #{metric.name} missing :fix_hint field"
+
+        assert is_binary(metric.fix_hint),
+               "Metric #{metric.name} :fix_hint must be a string"
+
+        assert String.length(metric.fix_hint) > 0,
+               "Metric #{metric.name} :fix_hint cannot be empty"
+      end)
+    end
+
+    test "all categories have expected keys" do
+      categories = Categories.defaults()
+
+      Enum.each(categories, fn category ->
+        assert Map.has_key?(category, :key)
+        assert Map.has_key?(category, :name)
+        assert Map.has_key?(category, :metrics)
+      end)
+    end
+
+    test "all metrics have required threshold keys" do
+      categories = Categories.defaults()
+
+      metrics = Enum.flat_map(categories, & &1.metrics)
+
+      Enum.each(metrics, fn metric ->
+        assert Map.has_key?(metric, :name)
+        assert Map.has_key?(metric, :source)
+        assert Map.has_key?(metric, :weight)
+        assert Map.has_key?(metric, :good)
+        assert Map.has_key?(metric, :thresholds)
+        assert Map.has_key?(metric, :fix_hint)
+      end)
+    end
+
+    test "fix_hint is accessible via Map.get" do
+      categories = Categories.defaults()
+
+      metrics = Enum.flat_map(categories, & &1.metrics)
+
+      Enum.each(metrics, fn metric ->
+        hint = Map.get(metric, :fix_hint)
+        assert is_binary(hint)
+        assert String.length(hint) > 0
+      end)
+    end
+
+    test "has exactly 24 metrics across 6 categories" do
+      categories = Categories.defaults()
+
+      assert length(categories) == 6
+
+      metrics = Enum.flat_map(categories, & &1.metrics)
+
+      assert length(metrics) == 24
+    end
+  end
+end
diff --git a/test/codeqa/health_report/config_test.exs b/test/codeqa/health_report/config_test.exs
new file mode 100644
index 00000000..da4d6a12
--- /dev/null
+++ b/test/codeqa/health_report/config_test.exs
@@ -0,0 +1,111 @@
+defmodule CodeQA.HealthReport.ConfigTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.HealthReport.Config
+
+  @default_impact %{
+    "complexity" => 5,
+    "file_structure" => 4,
+    "function_design" => 4,
+    "code_smells" => 3,
+    "naming_conventions" => 2,
+    "error_handling" => 2,
+    "consistency" => 2,
+    "documentation" => 1,
+    "testing" => 1
+  }
+
+  describe "load/1 with nil" do
+    test "returns default impact map" do
+      result = Config.load(nil)
+      assert result.impact_map == @default_impact
+    end
+
+    test "returns combined_top of 2" do
+      result = Config.load(nil)
+      assert result.combined_top == 2
+    end
+
+    test "returns categories and grade_scale" do
+      result = Config.load(nil)
+      assert is_list(result.categories)
+      assert is_list(result.grade_scale)
+    end
+  end
+
+  describe "load/1 with YAML path" do
+    defp write_temp_yaml(content) do
+      path = Path.join(System.tmp_dir!(), "test_config_#{System.unique_integer()}.yml")
+      File.write!(path, content)
+      on_exit(fn -> File.rm(path) end)
+      path
+    end
+
+    test "user impact values override defaults, defaults fill gaps" do
+      path =
+        write_temp_yaml("""
+        impact:
+          complexity: 10
+          testing: 9
+        """)
+
+      result = Config.load(path)
+
+      assert result.impact_map["complexity"] == 10
+      assert result.impact_map["testing"] == 9
+      # Default values for keys not overridden
+      assert result.impact_map["file_structure"] == 4
+      assert result.impact_map["function_design"] == 4
+      assert result.impact_map["code_smells"] == 3
+      assert result.impact_map["naming_conventions"] == 2
+      assert result.impact_map["error_handling"] == 2
+      assert result.impact_map["consistency"] == 2
+      assert result.impact_map["documentation"] == 1
+    end
+
+    test "reads combined_top from YAML" do
+      path =
+        write_temp_yaml("""
+        combined_top: 5
+        """)
+
+      result = Config.load(path)
+      assert result.combined_top == 5
+    end
+
+    test "defaults to combined_top: 2 when absent from YAML" do
+      path =
+        write_temp_yaml("""
+        categories: {}
+        """)
+
+      result = Config.load(path)
+      assert result.combined_top == 2
+    end
+
+    test "defaults to full default impact map when impact absent from YAML" do
+      path =
+        write_temp_yaml("""
+        categories: {}
+        """)
+
+      result = Config.load(path)
+      assert result.impact_map == @default_impact
+    end
+
+    test "returns categories and grade_scale alongside impact fields" do
+      path =
+        write_temp_yaml("""
+        impact:
+          complexity: 5
+        combined_top: 3
+        """)
+
+      result = Config.load(path)
+      assert is_list(result.categories)
+      assert is_list(result.grade_scale)
+      assert is_map(result.impact_map)
+      assert is_integer(result.combined_top)
+    end
+  end
+end
diff --git a/test/codeqa/health_report/delta_test.exs b/test/codeqa/health_report/delta_test.exs
new file mode 100644
index 00000000..6932e0c0
--- /dev/null
+++ b/test/codeqa/health_report/delta_test.exs
@@ -0,0 +1,61 @@
+defmodule CodeQA.HealthReport.DeltaTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.HealthReport.Delta
+
+  defp make_results(aggregate) do
+    %{"codebase" => %{"aggregate" => aggregate}}
+  end
+
+  test "returns base, head, and delta aggregates" do
+    base = make_results(%{"entropy" => %{"mean_value" => 5.0}})
+    head = make_results(%{"entropy" => %{"mean_value" => 6.0}})
+
+    result = Delta.compute(base, head)
+
+    assert result.base.aggregate == %{"entropy" => %{"mean_value" => 5.0}}
+    assert result.head.aggregate == %{"entropy" => %{"mean_value" => 6.0}}
+    assert result.delta.aggregate == %{"entropy" => %{"mean_value" => 1.0}}
+  end
+
+  test "rounds delta to 4 decimal places" do
+    base = make_results(%{"entropy" => %{"mean_value" => 1.0}})
+    head = make_results(%{"entropy" => %{"mean_value" => 4.3333}})
+
+    result = Delta.compute(base, head)
+    assert result.delta.aggregate["entropy"]["mean_value"] == 3.3333
+  end
+
+  test "handles missing base codebase gracefully" do
+    base = %{}
+    head = make_results(%{"entropy" => %{"mean_value" => 6.0}})
+
+    result = Delta.compute(base, head)
+    assert result.delta.aggregate == %{}
+  end
+
+  test "handles missing head codebase gracefully" do
+    base = make_results(%{"entropy" => %{"mean_value" => 5.0}})
+    head = %{}
+
+    result = Delta.compute(base, head)
+    assert result.delta.aggregate == %{}
+  end
+
+  test "skips non-numeric metric keys" do
+    base = make_results(%{"entropy" => %{"mean_value" => 5.0, "label" => "x"}})
+    head = make_results(%{"entropy" => %{"mean_value" => 6.0, "label" => "y"}})
+
+    result = Delta.compute(base, head)
+    refute Map.has_key?(result.delta.aggregate["entropy"], "label")
+    assert result.delta.aggregate["entropy"]["mean_value"] == 1.0
+  end
+
+  test "metric key present only in head produces no delta entry" do
+    base = make_results(%{"entropy" => %{"mean_value" => 5.0}})
+    head = make_results(%{"entropy" => %{"mean_value" => 6.0, "new_metric" => 3.0}})
+
+    result = Delta.compute(base, head)
+    refute Map.has_key?(result.delta.aggregate["entropy"], "new_metric")
+  end
+end
diff --git a/test/codeqa/health_report/formatter_test.exs b/test/codeqa/health_report/formatter_test.exs
index 1fddec3f..8518d945 100644
--- a/test/codeqa/health_report/formatter_test.exs
+++ b/test/codeqa/health_report/formatter_test.exs
@@ -9,24 +9,49 @@ defmodule CodeQA.HealthReport.FormatterTest do
     overall_grade: "B+",
     categories: [
       %{
+        type: :threshold,
         name: "Readability",
         key: :readability,
         score: 100,
         grade: "A",
+        impact: 3,
         summary: "Excellent",
         metric_scores: [
-          %{name: "flesch_adapted", source: "readability", weight: 0.4, good: :high, value: 102.5, score: 100}
+          %{
+            name: "flesch_adapted",
+            source: "readability",
+            weight: 0.4,
+            good: :high,
+            value: 102.5,
+            score: 100
+          }
         ],
         worst_offenders: [
-          %{path: "lib/foo.ex", score: 75, grade: "B+", lines: 120, bytes: 3840,
-            metric_scores: [%{name: "flesch_adapted", source: "readability", good: :high, value: 65.0, score: 75}]}
+          %{
+            path: "lib/foo.ex",
+            score: 75,
+            grade: "B+",
+            lines: 120,
+            bytes: 3840,
+            metric_scores: [
+              %{
+                name: "flesch_adapted",
+                source: "readability",
+                good: :high,
+                value: 65.0,
+                score: 75
+              }
+            ]
+          }
         ]
       },
       %{
+        type: :threshold,
         name: "Complexity",
         key: :complexity,
         score: 35,
         grade: "D",
+        impact: 5,
         summary: "Critical — requires attention",
         metric_scores: [
           %{name: "difficulty", source: "halstead", weight: 0.35, value: 24.01, score: 65}
@@ -36,6 +61,98 @@ defmodule CodeQA.HealthReport.FormatterTest do
     ]
   }
 
+  @cosine_category %{
+    type: :cosine,
+    key: "function_design",
+    name: "Function Design",
+    score: 64,
+    grade: "C",
+    impact: 1,
+    behaviors: [
+      %{
+        behavior: "no_boolean_parameter",
+        cosine: 0.12,
+        score: 56,
+        grade: "C",
+        worst_offenders: [
+          %{file: "lib/foo/bar.ex", cosine: -0.71}
+        ]
+      },
+      %{
+        behavior: "single_responsibility",
+        cosine: 0.45,
+        score: 78,
+        grade: "B+",
+        worst_offenders: []
+      }
+    ]
+  }
+
+  @enriched_cosine_category %{
+    type: :cosine,
+    key: "function_design",
+    name: "Function Design",
+    score: 64,
+    grade: "C",
+    impact: 1,
+    behaviors: [
+      %{
+        behavior: "no_boolean_parameter",
+        cosine: -0.65,
+        score: 42,
+        grade: "D+",
+        worst_offenders: [
+          %{
+            file: "lib/codeqa/formatter.ex",
+            cosine: -0.65,
+            top_metrics: [
+              %{metric: "branching.mean_depth", contribution: -4.10},
+              %{metric: "halstead.effort", contribution: -3.22}
+            ],
+            top_nodes: [
+              %{"start_line" => 89, "type" => "block"},
+              %{"start_line" => 134, "type" => "block"}
+            ]
+          }
+        ]
+      }
+    ]
+  }
+
+  @enriched_threshold_category %{
+    type: :threshold,
+    name: "Complexity",
+    key: :complexity,
+    score: 32,
+    grade: "F",
+    impact: 5,
+    summary: "Critical",
+    metric_scores: [
+      %{name: "difficulty", source: "halstead", weight: 0.35, good: :low, value: 39.0, score: 32}
+    ],
+    worst_offenders: [
+      %{
+        path: "lib/foo.ex",
+        score: 32,
+        grade: "F",
+        lines: 491,
+        bytes: 15_872,
+        metric_scores: [
+          %{name: "difficulty", source: "halstead", good: :low, value: 99.0, score: 0}
+        ],
+        top_nodes: [
+          %{"start_line" => 201, "type" => "block"},
+          %{"start_line" => 312, "type" => "block"}
+        ]
+      }
+    ]
+  }
+
+  @report_with_cosine %{
+    @sample_report
+    | categories: @sample_report.categories ++ [@cosine_category]
+  }
+
   describe "format_markdown/3 with :plain format" do
     test "produces header with # Code Health Report" do
       result = Formatter.format_markdown(@sample_report, :default, :plain)
@@ -53,29 +170,179 @@ defmodule CodeQA.HealthReport.FormatterTest do
       assert result =~ "## Overall: B+"
     end
 
-    test "includes category table" do
+    test "includes cosine legend" do
       result = Formatter.format_markdown(@sample_report, :default, :plain)
-      assert result =~ "| Readability | A | 100 | Excellent |"
-      assert result =~ "| Complexity | D | 35 |"
+      assert result =~ "cosine similarity"
+      assert result =~ "anti-pattern detected"
     end
 
-    test "includes worst offenders section" do
+    test "includes category table with Impact column" do
       result = Formatter.format_markdown(@sample_report, :default, :plain)
-      assert result =~ "### Worst Offenders"
-      refute result =~ "lib/<br>`foo.ex`"
-      assert result =~ "`lib/foo.ex`"
-      assert result =~ "120 lines · 3.8 KB"
-      assert result =~ "↑ flesch_adapted=65.00 (avg: 102.50)"
-      refute result =~ "↑ flesch_adapted=65.00, "
+      assert result =~ "| Category | Grade | Score | Impact | Summary |"
+      assert result =~ "| Readability | A | 100 | 3 | Excellent |"
+      assert result =~ "| Complexity | D | 35 | 5 |"
     end
 
     test "summary detail omits category sections" do
       result = Formatter.format_markdown(@sample_report, :summary, :plain)
-      refute result =~ "### Worst Offenders"
       refute result =~ "Codebase averages"
     end
   end
 
+  describe "format_markdown/3 plain with cosine category" do
+    test "renders cosine category header" do
+      result = Formatter.format_markdown(@report_with_cosine, :default, :plain)
+      assert result =~ "## Function Design — C"
+    end
+
+    test "renders cosine behavior table" do
+      result = Formatter.format_markdown(@report_with_cosine, :default, :plain)
+      assert result =~ "| Behavior | Cosine | Score | Grade |"
+      assert result =~ "| no_boolean_parameter | 0.12 | 56 | C |"
+      assert result =~ "| single_responsibility | 0.45 | 78 | B+ |"
+    end
+
+    test "cosine category impact shown in overall table" do
+      result = Formatter.format_markdown(@report_with_cosine, :default, :plain)
+      assert result =~ "| Function Design | C | 64 | 1 |"
+    end
+  end
+
+  describe "plain formatter: PR summary section" do
+    @sample_report_with_pr Map.put(@sample_report, :pr_summary, %{
+                             base_score: 85,
+                             head_score: 77,
+                             score_delta: -8,
+                             base_grade: "B+",
+                             head_grade: "C+",
+                             blocks_flagged: 6,
+                             files_changed: 3,
+                             files_added: 1,
+                             files_modified: 2
+                           })
+
+    test "renders PR summary line when pr_summary present" do
+      result = Formatter.format_markdown(@sample_report_with_pr, :default, :plain)
+      assert result =~ "B+"
+      assert result =~ "C+"
+      assert result =~ "-8"
+      assert result =~ "6"
+      assert result =~ "1 added"
+      assert result =~ "2 modified"
+    end
+
+    test "omits PR summary when pr_summary is nil" do
+      result = Formatter.format_markdown(@sample_report, :default, :plain)
+      refute result =~ "Score:"
+    end
+  end
+
+  describe "plain formatter: delta section" do
+    @delta %{
+      base: %{
+        aggregate: %{
+          "readability" => %{"mean_flesch_adapted" => 65.0},
+          "halstead" => %{"mean_difficulty" => 12.0}
+        }
+      },
+      head: %{
+        aggregate: %{
+          "readability" => %{"mean_flesch_adapted" => 61.0},
+          "halstead" => %{"mean_difficulty" => 15.0}
+        }
+      }
+    }
+
+    @sample_report_with_delta Map.put(@sample_report, :codebase_delta, @delta)
+
+    test "renders metric changes table when codebase_delta present" do
+      result = Formatter.format_markdown(@sample_report_with_delta, :default, :plain)
+      assert result =~ "Metric Changes"
+      assert result =~ "Readability"
+      assert result =~ "65.00"
+      assert result =~ "61.00"
+    end
+
+    test "omits delta section when codebase_delta is nil" do
+      result = Formatter.format_markdown(@sample_report, :default, :plain)
+      refute result =~ "Metric Changes"
+    end
+  end
+
+  describe "plain formatter: block section" do
+    @block_potential %{
+      category: "function_design",
+      behavior: "cyclomatic_complexity_under_10",
+      cosine_delta: 0.41,
+      severity: :critical,
+      fix_hint: "Reduce branching"
+    }
+
+    @top_blocks [
+      %{
+        path: "lib/foo.ex",
+        status: "modified",
+        start_line: 42,
+        end_line: 67,
+        type: "code",
+        token_count: 84,
+        source: "def foo do\n  :bar\nend",
+        language: "elixir",
+        potentials: [@block_potential]
+      }
+    ]
+
+    @sample_report_with_blocks Map.put(@sample_report, :top_blocks, @top_blocks)
+
+    test "renders block verdict header" do
+      result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain)
+      assert result =~ "review required"
+      assert result =~ "🔴"
+    end
+
+    test "renders file path with status" do
+      result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain)
+      assert result =~ "lib/foo.ex"
+      assert result =~ "modified"
+    end
+
+    test "renders block location and type" do
+      result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain)
+      assert result =~ "42-67"
+      assert result =~ "84 tokens"
+    end
+
+    test "renders severity icon and behavior" do
+      result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain)
+      assert result =~ "🔴"
+      assert result =~ "CRITICAL"
+      assert result =~ "cyclomatic_complexity_under_10"
+      assert result =~ "0.41"
+    end
+
+    test "renders fix hint" do
+      result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain)
+      assert result =~ "Reduce branching"
+    end
+
+    test "renders source code" do
+      result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain)
+      assert result =~ "def foo do"
+      assert result =~ ":bar"
+    end
+
+    test "shows green verdict when top_blocks is empty" do
+      report = Map.put(@sample_report, :top_blocks, [])
+      result = Formatter.format_markdown(report, :default, :plain)
+      assert result =~ "No block-level issues detected"
+    end
+
+    test "shows green verdict when top_blocks key absent" do
+      result = Formatter.format_markdown(@sample_report, :default, :plain)
+      refute result =~ "review required"
+    end
+  end
+
   describe "format_markdown/3 defaults to :plain" do
     test "two-arity call matches plain output" do
       plain = Formatter.format_markdown(@sample_report, :default, :plain)
@@ -91,6 +358,12 @@ defmodule CodeQA.HealthReport.FormatterTest do
       assert result =~ "(79/100)"
     end
 
+    test "includes cosine legend" do
+      result = Formatter.format_markdown(@sample_report, :default, :github)
+      assert result =~ "cosine similarity"
+      assert result =~ "anti-pattern detected"
+    end
+
     test "includes mermaid chart" do
       result = Formatter.format_markdown(@sample_report, :default, :github)
       assert result =~ "```mermaid"
@@ -130,6 +403,19 @@ defmodule CodeQA.HealthReport.FormatterTest do
     end
   end
 
+  describe "format_markdown/3 github with cosine category" do
+    test "wraps cosine category in details/summary block" do
+      result = Formatter.format_markdown(@report_with_cosine, :default, :github)
+      assert result =~ "<summary><strong>🟠 Function Design — C (64/100)</strong></summary>"
+    end
+
+    test "renders cosine behaviors table inside details" do
+      result = Formatter.format_markdown(@report_with_cosine, :default, :github)
+      assert result =~ "| Behavior | Cosine | Score | Grade |"
+      assert result =~ "| no_boolean_parameter | 0.12 | 56 | C |"
+    end
+  end
+
   describe "format_markdown/4 with :github format and chart: false" do
     test "omits mermaid chart when chart option is false" do
       result = Formatter.format_markdown(@sample_report, :default, :github, chart: false)
@@ -137,4 +423,228 @@ defmodule CodeQA.HealthReport.FormatterTest do
       assert result =~ "████"
     end
   end
+
+  describe "github formatter: block section" do
+    @block_potential %{
+      category: "function_design",
+      behavior: "cyclomatic_complexity_under_10",
+      cosine_delta: 0.41,
+      severity: :critical,
+      fix_hint: "Reduce branching"
+    }
+
+    @top_blocks_gh [
+      %{
+        path: "lib/foo.ex",
+        status: "modified",
+        start_line: 42,
+        end_line: 67,
+        type: "code",
+        token_count: 84,
+        source: "def foo do\n  :bar\nend",
+        language: "elixir",
+        potentials: [@block_potential]
+      }
+    ]
+
+    @report_with_blocks_gh Map.put(@sample_report, :top_blocks, @top_blocks_gh)
+
+    test "renders block section with verdict and details per block" do
+      result = Formatter.format_markdown(@report_with_blocks_gh, :default, :github)
+      assert result =~ "review required"
+      assert result =~ "<details>"
+      assert result =~ "lib/foo.ex"
+    end
+
+    test "renders severity and fix hint" do
+      result = Formatter.format_markdown(@report_with_blocks_gh, :default, :github)
+      assert result =~ "🔴"
+      assert result =~ "cyclomatic_complexity_under_10"
+      assert result =~ "Reduce branching"
+    end
+
+    test "renders source code in collapsed block" do
+      result = Formatter.format_markdown(@report_with_blocks_gh, :default, :github)
+      assert result =~ "```elixir"
+      assert result =~ "def foo do"
+    end
+  end
+
+  describe "github formatter: PR summary and delta" do
+    @pr_summary_gh %{
+      base_score: 85,
+      head_score: 77,
+      score_delta: -8,
+      base_grade: "B+",
+      head_grade: "C+",
+      blocks_flagged: 6,
+      files_changed: 3,
+      files_added: 1,
+      files_modified: 2
+    }
+
+    @delta_gh %{
+      base: %{aggregate: %{"readability" => %{"mean_flesch_adapted" => 65.0}}},
+      head: %{aggregate: %{"readability" => %{"mean_flesch_adapted" => 61.0}}}
+    }
+
+    test "renders PR summary" do
+      report = @sample_report |> Map.put(:pr_summary, @pr_summary_gh)
+      result = Formatter.format_markdown(report, :default, :github)
+      assert result =~ "B+"
+      assert result =~ "C+"
+      assert result =~ "-8"
+    end
+
+    test "renders delta section" do
+      report = @sample_report |> Map.put(:codebase_delta, @delta_gh)
+      result = Formatter.format_markdown(report, :default, :github)
+      assert result =~ "Metric Changes"
+      assert result =~ "65.00"
+      assert result =~ "61.00"
+    end
+  end
+
+  describe "render_parts/2" do
+    test "returns at least 3 parts" do
+      parts = Formatter.render_parts(@sample_report)
+      assert length(parts) >= 3
+    end
+
+    test "each part ends with sentinel comment" do
+      parts = Formatter.render_parts(@sample_report)
+
+      Enum.with_index(parts, 1)
+      |> Enum.each(fn {part, n} ->
+        assert part =~ "<!-- codeqa-health-report-#{n} -->"
+      end)
+    end
+
+    test "part 1 contains header and grade" do
+      [part_1 | _] = Formatter.render_parts(@sample_report)
+      assert part_1 =~ "Code Health: B+"
+      assert part_1 =~ "(79/100)"
+    end
+
+    test "part 1 contains mermaid chart by default" do
+      [part_1 | _] = Formatter.render_parts(@sample_report)
+      assert part_1 =~ "```mermaid"
+    end
+
+    test "part 1 contains progress bars" do
+      [part_1 | _] = Formatter.render_parts(@sample_report)
+      assert part_1 =~ "████"
+    end
+
+    test "part 2 contains category details" do
+      [_, part_2 | _] = Formatter.render_parts(@sample_report)
+      assert part_2 =~ "<details>"
+      assert part_2 =~ "Readability"
+    end
+
+    test "part 3 shows green verdict when no blocks" do
+      [_, _, part_3 | _] = Formatter.render_parts(@sample_report)
+      assert part_3 =~ "No block-level issues detected"
+    end
+
+    test "part 3 contains verdict and blocks when present" do
+      report = Map.put(@sample_report, :top_blocks, @top_blocks_gh)
+      [_, _, part_3 | _] = Formatter.render_parts(report)
+      assert part_3 =~ "lib/foo.ex"
+      assert part_3 =~ "review required"
+    end
+  end
+
+  describe "Github.render_parts_3/2" do
+    alias CodeQA.HealthReport.Formatter.Github
+
+    test "returns single part with blocks (top 10 limit means no slicing needed)" do
+      blocks =
+        Enum.map(1..10, fn i ->
+          %{
+            path: "lib/file_#{i}.ex",
+            status: "modified",
+            start_line: 10,
+            end_line: 30,
+            type: "function",
+            token_count: 150,
+            source: "def foo, do: :bar",
+            language: "elixir",
+            potentials: [
+              %{
+                category: "function_design",
+                behavior: "single_responsibility",
+                cosine_delta: 0.35,
+                severity: :high,
+                fix_hint: "Consider extracting helper function"
+              }
+            ]
+          }
+        end)
+
+      report = Map.put(@sample_report, :top_blocks, blocks)
+      parts = Github.render_parts_3(report)
+
+      # With top 10 blocks, should be a single part
+      assert length(parts) == 1
+    end
+
+    test "part ends with sentinel" do
+      blocks = [
+        %{
+          path: "lib/foo.ex",
+          status: nil,
+          start_line: 1,
+          end_line: 10,
+          type: "code",
+          token_count: 50,
+          source: "def foo, do: :bar",
+          language: "elixir",
+          potentials: [
+            %{
+              category: "function_design",
+              behavior: "single_responsibility",
+              cosine_delta: 0.35,
+              severity: :high,
+              fix_hint: nil
+            }
+          ]
+        }
+      ]
+
+      report = Map.put(@sample_report, :top_blocks, blocks)
+      [part] = Github.render_parts_3(report)
+      assert part =~ "<!-- codeqa-health-report-3 -->"
+    end
+
+    test "renders source code in fenced block" do
+      blocks = [
+        %{
+          path: "lib/foo.ex",
+          status: nil,
+          start_line: 1,
+          end_line: 10,
+          type: "code",
+          token_count: 50,
+          source: "def hello do\n  :world\nend",
+          language: "elixir",
+          potentials: [
+            %{
+              category: "function_design",
+              behavior: "single_responsibility",
+              cosine_delta: 0.35,
+              severity: :high,
+              fix_hint: nil
+            }
+          ]
+        }
+      ]
+
+      report = Map.put(@sample_report, :top_blocks, blocks)
+      [part] = Github.render_parts_3(report)
+      assert part =~ "```elixir"
+      assert part =~ "def hello do"
+      assert part =~ ":world"
+    end
+  end
 end
diff --git a/test/codeqa/health_report/grader_test.exs b/test/codeqa/health_report/grader_test.exs
new file mode 100644
index 00000000..6f9ea544
--- /dev/null
+++ b/test/codeqa/health_report/grader_test.exs
@@ -0,0 +1,495 @@
+defmodule CodeQA.HealthReport.GraderTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.CombinedMetrics.SampleRunner
+  alias CodeQA.Engine.Analyzer
+  alias CodeQA.Engine.Collector
+  alias CodeQA.HealthReport.Grader
+
+  @default_scale CodeQA.HealthReport.Categories.default_grade_scale()
+
+  # -----------------------------------------------------------------------
+  # score_cosine/1
+  # -----------------------------------------------------------------------
+
+  describe "score_cosine/1" do
+    test "cosine 1.0 maps to 100" do
+      assert Grader.score_cosine(1.0) == 100
+    end
+
+    test "cosine -1.0 maps to 0" do
+      assert Grader.score_cosine(-1.0) == 0
+    end
+
+    test "cosine 0.5 (lower bound of top band) maps to 90" do
+      assert Grader.score_cosine(0.5) == 90
+    end
+
+    test "cosine 0.2 (lower bound of second band) maps to 70" do
+      assert Grader.score_cosine(0.2) == 70
+    end
+
+    test "cosine 0.0 (lower bound of third band) maps to 50" do
+      assert Grader.score_cosine(0.0) == 50
+    end
+
+    test "cosine -0.3 (lower bound of fourth band) maps to 30" do
+      assert Grader.score_cosine(-0.3) == 30
+    end
+
+    test "interpolation in [0.0, 0.2) band: cosine 0.12 → 62" do
+      # ratio = 0.12 / 0.2 = 0.6, score = 50 + 0.6 * 20 = 62
+      assert Grader.score_cosine(0.12) == 62
+    end
+
+    test "interpolation in [0.2, 0.5) band: cosine 0.35 → 80" do
+      # ratio = (0.35 - 0.2) / (0.5 - 0.2) = 0.15/0.3 = 0.5, score = 70 + 0.5 * 20 = 80
+      assert Grader.score_cosine(0.35) == 80
+    end
+
+    test "interpolation in [0.5, 1.0] band: cosine 0.75 → 95" do
+      # ratio = (0.75 - 0.5) / (1.0 - 0.5) = 0.25/0.5 = 0.5, score = 90 + 0.5 * 10 = 95
+      assert Grader.score_cosine(0.75) == 95
+    end
+
+    test "interpolation in [-0.3, 0.0) band: cosine -0.15 → 40" do
+      # ratio = (-0.15 - (-0.3)) / (0.0 - (-0.3)) = 0.15/0.3 = 0.5, score = 30 + 0.5 * 20 = 40
+      assert Grader.score_cosine(-0.15) == 40
+    end
+
+    test "interpolation in [-1.0, -0.3) band: cosine -0.65 → 15" do
+      # ratio = (-0.65 - (-1.0)) / (-0.3 - (-1.0)) = 0.35/0.7 = 0.5, score = 0 + 0.5 * 30 = 15
+      assert Grader.score_cosine(-0.65) == 15
+    end
+
+    test "result is always an integer" do
+      for cosine <- [-1.0, -0.5, 0.0, 0.1, 0.3, 0.6, 1.0] do
+        assert is_integer(Grader.score_cosine(cosine)),
+               "expected integer for cosine #{cosine}"
+      end
+    end
+
+    test "result is always in [0, 100]" do
+      for cosine <- [-1.0, -0.9, -0.3, 0.0, 0.2, 0.5, 1.0] do
+        score = Grader.score_cosine(cosine)
+
+        assert score >= 0 and score <= 100,
+               "score #{score} out of range for cosine #{cosine}"
+      end
+    end
+  end
+
+  # -----------------------------------------------------------------------
+  # overall_score/3 (including backward compat as /2)
+  # -----------------------------------------------------------------------
+
+  describe "overall_score/3" do
+    test "empty list returns {0, 'F'}" do
+      assert Grader.overall_score([], @default_scale) == {0, "F"}
+    end
+
+    test "equal weights produces arithmetic mean (backward compat /2)" do
+      categories = [
+        %{key: :readability, score: 80},
+        %{key: :complexity, score: 60}
+      ]
+
+      {score, _grade} = Grader.overall_score(categories, @default_scale)
+      assert score == 70
+    end
+
+    test "weighted average applies impact_map correctly" do
+      categories = [
+        %{key: :readability, score: 80},
+        %{key: :complexity, score: 60}
+      ]
+
+      # readability has weight 3, complexity has weight 1
+      # weighted = (80*3 + 60*1) / 4 = 300/4 = 75
+      impact_map = %{"readability" => 3, "complexity" => 1}
+      {score, _grade} = Grader.overall_score(categories, @default_scale, impact_map)
+      assert score == 75
+    end
+
+    test "missing keys in impact_map default to 1" do
+      categories = [
+        %{key: :readability, score: 80},
+        %{key: :complexity, score: 60}
+      ]
+
+      # Only readability in map with weight 2; complexity defaults to 1
+      # weighted = (80*2 + 60*1) / 3 = 220/3 ≈ 73
+      impact_map = %{"readability" => 2}
+      {score, _grade} = Grader.overall_score(categories, @default_scale, impact_map)
+      assert score == 73
+    end
+
+    test "backward compat: /2 call with empty impact_map equals arithmetic mean" do
+      categories = [
+        %{key: :readability, score: 90},
+        %{key: :complexity, score: 70},
+        %{key: :naming, score: 50}
+      ]
+
+      {score_two, grade_two} = Grader.overall_score(categories, @default_scale)
+      {score_three, grade_three} = Grader.overall_score(categories, @default_scale, %{})
+
+      assert score_two == score_three
+      assert grade_two == grade_three
+    end
+
+    test "returns grade string along with integer score" do
+      categories = [%{key: :readability, score: 100}]
+      {score, grade} = Grader.overall_score(categories, @default_scale)
+      assert is_integer(score)
+      assert is_binary(grade)
+    end
+
+    test "atom keys are converted to strings for impact_map lookup" do
+      categories = [
+        %{key: :function_design, score: 60},
+        %{key: :variable_naming, score: 40}
+      ]
+
+      impact_map = %{"function_design" => 2, "variable_naming" => 1}
+      {score, _} = Grader.overall_score(categories, @default_scale, impact_map)
+      # (60*2 + 40*1) / 3 = 160/3 ≈ 53
+      assert score == 53
+    end
+  end
+
+  # Shared cosines_by_category for grade_cosine_categories/3 tests — computed once for the module.
+  setup_all do
+    files = Collector.collect_files("lib", [])
+    result = Analyzer.analyze_codebase(files)
+    aggregate = get_in(result, ["codebase", "aggregate"])
+    all_cosines = SampleRunner.diagnose_aggregate(aggregate, top: 99_999)
+    cosines_by_category = Enum.group_by(all_cosines, & &1.category)
+    {:ok, cosines_by_category: cosines_by_category}
+  end
+
+  # -----------------------------------------------------------------------
+  # grade_cosine_categories/3
+  # -----------------------------------------------------------------------
+
+  describe "grade_cosine_categories/3" do
+    test "returns a list", %{cosines_by_category: cosines_by_category} do
+      result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale)
+      assert is_list(result)
+    end
+
+    test "each entry has required top-level keys", %{cosines_by_category: cosines_by_category} do
+      result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale)
+
+      for cat <- result do
+        assert Map.has_key?(cat, :type), "missing :type in #{inspect(cat)}"
+        assert Map.has_key?(cat, :key), "missing :key"
+        assert Map.has_key?(cat, :name), "missing :name"
+        assert Map.has_key?(cat, :score), "missing :score"
+        assert Map.has_key?(cat, :grade), "missing :grade"
+        assert Map.has_key?(cat, :behaviors), "missing :behaviors"
+      end
+    end
+
+    test "type is :cosine for every entry", %{cosines_by_category: cosines_by_category} do
+      result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale)
+      for cat <- result, do: assert(cat.type == :cosine)
+    end
+
+    test "scores are integers in [0, 100]", %{cosines_by_category: cosines_by_category} do
+      result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale)
+
+      for cat <- result do
+        assert is_integer(cat.score), "score not integer in #{cat.key}"
+        assert cat.score >= 0 and cat.score <= 100
+      end
+    end
+
+    test "grade is a string", %{cosines_by_category: cosines_by_category} do
+      result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale)
+      for cat <- result, do: assert(is_binary(cat.grade))
+    end
+
+    test "impact key is absent (HealthReport.generate/2 is responsible for embedding impact)", %{
+      cosines_by_category: cosines_by_category
+    } do
+      result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale)
+      for cat <- result, do: refute(Map.has_key?(cat, :impact))
+    end
+
+    test "name is humanized from key", %{cosines_by_category: cosines_by_category} do
+      result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale)
+
+      for cat <- result do
+        # name must be a non-empty string, words capitalized
+        assert is_binary(cat.name)
+        assert String.length(cat.name) > 0
+        # key should be a string (category slug)
+        assert is_binary(cat.key)
+      end
+    end
+
+    test "each behavior entry has required keys", %{cosines_by_category: cosines_by_category} do
+      result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale)
+
+      for cat <- result, b <- cat.behaviors do
+        assert Map.has_key?(b, :behavior)
+        assert Map.has_key?(b, :cosine)
+        assert Map.has_key?(b, :score)
+        assert Map.has_key?(b, :grade)
+        assert Map.has_key?(b, :worst_offenders)
+      end
+    end
+
+    test "behavior scores are integers in [0, 100]", %{cosines_by_category: cosines_by_category} do
+      result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale)
+
+      for cat <- result, b <- cat.behaviors do
+        assert is_integer(b.score)
+        assert b.score >= 0 and b.score <= 100
+      end
+    end
+
+    test "worst_offenders uses worst_files lookup", %{cosines_by_category: cosines_by_category} do
+      sentinel = [%{file: "lib/sentinel.ex", cosine: -0.99}]
+      # Get one real behavior key to inject into worst_files
+      [first_cat | _] = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale)
+      first_behavior = hd(first_cat.behaviors)
+      lookup_key = "#{first_cat.key}.#{first_behavior.behavior}"
+
+      worst_files = %{lookup_key => sentinel}
+      result = Grader.grade_cosine_categories(cosines_by_category, worst_files, @default_scale)
+
+      found_cat = Enum.find(result, &(&1.key == first_cat.key))
+      found_behavior = Enum.find(found_cat.behaviors, &(&1.behavior == first_behavior.behavior))
+      assert found_behavior.worst_offenders == sentinel
+    end
+
+    test "top_metrics and top_nodes pass through unmodified", %{
+      cosines_by_category: cosines_by_category
+    } do
+      sentinel = [
+        %{
+          file: "lib/sentinel.ex",
+          cosine: -0.99,
+          top_metrics: [%{metric: "foo.bar", contribution: -1.5}],
+          top_nodes: [%{"start_line" => 42, "type" => "block"}]
+        }
+      ]
+
+      [first_cat | _] = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale)
+      first_behavior = hd(first_cat.behaviors)
+      lookup_key = "#{first_cat.key}.#{first_behavior.behavior}"
+
+      worst_files = %{lookup_key => sentinel}
+      result = Grader.grade_cosine_categories(cosines_by_category, worst_files, @default_scale)
+
+      found_cat = Enum.find(result, &(&1.key == first_cat.key))
+      found_behavior = Enum.find(found_cat.behaviors, &(&1.behavior == first_behavior.behavior))
+      assert found_behavior.worst_offenders == sentinel
+    end
+
+    test "worst_offenders defaults to [] when key absent", %{
+      cosines_by_category: cosines_by_category
+    } do
+      result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale)
+      for cat <- result, b <- cat.behaviors, do: assert(b.worst_offenders == [])
+    end
+  end
+
+  # -----------------------------------------------------------------------
+  # worst_offenders/4 — top_nodes
+  # -----------------------------------------------------------------------
+
+  describe "worst_offenders/4 top_nodes" do
+    @category %{
+      key: :function_design,
+      name: "Function Design",
+      metrics: [
+        %{
+          source: "halstead",
+          name: "tokens",
+          weight: 1.0,
+          good: :low,
+          thresholds: %{a: 10, b: 20, c: 30, d: 40}
+        }
+      ]
+    }
+
+    test "returns top_nodes: [] when file_data has no nodes key" do
+      files = %{
+        "lib/foo.ex" => %{
+          "metrics" => %{"halstead" => %{"tokens" => 50.0}},
+          "lines" => 10,
+          "bytes" => 100
+        }
+      }
+
+      result = Grader.worst_offenders(@category, files, 5)
+      [entry | _] = result
+      assert entry.top_nodes == []
+    end
+
+    test "returns top_nodes: [] when file_data nodes is nil" do
+      files = %{
+        "lib/foo.ex" => %{
+          "metrics" => %{"halstead" => %{"tokens" => 50.0}},
+          "nodes" => nil,
+          "lines" => 10,
+          "bytes" => 100
+        }
+      }
+
+      result = Grader.worst_offenders(@category, files, 5)
+      [entry | _] = result
+      assert entry.top_nodes == []
+    end
+
+    test "returns top_nodes: [] when file_data nodes is []" do
+      files = %{
+        "lib/foo.ex" => %{
+          "metrics" => %{"halstead" => %{"tokens" => 50.0}},
+          "nodes" => [],
+          "lines" => 10,
+          "bytes" => 100
+        }
+      }
+
+      result = Grader.worst_offenders(@category, files, 5)
+      [entry | _] = result
+      assert entry.top_nodes == []
+    end
+
+    test "returns top 3 nodes ranked by refactoring_potentials descending" do
+      nodes = [
+        %{
+          "start_line" => 1,
+          "column_start" => 0,
+          "char_length" => 50,
+          "type" => "function",
+          "token_count" => 20,
+          "refactoring_potentials" => [
+            %{"category" => "function_design", "behavior" => "x", "cosine_delta" => 0.5}
+          ],
+          "children" => []
+        },
+        %{
+          "start_line" => 10,
+          "column_start" => 0,
+          "char_length" => 100,
+          "type" => "function",
+          "token_count" => 40,
+          "refactoring_potentials" => [
+            %{"category" => "function_design", "behavior" => "x", "cosine_delta" => 0.9},
+            %{"category" => "naming", "behavior" => "y", "cosine_delta" => 0.4}
+          ],
+          "children" => []
+        },
+        %{
+          "start_line" => 20,
+          "column_start" => 0,
+          "char_length" => 30,
+          "type" => "function",
+          "token_count" => 10,
+          "refactoring_potentials" => [
+            %{"category" => "function_design", "behavior" => "z", "cosine_delta" => 0.2}
+          ],
+          "children" => []
+        },
+        %{
+          "start_line" => 30,
+          "column_start" => 0,
+          "char_length" => 10,
+          "type" => "function",
+          "token_count" => 5,
+          "refactoring_potentials" => [],
+          "children" => []
+        }
+      ]
+
+      files = %{
+        "lib/foo.ex" => %{
+          "metrics" => %{"halstead" => %{"tokens" => 50.0}},
+          "nodes" => nodes,
+          "lines" => 40,
+          "bytes" => 400
+        }
+      }
+
+      result = Grader.worst_offenders(@category, files, 5)
+      [entry | _] = result
+
+      assert length(entry.top_nodes) == 3
+      # The node with highest sum of cosine_delta comes first (0.9+0.4=1.3)
+      [first | _] = entry.top_nodes
+      assert first["start_line"] == 10
+    end
+
+    test "parent+child overlap: only parent is included when both rank top 3" do
+      child_node = %{
+        "start_line" => 11,
+        "column_start" => 2,
+        "char_length" => 30,
+        "type" => "function",
+        "token_count" => 10,
+        "refactoring_potentials" => [
+          %{"category" => "function_design", "behavior" => "x", "cosine_delta" => 0.8}
+        ],
+        "children" => []
+      }
+
+      nodes = [
+        %{
+          "start_line" => 10,
+          "column_start" => 0,
+          "char_length" => 100,
+          "type" => "function",
+          "token_count" => 40,
+          "refactoring_potentials" => [
+            %{"category" => "function_design", "behavior" => "x", "cosine_delta" => 0.9}
+          ],
+          "children" => [child_node]
+        },
+        %{
+          "start_line" => 20,
+          "column_start" => 0,
+          "char_length" => 50,
+          "type" => "function",
+          "token_count" => 20,
+          "refactoring_potentials" => [
+            %{"category" => "naming", "behavior" => "y", "cosine_delta" => 0.5}
+          ],
+          "children" => []
+        },
+        %{
+          "start_line" => 30,
+          "column_start" => 0,
+          "char_length" => 30,
+          "type" => "function",
+          "token_count" => 10,
+          "refactoring_potentials" => [
+            %{"category" => "naming", "behavior" => "z", "cosine_delta" => 0.3}
+          ],
+          "children" => []
+        }
+      ]
+
+      files = %{
+        "lib/foo.ex" => %{
+          "metrics" => %{"halstead" => %{"tokens" => 50.0}},
+          "nodes" => nodes,
+          "lines" => 40,
+          "bytes" => 400
+        }
+      }
+
+      result = Grader.worst_offenders(@category, files, 5)
+      [entry | _] = result
+
+      # child_node is not top-level, so only top-level nodes are considered
+      assert length(entry.top_nodes) == 3
+      start_lines = Enum.map(entry.top_nodes, & &1["start_line"])
+      refute 11 in start_lines
+    end
+  end
+end
diff --git a/test/codeqa/health_report/top_blocks_test.exs b/test/codeqa/health_report/top_blocks_test.exs
new file mode 100644
index 00000000..4bef28b2
--- /dev/null
+++ b/test/codeqa/health_report/top_blocks_test.exs
@@ -0,0 +1,471 @@
+defmodule CodeQA.HealthReport.TopBlocksTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.Git.ChangedFile
+  alias CodeQA.HealthReport.TopBlocks
+
+  # A node with cosine_delta 0.60 — will be :critical when codebase_cosine = 0.0 (gap=1.0, ratio=0.60)
+  defp make_node(cosine_delta, token_count \\ 20) do
+    %{
+      "start_line" => 1,
+      "end_line" => 10,
+      "type" => "code",
+      "token_count" => token_count,
+      "refactoring_potentials" => [
+        %{
+          "category" => "function_design",
+          "behavior" => "cyclomatic_complexity_under_10",
+          "cosine_delta" => cosine_delta
+        }
+      ],
+      "children" => []
+    }
+  end
+
+  defp make_results(nodes) do
+    %{"files" => %{"lib/foo.ex" => %{"nodes" => nodes}}, "metadata" => %{"path" => "/tmp"}}
+  end
+
+  defp lookup(cosine \\ 0.0) do
+    %{{"function_design", "cyclomatic_complexity_under_10"} => cosine}
+  end
+
+  describe "severity classification" do
+    test ":critical when severity_ratio > 0.50" do
+      # gap = max(0.01, 1.0 - 0.0) = 1.0, ratio = 0.60 / 1.0 = 0.60 > 0.50
+      [block] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup())
+      assert hd(block.potentials).severity == :critical
+    end
+
+    test ":high when severity_ratio > 0.25 and <= 0.50" do
+      # ratio = 0.30 / 1.0 = 0.30
+      [block] = TopBlocks.build(make_results([make_node(0.30)]), [], lookup())
+      assert hd(block.potentials).severity == :high
+    end
+
+    test ":medium when severity_ratio > 0.10 and <= 0.25" do
+      # ratio = 0.15 / 1.0 = 0.15
+      [block] = TopBlocks.build(make_results([make_node(0.15)]), [], lookup())
+      assert hd(block.potentials).severity == :medium
+    end
+
+    test "filtered when severity_ratio <= 0.10" do
+      # ratio = 0.05 / 1.0 = 0.05 — block should not appear
+      assert TopBlocks.build(make_results([make_node(0.05)]), [], lookup()) == []
+    end
+
+    test "gap floor prevents division by zero when codebase_cosine = 1.0" do
+      # gap = max(0.01, 1.0 - 1.0) = 0.01, ratio = 0.02 / 0.01 = 2.0 → :critical
+      [block] = TopBlocks.build(make_results([make_node(0.02)]), [], lookup(1.0))
+      assert hd(block.potentials).severity == :critical
+    end
+
+    test "gap handles negative codebase_cosine" do
+      # codebase_cosine = -0.5, gap = max(0.01, 1.0 - (-0.5)) = 1.5
+      # ratio = 0.60 / 1.5 = 0.40 → :high
+      [block] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup(-0.5))
+      assert hd(block.potentials).severity == :high
+    end
+
+    test "unknown behavior defaults codebase_cosine to 0.0" do
+      lookup_empty = %{}
+      # gap = 1.0, ratio = 0.60 → :critical
+      [block] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup_empty)
+      assert hd(block.potentials).severity == :critical
+    end
+  end
+
+  describe "changed_files filtering" do
+    test "when changed_files is empty, shows all files" do
+      [block] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup())
+      assert block.path == "lib/foo.ex"
+      assert block.status == nil
+    end
+
+    test "when changed_files given, only shows matching files" do
+      changed = [%ChangedFile{path: "lib/other.ex", status: "added"}]
+      assert TopBlocks.build(make_results([make_node(0.60)]), changed, lookup()) == []
+    end
+
+    test "status comes from ChangedFile struct" do
+      changed = [%ChangedFile{path: "lib/foo.ex", status: "modified"}]
+      [block] = TopBlocks.build(make_results([make_node(0.60)]), changed, lookup())
+      assert block.status == "modified"
+    end
+  end
+
+  describe "block filtering" do
+    test "blocks with token_count < 10 are excluded" do
+      assert TopBlocks.build(make_results([make_node(0.60, 9)]), [], lookup()) == []
+    end
+
+    test "blocks are ordered by highest cosine_delta descending" do
+      node_low = make_node(0.20)
+      node_high = put_in(make_node(0.60), ["start_line"], 20)
+
+      results = %{
+        "files" => %{"lib/foo.ex" => %{"nodes" => [node_low, node_high]}},
+        "metadata" => %{"path" => "/tmp"}
+      }
+
+      blocks = TopBlocks.build(results, [], lookup())
+      deltas = Enum.map(blocks, fn b -> hd(b.potentials).cosine_delta end)
+      assert deltas == Enum.sort(deltas, :desc)
+    end
+
+    test "children nodes are included" do
+      parent = %{
+        "start_line" => 1,
+        "end_line" => 20,
+        "type" => "code",
+        "token_count" => 5,
+        "refactoring_potentials" => [],
+        "children" => [make_node(0.60)]
+      }
+
+      blocks = TopBlocks.build(make_results([parent]), [], lookup())
+      assert length(blocks) == 1
+    end
+  end
+
+  describe "fix hints" do
+    test "includes fix_hint string for known behavior" do
+      # naming_conventions/file_name_matches_primary_export has _fix_hint in YAML
+      node = %{
+        "start_line" => 1,
+        "end_line" => 10,
+        "type" => "code",
+        "token_count" => 20,
+        "refactoring_potentials" => [
+          %{
+            "category" => "naming_conventions",
+            "behavior" => "file_name_matches_primary_export",
+            "cosine_delta" => 0.60
+          }
+        ],
+        "children" => []
+      }
+
+      hint_lookup = %{{"naming_conventions", "file_name_matches_primary_export"} => 0.0}
+      [block] = TopBlocks.build(make_results([node]), [], hint_lookup)
+      potential = hd(block.potentials)
+      assert is_binary(potential.fix_hint)
+    end
+
+    test "fix_hint is nil for unknown behavior" do
+      node = %{
+        "start_line" => 1,
+        "end_line" => 10,
+        "type" => "code",
+        "token_count" => 20,
+        "refactoring_potentials" => [
+          %{"category" => "unknown_cat", "behavior" => "unknown_beh", "cosine_delta" => 0.60}
+        ],
+        "children" => []
+      }
+
+      [block] = TopBlocks.build(make_results([node]), [], %{})
+      assert hd(block.potentials).fix_hint == nil
+    end
+  end
+
+  describe "source code extraction" do
+    test "includes source code when file exists" do
+      # Create a temp file
+      tmp_dir = System.tmp_dir!()
+      test_dir = Path.join(tmp_dir, "top_blocks_test_#{:rand.uniform(100_000)}")
+      File.mkdir_p!(test_dir)
+      file_path = Path.join(test_dir, "test.ex")
+      File.write!(file_path, "line 1\nline 2\nline 3\nline 4\nline 5")
+
+      results = %{
+        "files" => %{"test.ex" => %{"nodes" => [make_node(0.60) |> Map.put("end_line", 3)]}},
+        "metadata" => %{"path" => test_dir}
+      }
+
+      [block] = TopBlocks.build(results, [], lookup())
+      assert block.source == "line 1\nline 2\nline 3"
+      assert block.language == "elixir"
+
+      File.rm_rf!(test_dir)
+    end
+
+    test "source is nil when file does not exist" do
+      results = %{
+        "files" => %{"nonexistent.ex" => %{"nodes" => [make_node(0.60)]}},
+        "metadata" => %{"path" => "/nonexistent/path"}
+      }
+
+      [block] = TopBlocks.build(results, [], lookup())
+      assert block.source == nil
+    end
+  end
+
+  describe "top N limiting" do
+    test "returns at most 10 blocks" do
+      # Create 15 nodes, each 10 lines (within default 3-20 range)
+      nodes =
+        for i <- 1..15 do
+          make_node(0.60 + i * 0.01)
+          |> put_in(["start_line"], i * 20)
+          |> put_in(["end_line"], i * 20 + 9)
+        end
+
+      results = %{
+        "files" => %{"lib/foo.ex" => %{"nodes" => nodes}},
+        "metadata" => %{"path" => "/tmp"}
+      }
+
+      blocks = TopBlocks.build(results, [], lookup())
+      assert length(blocks) == 10
+    end
+  end
+
+  describe "line range filtering" do
+    test "blocks outside line range are excluded" do
+      # 2-line block (below min of 3)
+      small_node =
+        make_node(0.60)
+        |> put_in(["start_line"], 1)
+        |> put_in(["end_line"], 2)
+
+      # 25-line block (above max of 20)
+      large_node =
+        make_node(0.60)
+        |> put_in(["start_line"], 10)
+        |> put_in(["end_line"], 34)
+
+      results = %{
+        "files" => %{"lib/foo.ex" => %{"nodes" => [small_node, large_node]}},
+        "metadata" => %{"path" => "/tmp"}
+      }
+
+      blocks = TopBlocks.build(results, [], lookup())
+      assert blocks == []
+    end
+
+    test "blocks within line range are included" do
+      # 10-line block (within 3-20 range)
+      node =
+        make_node(0.60)
+        |> put_in(["start_line"], 1)
+        |> put_in(["end_line"], 10)
+
+      results = %{
+        "files" => %{"lib/foo.ex" => %{"nodes" => [node]}},
+        "metadata" => %{"path" => "/tmp"}
+      }
+
+      blocks = TopBlocks.build(results, [], lookup())
+      assert length(blocks) == 1
+    end
+
+    test "line range is configurable" do
+      # 2-line block
+      small_node =
+        make_node(0.60)
+        |> put_in(["start_line"], 1)
+        |> put_in(["end_line"], 2)
+
+      results = %{
+        "files" => %{"lib/foo.ex" => %{"nodes" => [small_node]}},
+        "metadata" => %{"path" => "/tmp"}
+      }
+
+      # Default range (3-20) excludes it
+      assert TopBlocks.build(results, [], lookup()) == []
+
+      # Custom range (1-5) includes it
+      blocks = TopBlocks.build(results, [], lookup(), block_min_lines: 1, block_max_lines: 5)
+      assert length(blocks) == 1
+    end
+  end
+
+  describe "diff_line_ranges filtering" do
+    test "when diff_line_ranges is empty map, shows all blocks" do
+      node = make_node(0.60)
+      [block] = TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: %{})
+      assert block.path == "lib/foo.ex"
+    end
+
+    test "when diff_line_ranges provided, only shows blocks overlapping diff" do
+      # Block at lines 1-10
+      node = make_node(0.60)
+
+      # Diff changes lines 5-7 (overlaps with block)
+      diff_ranges = %{"lib/foo.ex" => [{5, 7}]}
+
+      [block] =
+        TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges)
+
+      assert block.path == "lib/foo.ex"
+    end
+
+    test "excludes blocks that don't overlap with diff" do
+      # Block at lines 1-10
+      node = make_node(0.60)
+
+      # Diff changes lines 50-55 (no overlap)
+      diff_ranges = %{"lib/foo.ex" => [{50, 55}]}
+
+      blocks = TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges)
+      assert blocks == []
+    end
+
+    test "excludes blocks when file has no diff ranges" do
+      node = make_node(0.60)
+
+      # Diff only has ranges for different file
+      diff_ranges = %{"lib/other.ex" => [{1, 10}]}
+
+      blocks = TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges)
+      assert blocks == []
+    end
+
+    test "includes block with exact overlap" do
+      # Block at lines 5-15
+      node =
+        make_node(0.60)
+        |> put_in(["start_line"], 5)
+        |> put_in(["end_line"], 15)
+
+      # Diff changes exactly lines 5-15
+      diff_ranges = %{"lib/foo.ex" => [{5, 15}]}
+
+      [block] =
+        TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges)
+
+      assert block.start_line == 5
+      assert block.end_line == 15
+    end
+
+    test "includes block with partial overlap at start" do
+      # Block at lines 10-20
+      node =
+        make_node(0.60)
+        |> put_in(["start_line"], 10)
+        |> put_in(["end_line"], 20)
+
+      # Diff changes lines 5-12 (overlaps start of block)
+      diff_ranges = %{"lib/foo.ex" => [{5, 12}]}
+
+      [block] =
+        TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges)
+
+      assert block.start_line == 10
+    end
+
+    test "includes block with partial overlap at end" do
+      # Block at lines 10-20
+      node =
+        make_node(0.60)
+        |> put_in(["start_line"], 10)
+        |> put_in(["end_line"], 20)
+
+      # Diff changes lines 18-25 (overlaps end of block)
+      diff_ranges = %{"lib/foo.ex" => [{18, 25}]}
+
+      [block] =
+        TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges)
+
+      assert block.end_line == 20
+    end
+
+    test "includes block when diff is entirely inside block" do
+      # Block at lines 1-10
+      node = make_node(0.60)
+
+      # Diff changes lines 3-5 (inside block)
+      diff_ranges = %{"lib/foo.ex" => [{3, 5}]}
+
+      [block] =
+        TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges)
+
+      assert block.path == "lib/foo.ex"
+      assert block.start_line == 1
+      assert block.end_line == 10
+    end
+
+    test "works with multiple diff ranges for same file" do
+      # Block at lines 1-10
+      node = make_node(0.60)
+
+      # Diff changes lines 50-55 and 5-7 (second range overlaps)
+      diff_ranges = %{"lib/foo.ex" => [{50, 55}, {5, 7}]}
+
+      [block] =
+        TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges)
+
+      assert block.path == "lib/foo.ex"
+      assert block.start_line == 1
+      assert block.end_line == 10
+    end
+
+    test "excludes adjacent but non-overlapping ranges" do
+      # Block at lines 1-10
+      node = make_node(0.60)
+
+      # Diff changes line 11 (adjacent but not overlapping)
+      diff_ranges = %{"lib/foo.ex" => [{11, 11}]}
+
+      blocks = TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges)
+      assert blocks == []
+    end
+
+    test "excludes blocks when file has empty diff ranges list" do
+      node = make_node(0.60)
+
+      # File is present but with empty ranges (e.g., only deletions)
+      diff_ranges = %{"lib/foo.ex" => []}
+
+      blocks = TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges)
+      assert blocks == []
+    end
+
+    test "single-line block overlapping single-line diff" do
+      # Single-line block at line 5
+      node =
+        make_node(0.60)
+        |> put_in(["start_line"], 5)
+        |> put_in(["end_line"], 5)
+
+      diff_ranges = %{"lib/foo.ex" => [{5, 5}]}
+
+      # Need to adjust min_lines for this test since block is only 1 line
+      [block] =
+        TopBlocks.build(make_results([node]), [], lookup(),
+          diff_line_ranges: diff_ranges,
+          block_min_lines: 1
+        )
+
+      assert block.start_line == 5
+      assert block.end_line == 5
+    end
+
+    test "when both changed_files and diff_line_ranges provided, both filters apply" do
+      node = make_node(0.60)
+      changed = [%ChangedFile{path: "lib/foo.ex", status: "modified"}]
+      diff_ranges = %{"lib/foo.ex" => [{5, 7}]}
+
+      [block] =
+        TopBlocks.build(make_results([node]), changed, lookup(), diff_line_ranges: diff_ranges)
+
+      assert block.path == "lib/foo.ex"
+      assert block.status == "modified"
+      assert block.start_line == 1
+      assert block.end_line == 10
+    end
+
+    test "changed_files filter applies before diff_line_ranges filter" do
+      node = make_node(0.60)
+      # File is in diff_ranges but not in changed_files
+      changed = [%ChangedFile{path: "lib/other.ex", status: "modified"}]
+      diff_ranges = %{"lib/foo.ex" => [{5, 7}]}
+
+      blocks =
+        TopBlocks.build(make_results([node]), changed, lookup(), diff_line_ranges: diff_ranges)
+
+      assert blocks == []
+    end
+  end
+end
diff --git a/test/codeqa/health_report_test.exs b/test/codeqa/health_report_test.exs
new file mode 100644
index 00000000..80f8575f
--- /dev/null
+++ b/test/codeqa/health_report_test.exs
@@ -0,0 +1,131 @@
+defmodule CodeQA.HealthReportTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.BlockImpactAnalyzer
+  alias CodeQA.Engine.Analyzer
+  alias CodeQA.Git.ChangedFile
+  alias CodeQA.HealthReport
+
+  describe "generate/2 output keys" do
+    @tag :slow
+    test "without base_results: pr_summary and codebase_delta are nil" do
+      files = %{"lib/foo.ex" => "defmodule Foo do\n  def bar, do: :ok\nend\n"}
+      results = Analyzer.analyze_codebase(files)
+      results = BlockImpactAnalyzer.analyze(results, files)
+
+      report = HealthReport.generate(results)
+
+      assert report.pr_summary == nil
+      assert report.codebase_delta == nil
+      assert is_list(report.top_blocks)
+      assert Map.has_key?(report, :overall_score)
+      assert Map.has_key?(report, :overall_grade)
+      assert Map.has_key?(report, :categories)
+      assert Map.has_key?(report, :top_issues)
+    end
+
+    @tag :slow
+    test "without base_results: top_blocks shows top 10 blocks by impact" do
+      files = %{"lib/foo.ex" => "defmodule Foo do\n  def bar, do: :ok\nend\n"}
+      results = Analyzer.analyze_codebase(files)
+      results = BlockImpactAnalyzer.analyze(results, files)
+
+      report = HealthReport.generate(results)
+
+      # top_blocks is a flat list of blocks (may be empty if no blocks above threshold)
+      assert is_list(report.top_blocks)
+
+      Enum.each(report.top_blocks, fn block ->
+        assert Map.has_key?(block, :path)
+        assert Map.has_key?(block, :status)
+        assert Map.has_key?(block, :potentials)
+        assert Map.has_key?(block, :source)
+        assert block.status == nil
+      end)
+    end
+
+    @tag :slow
+    test "worst_offenders is always empty in categories" do
+      files = %{"lib/foo.ex" => "defmodule Foo do\n  def bar, do: :ok\nend\n"}
+      results = Analyzer.analyze_codebase(files)
+      results = BlockImpactAnalyzer.analyze(results, files)
+
+      report = HealthReport.generate(results)
+
+      Enum.each(report.categories, fn cat ->
+        assert Map.get(cat, :worst_offenders, []) == []
+      end)
+    end
+  end
+
+  describe "generate/2 with base_results" do
+    @tag :slow
+    test "pr_summary is populated" do
+      files = %{"lib/foo.ex" => "defmodule Foo do\n  def bar, do: :ok\nend\n"}
+      head_results = Analyzer.analyze_codebase(files)
+      head_results = BlockImpactAnalyzer.analyze(head_results, files)
+      base_results = Analyzer.analyze_codebase(files)
+
+      changed = [%ChangedFile{path: "lib/foo.ex", status: "modified"}]
+
+      report =
+        HealthReport.generate(head_results,
+          base_results: base_results,
+          changed_files: changed
+        )
+
+      assert %{
+               base_score: base_score,
+               head_score: head_score,
+               score_delta: delta,
+               base_grade: _,
+               head_grade: _,
+               blocks_flagged: flagged,
+               files_changed: 1,
+               files_added: 0,
+               files_modified: 1
+             } = report.pr_summary
+
+      assert is_integer(base_score)
+      assert is_integer(head_score)
+      assert delta == head_score - base_score
+      assert is_integer(flagged)
+    end
+
+    @tag :slow
+    test "codebase_delta is populated" do
+      files = %{"lib/foo.ex" => "defmodule Foo do\n  def bar, do: :ok\nend\n"}
+      head_results = Analyzer.analyze_codebase(files)
+      head_results = BlockImpactAnalyzer.analyze(head_results, files)
+      base_results = Analyzer.analyze_codebase(files)
+
+      report = HealthReport.generate(head_results, base_results: base_results)
+
+      assert %{base: %{aggregate: _}, head: %{aggregate: _}, delta: %{aggregate: _}} =
+               report.codebase_delta
+    end
+
+    @tag :slow
+    test "top_blocks scoped to changed_files" do
+      files = %{
+        "lib/foo.ex" => "defmodule Foo do\n  def bar, do: :ok\nend\n",
+        "lib/bar.ex" => "defmodule Bar do\n  def baz, do: :ok\nend\n"
+      }
+
+      head_results = Analyzer.analyze_codebase(files)
+      head_results = BlockImpactAnalyzer.analyze(head_results, files)
+      base_results = Analyzer.analyze_codebase(files)
+
+      changed = [%ChangedFile{path: "lib/foo.ex", status: "modified"}]
+
+      report =
+        HealthReport.generate(head_results,
+          base_results: base_results,
+          changed_files: changed
+        )
+
+      paths = Enum.map(report.top_blocks, & &1.path)
+      refute "lib/bar.ex" in paths
+    end
+  end
+end
diff --git a/test/codeqa/metrics/codebase/near_duplicate_blocks_codebase_test.exs b/test/codeqa/metrics/codebase/near_duplicate_blocks_codebase_test.exs
new file mode 100644
index 00000000..1c797761
--- /dev/null
+++ b/test/codeqa/metrics/codebase/near_duplicate_blocks_codebase_test.exs
@@ -0,0 +1,98 @@
+defmodule CodeQA.Metrics.Codebase.NearDuplicateBlocksCodebaseTest do
+  use ExUnit.Case, async: true
+  alias CodeQA.Analysis.FileContextServer
+  alias CodeQA.Metrics.Codebase.NearDuplicateBlocksCodebase
+
+  defp files(pairs), do: Map.new(pairs)
+
+  defp with_pid(fun) do
+    {:ok, pid} = FileContextServer.start_link()
+    fun.(pid)
+  end
+
+  describe "name/0" do
+    test "returns near_duplicate_blocks_codebase" do
+      assert NearDuplicateBlocksCodebase.name() == "near_duplicate_blocks_codebase"
+    end
+  end
+
+  describe "analyze/2" do
+    test "returns all count keys d0..d8" do
+      with_pid(fn pid ->
+        result =
+          NearDuplicateBlocksCodebase.analyze(files([{"a.ex", "x = 1\n"}]), file_context_pid: pid)
+
+        for d <- 0..8, do: assert(Map.has_key?(result, "near_dup_block_d#{d}"))
+      end)
+    end
+
+    test "returns all pairs keys d0..d8" do
+      with_pid(fn pid ->
+        result =
+          NearDuplicateBlocksCodebase.analyze(files([{"a.ex", "x = 1\n"}]), file_context_pid: pid)
+
+        for d <- 0..8, do: assert(Map.has_key?(result, "near_dup_block_d#{d}_pairs"))
+      end)
+    end
+
+    test "zero counts for a single trivial file" do
+      with_pid(fn pid ->
+        result =
+          NearDuplicateBlocksCodebase.analyze(files([{"a.ex", "x = 1\n"}]), file_context_pid: pid)
+
+        assert result["near_dup_block_d0"] == 0
+      end)
+    end
+
+    test "detects exact duplicate block across two files" do
+      block = "def foo\n  x = 1\nend\n"
+
+      with_pid(fn pid ->
+        result =
+          NearDuplicateBlocksCodebase.analyze(
+            files([{"a.ex", block}, {"b.ex", block}]),
+            file_context_pid: pid
+          )
+
+        assert result["near_dup_block_d0"] >= 1
+      end)
+    end
+
+    test "pair sources include file paths" do
+      block = "def foo\n  x = 1\nend\n"
+
+      with_pid(fn pid ->
+        result =
+          NearDuplicateBlocksCodebase.analyze(
+            files([{"a.ex", block}, {"b.ex", block}]),
+            file_context_pid: pid
+          )
+
+        all_pairs = result |> Map.values() |> Enum.filter(&is_list/1) |> List.flatten()
+
+        if all_pairs != [] do
+          pair = hd(all_pairs)
+          assert Map.has_key?(pair, "source_a")
+          assert Map.has_key?(pair, "source_b")
+        end
+      end)
+    end
+
+    test "pairs list is capped at max_pairs_per_bucket" do
+      block = "def foo\n  x = 1\nend\n"
+      many_files = for i <- 1..5, do: {"file#{i}.ex", block}
+
+      with_pid(fn pid ->
+        result =
+          NearDuplicateBlocksCodebase.analyze(
+            files(many_files),
+            file_context_pid: pid,
+            near_duplicate_blocks: [max_pairs_per_bucket: 2]
+          )
+
+        pairs_lists = result |> Map.values() |> Enum.filter(&is_list/1)
+        assert Enum.all?(pairs_lists, &(length(&1) <= 2))
+      end)
+    end
+  end
+end
diff --git a/test/codeqa/metrics/codebase/similarity_test.exs b/test/codeqa/metrics/codebase/similarity_test.exs
new file mode 100644
index 00000000..d20dbf13
--- /dev/null
+++ b/test/codeqa/metrics/codebase/similarity_test.exs
@@ -0,0 +1,79 @@
+defmodule CodeQA.Metrics.Codebase.SimilarityTest do
+  use ExUnit.Case, async: true
+  alias CodeQA.Metrics.Codebase.Similarity
+
+  describe "name/0" do
+    test "returns similarity" do
+      assert Similarity.name() == "similarity"
+    end
+  end
+
+  describe "analyze/2 with fewer than 2 files" do
+    test "empty codebase returns zero density" do
+      result = Similarity.analyze(%{})
+      assert result["cross_file_density"] == 0.0
+    end
+
+    test "single file returns zero density" do
+      result = Similarity.analyze(%{"a.ex" => "x = 1"})
+      assert result["cross_file_density"] == 0.0
+    end
+
+    test "fewer than 2 files returns empty ncd_pairs" do
+      result = Similarity.analyze(%{"a.ex" => "x = 1"})
+      assert result["ncd_pairs"] == %{}
+    end
+  end
+
+  describe "analyze/2 cross_file_density" do
+    test "returns a float between 0 and 2" do
+      files = %{"a.ex" => "def foo, do: 1", "b.ex" => "def bar, do: 2"}
+      result = Similarity.analyze(files)
+      assert is_float(result["cross_file_density"])
+      assert result["cross_file_density"] >= 0.0
+    end
+
+    test "identical files produce higher density than dissimilar files" do
+      content = String.duplicate("def foo do\n  x = 1\nend\n", 20)
+      identical = %{"a.ex" => content, "b.ex" => content}
+      dissimilar = %{"a.ex" => content, "b.ex" => String.duplicate("zzz qqq rrr\n", 20)}
+
+      assert Similarity.analyze(identical)["cross_file_density"] >
+               Similarity.analyze(dissimilar)["cross_file_density"]
+    end
+
+    test "does not return ncd_pairs key by default" do
+      files = %{"a.ex" => "x = 1", "b.ex" => "y = 2"}
+      result = Similarity.analyze(files)
+      refute Map.has_key?(result, "ncd_pairs")
+    end
+  end
+
+  describe "analyze/2 with show_ncd: true" do
+    test "returns ncd_pairs key" do
+      files = %{"a.ex" => "x = 1", "b.ex" => "y = 2"}
+      result = Similarity.analyze(files, show_ncd: true)
+      assert Map.has_key?(result, "ncd_pairs")
+    end
+
+    test "identical files have ncd near 0" do
+      content = String.duplicate("def foo do\n  x = 1\nend\n", 10)
+      files = %{"a.ex" => content, "b.ex" => content}
+
+      result = Similarity.analyze(files, show_ncd: true, ncd_paths: ["a.ex"])
+      pairs = result["ncd_pairs"]
+
+      scores = pairs |> Map.values() |> List.flatten() |> Enum.map(& &1["score"])
+      assert Enum.all?(scores, &(&1 < 0.2))
+    end
+
+    test "ncd_paths restricts which files are compared" do
+      files = %{"a.ex" => "x = 1", "b.ex" => "y = 2", "c.ex" => "z = 3"}
+      result = Similarity.analyze(files, show_ncd: true, ncd_paths: ["a.ex"])
+      pairs = result["ncd_pairs"]
+      assert Map.has_key?(pairs, "a.ex")
+      refute Map.has_key?(pairs, "b.ex")
+      refute Map.has_key?(pairs, "c.ex")
+    end
+  end
+end
diff --git a/test/codeqa/metrics/file/bradford_test.exs b/test/codeqa/metrics/file/bradford_test.exs
new file mode 100644
index 00000000..db948d9e
--- /dev/null
+++ b/test/codeqa/metrics/file/bradford_test.exs
@@ -0,0 +1,122 @@
+defmodule CodeQA.Metrics.File.BradfordTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.Engine.Pipeline
+  alias CodeQA.Metrics.File.Bradford
+
+  # Bradford zones are built by ranking lines densest-first, then walking down
+  # until each third of total tokens is accumulated:
+  #   zone 1 (core)   — fewest lines needed to reach 1/3 of all tokens
+  #   zone 2 (middle) — fewest additional lines to reach 2/3
+  #   zone 3 (tail)   — all remaining lines
+  #
+  # k1 = zone2_lines / zone1_lines  — how many more lines the middle needs vs the core
+  # k2 = zone3_lines / zone2_lines  — how many more lines the tail needs vs the middle
+  # k_ratio = k2 / k1               — > 1 means tail is more stretched; < 1 means core is extreme
+
+  defp ctx(code), do: Pipeline.build_file_context(code)
+  defp result(code), do: Bradford.analyze(ctx(code))
+
+  describe "analyze/1 - edge cases" do
+    test "returns zeros for empty content" do
+      # can't form three meaningful zones with nothing
+      assert result("") == %{"k1" => 0.0, "k2" => 0.0, "k_ratio" => 0.0}
+    end
+
+    test "returns zeros for a single line" do
+      # a single line cannot be split into three zones
+      assert result("a b c") == %{"k1" => 0.0, "k2" => 0.0, "k_ratio" => 0.0}
+    end
+
+    test "returns zeros for two lines" do
+      # two lines still can't fill three zones
+      assert result("a b c\nd e f") == %{"k1" => 0.0, "k2" => 0.0, "k_ratio" => 0.0}
+    end
+  end
+
+  describe "analyze/1 - uniform distribution" do
+    # 9 lines × 3 tokens = 27 total, third = 9
+    # sorted counts: [3, 3, 3, 3, 3, 3, 3, 3, 3]
+    # zone 1: 3 lines (3+3+3 = 9 ≥ 9)
+    # zone 2: 3 lines (3+3+3 = 9 ≥ 9)
+    # zone 3: 3 lines remaining
+    # k1 = 3/3 = 1.0  — middle needs the same number of lines as the core
+    # k2 = 3/3 = 1.0  — tail needs the same number of lines as the middle
+    # k_ratio = 1.0   — perfectly symmetric: no zone is more stretched than another
+    test "uniform file has k = 1" do
+      code = Enum.map_join(1..9, "\n", fn _ -> "a b c" end)
+      assert result(code) == %{"k1" => 1.0, "k2" => 1.0, "k_ratio" => 1.0}
+    end
+  end
+
+  describe "analyze/1 - Bradford concentration" do
+    # 1 line with 10 tokens  +  3 lines with 3 tokens  +  9 lines with 1 token
+    # total = 28, third ≈ 9.333
+    # sorted: [10, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1]  (13 lines)
+    # zone 1: 1 line  (10 ≥ 9.333)                        → n1 = 1
+    # zone 2: 4 lines (3+3+3 = 9 < 9.333; +1 → 10 ≥ 9.333) → n2 = 4
+    # zone 3: 8 lines remaining                            → n3 = 8
+    # k1 = 4/1 = 4.0  — the middle needs 4× more lines than the dense core
+    # k2 = 8/4 = 2.0  — the tail needs 2× more lines than the middle
+    # k_ratio = 0.5   — the core-to-middle jump (4×) is bigger than middle-to-tail (2×),
+    #                    meaning extreme concentration is at the very top, not spread across zones
+    test "concentrated file produces k1=4.0, k2=2.0, k_ratio=0.5" do
+      dense = "a b c d e f g h i j"
+      medium = Enum.map_join(1..3, "\n", fn _ -> "a b c" end)
+      sparse = Enum.map_join(1..9, "\n", fn _ -> "a" end)
+      code = Enum.join([dense, medium, sparse], "\n")
+
+      assert result(code) == %{
+               # 1 dense line does the work of 4 middle lines — extreme core
+               "k1" => 4.0,
+               # 4 middle lines do the work of 8 tail lines — moderate long tail
+               "k2" => 2.0,
+               # k2 < k1: the core is more concentrated than the tail is sparse
+               "k_ratio" => 0.5
+             }
+    end
+
+    test "concentrated file has higher k1 than uniform" do
+      # k1 is the primary concentration signal: how many times more lines the
+      # middle zone needs compared to the core. A uniform file scores 1.0 here.
+      uniform = Enum.map_join(1..9, "\n", fn _ -> "a b c" end)
+
+      dense = "a b c d e f g h i j"
+      medium = Enum.map_join(1..3, "\n", fn _ -> "a b c" end)
+      sparse = Enum.map_join(1..9, "\n", fn _ -> "a" end)
+      concentrated = Enum.join([dense, medium, sparse], "\n")
+
+      assert result(concentrated)["k1"] > result(uniform)["k1"]
+    end
+
+    test "k_ratio < 1 when the core is more extreme than the tail" do
+      # k_ratio = k2 / k1
+      # k_ratio < 1  →  k2 < k1  →  the core-to-middle multiplier exceeds the
+      #                               middle-to-tail multiplier: the spike is at
+      #                               the very top, not spread evenly down the rank list
+      # k_ratio > 1  →  k2 > k1  →  the tail is more stretched than the core jump,
+      #                               typical of many medium lines plus a huge sparse tail
+      code =
+        Enum.join(
+          [
+            "a b c d e f g h i j",
+            "a b c",
+            "a b c",
+            "a b c",
+            "a",
+            "a",
+            "a",
+            "a",
+            "a",
+            "a",
+            "a",
+            "a",
+            "a"
+          ],
+          "\n"
+        )
+
+      assert result(code)["k_ratio"] < 1.0
+    end
+  end
+end
diff --git a/test/codeqa/metrics/branching_test.exs b/test/codeqa/metrics/file/branching_test.exs
similarity index 79%
rename from test/codeqa/metrics/branching_test.exs
rename to test/codeqa/metrics/file/branching_test.exs
index d7947800..c4749b3f 100644
--- a/test/codeqa/metrics/branching_test.exs
+++ b/test/codeqa/metrics/file/branching_test.exs
@@ -1,8 +1,8 @@
-defmodule CodeQA.Metrics.BranchingTest do
+defmodule CodeQA.Metrics.File.BranchingTest do
   use ExUnit.Case, async: true
 
-  alias CodeQA.Metrics.Branching
-  alias CodeQA.Pipeline
+  alias CodeQA.Engine.Pipeline
+  alias CodeQA.Metrics.File.Branching
 
   defp ctx(code), do: Pipeline.build_file_context(code)
   defp density(code), do: Branching.analyze(ctx(code))["branching_density"]
@@ -27,7 +27,9 @@ defmodule CodeQA.Metrics.BranchingTest do
     for keyword <- Branching.branching_keywords() |> MapSet.to_list() |> Enum.sort() do
       test "counts #{keyword} as a branching token" do
         code = "line_before\n#{unquote(keyword)} condition\nline_after"
-        assert density(code) > 0.0, "expected '#{unquote(keyword)}' to be counted as a branching token"
+
+        assert density(code) > 0.0,
+               "expected '#{unquote(keyword)}' to be counted as a branching token"
       end
     end
   end
diff --git a/test/codeqa/metrics/file/brevity_test.exs b/test/codeqa/metrics/file/brevity_test.exs
new file mode 100644
index 00000000..4f65fa05
--- /dev/null
+++ b/test/codeqa/metrics/file/brevity_test.exs
@@ -0,0 +1,44 @@
+defmodule CodeQA.Metrics.File.BrevityTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.Engine.Pipeline
+  alias CodeQA.Metrics.File.Brevity
+
+  defp ctx(code), do: Pipeline.build_file_context(code)
+  defp result(code), do: Brevity.analyze(ctx(code))
+
+  describe "analyze/1 - edge cases" do
+    test "returns zeros for empty content" do
+      assert result("") == %{"correlation" => 0.0, "slope" => 0.0, "sample_size" => 0}
+    end
+
+    test "returns zeros for fewer than 3 unique tokens" do
+      assert result("a a b b")["correlation"] == 0.0
+      assert result("a a b b")["slope"] == 0.0
+    end
+  end
+
+  describe "analyze/1 - brevity law" do
+    test "negative correlation when shorter tokens are more frequent" do
+      # x(len=1): 10×, to(len=2): 3×, longname(len=8): 1×
+      code = String.duplicate("x ", 10) <> String.duplicate("to ", 3) <> "longname"
+      assert result(code)["correlation"] < 0.0
+    end
+
+    test "positive correlation when longer tokens are more frequent" do
+      # longword(len=8): 4×, a(len=1): 1×, b(len=1): 1×
+      code = String.duplicate("longword ", 4) <> "a b"
+      assert result(code)["correlation"] > 0.0
+    end
+
+    test "sample_size reflects unique token count" do
+      code = "alpha beta gamma alpha beta"
+      assert result(code)["sample_size"] == 3
+    end
+
+    test "slope is negative when brevity law holds" do
+      code = String.duplicate("x ", 10) <> String.duplicate("to ", 3) <> "longname"
+      assert result(code)["slope"] < 0.0
+    end
+  end
+end
diff --git a/test/codeqa/metrics/function_metrics_test.exs b/test/codeqa/metrics/file/function_metrics_test.exs
similarity index 78%
rename from test/codeqa/metrics/function_metrics_test.exs
rename to test/codeqa/metrics/file/function_metrics_test.exs
index caa1f6bf..7f05b906 100644
--- a/test/codeqa/metrics/function_metrics_test.exs
+++ b/test/codeqa/metrics/file/function_metrics_test.exs
@@ -1,8 +1,8 @@
-defmodule CodeQA.Metrics.FunctionMetricsTest do
+defmodule CodeQA.Metrics.File.FunctionMetricsTest do
   use ExUnit.Case, async: true
 
-  alias CodeQA.Metrics.FunctionMetrics
-  alias CodeQA.Pipeline
+  alias CodeQA.Engine.Pipeline
+  alias CodeQA.Metrics.File.FunctionMetrics
 
   defp ctx(code), do: Pipeline.build_file_context(code)
   defp analyze(code), do: FunctionMetrics.analyze(ctx(code))
@@ -50,8 +50,10 @@ defmodule CodeQA.Metrics.FunctionMetricsTest do
     for keyword <- FunctionMetrics.func_keywords() do
       test "detects function starting with #{keyword}" do
         code = "#{unquote(keyword)} my_func(x) {\n  return x\n}"
-        result = FunctionMetrics.analyze(CodeQA.Pipeline.build_file_context(code))
-        assert result["avg_function_lines"] > 0, "expected '#{unquote(keyword)}' to be detected as function start"
+        result = FunctionMetrics.analyze(Pipeline.build_file_context(code))
+
+        assert result["avg_function_lines"] > 0,
+               "expected '#{unquote(keyword)}' to be detected as function start"
       end
     end
   end
@@ -60,8 +62,10 @@ defmodule CodeQA.Metrics.FunctionMetricsTest do
     for modifier <- FunctionMetrics.access_modifiers() do
       test "detects method starting with #{modifier}" do
         code = "#{unquote(modifier)} void MyMethod() {\n  return;\n}"
-        result = FunctionMetrics.analyze(CodeQA.Pipeline.build_file_context(code))
-        assert result["avg_function_lines"] > 0, "expected '#{unquote(modifier)}' access modifier to trigger method detection"
+        result = FunctionMetrics.analyze(Pipeline.build_file_context(code))
+
+        assert result["avg_function_lines"] > 0,
+               "expected '#{unquote(modifier)}' access modifier to trigger method detection"
       end
     end
   end
diff --git a/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs b/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs
new file mode 100644
index 00000000..cb10540c
--- /dev/null
+++ b/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs
@@ -0,0 +1,62 @@
+defmodule CodeQA.Metrics.File.NearDuplicateBlocksFileTest do
+  use ExUnit.Case, async: true
+  alias CodeQA.Engine.Pipeline
+  alias CodeQA.Metrics.File.NearDuplicateBlocksFile
+
+  defp ctx(code, path \\ "test.ex") do
+    Pipeline.build_file_context(code, path: path)
+  end
+
+  describe "name/0" do
+    test "returns near_duplicate_blocks_file" do
+      assert NearDuplicateBlocksFile.name() == "near_duplicate_blocks_file"
+    end
+  end
+
+  describe "keys/0" do
+    test "returns 11 keys: block_count, sub_block_count, and d0..d8" do
+      keys = NearDuplicateBlocksFile.keys()
+      assert length(keys) == 11
+      assert "block_count" in keys
+      assert "sub_block_count" in keys
+      assert "near_dup_block_d0" in keys
+      assert "near_dup_block_d8" in keys
+    end
+  end
+
+  describe "analyze/1 with nil blocks" do
+    test "returns zeroed map with all keys when blocks is nil" do
+      ctx = Pipeline.build_file_context("x = 1\n", skip_structural: true)
+      result = NearDuplicateBlocksFile.analyze(ctx)
+      assert Map.has_key?(result, "block_count")
+      assert Map.has_key?(result, "sub_block_count")
+      for d <- 0..8, do: assert(Map.has_key?(result, "near_dup_block_d#{d}"))
+      for {_key, value} <- result, do: assert(value == 0)
+    end
+  end
+
+  describe "analyze/1" do
+    test "returns a map with all expected keys" do
+      result = NearDuplicateBlocksFile.analyze(ctx("x = 1\n"))
+      assert Map.has_key?(result, "block_count")
+      assert Map.has_key?(result, "sub_block_count")
+      for d <- 0..8, do: assert(Map.has_key?(result, "near_dup_block_d#{d}"))
+    end
+
+    test "no _pairs keys in output" do
+      result = NearDuplicateBlocksFile.analyze(ctx("x = 1\n"))
+      refute Enum.any?(Map.keys(result), &String.ends_with?(&1, "_pairs"))
+    end
+
+    test "detects exact duplicate blocks at d0" do
+      block = "def foo\n  x = 1\nend\n"
+      result = NearDuplicateBlocksFile.analyze(ctx(block <> "\n\n" <> block))
+      assert result["near_dup_block_d0"] >= 1
+    end
+
+    test "block_count is positive for non-trivial file" do
+      result = NearDuplicateBlocksFile.analyze(ctx("def foo\n  x\nend\n"))
+      assert result["block_count"] >= 1
+    end
+  end
+end
diff --git a/test/codeqa/metrics/file/near_duplicate_blocks_test.exs b/test/codeqa/metrics/file/near_duplicate_blocks_test.exs
new file mode 100644
index 00000000..a65e201e
--- /dev/null
+++ b/test/codeqa/metrics/file/near_duplicate_blocks_test.exs
@@ -0,0 +1,227 @@
+defmodule CodeQA.Metrics.File.NearDuplicateBlocksTest do
+  use ExUnit.Case, async: true
+  alias CodeQA.Metrics.File.NearDuplicateBlocks, as: NDB
+
+  describe "token_edit_distance/2" do
+    test "identical sequences have distance 0" do
+      assert NDB.token_edit_distance(~w[a b c], ~w[a b c]) == 0
+    end
+
+    test "empty vs non-empty equals length of other" do
+      assert NDB.token_edit_distance([], ~w[a b c]) == 3
+      assert NDB.token_edit_distance(~w[a b c], []) == 3
+    end
+
+    test "single substitution" do
+      assert NDB.token_edit_distance(~w[a b c], ~w[a x c]) == 1
+    end
+  end
+
+  describe "find_pairs/2 idf_max_freq option" do
+    defp make_block(tokens, label) do
+      %CodeQA.AST.Enrichment.Node{
+        label: label,
+        tokens: Enum.map(tokens, &%{kind: &1}),
+        line_count: length(tokens),
+        children: []
+      }
+    end
+
+    test "exact duplicates are still detected when all bigrams are high-frequency" do
+      # 30 blocks all sharing bigram [end, nil] → pruned by IDF
+      # Two additional identical blocks → should still match via exact hash index (d0)
+      common = Enum.map(1..30, fn i -> make_block(~w[end nil common_#{i}], "file:#{i}") end)
+      dup = make_block(~w[end nil special unique_token], "dup:1")
+      dup2 = make_block(~w[end nil special unique_token], "dup:2")
+
+      result = NDB.find_pairs(common ++ [dup, dup2], idf_max_freq: 0.05)
+
+      assert result[0].count >= 1
+    end
+
+    test "near-duplicates are detected via non-pruned unique bigrams" do
+      # 50 blocks all sharing [end, nil] → pruned
+      # Two near-duplicates sharing unique bigrams [nil, special], [special, alpha] → not pruned
+      common = Enum.map(1..50, fn i -> make_block(~w[end nil common_#{i}], "common:#{i}") end)
+      near_a = make_block(~w[end nil special alpha beta gamma], "near:1")
+      near_b = make_block(~w[end nil special alpha beta delta], "near:2")
+
+      result = NDB.find_pairs(common ++ [near_a, near_b], idf_max_freq: 0.05)
+
+      total = Map.values(result) |> Enum.map(& &1.count) |> Enum.sum()
+      assert total >= 1
+    end
+  end
+
+  describe "analyze_from_blocks/2 sub_block_count" do
+    test "sub_block_count equals sum of children counts across all blocks" do
+      child = make_block(["x"], "child:1")
+
+      parent = %CodeQA.AST.Enrichment.Node{
+        label: "a:1",
+        tokens: Enum.map(["def", "<ID>", "end"], &%{kind: &1}),
+        line_count: 3,
+        children: [child, child]
+      }
+
+      solo = make_block(["y", "z", "w", "v", "u"], "b:1")
+      result = NDB.analyze_from_blocks([parent, solo], [])
+      assert result["sub_block_count"] == 2
+    end
+
+    test "sub_block_count is zero when no block has children" do
+      a = make_block(["x", "y", "z", "w", "v"], "a:1")
+      b = make_block(["x", "y", "z", "w", "Q"], "b:1")
+      result = NDB.analyze_from_blocks([a, b], [])
+      assert result["sub_block_count"] == 0
+    end
+  end
+
+  describe "canonical_values (via find_pairs)" do
+    test "blocks identical except for leading/trailing newline tokens are detected as d0 exact duplicates" do
+      core = ["def", "<ID>", "end"]
+      trimmed = make_block(core, "a:1")
+      with_nl = make_block(["<NL>"] ++ core ++ ["<NL>"], "b:1")
+      result = NDB.find_pairs([trimmed, with_nl], [])
+      assert Map.get(result, 0, %{count: 0}).count >= 1
+    end
+
+    test "blocks identical except for leading/trailing whitespace tokens are detected as d0 exact duplicates" do
+      core = ["def", "<ID>", "end"]
+      trimmed = make_block(core, "a:1")
+      with_ws = make_block(["<WS>"] ++ core ++ ["<WS>"], "b:1")
+      result = NDB.find_pairs([trimmed, with_ws], [])
+      assert Map.get(result, 0, %{count: 0}).count >= 1
+    end
+  end
+
+  describe "find_pairs/2 near-boundary behavior" do
+    test "pair at exactly d8 boundary (50% edit distance) is detected" do
+      # 10 tokens each, 5 substitutions = exactly 50% edit distance → d8
+      # First 5 tokens identical → 4 shared bigrams, passes shingle filter
+      a = ~w[a b c d e f g h i j]
+      b = ~w[a b c d e X Y Z W V]
+      result = NDB.find_pairs([make_block(a, "x:1"), make_block(b, "x:2")], [])
+      total = Map.values(result) |> Enum.map(& &1.count) |> Enum.sum()
+      assert total >= 1
+    end
+
+    test "pair just over d8 boundary (>50% edit distance) is not reported" do
+      # a: 10 tokens, b: 11 tokens — first 5 identical (4 shared bigrams, passes shingle),
+      # abs(10-11)=1 passes token-length guard, but edit distance = 6 (60%) → nil
+      a = ~w[a b c d e f g h i j]
+      b = ~w[a b c d e X Y Z W V U]
+      result = NDB.find_pairs([make_block(a, "x:1"), make_block(b, "x:2")], [])
+      total = Map.values(result) |> Enum.map(& &1.count) |> Enum.sum()
+      assert total == 0
+    end
+  end
+
+  describe "percent_bucket/2" do
+    test "returns 0 for edit distance 0" do
+      assert NDB.percent_bucket(0, 100) == 0
+    end
+
+    test "returns 1 for 1% difference (within 0–5%)" do
+      assert NDB.percent_bucket(1, 100) == 1
+    end
+
+    test "returns 1 for 5% difference (boundary)" do
+      assert NDB.percent_bucket(5, 100) == 1
+    end
+
+    test "returns 2 for 6% difference" do
+      assert NDB.percent_bucket(6, 100) == 2
+    end
+
+    test "returns 8 for 50% difference" do
+      assert NDB.percent_bucket(50, 100) == 8
+    end
+
+    test "returns nil for >50% difference" do
+      assert NDB.percent_bucket(51, 100) == nil
+    end
+
+    test "returns nil when min_token_count is 0" do
+      assert NDB.percent_bucket(0, 0) == nil
+    end
+
+    test "returns 7 for exactly 40% (d7 upper boundary)" do
+      assert NDB.percent_bucket(40, 100) == 7
+    end
+
+    test "returns 8 for 41% (just above d7 boundary, in d8)" do
+      assert NDB.percent_bucket(41, 100) == 8
+    end
+
+    test "returns 7 for mid-range d7 (35%)" do
+      assert NDB.percent_bucket(35, 100) == 7
+    end
+  end
+
+  describe "analyze/2" do
+    test "returns all expected count keys" do
+      result = NDB.analyze([{"a.ex", "x = 1\n"}], [])
+
+      for d <- 0..8 do
+        assert Map.has_key?(result, "near_dup_block_d#{d}")
+      end
+    end
+
+    test "returns block_count and sub_block_count" do
+      result = NDB.analyze([{"a.ex", "def foo\n  x\nend\n"}], [])
+      assert Map.has_key?(result, "block_count")
+      assert Map.has_key?(result, "sub_block_count")
+    end
+
+    test "block_count reflects detected blocks" do
+      code = "def foo\n  x\nend\n\n\ndef bar\n  y\nend\n"
+      result = NDB.analyze([{"a.ex", code}], [])
+      assert result["block_count"] >= 2
+    end
+
+    test "detects exact duplicate blocks at d0" do
+      # Two identical function-like blocks separated by blank lines
+      block = "def foo\n  x = 1\nend\n"
+      result = NDB.analyze([{"a.ex", block <> "\n\n" <> block}], [])
+      assert result["near_dup_block_d0"] >= 1
+    end
+
+    test "detects near-duplicate blocks (single token difference)" do
+      block_a = "def foo\n  x = 1\nend\n"
+      # one identifier differs
+      block_b = "def bar\n  x = 1\nend\n"
+      result = NDB.analyze([{"a.ex", block_a <> "\n\n" <> block_b}], [])
+      near_dup_total = Enum.sum(for d <- 0..8, do: result["near_dup_block_d#{d}"])
+      assert near_dup_total >= 1
+    end
+
+    test "cross-file detection: same block in two files" do
+      block = "def foo\n  x = 1\nend\n"
+      result = NDB.analyze([{"a.ex", block}, {"b.ex", block}], [])
+      assert result["near_dup_block_d0"] >= 1
+    end
+
+    test "returns only count keys (no pairs keys)" do
+      result = NDB.analyze([{"a.ex", "x = 1\n"}], [])
+      refute Enum.any?(Map.keys(result), &String.ends_with?(&1, "_pairs"))
+    end
+
+    test "find_pairs/2 with include_pairs option returns pair data" do
+      block = "def foo\n  x = 1\nend\n"
+      result = NDB.analyze([{"a.ex", block <> "\n\n" <> block}], include_pairs: true)
+      pairs_keys = Map.keys(result) |> Enum.filter(&String.ends_with?(&1, "_pairs"))
+      assert pairs_keys != []
+    end
+
+    test "pair sources include file:line format" do
+      block = "def foo\n  x = 1\nend\n"
+      result = NDB.analyze([{"a.ex", block <> "\n\n" <> block}], include_pairs: true)
+      pairs = result["near_dup_block_d0_pairs"]
+      assert pairs != []
+      [first | _] = pairs
+      assert first["source_a"] =~ ~r/a\.ex:\d+/
+      assert first["source_b"] =~ ~r/a\.ex:\d+/
+    end
+  end
+end
diff --git a/test/codeqa/metrics/file/rfc_test.exs b/test/codeqa/metrics/file/rfc_test.exs
new file mode 100644
index 00000000..19716f3a
--- /dev/null
+++ b/test/codeqa/metrics/file/rfc_test.exs
@@ -0,0 +1,97 @@
+defmodule CodeQA.Metrics.File.RFCTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.Engine.Pipeline
+  alias CodeQA.Metrics.File.RFC
+
+  defp ctx(code), do: Pipeline.build_file_context(code)
+  defp result(code), do: RFC.analyze(ctx(code))
+
+  describe "analyze/1" do
+    test "returns zero counts for empty content" do
+      r = result("")
+      assert r["rfc_count"] == 0
+      assert r["rfc_density"] == 0.0
+    end
+
+    test "counts function definitions with no calls" do
+      code = """
+      def foo do
+        1
+      end
+      """
+
+      r = result(code)
+      assert r["function_def_count"] == 1
+      assert r["distinct_call_count"] == 0
+      assert r["rfc_count"] == 1
+    end
+
+    test "counts distinct call targets" do
+      code = """
+      def foo do
+        bar()
+        baz()
+        bar()
+      end
+      """
+
+      r = result(code)
+      # bar and baz are distinct call targets (bar appears twice but counts once)
+      assert r["distinct_call_count"] == 2
+      assert r["function_def_count"] == 1
+      assert r["rfc_count"] == 3
+    end
+
+    test "rfc_density is rfc_count normalized by line count" do
+      code = """
+      def foo do
+        bar()
+        baz()
+      end
+      """
+
+      c = ctx(code)
+      r = RFC.analyze(c)
+      assert r["rfc_density"] == Float.round(r["rfc_count"] / c.line_count, 4)
+    end
+
+    test "file with no functions and no calls returns all zeros" do
+      r = result("x = 1\ny = 2")
+      assert r["rfc_count"] == 0
+      assert r["function_def_count"] == 0
+      assert r["distinct_call_count"] == 0
+    end
+
+    test "file with only calls and no function definitions" do
+      code = "foo()\nbar()\nbaz()"
+      r = result(code)
+      assert r["function_def_count"] == 0
+      assert r["distinct_call_count"] == 3
+      assert r["rfc_count"] == 3
+    end
+
+    test "duplicate calls are deduplicated" do
+      code = "foo()\nfoo()\nfoo()"
+      r = result(code)
+      assert r["distinct_call_count"] == 1
+    end
+
+    test "multiple function definitions are counted" do
+      code = """
+      def foo do
+        bar()
+      end
+
+      def baz do
+        qux()
+      end
+      """
+
+      r = result(code)
+      assert r["function_def_count"] == 2
+      assert r["distinct_call_count"] == 2
+      assert r["rfc_count"] == 4
+    end
+  end
+end
diff --git a/test/codeqa/metrics/file/separator_counts_test.exs b/test/codeqa/metrics/file/separator_counts_test.exs
new file mode 100644
index 00000000..2ac2a063
--- /dev/null
+++ b/test/codeqa/metrics/file/separator_counts_test.exs
@@ -0,0 +1,54 @@
+defmodule CodeQA.Metrics.File.SeparatorCountsTest do
+  use ExUnit.Case, async: true
+
+  alias CodeQA.Metrics.File.SeparatorCounts
+
+  describe "name/0" do
+    test "returns separator_counts" do
+      assert SeparatorCounts.name() == "separator_counts"
+    end
+  end
+
+  describe "keys/0" do
+    test "returns four count keys" do
+      assert SeparatorCounts.keys() == [
+               "underscore_count",
+               "hyphen_count",
+               "slash_count",
+               "dot_count"
+             ]
+    end
+  end
+
+  describe "analyze/1" do
+    test "counts separators in source code" do
+      content = "def my_func(a_b) do\n  File.read(\"path/to/file.txt\")\nend"
+
+      result = SeparatorCounts.analyze(%{content: content})
+
+      assert result["underscore_count"] == 2
+      assert result["slash_count"] == 2
+      assert result["dot_count"] == 2
+      assert result["hyphen_count"] == 0
+    end
+
+    test "counts hyphens" do
+      content = "some-component {\n  background-color: red;\n}"
+
+      result = SeparatorCounts.analyze(%{content: content})
+
+      assert result["hyphen_count"] == 2
+    end
+
+    test "returns zeros for empty content" do
+      result = SeparatorCounts.analyze(%{content: ""})
+
+      assert result == %{
+               "underscore_count" => 0,
+               "hyphen_count" => 0,
+               "slash_count" => 0,
+               "dot_count" => 0
+             }
+    end
+  end
+end
diff --git a/test/fixtures/sample.ex b/test/fixtures/sample.ex
index 16d90fc4..625d9cb2 100644
--- a/test/fixtures/sample.ex
+++ b/test/fixtures/sample.ex
@@ -1,4 +1,5 @@
 defmodule Sample do
+  @moduledoc false
   def hello do
     IO.puts("Hello, world!")
   end
diff --git a/test/support/counter_signal.ex b/test/support/counter_signal.ex
new file mode 100644
index 00000000..7ffb5d81
--- /dev/null
+++ b/test/support/counter_signal.ex
@@ -0,0 +1,19 @@
+defmodule CodeQA.Support.CounterSignal do
+  @moduledoc false
+  defstruct []
+end
+
+defimpl CodeQA.AST.Parsing.Signal, for: CodeQA.Support.CounterSignal do
+  def source(_), do: CodeQA.Support.CounterSignal
+  def group(_), do: :test
+  def init(_, _), do: %{idx: 0}
+
+  def emit(_, {_prev, token, _next}, %{idx: i} = state) do
+    emissions =
+      if token.kind == "<ID>",
+        do: MapSet.new([{:id_seen, i}]),
+        else: MapSet.new()
+
+    {emissions, %{state | idx: i + 1}}
+  end
+end
diff --git a/test/support/fixtures/cpp/observer_pattern.ex b/test/support/fixtures/cpp/observer_pattern.ex
new file mode 100644
index 00000000..b536d358
--- /dev/null
+++ b/test/support/fixtures/cpp/observer_pattern.ex
@@ -0,0 +1,71 @@
+defmodule Test.Fixtures.Cpp.ObserverPattern do
+  @moduledoc false
+  use Test.LanguageFixture, language: "cpp observer_pattern"
+
+  @code ~S'''
+  #include <vector>
+  #include <functional>
+
+  template<typename Event>
+  class Observer {
+  public:
+    virtual void onEvent(const Event& event) = 0;
+
+    virtual ~Observer() = default;
+  };
+
+  template<typename Event>
+  class Subject {
+    std::vector<Observer<Event>*> observers;
+
+  public:
+    void attach(Observer<Event>* observer) { observers.push_back(observer); }
+
+    void detach(Observer<Event>* observer) {
+      observers.erase(
+        std::remove(observers.begin(), observers.end(), observer),
+        observers.end()
+      );
+    }
+
+    void notify(const Event& event) {
+      for (auto* obs : observers) obs->onEvent(event);
+    }
+  };
+
+  struct StockEvent {
+    std::string symbol;
+    double price;
+    double previousPrice;
+
+    double change() const { return price - previousPrice; }
+
+    double changePercent() const { return previousPrice > 0 ? change() / previousPrice * 100.0 : 0.0; }
+  };
+
+  class StockTicker : public Subject<StockEvent> {
+    std::map<std::string, double> prices;
+
+  public:
+    void updatePrice(const std::string& symbol, double newPrice) {
+      double prev = prices.count(symbol) ? prices[symbol] : newPrice;
+      prices[symbol] = newPrice;
+      notify(StockEvent{symbol, newPrice, prev});
+    }
+
+    double getPrice(const std::string& symbol) const {
+      auto it = prices.find(symbol);
+      return it != prices.end() ? it->second : 0.0;
+    }
+  };
+
+  class AlertObserver : public Observer<StockEvent> {
+    double threshold;
+
+  public:
+    explicit AlertObserver(double threshold) : threshold(threshold) {}
+
+    void onEvent(const StockEvent& event) override {}
+  };
+  '''
+end
diff --git a/test/support/fixtures/cpp/smart_pointer.ex b/test/support/fixtures/cpp/smart_pointer.ex
new file mode 100644
index 00000000..6e91c9b9
--- /dev/null
+++ b/test/support/fixtures/cpp/smart_pointer.ex
@@ -0,0 +1,87 @@
+defmodule Test.Fixtures.Cpp.SmartPointer do
+  @moduledoc false
+  use Test.LanguageFixture, language: "cpp smart_pointer"
+
+  @code ~S'''
+  #include <memory>
+  #include <functional>
+
+  template<typename T>
+  class UniquePtr {
+    T* ptr;
+    std::function<void(T*)> deleter;
+
+  public:
+    explicit UniquePtr(T* p = nullptr, std::function<void(T*)> d = std::default_delete<T>())
+      : ptr(p), deleter(d) {}
+
+    ~UniquePtr() { if (ptr) deleter(ptr); }
+
+    UniquePtr(const UniquePtr&) = delete;
+
+    UniquePtr& operator=(const UniquePtr&) = delete;
+
+    UniquePtr(UniquePtr&& other) noexcept : ptr(other.ptr), deleter(std::move(other.deleter)) { other.ptr = nullptr; }
+
+    UniquePtr& operator=(UniquePtr&& other) noexcept {
+      if (this != &other) { if (ptr) deleter(ptr); ptr = other.ptr; other.ptr = nullptr; }
+      return *this;
+    }
+
+    T* get() const { return ptr; }
+
+    T& operator*() const { return *ptr; }
+
+    T* operator->() const { return ptr; }
+
+    explicit operator bool() const { return ptr != nullptr; }
+
+    T* release() { T* p = ptr; ptr = nullptr; return p; }
+
+    void reset(T* p = nullptr) { if (ptr) deleter(ptr); ptr = p; }
+  };
+
+  template<typename T>
+  struct SharedControl {
+    T* ptr;
+    int refCount;
+
+    SharedControl(T* p) : ptr(p), refCount(1) {}
+
+    ~SharedControl() { delete ptr; }
+  };
+
+  template<typename T>
+  class SharedPtr {
+    SharedControl<T>* ctrl;
+
+  public:
+    explicit SharedPtr(T* p = nullptr) : ctrl(p ? new SharedControl<T>(p) : nullptr) {}
+
+    SharedPtr(const SharedPtr& other) : ctrl(other.ctrl) { if (ctrl) ++ctrl->refCount; }
+
+    SharedPtr& operator=(const SharedPtr& other) {
+      if (this != &other) { release(); ctrl = other.ctrl; if (ctrl) ++ctrl->refCount; }
+      return *this;
+    }
+
+    ~SharedPtr() { release(); }
+
+    T* get() const { return ctrl ? ctrl->ptr : nullptr; }
+
+    T& operator*() const { return *ctrl->ptr; }
+
+    T* operator->() const { return ctrl->ptr; }
+
+    int useCount() const { return ctrl ? ctrl->refCount : 0; }
+
+  private:
+    void release() { if (ctrl && --ctrl->refCount == 0) { delete ctrl; ctrl = nullptr; } }
+  };
+
+  template<typename T, typename... Args>
+  UniquePtr<T> makeUnique(Args&&... args) {
+    return UniquePtr<T>(new T(std::forward<Args>(args)...));
+  }
+  '''
+end
diff --git a/test/support/fixtures/cpp/template_container.ex b/test/support/fixtures/cpp/template_container.ex
new file mode 100644
index 00000000..6ff7bdb1
--- /dev/null
+++ b/test/support/fixtures/cpp/template_container.ex
@@ -0,0 +1,90 @@
+defmodule Test.Fixtures.Cpp.TemplateContainer do
+  @moduledoc false
+  use Test.LanguageFixture, language: "cpp template_container"
+
+  @code ~S'''
+  #include <stdexcept>
+
+  template<typename T>
+  class Stack {
+    T* data;
+    int capacity;
+    int topIdx;
+
+  public:
+    explicit Stack(int cap = 16) : capacity(cap), topIdx(-1) { data = new T[cap]; }
+
+    ~Stack() { delete[] data; }
+
+    Stack(const Stack&) = delete;
+
+    Stack& operator=(const Stack&) = delete;
+
+    void push(const T& value) {
+      if (topIdx + 1 >= capacity) throw std::overflow_error("Stack overflow");
+      data[++topIdx] = value;
+    }
+
+    T pop() {
+      if (empty()) throw std::underflow_error("Stack underflow");
+      return data[topIdx--];
+    }
+
+    T& top() {
+      if (empty()) throw std::underflow_error("Stack is empty");
+      return data[topIdx];
+    }
+
+    bool empty() const { return topIdx < 0; }
+
+    int size() const { return topIdx + 1; }
+
+    int maxCapacity() const { return capacity; }
+  };
+
+  template<typename T>
+  class Queue {
+    T* data;
+    int capacity;
+    int head;
+    int tail;
+    int count;
+
+  public:
+    explicit Queue(int cap = 16) : capacity(cap), head(0), tail(0), count(0) { data = new T[cap]; }
+
+    ~Queue() { delete[] data; }
+
+    void enqueue(const T& value) {
+      if (count >= capacity) throw std::overflow_error("Queue overflow");
+      data[tail] = value;
+      tail = (tail + 1) % capacity;
+      ++count;
+    }
+
+    T dequeue() {
+      if (empty()) throw std::underflow_error("Queue underflow");
+      T value = data[head];
+      head = (head + 1) % capacity;
+      --count;
+      return value;
+    }
+
+    T& front() { if (empty()) throw std::underflow_error("Queue is empty"); return data[head]; }
+
+    bool empty() const { return count == 0; }
+
+    int size() const { return count; }
+  };
+
+  template<typename T>
+  struct Pair {
+    T first;
+    T second;
+
+    Pair(T a, T b) : first(a), second(b) {}
+
+    bool operator==(const Pair& other) const { return first == other.first && second == other.second; }
+  };
+  '''
+end
diff --git a/test/support/fixtures/csharp/async_task_manager.ex b/test/support/fixtures/csharp/async_task_manager.ex
new file mode 100644
index 00000000..6dd9db7f
--- /dev/null
+++ b/test/support/fixtures/csharp/async_task_manager.ex
@@ -0,0 +1,67 @@
+defmodule Test.Fixtures.CSharp.AsyncTaskManager do
+  @moduledoc false
+  use Test.LanguageFixture, language: "csharp async_task_manager"
+
+  @code ~S'''
+  // TaskManagement namespace — async task scheduling with bounded concurrency
+  using System.Threading.Tasks;
+  using System.Collections.Generic;
+
+  interface ITaskScheduler
+  {
+    Task ScheduleAsync(System.Func<Task> work, System.Threading.CancellationToken ct);
+    Task<T> ScheduleAsync<T>(System.Func<Task<T>> work, System.Threading.CancellationToken ct);
+  }
+
+  interface IWorkQueue
+  {
+    void Enqueue(System.Func<Task> work);
+    Task DrainAsync(System.Threading.CancellationToken ct);
+    int Count { get; }
+  }
+
+  class BoundedTaskScheduler : ITaskScheduler
+  {
+    private readonly System.Threading.SemaphoreSlim semaphore;
+
+    public BoundedTaskScheduler(int maxConcurrency)
+    {
+      semaphore = new System.Threading.SemaphoreSlim(maxConcurrency, maxConcurrency);
+    }
+
+    public async Task ScheduleAsync(System.Func<Task> work, System.Threading.CancellationToken ct)
+    {
+      await semaphore.WaitAsync(ct);
+      try { await work(); }
+      finally { semaphore.Release(); }
+    }
+
+    public async Task<T> ScheduleAsync<T>(System.Func<Task<T>> work, System.Threading.CancellationToken ct)
+    {
+      await semaphore.WaitAsync(ct);
+      try { return await work(); }
+      finally { semaphore.Release(); }
+    }
+  }
+
+  class InMemoryWorkQueue : IWorkQueue
+  {
+    private readonly Queue<System.Func<Task>> queue = new Queue<System.Func<Task>>();
+
+    public void Enqueue(System.Func<Task> work) { queue.Enqueue(work); }
+
+    public int Count => queue.Count;
+
+    public async Task DrainAsync(System.Threading.CancellationToken ct)
+    {
+      while (queue.Count > 0 && !ct.IsCancellationRequested)
+      {
+        var work = queue.Dequeue();
+        await work();
+      }
+    }
+  }
+
+  enum TaskState { Pending, Running, Completed, Failed, Cancelled }
+  '''
+end
diff --git a/test/support/fixtures/csharp/linq_pipeline.ex b/test/support/fixtures/csharp/linq_pipeline.ex
new file mode 100644
index 00000000..677c6eb9
--- /dev/null
+++ b/test/support/fixtures/csharp/linq_pipeline.ex
@@ -0,0 +1,71 @@
+defmodule Test.Fixtures.CSharp.LinqPipeline do
+  @moduledoc false
+  use Test.LanguageFixture, language: "csharp linq_pipeline"
+
+  @code ~S'''
+  // DataPipeline namespace — LINQ-style transformation pipeline
+  using System.Collections.Generic;
+  using System.Linq;
+
+  interface ITransform<TIn, TOut>
+  {
+    IEnumerable<TOut> Apply(IEnumerable<TIn> input);
+  }
+
+  interface IPipeline<T>
+  {
+    IPipeline<TOut> Pipe<TOut>(ITransform<T, TOut> transform);
+    IEnumerable<T> Execute();
+  }
+
+  class FilterTransform<T> : ITransform<T, T>
+  {
+    private readonly System.Func<T, bool> predicate;
+
+    public FilterTransform(System.Func<T, bool> predicate)
+    {
+      this.predicate = predicate;
+    }
+
+    public IEnumerable<T> Apply(IEnumerable<T> input)
+    {
+      return input.Where(predicate);
+    }
+  }
+
+  class MapTransform<TIn, TOut> : ITransform<TIn, TOut>
+  {
+    private readonly System.Func<TIn, TOut> selector;
+
+    public MapTransform(System.Func<TIn, TOut> selector)
+    {
+      this.selector = selector;
+    }
+
+    public IEnumerable<TOut> Apply(IEnumerable<TIn> input)
+    {
+      return input.Select(selector);
+    }
+  }
+
+  class DataPipeline<T> : IPipeline<T>
+  {
+    private readonly IEnumerable<T> source;
+
+    public DataPipeline(IEnumerable<T> source)
+    {
+      this.source = source;
+    }
+
+    public IPipeline<TOut> Pipe<TOut>(ITransform<T, TOut> transform)
+    {
+      return new DataPipeline<TOut>(transform.Apply(source));
+    }
+
+    public IEnumerable<T> Execute()
+    {
+      return source.ToList();
+    }
+  }
+  '''
+end
diff --git a/test/support/fixtures/csharp/plugin_system.ex b/test/support/fixtures/csharp/plugin_system.ex
new file mode 100644
index 00000000..ebf7e7fd
--- /dev/null
+++ b/test/support/fixtures/csharp/plugin_system.ex
@@ -0,0 +1,72 @@
+defmodule Test.Fixtures.CSharp.PluginSystem do
+  @moduledoc false
+  use Test.LanguageFixture, language: "csharp plugin_system"
+
+  @code ~S'''
+  // PluginSystem namespace — plugin registry with lifecycle management
+  using System.Collections.Generic;
+
+  interface IPlugin
+  {
+    string Name { get; }
+    string Version { get; }
+    void Initialize(IPluginContext context);
+    void Shutdown();
+  }
+
+  interface IPluginContext
+  {
+    void RegisterService<T>(T service) where T : class;
+    T ResolveService<T>() where T : class;
+    void Log(string message);
+  }
+
+  interface IPluginRegistry
+  {
+    void Register(IPlugin plugin);
+    void Unregister(string name);
+    IPlugin Find(string name);
+    IEnumerable<IPlugin> All();
+  }
+
+  class PluginContext : IPluginContext
+  {
+    private readonly Dictionary<System.Type, object> services = new Dictionary<System.Type, object>();
+
+    public void RegisterService<T>(T service) where T : class { services[typeof(T)] = service; }
+
+    public T ResolveService<T>() where T : class
+    {
+      if (services.TryGetValue(typeof(T), out var svc)) return (T)svc;
+      throw new System.InvalidOperationException("Service not found: " + typeof(T).Name);
+    }
+
+    public void Log(string message) { System.Console.WriteLine("[Plugin] " + message); }
+  }
+
+  class PluginRegistry : IPluginRegistry
+  {
+    private readonly Dictionary<string, IPlugin> plugins = new Dictionary<string, IPlugin>();
+    private readonly IPluginContext context;
+
+    public PluginRegistry(IPluginContext context) { this.context = context; }
+
+    public void Register(IPlugin plugin)
+    {
+      plugin.Initialize(context);
+      plugins[plugin.Name] = plugin;
+    }
+
+    public void Unregister(string name)
+    {
+      if (plugins.TryGetValue(name, out var plugin)) { plugin.Shutdown(); plugins.Remove(name); }
+    }
+
+    public IPlugin Find(string name) { plugins.TryGetValue(name, out var p); return p; }
+
+    public IEnumerable<IPlugin> All() { return plugins.Values; }
+  }
+
+  enum PluginState { Unloaded, Initializing, Active, ShuttingDown }
+  '''
+end
diff --git a/test/support/fixtures/dart/futures_async.ex b/test/support/fixtures/dart/futures_async.ex
new file mode 100644
index 00000000..ff5317a6
--- /dev/null
+++ b/test/support/fixtures/dart/futures_async.ex
@@ -0,0 +1,78 @@
+defmodule Test.Fixtures.Dart.FuturesAsync do
+  @moduledoc false
+  use Test.LanguageFixture, language: "dart futures_async"
+
+  @code ~S'''
+  abstract class AsyncTask<T> {
+  Future<T> execute();
+
+  void cancel();
+
+  bool get isCancelled;
+  }
+
+  abstract class TaskScheduler {
+  Future<T> schedule<T>(AsyncTask<T> task);
+
+  Future<List<T>> scheduleAll<T>(List<AsyncTask<T>> tasks);
+
+  void shutdown();
+  }
+
+  class RetryPolicy {
+  final int maxAttempts;
+  final Duration delay;
+  final double backoffMultiplier;
+
+  const RetryPolicy({
+    this.maxAttempts = 3,
+    this.delay = const Duration(milliseconds: 500),
+    this.backoffMultiplier = 2.0,
+  });
+
+  Duration delayForAttempt(int attempt) {
+    final ms = delay.inMilliseconds * (backoffMultiplier * attempt).ceil();
+    return Duration(milliseconds: ms);
+  }
+  }
+
+  class SimpleTaskScheduler implements TaskScheduler {
+  bool _shutdown = false;
+  final List<Future<dynamic>> _pending = [];
+
+  Future<T> schedule<T>(AsyncTask<T> task) async {
+    if (_shutdown) throw StateError("Scheduler is shut down");
+    final future = task.execute();
+    _pending.add(future);
+    return future;
+  }
+
+  Future<List<T>> scheduleAll<T>(List<AsyncTask<T>> tasks) {
+    return Future.wait(tasks.map((t) => schedule(t)).toList());
+  }
+
+  void shutdown() {
+    _shutdown = true;
+    _pending.clear();
+  }
+  }
+
+  enum TaskStatus {
+  pending,
+  running,
+  completed,
+  failed,
+  cancelled
+  }
+
+  class TaskResult<T> {
+  final T? value;
+  final Object? error;
+  final TaskStatus status;
+
+  const TaskResult.success(this.value) : error = null, status = TaskStatus.completed;
+
+  const TaskResult.failure(this.error) : value = null, status = TaskStatus.failed;
+  }
+  '''
+end
diff --git a/test/support/fixtures/dart/mixin_composition.ex b/test/support/fixtures/dart/mixin_composition.ex
new file mode 100644
index 00000000..05013b41
--- /dev/null
+++ b/test/support/fixtures/dart/mixin_composition.ex
@@ -0,0 +1,85 @@
+defmodule Test.Fixtures.Dart.MixinComposition do
+  @moduledoc false
+  use Test.LanguageFixture, language: "dart mixin_composition"
+
+  @code ~S'''
+  abstract class Serializable {
+  Map<String, dynamic> toJson();
+
+  String toJsonString() {
+    final map = toJson();
+    return map.entries.map((e) => '"${e.key}": "${e.value}"').join(', ');
+  }
+  }
+
+  abstract class Validatable {
+  List<String> validate();
+
+  bool get isValid => validate().isEmpty;
+
+  void assertValid() {
+    final errors = validate();
+    if (errors.isNotEmpty) throw ArgumentError(errors.join(', '));
+  }
+  }
+
+  abstract class Equatable {
+  List<Object?> get props;
+
+  bool equalsTo(Object other) {
+    if (identical(this, other)) return true;
+    if (other.runtimeType != runtimeType) return false;
+    final otherEquatable = other as Equatable;
+    for (int i = 0; i < props.length; i++) {
+      if (props[i] != otherEquatable.props[i]) return false;
+    }
+    return true;
+  }
+  }
+
+  class Address extends Serializable implements Validatable {
+  final String street;
+  final String city;
+  final String country;
+
+  Address({required this.street, required this.city, required this.country});
+
+  Map<String, dynamic> toJson() => {'street': street, 'city': city, 'country': country};
+
+  List<String> validate() {
+    final errors = <String>[];
+    if (street.isEmpty) errors.add('street is required');
+    if (city.isEmpty) errors.add('city is required');
+    if (country.isEmpty) errors.add('country is required');
+    return errors;
+  }
+
+  List<Object?> get props => [street, city, country];
+  }
+
+  enum AddressType {
+  home,
+  work,
+  billing,
+  shipping
+  }
+
+  class Contact extends Serializable implements Validatable {
+  final String name;
+  final String email;
+  final Address address;
+
+  Contact({required this.name, required this.email, required this.address});
+
+  Map<String, dynamic> toJson() => {'name': name, 'email': email, 'address': address.toJson()};
+
+  List<String> validate() {
+    final errors = <String>[];
+    if (name.isEmpty) errors.add('name is required');
+    if (!email.contains('@')) errors.add('invalid email');
+    errors.addAll(address.validate());
+    return errors;
+  }
+  }
+  '''
+end
diff --git a/test/support/fixtures/dart/widget_state.ex b/test/support/fixtures/dart/widget_state.ex
new file mode 100644
index 00000000..d4a1b048
--- /dev/null
+++ b/test/support/fixtures/dart/widget_state.ex
@@ -0,0 +1,91 @@
+defmodule Test.Fixtures.Dart.WidgetState do
+  @moduledoc false
+  use Test.LanguageFixture, language: "dart widget_state"
+
+  @code ~S'''
+  abstract class Widget {
+  String get key;
+
+  Element createElement();
+  }
+
+  abstract class StatefulWidget extends Widget {
+  State createState();
+  }
+
+  abstract class State<T extends StatefulWidget> {
+  T widget;
+
+  State(this.widget);
+
+  void setState(void Function() fn) {
+    fn();
+    markNeedsBuild();
+  }
+
+  void markNeedsBuild() {}
+
+  Widget build();
+
+  void initState() {}
+
+  void dispose() {}
+  }
+
+  class Element {
+  Widget widget;
+  State? state;
+
+  Element(this.widget);
+
+  void mount() {
+    if (widget is StatefulWidget) {
+      state = (widget as StatefulWidget).createState();
+      state!.initState();
+    }
+  }
+
+  void unmount() {
+    state?.dispose();
+  }
+  }
+
+  abstract class BuildContext {
+  Widget get widget;
+
+  Element get element;
+  }
+
+  enum WidgetLifecycle {
+  created,
+  mounted,
+  active,
+  inactive,
+  disposed
+  }
+
+  class RenderObject {
+  double x = 0;
+  double y = 0;
+  double width = 0;
+  double height = 0;
+  bool needsLayout = true;
+  bool needsPaint = true;
+  RenderObject? parent;
+  List<RenderObject> children = [];
+
+  void layout() {
+    needsLayout = false;
+  }
+
+  void paint() {
+    needsPaint = false;
+  }
+
+  void addChild(RenderObject child) {
+    children.add(child);
+    child.parent = this;
+  }
+  }
+  '''
+end
diff --git a/test/support/fixtures/elixir/calculator.ex b/test/support/fixtures/elixir/calculator.ex
new file mode 100644
index 00000000..7657b88b
--- /dev/null
+++ b/test/support/fixtures/elixir/calculator.ex
@@ -0,0 +1,125 @@
+defmodule Test.Fixtures.Elixir.Calculator do
+  @moduledoc false
+  use Test.LanguageFixture, language: "elixir calculator"
+  import Test.NodeMatcher
+
+  @code ~S'''
+  defmodule Calculator.Behaviour do
+    @moduledoc "Contract for all calculator implementations."
+    @callback add(number, number) :: number
+    @callback subtract(number, number) :: number
+    @callback multiply(number, number) :: number
+    @callback divide(number, number) :: {:ok, float} | {:error, :division_by_zero}
+  end
+
+  defprotocol Calculator.Displayable do
+    @doc "Converts a result to a human-readable string."
+    def display(value)
+  end
+
+  defmodule Calculator.Basic do
+    @moduledoc "Basic arithmetic calculator."
+    @behaviour Calculator.Behaviour
+
+    @doc "Adds two numbers."
+    @spec add(number, number) :: number
+    def add(a, b), do: a + b
+
+    @doc "Subtracts b from a."
+    @spec subtract(number, number) :: number
+    def subtract(a, b), do: a - b
+
+    @doc "Multiplies two numbers."
+    @spec multiply(number, number) :: number
+    def multiply(a, b), do: a * b
+
+    @doc "Divides a by b, returns error for zero divisor."
+    @spec divide(number, number) :: {:ok, float} | {:error, :division_by_zero}
+    def divide(_a, 0), do: {:error, :division_by_zero}
+    def divide(a, b), do: {:ok, a / b}
+
+    @doc "Absolute value of n."
+    @spec abs_val(number) :: number
+    def abs_val(n) when n < 0, do: -n
+    def abs_val(n), do: n
+  end
+
+  defimpl Calculator.Displayable, for: Integer do
+    def display(value), do: Integer.to_string(value)
+  end
+
+  defimpl Calculator.Displayable, for: Float do
+    def display(value), do: :erlang.float_to_binary(value, [decimals: 4])
+  end
+
+  defmodule Calculator.Scientific do
+    @moduledoc "Scientific calculator with extended math operations."
+    @behaviour Calculator.Behaviour
+
+    @doc "Adds two numbers."
+    @spec add(number, number) :: number
+    def add(a, b), do: a + b
+
+    @doc "Subtracts b from a."
+    @spec subtract(number, number) :: number
+    def subtract(a, b), do: a - b
+
+    @doc "Multiplies two numbers."
+    @spec multiply(number, number) :: number
+    def multiply(a, b), do: a * b
+
+    @doc "Divides, returning an error on zero divisor."
+    @spec divide(number, number) :: {:ok, float} | {:error, :division_by_zero}
+    def divide(_a, 0), do: {:error, :division_by_zero}
+    def divide(a, b), do: {:ok, a / b}
+
+    @doc "Raises a to the power of b."
+    @spec power(number, number) :: number
+    def power(a, b), do: :math.pow(a, b)
+
+    @doc "Returns the square root or an error for negative input."
+    @spec sqrt(number) :: {:ok, float} | {:error, :negative_input}
+    def sqrt(n) when n < 0, do: {:error, :negative_input}
+    def sqrt(n), do: {:ok, :math.sqrt(n)}
+
+    @doc "Natural logarithm, error for non-positive input."
+    @spec log(number) :: {:ok, float} | {:error, :non_positive_input}
+    def log(n) when n <= 0, do: {:error, :non_positive_input}
+    def log(n), do: {:ok, :math.log(n)}
+
+    defp validate_positive(n) when n > 0, do: {:ok, n}
+    defp validate_positive(_n), do: {:error, :non_positive_input}
+  end
+
+  defmodule Calculator.History do
+    @moduledoc "Tracks a history of calculator operations."
+    @type entry :: {atom, list}
+    @type t :: list
+
+    @doc "Creates an empty history."
+    @spec new() :: t
+    def new(), do: []
+
+    @doc "Records an operation entry."
+    @spec record(t, atom, list) :: t
+    def record(history, op, args) when is_list(args), do: [{op, args} | history]
+
+    @doc "Returns the last n entries."
+    @spec last(t, non_neg_integer) :: t
+    def last(history, n \\ 5), do: Enum.take(history, n)
+
+    @doc "Clears the history."
+    @spec clear(t) :: t
+    def clear(_history), do: []
+
+    defp format_entry({op, args}), do: "#{op}(#{Enum.join(args, ", ")})"
+  end
+  '''
+
+  @block_assertions [
+    %{
+      description: "a compound block containing add with doc and spec annotations",
+      all_of: [exact(:content, "add"), exact(:content, "doc"), exact(:content, "spec")]
+    }
+  ]
+end
diff --git a/test/support/fixtures/elixir/event_bus.ex b/test/support/fixtures/elixir/event_bus.ex
new file mode 100644
index 00000000..e196e099
--- /dev/null
+++ b/test/support/fixtures/elixir/event_bus.ex
@@ -0,0 +1,71 @@
+defmodule Test.Fixtures.Elixir.EventBus do
+  @moduledoc false
+  use Test.LanguageFixture, language: "elixir event_bus"
+
+  @code ~S'''
+  defmodule EventBus.Behaviour do
+    @moduledoc "Contract for event bus implementations."
+    @callback subscribe(topic :: String.t(), pid :: pid()) :: :ok | {:error, term()}
+    @callback unsubscribe(topic :: String.t(), pid :: pid()) :: :ok
+    @callback publish(topic :: String.t(), event :: term()) :: :ok
+    @callback topics() :: [String.t()]
+  end
+
+  defprotocol EventBus.Serializable do
+    @doc "Encodes an event to a binary payload."
+    @spec encode(t()) :: binary()
+    def encode(event)
+
+    @doc "Decodes a binary payload back to an event."
+    @spec decode(t(), binary()) :: term()
+    def decode(schema, payload)
+  end
+
+  defmodule EventBus.Topic do
+    @moduledoc "Represents a named event topic with subscriber tracking."
+    @enforce_keys [:name]
+    defstruct [:name, subscribers: []]
+
+    @doc "Creates a new topic."
+    @spec new(String.t()) :: t()
+    def new(name) when is_binary(name), do: %__MODULE__{name: name}
+
+    @doc "Adds a subscriber pid to the topic."
+    @spec add_subscriber(t(), pid()) :: t()
+    def add_subscriber(%__MODULE__{subscribers: subs} = topic, pid) do
+      %{topic | subscribers: [pid | subs]}
+    end
+
+    @doc "Removes a subscriber pid from the topic."
+    @spec remove_subscriber(t(), pid()) :: t()
+    def remove_subscriber(%__MODULE__{subscribers: subs} = topic, pid) do
+      %{topic | subscribers: List.delete(subs, pid)}
+    end
+
+    @doc "Returns all current subscribers."
+    @spec subscribers(t()) :: [pid()]
+    def subscribers(%__MODULE__{subscribers: subs}), do: subs
+  end
+
+  defmodule EventBus.Dispatcher do
+    @moduledoc "Dispatches events to all topic subscribers."
+
+    @doc "Broadcasts an event to every subscriber of the given topic."
+    @spec broadcast(EventBus.Topic.t(), term()) :: :ok
+    def broadcast(%EventBus.Topic{} = topic, event) do
+      topic
+      |> EventBus.Topic.subscribers()
+      |> Enum.each(&send(&1, {:event, topic.name, event}))
+    end
+
+    @doc "Dispatches to subscribers matching a predicate."
+    @spec dispatch_filtered(EventBus.Topic.t(), term(), (pid() -> boolean())) :: :ok
+    def dispatch_filtered(%EventBus.Topic{} = topic, event, filter_fn) do
+      topic
+      |> EventBus.Topic.subscribers()
+      |> Enum.filter(filter_fn)
+      |> Enum.each(&send(&1, {:event, topic.name, event}))
+    end
+  end
+  '''
+end
diff --git a/test/support/fixtures/elixir/rate_limiter.ex b/test/support/fixtures/elixir/rate_limiter.ex
new file mode 100644
index 00000000..580a2b4b
--- /dev/null
+++ b/test/support/fixtures/elixir/rate_limiter.ex
@@ -0,0 +1,85 @@
+defmodule Test.Fixtures.Elixir.RateLimiter do
+  @moduledoc false
+  use Test.LanguageFixture, language: "elixir rate_limiter"
+
+  @code ~S'''
+  defmodule RateLimiter.Behaviour do
+    @moduledoc "Contract for rate limiter backends."
+    @callback allow?(key :: term(), cost :: pos_integer()) :: boolean()
+    @callback reset(key :: term()) :: :ok
+    @callback stats(key :: term()) :: {:ok, map()} | {:error, :not_found}
+  end
+
+  defmodule RateLimiter.Bucket do
+    @moduledoc "Token bucket state for a single rate-limited key."
+    @enforce_keys [:capacity, :tokens, :refill_rate]
+    defstruct [:capacity, :tokens, :refill_rate, last_refill: nil]
+
+    @doc "Creates a new bucket with full capacity."
+    @spec new(pos_integer(), pos_integer()) :: t()
+    def new(capacity, refill_rate) when capacity > 0 and refill_rate > 0 do
+      %__MODULE__{capacity: capacity, tokens: capacity, refill_rate: refill_rate, last_refill: System.monotonic_time(:millisecond)}
+    end
+
+    @doc "Consumes tokens from the bucket. Returns updated bucket or error."
+    @spec consume(t(), pos_integer()) :: {:ok, t()} | {:error, :rate_limited}
+    def consume(%__MODULE__{tokens: tokens} = bucket, cost) when tokens >= cost do
+      {:ok, %{bucket | tokens: tokens - cost}}
+    end
+    def consume(%__MODULE__{}, _cost), do: {:error, :rate_limited}
+
+    @doc "Refills the bucket based on elapsed time."
+    @spec refill(t()) :: t()
+    def refill(%__MODULE__{tokens: t, capacity: cap, refill_rate: rate, last_refill: last} = bucket) do
+      now = System.monotonic_time(:millisecond)
+      elapsed_ms = now - last
+      new_tokens = min(cap, t + div(elapsed_ms * rate, 1000))
+      %{bucket | tokens: new_tokens, last_refill: now}
+    end
+  end
+
+  defmodule RateLimiter.Server do
+    @moduledoc "GenServer-backed rate limiter with configurable buckets."
+    @behaviour RateLimiter.Behaviour
+    use GenServer
+
+    @doc "Starts the rate limiter server."
+    @spec start_link(keyword()) :: GenServer.on_start()
+    def start_link(opts), do: GenServer.start_link(__MODULE__, opts, name: __MODULE__)
+
+    @impl RateLimiter.Behaviour
+    @spec allow?(term(), pos_integer()) :: boolean()
+    def allow?(key, cost \\ 1), do: GenServer.call(__MODULE__, {:allow, key, cost})
+
+    @impl RateLimiter.Behaviour
+    @spec reset(term()) :: :ok
+    def reset(key), do: GenServer.cast(__MODULE__, {:reset, key})
+
+    @impl RateLimiter.Behaviour
+    @spec stats(term()) :: {:ok, map()} | {:error, :not_found}
+    def stats(key), do: GenServer.call(__MODULE__, {:stats, key})
+
+    @impl GenServer
+    def init(opts) do
+      capacity = Keyword.get(opts, :capacity, 100)
+      refill_rate = Keyword.get(opts, :refill_rate, 10)
+      {:ok, %{buckets: %{}, capacity: capacity, refill_rate: refill_rate}}
+    end
+
+    @impl GenServer
+    def handle_call({:allow, key, cost}, _from, state) do
+      bucket = Map.get_lazy(state.buckets, key, fn -> RateLimiter.Bucket.new(state.capacity, state.refill_rate) end)
+      bucket = RateLimiter.Bucket.refill(bucket)
+      case RateLimiter.Bucket.consume(bucket, cost) do
+        {:ok, updated} -> {:reply, true, %{state | buckets: Map.put(state.buckets, key, updated)}}
+        {:error, :rate_limited} -> {:reply, false, %{state | buckets: Map.put(state.buckets, key, bucket)}}
+      end
+    end
+
+    @impl GenServer
+    def handle_cast({:reset, key}, state), do: {:noreply, %{state | buckets: Map.delete(state.buckets, key)}}
+
+    defp default_bucket(state), do: RateLimiter.Bucket.new(state.capacity, state.refill_rate)
+  end
+  '''
+end
diff --git a/test/support/fixtures/go/calculator.ex b/test/support/fixtures/go/calculator.ex
new file mode 100644
index 00000000..e55100a1
--- /dev/null
+++ b/test/support/fixtures/go/calculator.ex
@@ -0,0 +1,53 @@
+defmodule Test.Fixtures.Go.Calculator do
+  @moduledoc false
+  use Test.LanguageFixture, language: "go calculator"
+
+  @code ~S'''
+  func Add(a, b float64) float64 {
+  	return a + b
+  }
+
+  func Subtract(a, b float64) float64 {
+  	return a - b
+  }
+
+  func Multiply(a, b float64) float64 {
+  	return a * b
+  }
+
+  func Divide(a, b float64) (float64, error) {
+  	if b == 0 {
+  		return 0, fmt.Errorf("division by zero")
+  	}
+  	return a / b, nil
+  }
+
+  func Power(base, exp float64) float64 {
+  	return math.Pow(base, exp)
+  }
+
+  func Sqrt(n float64) (float64, error) {
+  	if n < 0 {
+  		return 0, fmt.Errorf("cannot take sqrt of negative number")
+  	}
+  	return math.Sqrt(n), nil
+  }
+
+  func Abs(n float64) float64 {
+  	if n < 0 {
+  		return -n
+  	}
+  	return n
+  }
+
+  func Clamp(n, min, max float64) float64 {
+  	if n < min {
+  		return min
+  	}
+  	if n > max {
+  		return max
+  	}
+  	return n
+  }
+  '''
+end
diff --git a/test/support/fixtures/go/cli_parser.ex b/test/support/fixtures/go/cli_parser.ex
new file mode 100644
index 00000000..c97c14a1
--- /dev/null
+++ b/test/support/fixtures/go/cli_parser.ex
@@ -0,0 +1,77 @@
+defmodule Test.Fixtures.Go.CliParser do
+  @moduledoc false
+  use Test.LanguageFixture, language: "go cli_parser"
+
+  @code ~S'''
+  type Flag struct {
+      Name string
+      Short string
+      Description string
+      Required bool
+      Value interface{}
+  }
+
+  type Command struct {
+      Name string
+      Description string
+      flags []*Flag
+      subcommands []*Command
+      action func(args []string, flags map[string]interface{}) error
+  }
+
+  func NewCommand(name, description string) *Command {
+      return &Command{Name: name, Description: description, flags: []*Flag{}, subcommands: []*Command{}}
+  }
+
+  func (c *Command) AddFlag(name, short, description string, required bool) *Flag {
+      f := &Flag{Name: name, Short: short, Description: description, Required: required}
+      c.flags = append(c.flags, f)
+      return f
+  }
+
+  func (c *Command) AddSubcommand(sub *Command) *Command {
+      c.subcommands = append(c.subcommands, sub)
+      return c
+  }
+
+  func (c *Command) Action(fn func(args []string, flags map[string]interface{}) error) {
+      c.action = fn
+  }
+
+  func (c *Command) Execute(args []string) error {
+      if len(args) > 0 {
+          for _, sub := range c.subcommands {
+              if sub.Name == args[0] {
+                  return sub.Execute(args[1:])
+              }
+          }
+      }
+      flags, remaining, err := c.parseFlags(args)
+      if err != nil {
+          return err
+      }
+      if c.action != nil {
+          return c.action(remaining, flags)
+      }
+      return nil
+  }
+
+  func (c *Command) parseFlags(args []string) (map[string]interface{}, []string, error) {
+      result := make(map[string]interface{})
+      remaining := []string{}
+      for i := 0; i < len(args); i++ {
+          arg := args[i]
+          if len(arg) > 2 && arg[:2] == "--" {
+              key := arg[2:]
+              if i+1 < len(args) {
+                  result[key] = args[i+1]
+                  i++
+              }
+          } else {
+              remaining = append(remaining, arg)
+          }
+      }
+      return result, remaining, nil
+  }
+  '''
+end
diff --git a/test/support/fixtures/go/http_middleware.ex b/test/support/fixtures/go/http_middleware.ex
new file mode 100644
index 00000000..e759c854
--- /dev/null
+++ b/test/support/fixtures/go/http_middleware.ex
@@ -0,0 +1,86 @@
+defmodule Test.Fixtures.Go.HttpMiddleware do
+  @moduledoc false
+  use Test.LanguageFixture, language: "go http_middleware"
+
+  @code ~S'''
+  type Handler func(w ResponseWriter, r *Request)
+
+  type Middleware func(Handler) Handler
+
+  type ResponseWriter interface {
+      Write([]byte) (int, error)
+      WriteHeader(statusCode int)
+      Header() map[string][]string
+  }
+
+  type Request struct {
+      Method string
+      Path string
+      Headers map[string]string
+      Body []byte
+  }
+
+  type Router struct {
+      routes map[string]Handler
+      middlewares []Middleware
+  }
+
+  func NewRouter() *Router {
+      return &Router{routes: make(map[string]Handler), middlewares: []Middleware{}}
+  }
+
+  func (r *Router) Use(m Middleware) {
+      r.middlewares = append(r.middlewares, m)
+  }
+
+  func (r *Router) Handle(path string, h Handler) {
+      r.routes[path] = r.wrap(h)
+  }
+
+  func (r *Router) ServeHTTP(w ResponseWriter, req *Request) {
+      h, ok := r.routes[req.Path]
+      if !ok {
+          w.WriteHeader(404)
+          return
+      }
+      h(w, req)
+  }
+
+  func (r *Router) wrap(h Handler) Handler {
+      for i := len(r.middlewares) - 1; i >= 0; i-- {
+          h = r.middlewares[i](h)
+      }
+      return h
+  }
+
+  func LoggingMiddleware(next Handler) Handler {
+      return func(w ResponseWriter, r *Request) {
+          next(w, r)
+      }
+  }
+
+  func RecoveryMiddleware(next Handler) Handler {
+      return func(w ResponseWriter, r *Request) {
+          defer func() {
+              if rec := recover(); rec != nil {
+                  w.WriteHeader(500)
+              }
+          }()
+          next(w, r)
+      }
+  }
+
+  func AuthMiddleware(secret string) Middleware {
+      return func(next Handler) Handler {
+          return func(w ResponseWriter, r *Request) {
+              token, ok := r.Headers["Authorization"]
+              if !ok || token != secret {
+                  w.WriteHeader(401)
+                  return
+              }
+              next(w, r)
+          }
+      }
+  }
+  '''
+end
diff --git a/test/support/fixtures/java/builder_pattern.ex b/test/support/fixtures/java/builder_pattern.ex
new file mode 100644
index 00000000..15cd00d7
--- /dev/null
+++ b/test/support/fixtures/java/builder_pattern.ex
@@ -0,0 +1,81 @@
+defmodule Test.Fixtures.Java.BuilderPattern do
+  @moduledoc false
+  use Test.LanguageFixture, language: "java builder_pattern"
+
+  @code ~S'''
+  interface Validatable {
+    boolean isValid();
+    String validationError();
+  }
+
+  interface Buildable<T> {
+    T build();
+  }
+
+  class Address implements Validatable {
+    private final String street;
+    private final String city;
+    private final String country;
+    private final String postalCode;
+
+    private Address(Builder b) {
+      this.street = b.street;
+      this.city = b.city;
+      this.country = b.country;
+      this.postalCode = b.postalCode;
+    }
+
+    public boolean isValid() {
+      return street != null && !street.isEmpty() && city != null && country != null;
+    }
+
+    public String validationError() {
+      if (street == null || street.isEmpty()) return "street is required";
+      if (city == null) return "city is required";
+      return null;
+    }
+
+    public String getStreet() { return street; }
+
+    public String getCity() { return city; }
+
+    public String getCountry() { return country; }
+
+    public String getPostalCode() { return postalCode; }
+
+    public static class Builder implements Buildable<Address> {
+      private String street;
+      private String city;
+      private String country;
+      private String postalCode;
+
+      public Builder street(String street) { this.street = street; return this; }
+
+      public Builder city(String city) { this.city = city; return this; }
+
+      public Builder country(String country) { this.country = country; return this; }
+
+      public Builder postalCode(String postalCode) { this.postalCode = postalCode; return this; }
+
+      public Address build() {
+        Address a = new Address(this);
+        if (!a.isValid()) throw new IllegalStateException(a.validationError());
+        return a;
+      }
+    }
+  }
+
+  enum Country {
+    US("United States"),
+    DE("Germany"),
+    JP("Japan"),
+    BR("Brazil");
+
+    private final String displayName;
+
+    Country(String displayName) { this.displayName = displayName; }
+
+    public String getDisplayName() { return displayName; }
+  }
+  '''
+end
diff --git a/test/support/fixtures/java/repository_pattern.ex b/test/support/fixtures/java/repository_pattern.ex
new file mode 100644
index 00000000..487b5260
--- /dev/null
+++ b/test/support/fixtures/java/repository_pattern.ex
@@ -0,0 +1,76 @@
+defmodule Test.Fixtures.Java.RepositoryPattern do
+  @moduledoc false
+  use Test.LanguageFixture, language: "java repository_pattern"
+
+  @code ~S'''
+  interface Entity<ID> {
+    ID getId();
+  }
+
+  interface Repository<T extends Entity<ID>, ID> {
+    T findById(ID id);
+    java.util.List<T> findAll();
+    T save(T entity);
+    void delete(ID id);
+    boolean exists(ID id);
+  }
+
+  interface UserRepository extends Repository<User, Long> {
+    java.util.Optional<User> findByEmail(String email);
+    java.util.List<User> findByRole(String role);
+  }
+
+  class User implements Entity<Long> {
+    private Long id;
+    private String name;
+    private String email;
+    private String role;
+
+    public User(Long id, String name, String email, String role) {
+      this.id = id;
+      this.name = name;
+      this.email = email;
+      this.role = role;
+    }
+
+    public Long getId() { return id; }
+
+    public String getName() { return name; }
+
+    public String getEmail() { return email; }
+
+    public String getRole() { return role; }
+  }
+
+  class InMemoryUserRepository implements UserRepository {
+    private final java.util.Map<Long, User> store = new java.util.HashMap<>();
+    private long nextId = 1L;
+
+    public User findById(Long id) { return store.get(id); }
+
+    public java.util.List<User> findAll() { return new java.util.ArrayList<>(store.values()); }
+
+    public User save(User user) {
+      if (user.getId() == null) {
+        User saved = new User(nextId++, user.getName(), user.getEmail(), user.getRole());
+        store.put(saved.getId(), saved);
+        return saved;
+      }
+      store.put(user.getId(), user);
+      return user;
+    }
+
+    public void delete(Long id) { store.remove(id); }
+
+    public boolean exists(Long id) { return store.containsKey(id); }
+
+    public java.util.Optional<User> findByEmail(String email) {
+      return store.values().stream().filter(u -> u.getEmail().equals(email)).findFirst();
+    }
+
+    public java.util.List<User> findByRole(String role) {
+      return store.values().stream().filter(u -> u.getRole().equals(role)).collect(java.util.stream.Collectors.toList());
+    }
+  }
+  '''
+end
diff --git a/test/support/fixtures/java/strategy_pattern.ex b/test/support/fixtures/java/strategy_pattern.ex
new file mode 100644
index 00000000..0d129f17
--- /dev/null
+++ b/test/support/fixtures/java/strategy_pattern.ex
@@ -0,0 +1,79 @@
+defmodule Test.Fixtures.Java.StrategyPattern do
+  @moduledoc false
+  use Test.LanguageFixture, language: "java strategy_pattern"
+
+  @code ~S'''
+  interface PaymentStrategy {
+    boolean validate(double amount);
+    String process(double amount, String currency);
+    String getName();
+  }
+
+  interface TransactionLogger {
+    void log(String strategy, double amount, String result);
+  }
+
+  class CreditCardStrategy implements PaymentStrategy {
+    private final String cardNumber;
+    private final String expiry;
+    private final String cvv;
+
+    public CreditCardStrategy(String cardNumber, String expiry, String cvv) {
+      this.cardNumber = cardNumber;
+      this.expiry = expiry;
+      this.cvv = cvv;
+    }
+
+    public boolean validate(double amount) {
+      return amount > 0 && cardNumber != null && cardNumber.length() == 16;
+    }
+
+    public String process(double amount, String currency) {
+      return "Charged " + amount + " " + currency + " to card ending " + cardNumber.substring(12);
+    }
+
+    public String getName() { return "credit_card"; }
+  }
+
+  class BankTransferStrategy implements PaymentStrategy {
+    private final String accountNumber;
+    private final String routingNumber;
+
+    public BankTransferStrategy(String accountNumber, String routingNumber) {
+      this.accountNumber = accountNumber;
+      this.routingNumber = routingNumber;
+    }
+
+    public boolean validate(double amount) { return amount >= 1.0; }
+
+    public String process(double amount, String currency) {
+      return "Transferred " + amount + " " + currency + " from account " + accountNumber;
+    }
+
+    public String getName() { return "bank_transfer"; }
+  }
+
+  class PaymentProcessor {
+    private PaymentStrategy strategy;
+    private final TransactionLogger logger;
+
+    public PaymentProcessor(PaymentStrategy strategy, TransactionLogger logger) {
+      this.strategy = strategy;
+      this.logger = logger;
+    }
+
+    public void setStrategy(PaymentStrategy strategy) { this.strategy = strategy; }
+
+    public String pay(double amount, String currency) {
+      if (!strategy.validate(amount)) throw new IllegalArgumentException("Invalid payment");
+      String result = strategy.process(amount, currency);
+      logger.log(strategy.getName(), amount, result);
+      return result;
+    }
+  }
+
+  enum PaymentStatus {
+    PENDING, PROCESSING, COMPLETED, FAILED, REFUNDED
+  }
+  '''
+end
diff --git a/test/support/fixtures/javascript/calculator.ex b/test/support/fixtures/javascript/calculator.ex
new file mode 100644
index 00000000..b6d67a5b
--- /dev/null
+++ b/test/support/fixtures/javascript/calculator.ex
@@ -0,0 +1,51 @@
+defmodule Test.Fixtures.JavaScript.Calculator do
+  @moduledoc false
+  use Test.LanguageFixture, language: "javascript calculator"
+
+  @code ~S'''
+  function add(a, b) {
+    return a + b;
+  }
+
+  function subtract(a, b) {
+    return a - b;
+  }
+
+  function multiply(a, b) {
+    return a * b;
+  }
+
+  function divide(a, b) {
+    if (b === 0) throw new Error("Cannot divide by zero");
+    return a / b;
+  }
+
+  function power(base, exp) {
+    return Math.pow(base, exp);
+  }
+
+  function sqrt(n) {
+    if (n < 0) throw new Error("Cannot take sqrt of negative number");
+    return Math.sqrt(n);
+  }
+
+  function abs(n) {
+    return Math.abs(n);
+  }
+
+  function clamp(n, min, max) {
+    return Math.min(Math.max(n, min), max);
+  }
+
+  function roundTo(n, decimals) {
+    var factor = Math.pow(10, decimals);
+    return Math.round(n * factor) / factor;
+  }
+
+  function average(values) {
+    if (values.length === 0) return 0;
+    var sum = values.reduce(function(acc, v) { return acc + v; }, 0);
+    return sum / values.length;
+  }
+  '''
+end
diff --git a/test/support/fixtures/javascript/form_validator.ex b/test/support/fixtures/javascript/form_validator.ex
new file mode 100644
index 00000000..017ed520
--- /dev/null
+++ b/test/support/fixtures/javascript/form_validator.ex
@@ -0,0 +1,134 @@
+defmodule Test.Fixtures.JavaScript.FormValidator do
+  @moduledoc false
+  use Test.LanguageFixture, language: "javascript form_validator"
+
+  @code ~S'''
+  class ValidationError {
+    constructor(field, message) {
+      this.field = field;
+      this.message = message;
+    }
+
+    toString() {
+      return `${this.field}: ${this.message}`;
+    }
+  }
+
+  class ValidationResult {
+    constructor() {
+      this.errors = [];
+    }
+
+    addError(field, message) {
+      this.errors.push(new ValidationError(field, message));
+      return this;
+    }
+
+    isValid() {
+      return this.errors.length === 0;
+    }
+
+    getErrors(field) {
+      return this.errors.filter(function(e) { return e.field === field; });
+    }
+  }
+
+  class FieldValidator {
+    constructor(field, value) {
+      this.field = field;
+      this.value = value;
+      this._rules = [];
+    }
+
+    required() {
+      this._rules.push(function(v) {
+        if (v === null || v === undefined || v === "") {
+          return "is required";
+        }
+        return null;
+      });
+      return this;
+    }
+
+    minLength(n) {
+      this._rules.push(function(v) {
+        if (typeof v === "string" && v.length < n) {
+          return "is too short (minimum " + n + " characters)";
+        }
+        return null;
+      });
+      return this;
+    }
+
+    maxLength(n) {
+      this._rules.push(function(v) {
+        if (typeof v === "string" && v.length > n) {
+          return "is too long (maximum " + n + " characters)";
+        }
+        return null;
+      });
+      return this;
+    }
+
+    matches(pattern, message) {
+      this._rules.push(function(v) {
+        if (typeof v === "string" && !pattern.test(v)) {
+          return message || "is invalid";
+        }
+        return null;
+      });
+      return this;
+    }
+
+    validate() {
+      var errors = [];
+      for (var i = 0; i < this._rules.length; i++) {
+        var error = this._rules[i](this.value);
+        if (error !== null) {
+          errors.push(error);
+        }
+      }
+      return errors;
+    }
+  }
+
+  class FormValidator {
+    constructor(data) {
+      this._data = data;
+      this._fields = [];
+    }
+
+    field(name) {
+      var validator = new FieldValidator(name, this._data[name]);
+      this._fields.push(validator);
+      return validator;
+    }
+
+    validate() {
+      var result = new ValidationResult();
+      for (var i = 0; i < this._fields.length; i++) {
+        var f = this._fields[i];
+        var errors = f.validate();
+        for (var j = 0; j < errors.length; j++) {
+          result.addError(f.field, errors[j]);
+        }
+      }
+      return result;
+    }
+  }
+
+  function validateEmail(value) {
+    var pattern = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
+    return pattern.test(value);
+  }
+
+  function validateUrl(value) {
+    try {
+      new URL(value);
+      return true;
+    } catch (_) {
+      return false;
+    }
+  }
+  '''
+end
diff --git a/test/support/fixtures/javascript/shopping_cart.ex b/test/support/fixtures/javascript/shopping_cart.ex
new file mode 100644
index 00000000..e7d8600b
--- /dev/null
+++ b/test/support/fixtures/javascript/shopping_cart.ex
@@ -0,0 +1,100 @@
+defmodule Test.Fixtures.JavaScript.ShoppingCart do
+  @moduledoc false
+  use Test.LanguageFixture, language: "javascript shopping_cart"
+
+  @code ~S'''
+  class CartItem {
+    constructor(id, name, price, quantity) {
+      this.id = id;
+      this.name = name;
+      this.price = price;
+      this.quantity = quantity;
+    }
+
+    get subtotal() {
+      return this.price * this.quantity;
+    }
+
+    withQuantity(quantity) {
+      return new CartItem(this.id, this.name, this.price, quantity);
+    }
+  }
+
+  class Discount {
+    constructor(code, type, value) {
+      this.code = code;
+      this.type = type;
+      this.value = value;
+    }
+
+    apply(subtotal) {
+      if (this.type === "percent") {
+        return subtotal * (1 - this.value / 100);
+      }
+      if (this.type === "fixed") {
+        return Math.max(0, subtotal - this.value);
+      }
+      return subtotal;
+    }
+  }
+
+  class ShoppingCart {
+    constructor() {
+      this._items = new Map();
+      this._discount = null;
+      this._listeners = [];
+    }
+
+    addItem(item) {
+      var existing = this._items.get(item.id);
+      if (existing) {
+        this._items.set(item.id, existing.withQuantity(existing.quantity + item.quantity));
+      } else {
+        this._items.set(item.id, item);
+      }
+      this._emit("item:added", item);
+      return this;
+    }
+
+    removeItem(id) {
+      this._items.delete(id);
+      this._emit("item:removed", { id: id });
+      return this;
+    }
+
+    applyDiscount(discount) {
+      this._discount = discount;
+      this._emit("discount:applied", discount);
+      return this;
+    }
+
+    get subtotal() {
+      var total = 0;
+      this._items.forEach(function(item) { total += item.subtotal; });
+      return total;
+    }
+
+    get total() {
+      var sub = this.subtotal;
+      return this._discount ? this._discount.apply(sub) : sub;
+    }
+
+    get itemCount() {
+      var count = 0;
+      this._items.forEach(function(item) { count += item.quantity; });
+      return count;
+    }
+
+    on(event, handler) {
+      this._listeners.push({ event: event, handler: handler });
+      return this;
+    }
+
+    _emit(event, data) {
+      this._listeners
+        .filter(function(l) { return l.event === event; })
+        .forEach(function(l) { l.handler(data); });
+    }
+  }
+  '''
+end
diff --git a/test/support/fixtures/kotlin/coroutine_flow.ex b/test/support/fixtures/kotlin/coroutine_flow.ex
new file mode 100644
index 00000000..efd8f80f
--- /dev/null
+++ b/test/support/fixtures/kotlin/coroutine_flow.ex
@@ -0,0 +1,64 @@
+defmodule Test.Fixtures.Kotlin.CoroutineFlow do
+  @moduledoc false
+  use Test.LanguageFixture, language: "kotlin coroutine_flow"
+
+  @code ~S'''
+  interface FlowCollector<T> {
+    suspend fun emit(value: T)
+  }
+
+  interface Flow<T> {
+    suspend fun collect(collector: FlowCollector<T>)
+  }
+
+  interface Channel<T> {
+    suspend fun send(value: T)
+    suspend fun receive(): T
+    fun close()
+    val isClosedForSend: Boolean
+  }
+
+  class SimpleFlow<T>(private val block: suspend FlowCollector<T>.() -> Unit) : Flow<T> {
+    override suspend fun collect(collector: FlowCollector<T>) {
+      collector.block()
+    }
+  }
+
+  class TransformFlow<T, R>(
+    private val upstream: Flow<T>,
+    private val transform: suspend (T) -> R
+  ) : Flow<R> {
+    override suspend fun collect(collector: FlowCollector<R>) {
+      upstream.collect(object : FlowCollector<T> {
+        override suspend fun emit(value: T) {
+          collector.emit(transform(value))
+        }
+      })
+    }
+  }
+
+  class FilterFlow<T>(
+    private val upstream: Flow<T>,
+    private val predicate: suspend (T) -> Boolean
+  ) : Flow<T> {
+    override suspend fun collect(collector: FlowCollector<T>) {
+      upstream.collect(object : FlowCollector<T> {
+        override suspend fun emit(value: T) {
+          if (predicate(value)) collector.emit(value)
+        }
+      })
+    }
+  }
+
+  class BufferedChannel<T>(private val capacity: Int) : Channel<T> {
+    private val buffer: ArrayDeque<T> = ArrayDeque()
+    override val isClosedForSend: Boolean get() = false
+
+    override suspend fun send(value: T) { buffer.addLast(value) }
+
+    override suspend fun receive(): T = buffer.removeFirst()
+
+    override fun close() { buffer.clear() }
+  }
+  '''
+end
diff --git a/test/support/fixtures/kotlin/extension_library.ex b/test/support/fixtures/kotlin/extension_library.ex
new file mode 100644
index 00000000..8d0ad7ee
--- /dev/null
+++ b/test/support/fixtures/kotlin/extension_library.ex
@@ -0,0 +1,55 @@
+defmodule Test.Fixtures.Kotlin.ExtensionLibrary do
+  @moduledoc false
+  use Test.LanguageFixture, language: "kotlin extension_library"
+
+  @code ~S'''
+  interface StringValidator {
+    fun validate(value: String): Boolean
+    fun errorMessage(): String
+  }
+
+  interface Transformer<T, R> {
+    fun transform(value: T): R
+  }
+
+  interface Pipeline<T> {
+    fun pipe(step: Transformer<T, T>): Pipeline<T>
+    fun execute(input: T): T
+  }
+
+  class EmailValidator : StringValidator {
+    override fun validate(value: String): Boolean = value.contains("@") && value.contains(".")
+
+    override fun errorMessage(): String = "Invalid email format"
+  }
+
+  class LengthValidator(private val min: Int, private val max: Int) : StringValidator {
+    override fun validate(value: String): Boolean = value.length in min..max
+
+    override fun errorMessage(): String = "Length must be between $min and $max"
+  }
+
+  class TrimTransformer : Transformer<String, String> {
+    override fun transform(value: String): String = value.trim()
+  }
+
+  class LowercaseTransformer : Transformer<String, String> {
+    override fun transform(value: String): String = value.lowercase()
+  }
+
+  class StringPipeline : Pipeline<String> {
+    private val steps: MutableList<Transformer<String, String>> = mutableListOf()
+
+    override fun pipe(step: Transformer<String, String>): Pipeline<String> {
+      steps.add(step)
+      return this
+    }
+
+    override fun execute(input: String): String = steps.fold(input) { acc, step -> step.transform(acc) }
+  }
+
+  enum class ValidationMode {
+    STRICT, LENIENT, DISABLED
+  }
+  '''
+end
diff --git a/test/support/fixtures/kotlin/sealed_state.ex b/test/support/fixtures/kotlin/sealed_state.ex
new file mode 100644
index 00000000..fd0b1fa7
--- /dev/null
+++ b/test/support/fixtures/kotlin/sealed_state.ex
@@ -0,0 +1,63 @@
+defmodule Test.Fixtures.Kotlin.SealedState do
+  @moduledoc false
+  use Test.LanguageFixture, language: "kotlin sealed_state"
+
+  @code ~S'''
+  interface Action
+
+  interface State
+
+  interface Reducer<S : State, A : Action> {
+    fun reduce(state: S, action: A): S
+  }
+
+  class ScreenState {
+    class Loading : ScreenState()
+    class Success(val data: List<String>) : ScreenState()
+    class Error(val message: String, val cause: Throwable?) : ScreenState()
+    class Empty : ScreenState()
+  }
+
+  class ScreenAction {
+    class Load : ScreenAction()
+    class LoadSuccess(val data: List<String>) : ScreenAction()
+    class LoadError(val message: String, val cause: Throwable?) : ScreenAction()
+    class Refresh : ScreenAction()
+    class Clear : ScreenAction()
+  }
+
+  class ScreenReducer : Reducer<ScreenState, ScreenAction> {
+    override fun reduce(state: ScreenState, action: ScreenAction): ScreenState {
+      return when (action) {
+        is ScreenAction.Load -> ScreenState.Loading()
+        is ScreenAction.LoadSuccess -> if (action.data.isEmpty()) ScreenState.Empty() else ScreenState.Success(action.data)
+        is ScreenAction.LoadError -> ScreenState.Error(action.message, action.cause)
+        is ScreenAction.Refresh -> ScreenState.Loading()
+        is ScreenAction.Clear -> ScreenState.Empty()
+        else -> state
+      }
+    }
+  }
+
+  enum class LoadStrategy {
+    EAGER, LAZY, PREFETCH, BACKGROUND
+  }
+
+  class StateStore<S : State, A : Action>(private val reducer: Reducer<S, A>, initialState: S) {
+    private var state: S = initialState
+    private val listeners: MutableList<(S) -> Unit> = mutableListOf()
+
+    fun getState(): S = state
+
+    fun dispatch(action: A) {
+      state = reducer.reduce(state, action)
+      listeners.forEach { it(state) }
+    }
+
+    fun subscribe(listener: (S) -> Unit): () -> Unit {
+      listeners.add(listener)
+      return { listeners.remove(listener) }
+    }
+  }
+  '''
+end
diff --git a/test/support/fixtures/lua/class_system.ex b/test/support/fixtures/lua/class_system.ex
new file mode 100644
index 00000000..d96769db
--- /dev/null
+++ b/test/support/fixtures/lua/class_system.ex
@@ -0,0 +1,63 @@
+defmodule Test.Fixtures.Lua.ClassSystem do
+  @moduledoc false
+  use Test.LanguageFixture, language: "lua class_system"
+
+  @code ~S'''
+  function class(parent)
+  local cls = {}
+  cls.__index = cls
+  if parent then
+    setmetatable(cls, { __index = parent })
+  end
+  cls.new = function(...)
+    local instance = setmetatable({}, cls)
+    if instance.init then
+      instance:init(...)
+    end
+    return instance
+  end
+  cls.isInstanceOf = function(self, klass)
+    local mt = getmetatable(self)
+    while mt do
+      if mt == klass then return true end
+      mt = getmetatable(mt)
+    end
+    return false
+  end
+  return cls
+  end
+
+  function mixin(target, source)
+  for key, value in pairs(source) do
+    if type(value) == "function" and not target[key] then
+      target[key] = value
+    end
+  end
+  return target
+  end
+
+  function interface(...)
+  local methods = { ... }
+  return function(obj)
+    for _, method in ipairs(methods) do
+      if type(obj[method]) ~= "function" then
+        error("Missing method: " .. method)
+      end
+    end
+    return true
+  end
+  end
+
+  function extend(parent, definition)
+  local cls = class(parent)
+  for k, v in pairs(definition) do
+    cls[k] = v
+  end
+  return cls
+  end
+
+  function implements(obj, iface)
+  return pcall(iface, obj)
+  end
+  '''
+end
diff --git a/test/support/fixtures/lua/event_system.ex b/test/support/fixtures/lua/event_system.ex
new file mode 100644
index 00000000..4c50cf85
--- /dev/null
+++ b/test/support/fixtures/lua/event_system.ex
@@ -0,0 +1,76 @@
+defmodule Test.Fixtures.Lua.EventSystem do
+  @moduledoc false
+  use Test.LanguageFixture, language: "lua event_system"
+
+  @code ~S'''
+  function EventEmitter()
+  local self = { listeners = {}, onceListeners = {} }
+
+  function self:on(event, callback)
+    if not self.listeners[event] then
+      self.listeners[event] = {}
+    end
+    table.insert(self.listeners[event], callback)
+    return self
+  end
+
+  function self:once(event, callback)
+    if not self.onceListeners[event] then
+      self.onceListeners[event] = {}
+    end
+    table.insert(self.onceListeners[event], callback)
+    return self
+  end
+
+  function self:off(event, callback)
+    if self.listeners[event] then
+      for i, cb in ipairs(self.listeners[event]) do
+        if cb == callback then
+          table.remove(self.listeners[event], i)
+          return self
+        end
+      end
+    end
+    return self
+  end
+
+  function self:emit(event, ...)
+    local listeners = self.listeners[event] or {}
+    for _, cb in ipairs(listeners) do
+      cb(...)
+    end
+    local onceListeners = self.onceListeners[event] or {}
+    self.onceListeners[event] = {}
+    for _, cb in ipairs(onceListeners) do
+      cb(...)
+    end
+    return self
+  end
+
+  function self:removeAllListeners(event)
+    if event then
+      self.listeners[event] = nil
+      self.onceListeners[event] = nil
+    else
+      self.listeners = {}
+      self.onceListeners = {}
+    end
+    return self
+  end
+
+  return self
+  end
+
+  function pipe(emitter1, event, emitter2, targetEvent)
+  emitter1:on(event, function(...)
+    emitter2:emit(targetEvent or event, ...)
+  end)
+  end
+
+  function broadcast(emitters, event, ...)
+  for _, emitter in ipairs(emitters) do
+    emitter:emit(event, ...)
+  end
+  end
+  '''
+end
diff --git a/test/support/fixtures/lua/state_machine.ex b/test/support/fixtures/lua/state_machine.ex
new file mode 100644
index 00000000..cba47b8f
--- /dev/null
+++ b/test/support/fixtures/lua/state_machine.ex
@@ -0,0 +1,75 @@
+defmodule Test.Fixtures.Lua.StateMachine do
+  @moduledoc false
+  use Test.LanguageFixture, language: "lua state_machine"
+
+  @code ~S'''
+  function StateMachine(config)
+  local self = {
+    current = config.initial,
+    states = config.states or {},
+    transitions = config.transitions or {},
+    history = {},
+    listeners = {},
+  }
+
+  function self:can(event)
+    local key = self.current .. ":" .. event
+    return self.transitions[key] ~= nil
+  end
+
+  function self:transition(event, data)
+    local key = self.current .. ":" .. event
+    local target = self.transitions[key]
+    if not target then
+      error("No transition from '" .. self.current .. "' on event '" .. event .. "'")
+    end
+    local from = self.current
+    local stateConfig = self.states[from] or {}
+    if stateConfig.onExit then stateConfig.onExit(from, event, data) end
+    table.insert(self.history, { state = from, event = event })
+    self.current = target
+    local targetConfig = self.states[target] or {}
+    if targetConfig.onEnter then targetConfig.onEnter(target, event, data) end
+    for _, cb in ipairs(self.listeners) do
+      cb(from, event, target, data)
+    end
+    return self
+  end
+
+  function self:onTransition(callback)
+    table.insert(self.listeners, callback)
+    return self
+  end
+
+  function self:getHistory()
+    return self.history
+  end
+
+  function self:reset()
+    self.current = config.initial
+    self.history = {}
+    return self
+  end
+
+  return self
+  end
+
+  function buildTransitionTable(transitions)
+  local tbl = {}
+  for _, t in ipairs(transitions) do
+    local key = t.from .. ":" .. t.event
+    tbl[key] = t.to
+  end
+  return tbl
+  end
+
+  function validateMachine(machine, requiredStates)
+  for _, state in ipairs(requiredStates) do
+    if not machine.states[state] then
+      return false, "Missing state: " .. state
+    end
+  end
+  return true, nil
+  end
+  '''
+end
diff --git a/test/support/fixtures/python/calculator.ex b/test/support/fixtures/python/calculator.ex
new file mode 100644
index 00000000..47c9029e
--- /dev/null
+++ b/test/support/fixtures/python/calculator.ex
@@ -0,0 +1,83 @@
+defmodule Test.Fixtures.Python.Calculator do
+  @moduledoc false
+  use Test.LanguageFixture, language: "python calculator"
+
+  @code ~S'''
+  class Calculator:
+      """A calculator supporting basic arithmetic operations."""
+
+      def add(self, a, b):
+          """Returns the sum of a and b."""
+          return a + b
+
+      def subtract(self, a, b):
+          """Returns a minus b."""
+          return a - b
+
+      def multiply(self, a, b):
+          """Returns the product of a and b."""
+          return a * b
+
+      def divide(self, a, b):
+          """Divides a by b. Raises for zero divisor."""
+          if b == 0:
+              raise ValueError("Cannot divide by zero")
+          return a / b
+
+      def power(self, base, exp):
+          """Returns base to the power of exp."""
+          return base ** exp
+
+      def sqrt(self, n):
+          """Returns the square root. Raises for negative input."""
+          if n < 0:
+              raise ValueError("Cannot take sqrt of negative number")
+          return n ** 0.5
+
+      def abs_val(self, n):
+          """Returns the absolute value of n."""
+          if n < 0:
+              return -n
+          return n
+
+
+  class ScientificCalculator(Calculator):
+      """Extended scientific calculator."""
+
+      def log(self, n, base=10):
+          """Returns log base of n. Raises for non-positive n."""
+          if n <= 0:
+              raise ValueError("Logarithm undefined for non-positive values")
+          import math
+          return math.log(n, base)
+
+      def factorial(self, n):
+          """Returns n factorial. Raises for negative n."""
+          if n < 0:
+              raise ValueError("Factorial undefined for negative numbers")
+          if n == 0:
+              return 1
+          result = 1
+          for i in range(1, n + 1):
+              result *= i
+          return result
+
+
+  def add(a, b):
+      return a + b
+
+
+  def subtract(a, b):
+      return a - b
+
+
+  def multiply(a, b):
+      return a * b
+
+
+  def divide(a, b):
+      if b == 0:
+          raise ValueError("Cannot divide by zero")
+      return a / b
+  '''
+end
diff --git a/test/support/fixtures/python/config_parser.ex b/test/support/fixtures/python/config_parser.ex
new file mode 100644
index 00000000..a58516ef
--- /dev/null
+++ b/test/support/fixtures/python/config_parser.ex
@@ -0,0 +1,89 @@
+defmodule Test.Fixtures.Python.ConfigParser do
+  @moduledoc false
+  use Test.LanguageFixture, language: "python config_parser"
+
+  @code ~S'''
+  from dataclasses import dataclass, field
+  from typing import ClassVar, Optional
+
+
+  @dataclass
+  class DatabaseConfig:
+      """Database connection configuration."""
+
+      host: str = "localhost"
+      port: int = 5432
+      name: str = "app"
+      pool_size: int = 10
+      VALID_PORTS: ClassVar[range] = range(1, 65536)
+
+      def __post_init__(self):
+          """Validates configuration after initialisation."""
+          if self.port not in self.VALID_PORTS:
+              raise ValueError(f"Invalid port: {self.port}")
+          if not self.host:
+              raise ValueError("host must not be empty")
+          if self.pool_size < 1:
+              raise ValueError("pool_size must be at least 1")
+
+      def url(self) -> str:
+          """Returns the database connection URL."""
+          return f"postgres://{self.host}:{self.port}/{self.name}"
+
+
+  @dataclass
+  class LoggingConfig:
+      """Logging configuration."""
+
+      level: str = "info"
+      format: str = "text"
+      output: str = "stdout"
+      VALID_LEVELS: ClassVar[list] = ["debug", "info", "warning", "error"]
+      VALID_FORMATS: ClassVar[list] = ["text", "json"]
+
+      def __post_init__(self):
+          """Validates level and format."""
+          if self.level not in self.VALID_LEVELS:
+              raise ValueError(f"Invalid log level: {self.level}")
+          if self.format not in self.VALID_FORMATS:
+              raise ValueError(f"Invalid log format: {self.format}")
+
+
+  @dataclass
+  class AppConfig:
+      """Top-level application configuration."""
+
+      database: DatabaseConfig = field(default_factory=DatabaseConfig)
+      logging: LoggingConfig = field(default_factory=LoggingConfig)
+      debug: bool = False
+      version: str = "1.0.0"
+
+      def is_production(self) -> bool:
+          """Returns True when debug mode is disabled."""
+          return not self.debug
+
+      @classmethod
+      def from_dict(cls, data: dict) -> "AppConfig":
+          """Builds an AppConfig from a plain dictionary."""
+          db_data = data.get("database", {})
+          log_data = data.get("logging", {})
+          return cls(
+              database=DatabaseConfig(**db_data),
+              logging=LoggingConfig(**log_data),
+              debug=data.get("debug", False),
+              version=data.get("version", "1.0.0"),
+          )
+
+      @classmethod
+      def from_env(cls, prefix: str = "APP") -> "AppConfig":
+          """Builds an AppConfig from environment variables."""
+          import os
+          return cls(
+              database=DatabaseConfig(
+                  host=os.getenv(f"{prefix}_DB_HOST", "localhost"),
+                  port=int(os.getenv(f"{prefix}_DB_PORT", "5432")),
+              ),
+              debug=os.getenv(f"{prefix}_DEBUG", "false").lower() == "true",
+          )
+  '''
+end
diff --git a/test/support/fixtures/python/csv_pipeline.ex b/test/support/fixtures/python/csv_pipeline.ex
new file mode 100644
index 00000000..459acf88
--- /dev/null
+++ b/test/support/fixtures/python/csv_pipeline.ex
@@ -0,0 +1,95 @@
+defmodule Test.Fixtures.Python.CsvPipeline do
+  @moduledoc false
+  use Test.LanguageFixture, language: "python csv_pipeline"
+
+  @code ~S'''
+  from dataclasses import dataclass, field
+  from typing import Iterator, Protocol
+
+
+  @dataclass
+  class CsvRow:
+      """Represents one row of parsed CSV data."""
+
+      fields: dict
+      line_number: int
+
+      def get(self, key: str, default=None):
+          """Returns the value for key or default."""
+          return self.fields.get(key, default)
+
+      def keys(self) -> list:
+          """Returns all field names."""
+          return list(self.fields.keys())
+
+
+  class RowTransformer(Protocol):
+      """Protocol for CSV row transformation steps."""
+
+      def transform(self, row: CsvRow) -> CsvRow:
+          """Transforms a single row."""
+          ...
+
+
+  @dataclass
+  class ColumnRenamer:
+      """Renames columns according to a mapping."""
+
+      mapping: dict = field(default_factory=dict)
+
+      def transform(self, row: CsvRow) -> CsvRow:
+          """Applies column rename mapping to a row."""
+          new_fields = {self.mapping.get(k, k): v for k, v in row.fields.items()}
+          return CsvRow(fields=new_fields, line_number=row.line_number)
+
+
+  @dataclass
+  class TypeCoercer:
+      """Coerces column values to specified types."""
+
+      types: dict = field(default_factory=dict)
+
+      def transform(self, row: CsvRow) -> CsvRow:
+          """Coerces field values using the types mapping."""
+          coerced = {}
+          for key, value in row.fields.items():
+              target_type = self.types.get(key)
+              if target_type is not None:
+                  try:
+                      coerced[key] = target_type(value)
+                  except (ValueError, TypeError):
+                      coerced[key] = value
+              else:
+                  coerced[key] = value
+          return CsvRow(fields=coerced, line_number=row.line_number)
+
+
+  class CsvPipeline:
+      """Streaming CSV pipeline with pluggable transformation steps."""
+
+      def __init__(self, path: str):
+          """Initialises the pipeline for the given CSV file path."""
+          self._path = path
+          self._steps: list = []
+
+      def add_step(self, step: RowTransformer) -> "CsvPipeline":
+          """Adds a transformation step and returns self for chaining."""
+          self._steps.append(step)
+          return self
+
+      def run(self) -> Iterator[CsvRow]:
+          """Yields processed rows from the CSV file."""
+          with open(self._path, "r", newline="") as fh:
+              import csv
+              reader = csv.DictReader(fh)
+              for line_number, raw in enumerate(reader, start=1):
+                  row = CsvRow(fields=dict(raw), line_number=line_number)
+                  for step in self._steps:
+                      row = step.transform(row)
+                  yield row
+
+      def collect(self) -> list:
+          """Collects all processed rows into a list."""
+          return list(self.run())
+  '''
+end
diff --git a/test/support/fixtures/ruby/calculator.ex b/test/support/fixtures/ruby/calculator.ex
new file mode 100644
index 00000000..df469555
--- /dev/null
+++ b/test/support/fixtures/ruby/calculator.ex
@@ -0,0 +1,59 @@
+defmodule Test.Fixtures.Ruby.Calculator do
+  @moduledoc false
+  use Test.LanguageFixture, language: "ruby calculator"
+
+  @code ~S'''
+  module Calculable
+    def abs_val(n)
+      n < 0 ? -n : n
+    end
+
+    def clamp(n, min, max)
+      [[n, min].max, max].min
+    end
+  end
+
+  class BasicCalculator
+    include Calculable
+
+    def add(a, b)
+      a + b
+    end
+
+    def subtract(a, b)
+      a - b
+    end
+
+    def multiply(a, b)
+      a * b
+    end
+
+    def divide(a, b)
+      raise ArgumentError, "Cannot divide by zero" if b.zero?
+      a.to_f / b
+    end
+
+    def power(a, b)
+      a ** b
+    end
+  end
+
+  class ScientificCalculator < BasicCalculator
+    def sqrt(n)
+      raise ArgumentError, "Cannot take sqrt of negative number" if n < 0
+      Math.sqrt(n)
+    end
+
+    def log(n, base = 10)
+      raise ArgumentError, "Logarithm undefined for non-positive values" if n <= 0
+      Math.log(n) / Math.log(base)
+    end
+
+    def factorial(n)
+      raise ArgumentError, "Factorial undefined for negative numbers" if n < 0
+      return 1 if n == 0
+      (1..n).reduce(1, :*)
+    end
+  end
+  '''
+end
diff --git a/test/support/fixtures/ruby/markdown_renderer.ex b/test/support/fixtures/ruby/markdown_renderer.ex
new file mode 100644
index 00000000..2e70d263
--- /dev/null
+++ b/test/support/fixtures/ruby/markdown_renderer.ex
@@ -0,0 +1,79 @@
+defmodule Test.Fixtures.Ruby.MarkdownRenderer do
+  @moduledoc false
+  use Test.LanguageFixture, language: "ruby markdown_renderer"
+
+  @code ~S'''
+  module Markdown
+    Token = Struct.new(:type, :content, :level)
+  end
+
+  module Markdown::Tokenizer
+    HEADING_RE = /^(#{1,6})\s+(.+)$/
+    CODE_BLOCK_RE = /^```(\w*)$/
+    BOLD_RE = /\*\*(.+?)\*\*/
+    ITALIC_RE = /\*(.+?)\*/
+    LINK_RE = /\[(.+?)\]\((.+?)\)/
+
+    def tokenize_line(line)
+      case line
+      when HEADING_RE
+        Markdown::Token.new(:heading, Regexp.last_match(2), Regexp.last_match(1).length)
+      when /^\s*[-*]\s+(.+)/
+        Markdown::Token.new(:list_item, Regexp.last_match(1), 0)
+      when /^\s*$/
+        Markdown::Token.new(:blank, "", 0)
+      else
+        Markdown::Token.new(:paragraph, line, 0)
+      end
+    end
+
+    def inline_format(text)
+      text
+        .gsub(LINK_RE) { "<a href=\"#{Regexp.last_match(2)}\">#{Regexp.last_match(1)}</a>" }
+        .gsub(BOLD_RE) { "<strong>#{Regexp.last_match(1)}</strong>" }
+        .gsub(ITALIC_RE) { "<em>#{Regexp.last_match(1)}</em>" }
+    end
+  end
+
+  module Markdown::Renderer
+    include Markdown::Tokenizer
+
+    def render_token(token)
+      case token.type
+      when :heading
+        "<h#{token.level}>#{inline_format(token.content)}</h#{token.level}>"
+      when :list_item
+        "<li>#{inline_format(token.content)}</li>"
+      when :paragraph
+        "<p>#{inline_format(token.content)}</p>"
+      when :blank
+        ""
+      end
+    end
+
+    def render(markdown)
+      markdown.lines.map { |line| tokenize_line(line.chomp) }.map { |token| render_token(token) }.reject(&:empty?).join("\n")
+    end
+  end
+
+  class Markdown::Document
+    include Markdown::Renderer
+
+    def initialize(source)
+      @source = source
+    end
+
+    def to_html
+      render(@source)
+    end
+
+    def word_count
+      @source.split(/\s+/).length
+    end
+
+    def heading_count
+      @source.lines.count { |l| l.match?(HEADING_RE) }
+    end
+  end
+  '''
+end
diff --git a/test/support/fixtures/ruby/orm_lite.ex b/test/support/fixtures/ruby/orm_lite.ex
new file mode 100644
index 00000000..672b668a
--- /dev/null
+++ b/test/support/fixtures/ruby/orm_lite.ex
@@ -0,0 +1,106 @@
+defmodule Test.Fixtures.Ruby.OrmLite do
+  @moduledoc false
+  use Test.LanguageFixture, language: "ruby orm_lite"
+
+  @code ~S'''
+  module OrmLite
+    module Persistence
+      def self.included(base)
+        base.extend(ClassMethods)
+        base.instance_variable_set(:@columns, [])
+        base.instance_variable_set(:@validations, [])
+      end
+
+      module ClassMethods
+        def column(name, type = :string)
+          @columns << { name: name, type: type }
+          attr_accessor name
+        end
+
+        def validates(name, **rules)
+          @validations << { name: name, rules: rules }
+        end
+
+        def columns
+          @columns
+        end
+
+        def validations
+          @validations
+        end
+
+        def find(id)
+          new(id: id)
+        end
+      end
+
+      def initialize(attrs = {})
+        attrs.each do |key, value|
+          send(:"#{key}=", value) if respond_to?(:"#{key}=")
+        end
+      end
+
+      def valid?
+        @errors = []
+        self.class.validations.each do |v|
+          value = send(v[:name])
+          @errors << "#{v[:name]} can't be blank" if v[:rules][:presence] && (value.nil? || value.to_s.empty?)
+          @errors << "#{v[:name]} is too short" if v[:rules][:min_length] && value.to_s.length < v[:rules][:min_length]
+        end
+        @errors.empty?
+      end
+
+      def errors
+        @errors ||= []
+      end
+
+      def save
+        return false unless valid?
+        true
+      end
+    end
+
+    module Associations
+      def self.included(base)
+        base.extend(ClassMethods)
+      end
+
+      module ClassMethods
+        def has_many(name)
+          define_method(name) do
+            []
+          end
+        end
+
+        def belongs_to(name)
+          attr_accessor :"#{name}_id"
+          define_method(name) do
+            nil
+          end
+        end
+      end
+    end
+  end
+
+  class User
+    include OrmLite::Persistence
+    include OrmLite::Associations
+    column :name, :string
+    column :email, :string
+    column :age, :integer
+    has_many :posts
+    validates :name, presence: true, min_length: 2
+    validates :email, presence: true
+  end
+
+  class Post
+    include OrmLite::Persistence
+    include OrmLite::Associations
+    column :title, :string
+    column :body, :text
+    belongs_to :user
+    validates :title, presence: true
+    validates :body, presence: true
+  end
+  '''
+end
diff --git a/test/support/fixtures/rust/calculator.ex b/test/support/fixtures/rust/calculator.ex
new file mode 100644
index 00000000..a47df574
--- /dev/null
+++ b/test/support/fixtures/rust/calculator.ex
@@ -0,0 +1,70 @@
+defmodule Test.Fixtures.Rust.Calculator do
+  @moduledoc false
+  use Test.LanguageFixture, language: "rust calculator"
+
+  @code ~S'''
+  trait Calculator {
+      fn add(&self, a: f64, b: f64) -> f64;
+      fn subtract(&self, a: f64, b: f64) -> f64;
+      fn multiply(&self, a: f64, b: f64) -> f64;
+      fn divide(&self, a: f64, b: f64) -> Option<f64>;
+  }
+
+  struct BasicCalculator;
+
+  impl Calculator for BasicCalculator {
+      fn add(&self, a: f64, b: f64) -> f64 {
+          a + b
+      }
+
+      fn subtract(&self, a: f64, b: f64) -> f64 {
+          a - b
+      }
+
+      fn multiply(&self, a: f64, b: f64) -> f64 {
+          a * b
+      }
+
+      fn divide(&self, a: f64, b: f64) -> Option<f64> {
+          if b == 0.0 { return None; }
+          Some(a / b)
+      }
+  }
+
+  impl BasicCalculator {
+      fn new() -> Self {
+          BasicCalculator
+      }
+
+      fn power(&self, base: f64, exp: f64) -> f64 {
+          base.powf(exp)
+      }
+
+      fn sqrt(&self, n: f64) -> Option<f64> {
+          if n < 0.0 { return None; }
+          Some(n.sqrt())
+      }
+
+      fn abs(&self, n: f64) -> f64 {
+          n.abs()
+      }
+  }
+
+  fn add(a: f64, b: f64) -> f64 {
+      a + b
+  }
+
+  fn subtract(a: f64, b: f64) -> f64 {
+      a - b
+  }
+
+  fn multiply(a: f64, b: f64) -> f64 {
+      a * b
+  }
+
+  fn divide(a: f64, b: f64) -> Option<f64> {
+      if b == 0.0 { return None; }
+      Some(a / b)
+  }
+  '''
+end
diff --git a/test/support/fixtures/rust/ring_buffer.ex b/test/support/fixtures/rust/ring_buffer.ex
new file mode 100644
index 00000000..eba5a762
--- /dev/null
+++ b/test/support/fixtures/rust/ring_buffer.ex
@@ -0,0 +1,86 @@
+defmodule Test.Fixtures.Rust.RingBuffer do
+  @moduledoc false
+  use Test.LanguageFixture, language: "rust ring_buffer"
+
+  @code ~S'''
+  struct RingBuffer<T> {
+      data: Vec<Option<T>>,
+      head: usize,
+      tail: usize,
+      len: usize,
+      capacity: usize,
+  }
+
+  impl<T> RingBuffer<T> {
+      fn new(capacity: usize) -> Self {
+          let data = (0..capacity).map(|_| None).collect();
+          RingBuffer { data, head: 0, tail: 0, len: 0, capacity }
+      }
+
+      fn push(&mut self, value: T) -> bool {
+          if self.len == self.capacity {
+              return false;
+          }
+          self.data[self.tail] = Some(value);
+          self.tail = (self.tail + 1) % self.capacity;
+          self.len += 1;
+          true
+      }
+
+      fn pop(&mut self) -> Option<T> {
+          if self.len == 0 {
+              return None;
+          }
+          let value = self.data[self.head].take();
+          self.head = (self.head + 1) % self.capacity;
+          self.len -= 1;
+          value
+      }
+
+      fn peek(&self) -> Option<&T> {
+          if self.len == 0 { None } else { self.data[self.head].as_ref() }
+      }
+
+      fn is_empty(&self) -> bool {
+          self.len == 0
+      }
+
+      fn is_full(&self) -> bool {
+          self.len == self.capacity
+      }
+
+      fn len(&self) -> usize {
+          self.len
+      }
+
+      fn capacity(&self) -> usize {
+          self.capacity
+      }
+
+      fn clear(&mut self) {
+          for slot in self.data.iter_mut() {
+              *slot = None;
+          }
+          self.head = 0;
+          self.tail = 0;
+          self.len = 0;
+      }
+  }
+
+  impl<T: Clone> RingBuffer<T> {
+      fn to_vec(&self) -> Vec<T> {
+          (0..self.len)
+              .filter_map(|i| self.data[(self.head + i) % self.capacity].clone())
+              .collect()
+      }
+  }
+
+  fn fill_buffer<T: Clone>(items: &[T], capacity: usize) -> RingBuffer<T> {
+      let mut buf = RingBuffer::new(capacity);
+      for item in items {
+          buf.push(item.clone());
+      }
+      buf
+  }
+  '''
+end
diff --git a/test/support/fixtures/rust/tokenizer.ex b/test/support/fixtures/rust/tokenizer.ex
new file mode 100644
index 00000000..09257745
--- /dev/null
+++ b/test/support/fixtures/rust/tokenizer.ex
@@ -0,0 +1,112 @@
+defmodule Test.Fixtures.Rust.Tokenizer do
+  @moduledoc false
+  use Test.LanguageFixture, language: "rust tokenizer"
+
+  @code ~S'''
+  #[derive(Debug, PartialEq, Clone)]
+  enum TokenKind {
+      Number(f64),
+      Plus,
+      Minus,
+      Star,
+      Slash,
+      LParen,
+      RParen,
+      Eof,
+  }
+
+  #[derive(Debug, Clone)]
+  struct Token {
+      kind: TokenKind,
+      lexeme: String,
+      line: usize,
+  }
+
+  impl Token {
+      fn new(kind: TokenKind, lexeme: &str, line: usize) -> Self {
+          Token { kind, lexeme: lexeme.to_string(), line }
+      }
+
+      fn is_operator(&self) -> bool {
+          matches!(self.kind, TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash)
+      }
+  }
+
+  impl std::fmt::Display for Token {
+      fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+          write!(f, "{:?}({})", self.kind, self.lexeme)
+      }
+  }
+
+  struct Lexer {
+      source: Vec<char>,
+      pos: usize,
+      line: usize,
+  }
+
+  impl Lexer {
+      fn new(source: &str) -> Self {
+          Lexer { source: source.chars().collect(), pos: 0, line: 1 }
+      }
+
+      fn peek(&self) -> Option<char> {
+          self.source.get(self.pos).copied()
+      }
+
+      fn advance(&mut self) -> Option<char> {
+          let ch = self.source.get(self.pos).copied();
+          self.pos += 1;
+          ch
+      }
+
+      fn skip_whitespace(&mut self) {
+          while let Some(c) = self.peek() {
+              if c == '\n' { self.line += 1; self.pos += 1; }
+              else if c.is_whitespace() { self.pos += 1; }
+              else { break; }
+          }
+      }
+
+      fn read_number(&mut self) -> Token {
+          let start = self.pos;
+          while let Some(c) = self.peek() {
+              if c.is_ascii_digit() || c == '.' { self.pos += 1; }
+              else { break; }
+          }
+          let lexeme: String = self.source[start..self.pos].iter().collect();
+          let value: f64 = lexeme.parse().unwrap_or(0.0);
+          Token::new(TokenKind::Number(value), &lexeme, self.line)
+      }
+
+      fn next_token(&mut self) -> Token {
+          self.skip_whitespace();
+          match self.advance() {
+              Some('+') => Token::new(TokenKind::Plus, "+", self.line),
+              Some('-') => Token::new(TokenKind::Minus, "-", self.line),
+              Some('*') => Token::new(TokenKind::Star, "*", self.line),
+              Some('/') => Token::new(TokenKind::Slash, "/", self.line),
+              Some('(') => Token::new(TokenKind::LParen, "(", self.line),
+              Some(')') => Token::new(TokenKind::RParen, ")", self.line),
+              Some(c) if c.is_ascii_digit() => { self.pos -= 1; self.read_number() }
+              None => Token::new(TokenKind::Eof, "", self.line),
+              _ => Token::new(TokenKind::Eof, "", self.line),
+          }
+      }
+
+      fn tokenize(&mut self) -> Vec<Token> {
+          let mut tokens = Vec::new();
+          loop {
+              let t = self.next_token();
+              let done = t.kind == TokenKind::Eof;
+              tokens.push(t);
+              if done { break; }
+          }
+          tokens
+      }
+  }
+
+  fn tokenize(source: &str) -> Vec<Token> {
+      Lexer::new(source).tokenize()
+  }
+  '''
+end
diff --git a/test/support/fixtures/scala/actor_messages.ex b/test/support/fixtures/scala/actor_messages.ex
new file mode 100644
index 00000000..73f150c3
--- /dev/null
+++ b/test/support/fixtures/scala/actor_messages.ex
@@ -0,0 +1,70 @@
+defmodule Test.Fixtures.Scala.ActorMessages do
+  @moduledoc false
+  use Test.LanguageFixture, language: "scala actor_messages"
+
+  @code ~S'''
+  trait Message
+
+  class Request(val id: String, val payload: Map[String, String]) extends Message
+
+  class Response(val id: String, val status: Int, val body: String) extends Message
+
+  class Broadcast(val topic: String, val data: String) extends Message
+
+  class Shutdown(val reason: String) extends Message
+
+  trait ActorState
+
+  class Active(val processedCount: Int) extends ActorState
+
+  class Paused(val since: Long, val reason: String) extends ActorState
+
+  class Stopped(val at: Long) extends ActorState
+
+  trait Behaviour {
+  def receive(message: Message, state: ActorState): (List[Message], ActorState)
+
+  def onStart(): ActorState
+
+  def onStop(state: ActorState): Unit
+  }
+
+  class EchoBehaviour extends Behaviour {
+  def receive(message: Message, state: ActorState): (List[Message], ActorState) =
+    message match {
+      case req: Request =>
+        val reply = new Response(req.id, 200, req.payload.mkString(","))
+        val newState = state match {
+          case a: Active => new Active(a.processedCount + 1)
+          case other => other
+        }
+        (List(reply), newState)
+      case _: Shutdown => (List.empty, new Stopped(System.currentTimeMillis()))
+      case _ => (List.empty, state)
+    }
+
+  def onStart(): ActorState = new Active(0)
+
+  def onStop(state: ActorState): Unit = {}
+  }
+
+  class Supervisor {
+  private var actors: Map[String, Behaviour] = Map.empty
+  private var states: Map[String, ActorState] = Map.empty
+
+  def spawn(id: String, behaviour: Behaviour): Unit = {
+    actors = actors + (id -> behaviour)
+    states = states + (id -> behaviour.onStart())
+  }
+
+  def send(id: String, message: Message): List[Message] =
+    actors.get(id).map { b =>
+      val (replies, newState) = b.receive(message, states(id))
+      states = states + (id -> newState)
+      replies
+    }.getOrElse(List.empty)
+
+  def stop(id: String): Unit = actors.get(id).foreach { b => b.onStop(states(id)); actors = actors - id }
+  }
+  '''
+end
diff --git a/test/support/fixtures/scala/case_class_algebra.ex b/test/support/fixtures/scala/case_class_algebra.ex
new file mode 100644
index 00000000..7a2f002f
--- /dev/null
+++ b/test/support/fixtures/scala/case_class_algebra.ex
@@ -0,0 +1,73 @@
+defmodule Test.Fixtures.Scala.CaseClassAlgebra do
+  @moduledoc false
+  use Test.LanguageFixture, language: "scala case_class_algebra"
+
+  @code ~S'''
+  trait Expr
+
+  class Num(val value: Double) extends Expr
+
+  class Add(val left: Expr, val right: Expr) extends Expr
+
+  class Sub(val left: Expr, val right: Expr) extends Expr
+
+  class Mul(val left: Expr, val right: Expr) extends Expr
+
+  class Div(val left: Expr, val right: Expr) extends Expr
+
+  class Neg(val expr: Expr) extends Expr
+
+  trait EvalResult
+
+  class EvalOk(val value: Double) extends EvalResult
+
+  class EvalError(val message: String) extends EvalResult
+
+  trait Evaluator {
+  def eval(expr: Expr): EvalResult
+  }
+
+  class SafeEvaluator extends Evaluator {
+  def eval(expr: Expr): EvalResult = expr match {
+    case n: Num => new EvalOk(n.value)
+    case neg: Neg => eval(neg.expr) match {
+      case ok: EvalOk => new EvalOk(-ok.value)
+      case err => err
+    }
+    case add: Add => combine(add.left, add.right)(_ + _)
+    case sub: Sub => combine(sub.left, sub.right)(_ - _)
+    case mul: Mul => combine(mul.left, mul.right)(_ * _)
+    case div: Div => eval(div.right) match {
+      case ok: EvalOk if ok.value == 0.0 => new EvalError("Division by zero")
+      case ok: EvalOk => eval(div.left) match {
+        case lOk: EvalOk => new EvalOk(lOk.value / ok.value)
+        case err => err
+      }
+      case err => err
+    }
+  }
+
+  private def combine(l: Expr, r: Expr)(op: (Double, Double) => Double): EvalResult =
+    (eval(l), eval(r)) match {
+      case (lv: EvalOk, rv: EvalOk) => new EvalOk(op(lv.value, rv.value))
+      case (err: EvalError, _) => err
+      case (_, err: EvalError) => err
+    }
+  }
+
+  trait Printer {
+  def print(expr: Expr): String
+  }
+
+  class InfixPrinter extends Printer {
+  def print(expr: Expr): String = expr match {
+    case n: Num => n.value.toString
+    case neg: Neg => s"-${print(neg.expr)}"
+    case add: Add => s"(${print(add.left)} + ${print(add.right)})"
+    case sub: Sub => s"(${print(sub.left)} - ${print(sub.right)})"
+    case mul: Mul => s"(${print(mul.left)} * ${print(mul.right)})"
+    case div: Div => s"(${print(div.left)} / ${print(div.right)})"
+  }
+  }
+  '''
+end
diff --git a/test/support/fixtures/scala/typeclass_pattern.ex b/test/support/fixtures/scala/typeclass_pattern.ex
new file mode 100644
index 00000000..0a14c37e
--- /dev/null
+++ b/test/support/fixtures/scala/typeclass_pattern.ex
@@ -0,0 +1,64 @@
+defmodule Test.Fixtures.Scala.TypeclassPattern do
+  @moduledoc false
+  use Test.LanguageFixture, language: "scala typeclass_pattern"
+
+  @code ~S'''
+  trait Show[A] {
+  def show(value: A): String
+  }
+
+  trait Eq[A] {
+  def eqv(a: A, b: A): Boolean
+
+  def neqv(a: A, b: A): Boolean = !eqv(a, b)
+  }
+
+  trait Ord[A] extends Eq[A] {
+  def compare(a: A, b: A): Int
+
+  def lt(a: A, b: A): Boolean = compare(a, b) < 0
+
+  def lte(a: A, b: A): Boolean = compare(a, b) <= 0
+
+  def gt(a: A, b: A): Boolean = compare(a, b) > 0
+
+  def gte(a: A, b: A): Boolean = compare(a, b) >= 0
+
+  def eqv(a: A, b: A): Boolean = compare(a, b) == 0
+  }
+
+  trait Functor[F[_]] {
+  def map[A, B](fa: F[A])(f: A => B): F[B]
+  }
+
+  class Identity[A](val value: A)
+
+  class IdentityInstances {
+  val identityFunctor: Functor[Identity] = new Functor[Identity] {
+    def map[A, B](fa: Identity[A])(f: A => B): Identity[B] = new Identity(f(fa.value))
+  }
+
+  val identityShow: Show[Identity[String]] = new Show[Identity[String]] {
+    def show(value: Identity[String]): String = s"Identity(${value.value})"
+  }
+  }
+
+  class ShowSyntax[A](value: A, ev: Show[A]) {
+  def show: String = ev.show(value)
+  }
+
+  class OrdSyntax[A](value: A, ev: Ord[A]) {
+  def <(other: A): Boolean = ev.lt(value, other)
+
+  def >(other: A): Boolean = ev.gt(value, other)
+
+  def ===(other: A): Boolean = ev.eqv(value, other)
+  }
+
+  trait Monoid[A] {
+  def empty: A
+
+  def combine(a: A, b: A): A
+  }
+  '''
+end
diff --git a/test/support/fixtures/swift/actor_model.ex b/test/support/fixtures/swift/actor_model.ex
new file mode 100644
index 00000000..3e651744
--- /dev/null
+++ b/test/support/fixtures/swift/actor_model.ex
@@ -0,0 +1,81 @@
+defmodule Test.Fixtures.Swift.ActorModel do
+  @moduledoc false
+  use Test.LanguageFixture, language: "swift actor_model"
+
+  @code ~S'''
+  enum ActorMessage {
+    case ping(replyTo: String)
+    case pong(from: String)
+    case shutdown
+    case updateState(key: String, value: String)
+  }
+
+  protocol ActorBehaviour {
+    var id: String { get }
+    func receive(_ message: ActorMessage) -> [ActorMessage]
+    func preStart()
+    func postStop()
+  }
+
+  struct ActorRef {
+    let id: String
+    private let mailbox: [ActorMessage]
+
+    init(id: String) {
+      self.id = id
+      self.mailbox = []
+    }
+  }
+
+  class ActorSystem {
+    private var actors: [String: ActorBehaviour] = [:]
+    private var mailboxes: [String: [ActorMessage]] = [:]
+
+    func spawn(id: String, behaviour: ActorBehaviour) {
+      actors[id] = behaviour
+      mailboxes[id] = []
+      behaviour.preStart()
+    }
+
+    func send(to id: String, message: ActorMessage) {
+      mailboxes[id, default: []].append(message)
+    }
+
+    func process(actorId: String) {
+      guard let actor = actors[actorId] else { return }
+      let messages = mailboxes[actorId] ?? []
+      mailboxes[actorId] = []
+      for message in messages {
+        let replies = actor.receive(message)
+        for reply in replies { self.processReply(reply) }
+      }
+    }
+
+    func stop(actorId: String) {
+      actors[actorId]?.postStop()
+      actors.removeValue(forKey: actorId)
+      mailboxes.removeValue(forKey: actorId)
+    }
+
+    private func processReply(_ message: ActorMessage) {}
+  }
+
+  struct StateActor: ActorBehaviour {
+    let id: String
+    private var state: [String: String] = [:]
+
+    func receive(_ message: ActorMessage) -> [ActorMessage] {
+      switch message {
+      case .ping(let replyTo): return [.pong(from: id)]
+      case .updateState(let key, let value): return []
+      case .shutdown: return []
+      default: return []
+      }
+    }
+
+    func preStart() {}
+
+    func postStop() {}
+  }
+  '''
+end
diff --git a/test/support/fixtures/swift/combine_stream.ex b/test/support/fixtures/swift/combine_stream.ex
new file mode 100644
index 00000000..1faf9e4e
--- /dev/null
+++ b/test/support/fixtures/swift/combine_stream.ex
@@ -0,0 +1,66 @@
+defmodule Test.Fixtures.Swift.CombineStream do
+  @moduledoc false
+  use Test.LanguageFixture, language: "swift combine_stream"
+
+  @code ~S'''
+  protocol Publisher {
+    associatedtype Output
+    associatedtype Failure: Error
+    func subscribe(_ subscriber: AnySubscriber<Output, Failure>)
+  }
+
+  protocol Subscriber {
+    associatedtype Input
+    associatedtype Failure: Error
+    func receive(_ input: Input)
+    func receiveCompletion(_ completion: Completion<Failure>)
+  }
+
+  enum Completion<Failure: Error> {
+    case finished
+    case failure(Failure)
+  }
+
+  struct AnySubscriber<Input, Failure: Error> {
+    private let receiveValue: (Input) -> Void
+    private let receiveCompletion: (Completion<Failure>) -> Void
+
+    init(receiveValue: @escaping (Input) -> Void, receiveCompletion: @escaping (Completion<Failure>) -> Void) {
+      self.receiveValue = receiveValue
+      self.receiveCompletion = receiveCompletion
+    }
+
+    func receive(_ input: Input) { receiveValue(input) }
+
+    func receiveCompletion(_ completion: Completion<Failure>) { self.receiveCompletion(completion) }
+  }
+
+  struct Just<Output>: Publisher {
+    typealias Failure = Never
+    let value: Output
+
+    func subscribe(_ subscriber: AnySubscriber<Output, Never>) {
+      subscriber.receive(value)
+      subscriber.receiveCompletion(.finished)
+    }
+  }
+
+  struct MapPublisher<Upstream: Publisher, Output>: Publisher {
+    typealias Failure = Upstream.Failure
+    let upstream: Upstream
+    let transform: (Upstream.Output) -> Output
+
+    func subscribe(_ subscriber: AnySubscriber<Output, Failure>) {
+      let mapped = AnySubscriber<Upstream.Output, Failure>(
+        receiveValue: { self.upstream.subscribe(AnySubscriber(receiveValue: { _ in }, receiveCompletion: { _ in })); subscriber.receive(self.transform($0)) },
+        receiveCompletion: subscriber.receiveCompletion
+      )
+      upstream.subscribe(mapped)
+    }
+  }
+
+  func sink<T>(receiveValue: @escaping (T) -> Void) -> AnySubscriber<T, Never> {
+    return AnySubscriber(receiveValue: receiveValue, receiveCompletion: { _ in })
+  }
+  '''
+end
diff --git a/test/support/fixtures/swift/result_type.ex b/test/support/fixtures/swift/result_type.ex
new file mode 100644
index 00000000..5ce1179a
--- /dev/null
+++ b/test/support/fixtures/swift/result_type.ex
@@ -0,0 +1,63 @@
+defmodule Test.Fixtures.Swift.ResultType do
+  @moduledoc false
+  use Test.LanguageFixture, language: "swift result_type"
+
+  @code ~S'''
+  enum ValidationError: Error {
+    case empty(field: String)
+    case tooShort(field: String, minimum: Int)
+    case tooLong(field: String, maximum: Int)
+    case invalidFormat(field: String, pattern: String)
+  }
+
+  enum ParseError: Error {
+    case invalidJSON
+    case missingField(String)
+    case typeMismatch(field: String, expected: String)
+  }
+
+  struct Email {
+    let value: String
+
+    static func parse(_ raw: String) -> Result<Email, ValidationError> {
+      guard !raw.isEmpty else { return .failure(.empty(field: "email")) }
+      guard raw.contains("@") else { return .failure(.invalidFormat(field: "email", pattern: "must contain @")) }
+      return .success(Email(value: raw.lowercased()))
+    }
+  }
+
+  struct Username {
+    let value: String
+
+    static func parse(_ raw: String) -> Result<Username, ValidationError> {
+      guard !raw.isEmpty else { return .failure(.empty(field: "username")) }
+      guard raw.count >= 3 else { return .failure(.tooShort(field: "username", minimum: 3)) }
+      guard raw.count <= 32 else { return .failure(.tooLong(field: "username", maximum: 32)) }
+      return .success(Username(value: raw))
+    }
+  }
+
+  struct UserRegistration {
+    let email: Email
+    let username: Username
+
+    static func validate(email rawEmail: String, username rawUsername: String) -> Result<UserRegistration, ValidationError> {
+      switch Email.parse(rawEmail) {
+      case .failure(let e): return .failure(e)
+      case .success(let email):
+        switch Username.parse(rawUsername) {
+        case .failure(let e): return .failure(e)
+        case .success(let username): return .success(UserRegistration(email: email, username: username))
+        }
+      }
+    }
+  }
+
+  func mapResult<T, U, E: Error>(_ result: Result<T, E>, _ transform: (T) -> U) -> Result<U, E> {
+    switch result {
+    case .success(let value): return .success(transform(value))
+    case .failure(let error): return .failure(error)
+    }
+  }
+  '''
+end
diff --git a/test/support/fixtures/typescript/dependency_injection.ex b/test/support/fixtures/typescript/dependency_injection.ex
new file mode 100644
index 00000000..38bb9b8a
--- /dev/null
+++ b/test/support/fixtures/typescript/dependency_injection.ex
@@ -0,0 +1,66 @@
+defmodule Test.Fixtures.TypeScript.DependencyInjection do
+  @moduledoc false
+  use Test.LanguageFixture, language: "typescript dependency_injection"
+
+  @code ~S'''
+  interface Token<T> {
+    readonly name: string;
+  }
+
+  interface Provider<T> {
+    token: Token<T>;
+    factory: (container: Container) => T;
+    singleton: boolean;
+  }
+
+  interface Container {
+    register<T>(provider: Provider<T>): void;
+    resolve<T>(token: Token<T>): T;
+    has<T>(token: Token<T>): boolean;
+  }
+
+  class DIContainer implements Container {
+    private providers: Map<string, Provider<unknown>>;
+    private singletons: Map<string, unknown>;
+
+    constructor() {
+      this.providers = new Map();
+      this.singletons = new Map();
+    }
+
+    register<T>(provider: Provider<T>): void {
+      this.providers.set(provider.token.name, provider as Provider<unknown>);
+    }
+
+    resolve<T>(token: Token<T>): T {
+      const provider = this.providers.get(token.name);
+      if (!provider) {
+        throw new Error("No provider registered for token: " + token.name);
+      }
+      if (provider.singleton) {
+        if (!this.singletons.has(token.name)) {
+          this.singletons.set(token.name, provider.factory(this));
+        }
+        return this.singletons.get(token.name) as T;
+      }
+      return provider.factory(this) as T;
+    }
+
+    has<T>(token: Token<T>): boolean {
+      return this.providers.has(token.name);
+    }
+  }
+
+  function createToken<T>(name: string): Token<T> {
+    return { name };
+  }
+
+  function singleton<T>(token: Token<T>, factory: (c: Container) => T): Provider<T> {
+    return { token, factory, singleton: true };
+  }
+
+  function transient<T>(token: Token<T>, factory: (c: Container) => T): Provider<T> {
+    return { token, factory, singleton: false };
+  }
+  '''
+end
diff --git a/test/support/fixtures/typescript/event_emitter.ex b/test/support/fixtures/typescript/event_emitter.ex
new file mode 100644
index 00000000..8f1fed31
--- /dev/null
+++ b/test/support/fixtures/typescript/event_emitter.ex
@@ -0,0 +1,68 @@
+defmodule Test.Fixtures.TypeScript.EventEmitter do
+  @moduledoc false
+  use Test.LanguageFixture, language: "typescript event_emitter"
+
+  @code ~S'''
+  interface EventMap {
+    [event: string]: unknown;
+  }
+
+  interface Listener<T> {
+    callback: (data: T) => void;
+    once: boolean;
+  }
+
+  class EventEmitter<T extends EventMap> {
+    private listeners: Map<keyof T, Array<Listener<unknown>>>;
+
+    constructor() {
+      this.listeners = new Map();
+    }
+
+    on<K extends keyof T>(event: K, callback: (data: T[K]) => void): this {
+      if (!this.listeners.has(event)) {
+        this.listeners.set(event, []);
+      }
+      this.listeners.get(event)!.push({ callback: callback as (data: unknown) => void, once: false });
+      return this;
+    }
+
+    once<K extends keyof T>(event: K, callback: (data: T[K]) => void): this {
+      if (!this.listeners.has(event)) {
+        this.listeners.set(event, []);
+      }
+      this.listeners.get(event)!.push({ callback: callback as (data: unknown) => void, once: true });
+      return this;
+    }
+
+    off<K extends keyof T>(event: K, callback: (data: T[K]) => void): this {
+      const list = this.listeners.get(event);
+      if (list) {
+        this.listeners.set(event, list.filter(function(l) { return l.callback !== callback; }));
+      }
+      return this;
+    }
+
+    emit<K extends keyof T>(event: K, data: T[K]): boolean {
+      const list = this.listeners.get(event);
+      if (!list || list.length === 0) return false;
+      list.forEach(function(listener) { listener.callback(data); });
+      this.listeners.set(event, list.filter(function(l) { return !l.once; }));
+      return true;
+    }
+
+    removeAllListeners(event?: keyof T): this {
+      if (event) {
+        this.listeners.delete(event);
+      } else {
+        this.listeners.clear();
+      }
+      return this;
+    }
+  }
+
+  function createEmitter<T extends EventMap>(): EventEmitter<T> {
+    return new EventEmitter<T>();
+  }
+  '''
+end
diff --git a/test/support/fixtures/typescript/user_profile_store.ex b/test/support/fixtures/typescript/user_profile_store.ex
new file mode 100644
index 00000000..2242e438
--- /dev/null
+++ b/test/support/fixtures/typescript/user_profile_store.ex
@@ -0,0 +1,72 @@
+defmodule Test.Fixtures.TypeScript.UserProfileStore do
+  @moduledoc false
+  use Test.LanguageFixture, language: "typescript user_profile_store"
+
+  @code ~S'''
+  interface UserProfile {
+    id: string;
+    name: string;
+    email: string;
+    role: "admin" | "member" | "guest";
+  }
+
+  interface StoreState {
+    users: Record<string, UserProfile>;
+    loading: boolean;
+    error: string | null;
+  }
+
+  interface Action {
+    type: string;
+    payload?: unknown;
+  }
+
+  class UserProfileStore {
+    private state: StoreState;
+    private subscribers: Array<(state: StoreState) => void>;
+
+    constructor() {
+      this.state = { users: {}, loading: false, error: null };
+      this.subscribers = [];
+    }
+
+    getState(): StoreState {
+      return this.state;
+    }
+
+    dispatch(action: Action): void {
+      this.state = this.reduce(this.state, action);
+      this.notify();
+    }
+
+    subscribe(listener: (state: StoreState) => void): () => void {
+      this.subscribers.push(listener);
+      return () => {
+        this.subscribers = this.subscribers.filter(function(s) { return s !== listener; });
+      };
+    }
+
+    private reduce(state: StoreState, action: Action): StoreState {
+      switch (action.type) {
+        case "SET_LOADING":
+          return { ...state, loading: action.payload as boolean };
+        case "SET_ERROR":
+          return { ...state, error: action.payload as string };
+        case "UPSERT_USER":
+          const user = action.payload as UserProfile;
+          return { ...state, users: { ...state.users, [user.id]: user } };
+        default:
+          return state;
+      }
+    }
+
+    private notify(): void {
+      this.subscribers.forEach(function(listener) { listener(this.state); }.bind(this));
+    }
+  }
+
+  function createUserProfileStore(): UserProfileStore {
+    return new UserProfileStore();
+  }
+  '''
+end
diff --git a/test/support/fixtures/zig/allocator_interface.ex b/test/support/fixtures/zig/allocator_interface.ex
new file mode 100644
index 00000000..f11d6800
--- /dev/null
+++ b/test/support/fixtures/zig/allocator_interface.ex
@@ -0,0 +1,72 @@
+defmodule Test.Fixtures.Zig.AllocatorInterface do
+  @moduledoc false
+  use Test.LanguageFixture, language: "zig allocator_interface"
+
+  @code ~S'''
+  const Allocator = struct {
+  ptr: *anyopaque,
+  vtable: *const VTable,
+
+  pub const VTable = struct {
+    alloc: *const fn (ctx: *anyopaque, len: usize, alignment: u8) ?[*]u8,
+    free: *const fn (ctx: *anyopaque, buf: [*]u8, len: usize) void,
+    resize: *const fn (ctx: *anyopaque, buf: [*]u8, old_len: usize, new_len: usize) bool,
+  };
+
+  pub fn alloc(self: Allocator, comptime T: type, n: usize) ![]T {
+    const ptr = self.vtable.alloc(self.ptr, @sizeOf(T) * n, @alignOf(T)) orelse return error.OutOfMemory;
+    return @as([*]T, @ptrCast(@alignCast(ptr)))[0..n];
+  }
+
+  pub fn free(self: Allocator, slice: anytype) void {
+    const T = @TypeOf(slice[0]);
+    self.vtable.free(self.ptr, @as([*]u8, @ptrCast(slice.ptr)), slice.len * @sizeOf(T));
+  }
+  };
+
+  const ArenaAllocator = struct {
+  backing: Allocator,
+  buffer: []u8,
+  pos: usize,
+
+  pub fn init(backing: Allocator, size: usize) !ArenaAllocator {
+    const buf = try backing.alloc(u8, size);
+    return ArenaAllocator{ .backing = backing, .buffer = buf, .pos = 0 };
+  }
+
+  pub fn deinit(self: *ArenaAllocator) void {
+    self.backing.free(self.buffer);
+  }
+
+  pub fn alloc(self: *ArenaAllocator, comptime T: type, n: usize) ![]T {
+    const size = @sizeOf(T) * n;
+    if (self.pos + size > self.buffer.len) return error.OutOfMemory;
+    const slice = self.buffer[self.pos .. self.pos + size];
+    self.pos += size;
+    return @as([*]T, @ptrCast(@alignCast(slice.ptr)))[0..n];
+  }
+
+  pub fn reset(self: *ArenaAllocator) void {
+    self.pos = 0;
+  }
+  };
+
+  const AllocError = error{
+  OutOfMemory,
+  AlignmentError,
+  InvalidSize,
+  };
+
+  fn alignForward(addr: usize, alignment: usize) usize {
+  return (addr + alignment - 1) & ~(alignment - 1);
+  }
+
+  fn isPowerOfTwo(n: usize) bool {
+  return n > 0 and (n & (n - 1)) == 0;
+  }
+
+  fn sizeOf(comptime T: type) comptime_int {
+  return @sizeOf(T);
+  }
+  '''
+end
diff --git a/test/support/fixtures/zig/iterator_protocol.ex b/test/support/fixtures/zig/iterator_protocol.ex
new file mode 100644
index 00000000..52848ef3
--- /dev/null
+++ b/test/support/fixtures/zig/iterator_protocol.ex
@@ -0,0 +1,87 @@
+defmodule Test.Fixtures.Zig.IteratorProtocol do
+  @moduledoc false
+  use Test.LanguageFixture, language: "zig iterator_protocol"
+
+  @code ~S'''
+  fn Iterator(comptime T: type) type {
+  return struct {
+    const Self = @This();
+    pub const Item = T;
+    ptr: *anyopaque,
+    nextFn: *const fn (ptr: *anyopaque) ?T,
+
+    pub fn next(self: *Self) ?T {
+      return self.nextFn(self.ptr);
+    }
+
+    pub fn count(self: *Self) usize {
+      var n: usize = 0;
+      while (self.next() != null) n += 1;
+      return n;
+    }
+
+    pub fn forEach(self: *Self, callback: fn (T) void) void {
+      while (self.next()) |item| callback(item);
+    }
+  };
+  }
+
+  fn RangeIterator(comptime T: type) type {
+  return struct {
+    current: T,
+    end: T,
+    step: T,
+
+    pub fn init(start: T, end: T, step: T) @This() {
+      return .{ .current = start, .end = end, .step = step };
+    }
+
+    pub fn next(self: *@This()) ?T {
+      if (self.current >= self.end) return null;
+      const value = self.current;
+      self.current += self.step;
+      return value;
+    }
+  };
+  }
+
+  fn SliceIterator(comptime T: type) type {
+  return struct {
+    slice: []const T,
+    index: usize,
+
+    pub fn init(slice: []const T) @This() {
+      return .{ .slice = slice, .index = 0 };
+    }
+
+    pub fn next(self: *@This()) ?T {
+      if (self.index >= self.slice.len) return null;
+      const item = self.slice[self.index];
+      self.index += 1;
+      return item;
+    }
+
+    pub fn reset(self: *@This()) void {
+      self.index = 0;
+    }
+  };
+  }
+
+  fn MapIterator(comptime In: type, comptime Out: type) type {
+  return struct {
+    inner: SliceIterator(In),
+    transform: *const fn (In) Out,
+
+    pub fn next(self: *@This()) ?Out {
+      const item = self.inner.next() orelse return null;
+      return self.transform(item);
+    }
+  };
+  }
+
+  fn take(comptime T: type, iter: *SliceIterator(T), n: usize) []const T {
+  _ = n;
+  return iter.slice;
+  }
+  '''
+end
diff --git a/test/support/fixtures/zig/tagged_union.ex b/test/support/fixtures/zig/tagged_union.ex
new file mode 100644
index 00000000..fed8b31b
--- /dev/null
+++ b/test/support/fixtures/zig/tagged_union.ex
@@ -0,0 +1,90 @@
+defmodule Test.Fixtures.Zig.TaggedUnion do
+  @moduledoc false
+  use Test.LanguageFixture, language: "zig tagged_union"
+
+  @code ~S'''
+  const TokenKind = enum {
+  identifier,
+  integer,
+  float,
+  string_literal,
+  operator,
+  keyword,
+  comment,
+  eof,
+  };
+
+  const Token = struct {
+  kind: TokenKind,
+  start: usize,
+  end: usize,
+  line: u32,
+  column: u32,
+
+  pub fn length(self: Token) usize {
+    return self.end - self.start;
+  }
+
+  pub fn isLiteral(self: Token) bool {
+    return self.kind == .integer or self.kind == .float or self.kind == .string_literal;
+  }
+  };
+
+  const Value = union(enum) {
+  int: i64,
+  float: f64,
+  boolean: bool,
+  string: []const u8,
+  null_value: void,
+
+  pub fn typeName(self: Value) []const u8 {
+    return switch (self) {
+      .int => "int",
+      .float => "float",
+      .boolean => "boolean",
+      .string => "string",
+      .null_value => "null",
+    };
+  }
+
+  pub fn isTruthy(self: Value) bool {
+    return switch (self) {
+      .int => |v| v != 0,
+      .float => |v| v != 0.0,
+      .boolean => |v| v,
+      .string => |v| v.len > 0,
+      .null_value => false,
+    };
+  }
+  };
+
+  const ParseError = error{
+  UnexpectedToken,
+  UnexpectedEof,
+  InvalidLiteral,
+  StackOverflow,
+  };
+
+  fn parseInteger(source: []const u8) !i64 {
+  var result: i64 = 0;
+  for (source) |ch| {
+    if (ch < '0' or ch > '9') return ParseError.InvalidLiteral;
+    result = result * 10 + @as(i64, ch - '0');
+  }
+  return result;
+  }
+
+  fn parseFloat(source: []const u8) !f64 {
+  var result: f64 = 0;
+  var decimal = false;
+  var scale: f64 = 1;
+  for (source) |ch| {
+    if (ch == '.') { decimal = true; continue; }
+    if (ch < '0' or ch > '9') return ParseError.InvalidLiteral;
+    if (decimal) { scale /= 10; result += @as(f64, ch - '0') * scale; }
+    else { result = result * 10 + @as(f64, ch - '0'); }
+  }
+  return result;
+  }
+  '''
+end
diff --git a/test/support/language_fixture.ex b/test/support/language_fixture.ex
new file mode 100644
index 00000000..b83b44a2
--- /dev/null
+++ b/test/support/language_fixture.ex
@@ -0,0 +1,61 @@
+defmodule Test.LanguageFixture do
+  @moduledoc """
+  Macro for defining per-language, per-domain code fixtures.
+
+  ## In a fixture module
+
+      defmodule Test.Fixtures.Elixir.EventBus do
+        use Test.LanguageFixture, language: "elixir event bus"
+
+        @code ~S'''
+        defmodule EventBus do
+          ...
+        end
+        '''
+      end
+
+  ## In a test module
+
+      defmodule MyTest do
+        Module.register_attribute(__MODULE__, :fixture, accumulate: true, persist: false)
+        use Test.Fixtures.Elixir.EventBus
+        use Test.Fixtures.Python.CsvPipeline
+      end
+  """
+
+  defmacro __using__(opts) do
+    language = Keyword.fetch!(opts, :language)
+
+    quote do
+      @language unquote(language)
+      @before_compile Test.LanguageFixture
+    end
+  end
+
+  defmacro __before_compile__(env) do
+    mod = env.module
+    code = Module.get_attribute(mod, :code)
+    language = Module.get_attribute(mod, :language)
+    block_assertions = Module.get_attribute(mod, :block_assertions) || []
+
+    unless code do
+      raise CompileError,
+        file: env.file,
+        line: env.line,
+        description: "#{mod} uses Test.LanguageFixture but @code is not set"
+    end
+
+    quote do
+      defmacro __using__(_opts) do
+        fixture_language = unquote(language)
+        fixture_code = unquote(code)
+        fixture_block_assertions = unquote(Macro.escape(block_assertions))
+
+        quote do
+          @fixture {unquote(fixture_language), unquote(fixture_code),
+                    unquote(Macro.escape(fixture_block_assertions))}
+        end
+      end
+    end
+  end
+end
diff --git a/test/support/node_matcher.ex b/test/support/node_matcher.ex
new file mode 100644
index 00000000..679dc921
--- /dev/null
+++ b/test/support/node_matcher.ex
@@ -0,0 +1,17 @@
+defmodule Test.NodeMatcher do
+  @moduledoc """
+  Helpers for asserting on tokens within `CompoundNode` structures.
+
+  Returns tagged tuples that can be matched against token fields:
+
+  - `exact(:content, "add")` — token whose `content` equals `"add"` exactly
+  - `partial(:content, "@doc")` — token whose `content` contains `"@doc"` as a substring
+  - `:value` targets the normalized token value instead of raw source content
+  """
+
+  @spec exact(:content | :value, String.t()) :: {:exact, :content | :value, String.t()}
+  def exact(field, value) when field in [:content, :value], do: {:exact, field, value}
+
+  @spec partial(:content | :value, String.t()) :: {:partial, :content | :value, String.t()}
+  def partial(field, value) when field in [:content, :value], do: {:partial, field, value}
+end