diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
index a7580ee..2d8150c 100644
--- a/.github/workflows/claude.yml
+++ b/.github/workflows/claude.yml
@@ -11,33 +11,70 @@ on:
     types: [submitted]
 
 jobs:
-  claude:
-    timeout-minutes: 10
-    concurrency:
-      group: claude-${{ github.event_name }}-${{ github.event.issue.number || github.event.pull_request.number || github.run_id }}
-      cancel-in-progress: true
+  # Check if the user is a member of liatrio-labs organization
+  check-org-membership:
+    runs-on: ubuntu-latest
     if: |
       (
         github.event_name == 'issue_comment' &&
-        contains(github.event.comment.body, '@claude') &&
-        contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)
+        contains(github.event.comment.body, '@claude')
       ) || (
         github.event_name == 'pull_request_review_comment' &&
-        contains(github.event.comment.body, '@claude') &&
-        contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)
+        contains(github.event.comment.body, '@claude')
       ) || (
         github.event_name == 'pull_request_review' &&
         github.event.review.body != null &&
-        contains(github.event.review.body, '@claude') &&
-        contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.review.author_association)
+        contains(github.event.review.body, '@claude')
       ) || (
         github.event_name == 'issues' &&
         (
           (github.event.issue.body != null && contains(github.event.issue.body, '@claude')) ||
           contains(github.event.issue.title, '@claude')
-        ) &&
-        contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.issue.author_association)
+        )
       )
+    outputs:
+      is-authorized: ${{ steps.check.outputs.authorized }}
+    steps:
+      - name: Check authorization
+        id: check
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          ACTOR="${{ github.actor }}"
+
+          # Check if user is a repo collaborator/owner/member first
+          if [[ "${{ github.event_name }}" == "issue_comment" ]]; then
+            AUTHOR_ASSOC="${{ github.event.comment.author_association }}"
+          elif [[ "${{ github.event_name }}" == "pull_request_review_comment" ]]; then
+            AUTHOR_ASSOC="${{ github.event.comment.author_association }}"
+          elif [[ "${{ github.event_name }}" == "pull_request_review" ]]; then
+            AUTHOR_ASSOC="${{ github.event.review.author_association }}"
+          elif [[ "${{ github.event_name }}" == "issues" ]]; then
+            AUTHOR_ASSOC="${{ github.event.issue.author_association }}"
+          fi
+
+          if [[ "$AUTHOR_ASSOC" == "OWNER" ]] || [[ "$AUTHOR_ASSOC" == "MEMBER" ]] || [[ "$AUTHOR_ASSOC" == "COLLABORATOR" ]]; then
+            echo "User is authorized via author_association: $AUTHOR_ASSOC"
+            echo "authorized=true" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          # Check if user is a member of liatrio-labs organization
+          if gh api "orgs/liatrio-labs/members/$ACTOR" --silent 2>/dev/null; then
+            echo "User is authorized as liatrio-labs organization member"
+            echo "authorized=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "User is not authorized"
+            echo "authorized=false" >> "$GITHUB_OUTPUT"
+          fi
+
+  claude:
+    needs: check-org-membership
+    if: needs.check-org-membership.outputs.is-authorized == 'true'
+    timeout-minutes: 10
+    concurrency:
+      group: claude-${{ github.event_name }}-${{ github.event.issue.number || github.event.pull_request.number || github.run_id }}
+      cancel-in-progress: true
     runs-on: ubuntu-latest
     permissions:
       contents: read
diff --git a/.github/workflows/opencode-gpt-5-codex.yml b/.github/workflows/opencode-gpt-5-codex.yml
index 7066901..28f6040 100644
--- a/.github/workflows/opencode-gpt-5-codex.yml
+++ b/.github/workflows/opencode-gpt-5-codex.yml
@@ -11,33 +11,70 @@ on:
     types: [submitted]
 
 jobs:
-  opencode:
-    timeout-minutes: 30 # to accommodate Codex's ability to run for extended periods
-    concurrency:
-      group: opencode-${{ github.event_name }}-${{ github.event.issue.number || github.event.pull_request.number || github.run_id }}
-      cancel-in-progress: true
+  # Check if the user is a member of liatrio-labs organization
+  check-org-membership:
+    runs-on: ubuntu-latest
     if: |
       (
         github.event_name == 'issue_comment' &&
-        contains(github.event.comment.body, '/oc-codex') &&
-        contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)
+        contains(github.event.comment.body, '/oc-codex')
       ) || (
         github.event_name == 'pull_request_review_comment' &&
-        contains(github.event.comment.body, '/oc-codex') &&
-        contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)
+        contains(github.event.comment.body, '/oc-codex')
       ) || (
         github.event_name == 'pull_request_review' &&
         github.event.review.body != null &&
-        contains(github.event.review.body, '/oc-codex') &&
-        contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.review.author_association)
+        contains(github.event.review.body, '/oc-codex')
       ) || (
         github.event_name == 'issues' &&
         (
           (github.event.issue.body != null && contains(github.event.issue.body, '/oc-codex')) ||
           contains(github.event.issue.title, '/oc-codex')
-        ) &&
-        contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.issue.author_association)
+        )
       )
+    outputs:
+      is-authorized: ${{ steps.check.outputs.authorized }}
+    steps:
+      - name: Check authorization
+        id: check
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          ACTOR="${{ github.actor }}"
+
+          # Check if user is a repo collaborator/owner/member first
+          if [[ "${{ github.event_name }}" == "issue_comment" ]]; then
+            AUTHOR_ASSOC="${{ github.event.comment.author_association }}"
+          elif [[ "${{ github.event_name }}" == "pull_request_review_comment" ]]; then
+            AUTHOR_ASSOC="${{ github.event.comment.author_association }}"
+          elif [[ "${{ github.event_name }}" == "pull_request_review" ]]; then
+            AUTHOR_ASSOC="${{ github.event.review.author_association }}"
+          elif [[ "${{ github.event_name }}" == "issues" ]]; then
+            AUTHOR_ASSOC="${{ github.event.issue.author_association }}"
+          fi
+
+          if [[ "$AUTHOR_ASSOC" == "OWNER" ]] || [[ "$AUTHOR_ASSOC" == "MEMBER" ]] || [[ "$AUTHOR_ASSOC" == "COLLABORATOR" ]]; then
+            echo "User is authorized via author_association: $AUTHOR_ASSOC"
+            echo "authorized=true" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          # Check if user is a member of liatrio-labs organization
+          if gh api "orgs/liatrio-labs/members/$ACTOR" --silent 2>/dev/null; then
+            echo "User is authorized as liatrio-labs organization member"
+            echo "authorized=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "User is not authorized"
+            echo "authorized=false" >> "$GITHUB_OUTPUT"
+          fi
+
+  opencode:
+    needs: check-org-membership
+    if: needs.check-org-membership.outputs.is-authorized == 'true'
+    timeout-minutes: 30 # to accommodate Codex's ability to run for extended periods
+    concurrency:
+      group: opencode-${{ github.event_name }}-${{ github.event.issue.number || github.event.pull_request.number || github.run_id }}
+      cancel-in-progress: true
     runs-on: ubuntu-latest
     permissions:
       contents: read
diff --git a/.markdownlintrc b/.markdownlintrc
new file mode 100644
index 0000000..4ba89ce
--- /dev/null
+++ b/.markdownlintrc
@@ -0,0 +1,4 @@
+{
+  "MD036": false,
+  "MD040": false
+}
diff --git a/docs/001-SYSTEM.md b/docs/001-SYSTEM.md
new file mode 100644
index 0000000..f4928cb
--- /dev/null
+++ b/docs/001-SYSTEM.md
@@ -0,0 +1,1705 @@
+# Codebase Context: mcp-spec-driven
+
+**Date:** 2025-01-23
+**Scope:** Full codebase analysis
+**Purpose:** Understanding the system architecture and implementation
+**Analysis Version:** 1.0
+
+---
+
+## 1. Repository Overview
+
+### 1.1 Structure
+
+- **Type:** Single Python application (FastMCP-based MCP server)
+- **Components:**
+  - `mcp_server/` - Core server implementation (4 modules)
+  - `prompts/` - Markdown workflow prompts (4 files)
+  - `tests/` - Unit tests (pytest)
+  - `docs/` - Documentation
+  - `tasks/` - Spec and task list outputs
+- **Organization:** Flat module structure with clear separation of concerns
+- **Size:** ~312 lines of Python code, 7 source files
+
+### 1.2 Technology Stack
+
+- **Languages:** Python 3.12+ (pyproject.toml:7)
+- **Frameworks:**
+  - FastMCP 2.12.4+ (pyproject.toml:9, mcp_server/**init**.py:7)
+  - Starlette (implicit via FastMCP - mcp_server/**init**.py:8-9)
+- **Databases:** None (file-based prompts)
+- **Infrastructure:**
+  - Transport: STDIO (default) or HTTP (configurable)
+  - Package manager: uv (CONTRIBUTING.md:18)
+  - Build: hatchling (pyproject.toml:26-28)
+- **Testing:** pytest 8.4.2+, pytest-cov 7.0.0+, anyio
+- **Code Quality:** Ruff 0.14.0+ (linting + formatting), pre-commit 4.3.0+
+- **CI/CD:** python-semantic-release 10.4.1+ (automated versioning)
+
+### 1.3 High-Level Architecture
+
+```mermaid
+graph TB
+    subgraph "Entry Points"
+        CLI[CLI: uvx spec-driven-development-mcp]
+        STDIO[STDIO Transport]
+        HTTP[HTTP Transport :8000]
+    end
+
+    subgraph "Application Layer"
+        SERVER[FastMCP Server]
+        FACTORY[create_app Factory]
+        CONFIG[Config Singleton]
+    end
+
+    subgraph "Prompt System"
+        LOADER[Prompt Loader]
+        PARSER[Markdown Parser]
+        PROMPTS[4 Workflow Prompts]
+    end
+
+    subgraph "Content Layer"
+        MD1[generate-spec.md]
+        MD2[generate-task-list.md]
+        MD3[manage-tasks.md]
+        MD4[generate-context.md]
+    end
+
+    CLI --> FACTORY
+    STDIO --> FACTORY
+    HTTP --> FACTORY
+    FACTORY --> SERVER
+    FACTORY --> CONFIG
+    FACTORY --> LOADER
+    LOADER --> PARSER
+    PARSER --> MD1
+    PARSER --> MD2
+    PARSER --> MD3
+    PARSER --> MD4
+```
+
+### 1.4 Version Control & Evolution
+
+**Repository Health Indicators:**
+
+- **Current Version:** v1.5.0 (pyproject.toml:3)
+- **Recent Activity:** 5 releases (v1.0.0 to v1.5.0) from Oct-Dec 2024
+- **Commit Conventions:** Conventional Commits (feat:, fix:, docs:, chore:)
+- **Branching:** feature/*, fix/*, docs/*, chore/* branches
+- **Automated Releases:** python-semantic-release via GitHub Actions
+
+**Code Maturity Signals:**
+
+- **Stable Foundation:** Core server architecture unchanged since v1.1.0
+- **Active Development:** 4 major versions in 3 months (rapid iteration)
+- **Documentation Focus:** Multiple doc improvements in recent releases
+- **Quality Focus:** Pre-commit hooks, linting, testing infrastructure established early
+
+**Architectural Evolution:**
+
+- **v1.0.0 (Oct 2024):** Initial release
+- **v1.1.0 (Oct 2024):** FastMCP server foundation, prompt loading
+- **v1.2.0 - v1.4.0:** CI/CD improvements, documentation enhancements
+- **v1.5.0 (Oct 2024 - current):** Health check endpoint, README restructure
+
+**Current Branch:** `add-reverse-engineer-codebase-prompt` (feature branch)
+
+- Adding `generate-context` prompt (enhanced codebase analysis)
+- Research-driven improvements documented in docs/roadmap/PROGRESS.md
+
+---
+
+## 2. Documentation Inventory
+
+### 2.1 Found Documentation
+
+**Core Documentation:**
+
+- `README.md` — Project overview, workflow guide, installation (Last updated: 2025-01-23)
+- `CONTRIBUTING.md` — Development setup, commit conventions, PR guidelines
+- `CHANGELOG.md` — Version history with semantic versioning
+- `docs/operations.md` — Server deployment, configuration, client integration
+- `docs/mcp-prompt-support.md` — MCP client compatibility matrix
+- `docs/roadmap/PROGRESS.md` — Implementation progress tracking (681 lines, very detailed)
+
+**Prompt Files (Product Core):**
+
+- `prompts/generate-spec.md` — Specification generation workflow
+- `prompts/generate-task-list-from-spec.md` — Task list generation workflow
+- `prompts/manage-tasks.md` — Task execution workflow
+- `prompts/generate-context.md` — Codebase context analysis (in progress)
+
+**Research Documentation:**
+
+- `docs/research/codebase-context/README.md` — Research synthesis
+- `docs/research/codebase-context/code-analyst.md` — Code analysis patterns
+- `docs/research/codebase-context/information-analyst.md` — Documentation analysis patterns
+- `docs/research/codebase-context/context_bootstrap.md` — Manager orchestration pattern
+- `docs/research/codebase-context/claude-code-feature-dev-comparison.md` — Claude Code plugin analysis (18,287 words)
+- `docs/research/codebase-context/research-synthesis.md` — Actionable recommendations (8,000+ words)
+
+### 2.2 Decision Rationale Found
+
+#### 🟢 High Confidence - Explicitly Documented
+
+**Decision 1: FastMCP Framework Choice**
+
+- **Rationale:** Official Python framework for MCP protocol implementation (README.md:179, pyproject.toml:9)
+- **Evidence:** Listed as primary dependency, core to architecture
+- **Source:** README references FastMCP as "Python tooling for building MCP servers"
+- **Confidence:** 🟢 High (explicit framework choice)
+
+**Decision 2: uv Package Manager**
+
+- **Rationale:** Modern Python package and project manager (CONTRIBUTING.md:18, README.md:148)
+- **Evidence:** All documentation uses `uv sync`, `uv run` commands
+- **Trade-offs:** Faster than pip, better dependency resolution
+- **Confidence:** 🟢 High (consistent tooling choice)
+
+**Decision 3: Ruff for Linting/Formatting**
+
+- **Rationale:** Fast Python linter and formatter (pyproject.toml:40-64, CONTRIBUTING.md:52)
+- **Configuration:** Line length 100, Python 3.12 target, comprehensive rule set
+- **Evidence:** Configured in pyproject.toml with specific rules
+- **Confidence:** 🟢 High (explicit configuration)
+
+**Decision 4: Conventional Commits**
+
+- **Rationale:** Enables automated semantic versioning (CONTRIBUTING.md:84-94)
+- **Evidence:** Used with python-semantic-release for automated releases
+- **Trade-offs:** Stricter commit format vs. automated versioning benefits
+- **Confidence:** 🟢 High (documented in contributing guide)
+
+**Decision 5: Phased Implementation Strategy**
+
+- **Decision:** Split improvements across multiple PRs (docs/roadmap/PROGRESS.md:631-635)
+- **Rationale:** "Keeps PRs focused and reviewable, allows incremental adoption"
+- **Source:** Team decision for maintainability
+- **Evidence:** Phase 1 complete (generate-context prompt), Phase 2 planned
+- **Confidence:** 🟢 High (explicit ADR-style documentation)
+
+**Decision 6: Evidence Citation Standards**
+
+- **Decision:** Require file:line for code, path#heading for docs (docs/roadmap/PROGRESS.md:619-623)
+- **Rationale:** "Provides traceability and accountability for all findings"
+- **Source:** Research synthesis from Claude Code analysis
+- **Confidence:** 🟢 High (documented design principle)
+
+**Decision 7: Confidence Levels**
+
+- **Decision:** Categorize findings as High/Medium/Low (docs/roadmap/PROGRESS.md:625-629)
+- **Rationale:** "Distinguishes facts from inferences, flags items needing validation"
+- **Source:** Research synthesis
+- **Confidence:** 🟢 High (documented pattern)
+
+**Decision 8: Interactive Questioning**
+
+- **Decision:** Replace batch questionnaires with short focused rounds (docs/roadmap/PROGRESS.md:637-641)
+- **Rationale:** "Better user engagement, more thoughtful answers"
+- **Source:** context_bootstrap.md + Claude Code Phase 3 pattern
+- **Confidence:** 🟢 High (research-driven decision)
+
+**Decision 9: Prompt-First Workflow**
+
+- **Decision:** Prompts are Markdown files, not Python code (README.md:20-28)
+- **Rationale:** "Markdown artifacts instead of tooling, travels with you across projects, models, and collaboration environments"
+- **Benefits:** Non-developers can edit prompts, no code deployment to update workflows
+- **Confidence:** 🟢 High (core product philosophy)
+
+#### 🟡 Medium Confidence - Implied or Partial Documentation
+
+**Decision 10: Python 3.12+ Requirement**
+
+- **Documented:** pyproject.toml:7 requires Python 3.12+
+- **Rationale (inferred):** Modern type hints (PEP 695), improved error messages, long-term support (until 2028)
+- **Evidence:** Project started in 2025, using recent stable Python
+- **Confidence:** 🟡 Medium (technical choice, rationale inferred)
+
+**Decision 11: /workspace Default**
+
+- **Documented:** config.py:22 defaults to `/workspace`
+- **Rationale (inferred):** Container-oriented design (common in Docker environments)
+- **Evidence:** Configurable via SDD_WORKSPACE_ROOT
+- **Confidence:** 🟡 Medium (standard container practice)
+
+**Decision 12: Pre-commit Hooks**
+
+- **Documented:** CONTRIBUTING.md:23 mentions pre-commit
+- **Rationale:** Quality enforcement before commits
+- **Gap:** No documentation of specific hooks chosen
+- **Confidence:** 🟡 Medium (tooling choice without detailed rationale)
+
+### 2.3 Gaps & Unknowns
+
+**No conflicts found** - Documentation is consistent with code.
+
+**Gaps identified:**
+
+- ❌ **Gap:** PyYAML dependency not explicit in pyproject.toml (used in prompt_utils.py:8)
+  - **Recommendation:** Add `pyyaml>=6.0.1` to dependencies
+- ❌ **Gap:** FastMCP version pinning strategy (uses `>=2.12.4` open-ended)
+  - **Recommendation:** Pin to minor version: `fastmcp>=2.12.4,<3.0.0`
+- ❌ **Gap:** No test coverage threshold configured
+  - **Recommendation:** Add `--cov-fail-under=80` to pytest config
+- ❌ **Gap:** TODO task numbers (Task 2.1, 5.1, etc.) not linked to source
+  - **Recommendation:** Link TODOs to task files for traceability
+
+---
+
+## 3. System Capabilities (WHAT)
+
+### 3.1 Core Features
+
+**Confidence Legend:** 🟢 High | 🟡 Medium | 🔴 Low
+
+#### 🟢 Feature 1: FastMCP Server with Health Check
+
+- **Entry point:** server.py:11 - Creates `mcp` instance via `create_app()`
+- **Flow:**
+  1. Import create_app → server.py:7
+  2. Call create_app() → mcp_server/**init**.py:17-45
+  3. Initialize FastMCP(name="spec-driven-development-mcp") → mcp_server/**init**.py:24
+  4. Register health check endpoint → mcp_server/**init**.py:26-28
+  5. Load and register prompts → mcp_server/**init**.py:31
+  6. Return configured FastMCP app → mcp_server/**init**.py:45
+- **Business rules:**
+  - GET /health returns PlainTextResponse("OK")
+  - Health check always returns 200 OK (no validation logic)
+- **Evidence:** Working code path with custom route handler (mcp_server/**init**.py:26-28), added in v1.5.0 (CHANGELOG.md:36)
+- **Confidence:** 🟢 High (active production code)
+
+#### 🟢 Feature 2: Dynamic Prompt Loading from Markdown Files
+
+- **Entry point:** register_prompts(mcp, prompts_dir) → mcp_server/prompts_loader.py:23-36
+- **Flow:**
+  1. Validate prompts directory exists → prompts_loader.py:24-25
+  2. Scan for .md files, sorted alphabetically → prompts_loader.py:28-31
+  3. For each prompt file:
+     - Load markdown with YAML frontmatter → prompt_utils.py:42-81
+     - Parse frontmatter (name, description, tags, meta, arguments) → prompt_utils.py:84-98
+     - Create MarkdownPrompt DTO → prompt_utils.py:72-81
+     - Register as FastMCP prompt via decorator → prompts_loader.py:14-20
+  4. Prompts become available via MCP protocol
+- **Business rules:**
+  - Only .md files in prompts/ directory are loaded
+  - Files sorted alphabetically by name
+  - YAML frontmatter is optional (defaults to filename as name)
+  - Invalid YAML is silently ignored (returns empty frontmatter, prompts still load)
+  - Frontmatter fields: name, description, tags, arguments, meta, enabled
+- **Evidence:** Working code with comprehensive tests (tests/test_prompts.py:55-116)
+- **Confidence:** 🟢 High (100% test coverage, core feature)
+
+#### 🟢 Feature 3: Four Workflow Prompts for Spec-Driven Development
+
+**Prompt 1: generate-spec**
+
+- **File:** prompts/generate-spec.md:1-50
+- **Purpose:** Creates detailed feature specifications
+- **Process:**
+  1. Optional: Read codebase context if available
+  2. Mandatory: Ask clarifying questions (3-5 per round)
+  3. Draft specification with WHAT and WHY
+  4. Save to `/tasks/000X-spec-[feature].md`
+- **Business rules:**
+  - Must ask clarifying questions before writing spec
+  - Short focused questions, not batch questionnaires
+  - Reference existing context when available
+  - Flag unknowns rather than guessing
+- **Confidence:** 🟢 High (active prompt, tested)
+
+**Prompt 2: generate-task-list-from-spec**
+
+- **File:** prompts/generate-task-list-from-spec.md:1-50
+- **Purpose:** Converts specs into actionable task lists
+- **Process:**
+  1. Read spec file
+  2. Analyze current codebase state
+  3. Phase 1: Generate parent tasks (demoable units)
+  4. Wait for user confirmation
+  5. Phase 2: Generate sub-tasks with proof artifacts
+  6. Save to `/tasks/tasks-000X-spec-[feature].md`
+- **Business rules:**
+  - Two-phase generation (parent tasks → sub-tasks)
+  - Every parent task requires Demo Criteria and Proof Artifacts
+  - Must identify relevant files
+- **Confidence:** 🟢 High (active prompt, tested)
+
+**Prompt 3: manage-tasks**
+
+- **File:** prompts/manage-tasks.md:1-50
+- **Purpose:** Task execution workflow management
+- **Process:**
+  - Mark tasks as [ ] (not started), [~] (in progress), [x] (completed)
+  - Enforce one sub-task at a time
+  - Run tests before marking parent complete
+  - Commit with conventional format
+- **Business rules:**
+  - One sub-task at a time (no parallel work)
+  - Mark in-progress immediately when starting
+  - Parent task completion requires all subtasks complete
+  - Conventional commit format required
+  - Clean up temp files before commit
+- **Confidence:** 🟢 High (active prompt, tested)
+
+**Prompt 4: generate-context**
+
+- **File:** prompts/generate-context.md (in current branch)
+- **Purpose:** Generates codebase context analysis
+- **Process:**
+  - 6-phase analysis: Structure → Docs → Code → Integration → Gaps → User collaboration
+  - Evidence-based with file:line citations
+  - Confidence levels (High/Medium/Low) for all findings
+  - Interactive questioning in short rounds
+  - Outputs to `/docs/00[n]-SYSTEM.md`
+- **Business rules:**
+  - Every finding requires evidence citation
+  - Separate WHAT/HOW (code) from WHY (docs)
+  - Flag gaps explicitly
+  - Interactive, not batch questioning
+- **Status:** In progress (not merged yet)
+- **Confidence:** 🟢 High (comprehensive prompt, 877 lines)
+
+#### 🟢 Feature 4: Configuration via Environment Variables
+
+- **Entry point:** Config class → mcp_server/config.py:16-70
+- **Supported variables:**
+  - `SDD_WORKSPACE_ROOT` (default: `/workspace`) - Output directory for specs/tasks
+  - `SDD_PROMPTS_DIR` (default: `./prompts`) - Prompt templates directory
+  - `SDD_TRANSPORT` (default: `stdio`) - Transport type (stdio/http)
+  - `SDD_HTTP_HOST` (default: `0.0.0.0`) - HTTP server host
+  - `SDD_HTTP_PORT` (default: `8000`) - HTTP server port
+  - `SDD_LOG_LEVEL` (default: `INFO`) - Logging level
+  - `SDD_LOG_FORMAT` (default: `json`) - Log format (json/text)
+  - `SDD_CORS_ENABLED` (default: `true`) - Enable CORS for HTTP
+  - `SDD_CORS_ORIGINS` (default: `*`) - Allowed CORS origins
+- **Business rules:**
+  - Port validation: 1-65535 range (config.py:33-34)
+  - Invalid port raises ValueError with clear message (config.py:36)
+  - Paths resolved to absolute paths (config.py:22-25)
+  - CORS origins comma-separated (config.py:44-48)
+- **Evidence:** Complete configuration class with validation (config.py:16-70)
+- **Confidence:** 🟢 High (documented in docs/operations.md:59-83)
+
+#### 🟢 Feature 5: Basic Example Tool (Placeholder)
+
+- **Entry point:** basic_example_tool() → mcp_server/**init**.py:33-37
+- **Purpose:** Verify MCP tool registration works
+- **Returns:** "Basic example tool invoked successfully."
+- **Evidence:** Working code with TODO comment indicating future tools planned (mcp_server/**init**.py:39-43)
+- **Confidence:** 🟢 High (working placeholder, documented as temporary)
+
+### 3.2 Planned But Not Yet Implemented
+
+**From TODO comments in mcp_server/**init**.py:39-43:**
+
+#### 🔴 Resources (Task 2.1)
+
+- **Status:** Planned, not implemented
+- **Purpose:** Provide MCP resources (likely task/spec file access)
+- **Priority:** Medium (after Tools)
+- **Evidence:** TODO comment line 39
+
+#### 🔴 Tools (Task 5.1)
+
+- **Status:** Planned, not implemented
+- **Purpose:** Spec manipulation tools beyond basic-example
+- **Priority:** High (needed for workflow automation)
+- **Evidence:** TODO comment line 40
+
+#### 🔴 Notifications (Task 5.2)
+
+- **Status:** Planned, not implemented
+- **Purpose:** MCP notification support
+- **Priority:** Low (nice to have)
+- **Evidence:** TODO comment line 41
+
+#### 🔴 Sampling (Task 5.3)
+
+- **Status:** Planned, not implemented
+- **Purpose:** MCP sampling support (prompt/completion tracking)
+- **Priority:** Low (nice to have)
+- **Evidence:** TODO comment line 42
+
+#### 🔴 Logging (Task 5.4)
+
+- **Status:** Planned, not implemented
+- **Purpose:** Structured logging infrastructure
+- **Note:** Config exists (SDD_LOG_LEVEL, SDD_LOG_FORMAT in config.py:38-40) but not wired up
+- **Priority:** Medium (for debugging and monitoring)
+- **Evidence:** TODO comment line 43
+
+**From docs/roadmap/PROGRESS.md:**
+
+#### 🔴 Phase 2 Enhancements (Future PR)
+
+- Enhanced generate-spec with mandatory clarifying phase
+- New generate-architecture-options prompt (3 approaches with trade-offs)
+- New review-implementation prompt (quality review before PR)
+- ADR template (MADR format)
+- Workflow documentation (docs/WORKFLOW.md)
+- **Priority:** HIGH (documented as critical gaps)
+- **Evidence:** docs/roadmap/PROGRESS.md:180-679
+- **Estimated Effort:** 10-12 hours
+
+---
+
+## 4. Architecture (HOW)
+
+### 4.1 Components
+
+#### Entry Point Component
+
+- **Location:** server.py
+- **Responsibilities:**
+  - Expose `mcp` instance for FastMCP CLI discovery
+  - Provide console script entry point (main function)
+- **Key files:**
+  - server.py:11 - Module-level mcp instance
+  - server.py:14-22 - main() function for console script
+- **Confidence:** 🟢 High (clear single-purpose module)
+
+#### Application Factory Component
+
+- **Location:** mcp_server/**init**.py
+- **Responsibilities:**
+  - Initialize FastMCP server
+  - Register custom routes (health check)
+  - Load and register prompts from directory
+  - Register tools
+- **Key files:**
+  - mcp_server/**init**.py:17-45 - create_app() factory function
+  - mcp_server/**init**.py:24 - FastMCP initialization
+  - mcp_server/**init**.py:26-28 - Health check route
+  - mcp_server/**init**.py:31 - Prompt registration
+  - mcp_server/**init**.py:33-37 - Basic tool registration
+- **Confidence:** 🟢 High (standard factory pattern)
+
+#### Configuration Component
+
+- **Location:** mcp_server/config.py
+- **Responsibilities:**
+  - Load environment variables with defaults
+  - Validate configuration (port range, paths)
+  - Provide global config instance
+  - Ensure workspace directories exist
+- **Key files:**
+  - mcp_server/config.py:16-66 - Config class
+  - mcp_server/config.py:50-54 - ensure_workspace_dirs() method
+  - mcp_server/config.py:69 - Global config singleton
+- **Confidence:** 🟢 High (well-defined boundaries)
+
+#### Prompt Loading Component
+
+- **Location:** mcp_server/prompts_loader.py, mcp_server/prompt_utils.py
+- **Responsibilities:**
+  - Scan prompts directory for .md files
+  - Parse YAML frontmatter + Markdown body
+  - Create MarkdownPrompt DTOs
+  - Register prompts with FastMCP via decorator
+- **Key files:**
+  - mcp_server/prompts_loader.py:23-36 - register_prompts() orchestration
+  - mcp_server/prompts_loader.py:10-11 - _load_prompt() helper
+  - mcp_server/prompts_loader.py:14-20 - _register_prompt() decorator wrapper
+  - mcp_server/prompt_utils.py:42-81 - load_markdown_prompt() parser
+  - mcp_server/prompt_utils.py:84-98 - parse_frontmatter() YAML parser
+  - mcp_server/prompt_utils.py:101-129 - normalize_arguments() validator
+- **Confidence:** 🟢 High (two-module separation: orchestration + utilities)
+
+#### Content/Domain Component
+
+- **Location:** prompts/ directory
+- **Responsibilities:**
+  - Define workflow prompts with metadata
+  - Provide Markdown content for prompts
+- **Key files:**
+  - prompts/generate-spec.md - Spec generation workflow
+  - prompts/generate-task-list-from-spec.md - Task list generation
+  - prompts/manage-tasks.md - Task execution workflow
+  - prompts/generate-context.md - Codebase analysis (in progress)
+- **Confidence:** 🟢 High (content, not code)
+
+### 4.2 Communication Patterns
+
+**Application Startup Flow:**
+
+```
+server.py:11 (module load)
+  → mcp_server.create_app() (server.py:7 import)
+    → Config() singleton initialization (mcp_server/config.py:69)
+    → FastMCP(name="spec-driven-development-mcp") (mcp_server/__init__.py:24)
+    → @mcp.custom_route("/health") registration (mcp_server/__init__.py:26-28)
+    → register_prompts(mcp, config.prompts_dir) (mcp_server/__init__.py:31)
+      → prompts_dir.iterdir() scan (prompts_loader.py:28-31)
+      → For each .md file:
+        → load_markdown_prompt(path) (prompt_utils.py:42-81)
+          → Path.read_text() (prompt_utils.py:46)
+          → parse_frontmatter(content) (prompt_utils.py:84-98)
+            → yaml.safe_load() (prompt_utils.py:93)
+          → normalize_arguments() (prompt_utils.py:101-129)
+          → Return MarkdownPrompt DTO (prompt_utils.py:72-81)
+        → _register_prompt(mcp, prompt) (prompts_loader.py:14-20)
+          → @mcp.prompt(**kwargs) decorator (prompts_loader.py:16)
+    → @mcp.tool(name="basic-example") registration (mcp_server/__init__.py:33-37)
+    → Return mcp instance (mcp_server/__init__.py:45)
+  → server.py:11 assigns to module-level `mcp`
+```
+
+**Console Script Entry Flow:**
+
+```
+$ uvx spec-driven-development-mcp
+  → pyproject.toml:31 [project.scripts] entry point
+  → server:main function (server.py:14-22)
+    → mcp.run() (server.py:22)
+      → FastMCP handles transport and protocol
+```
+
+**MCP Client Request Flow (Example: List Prompts):**
+
+```
+MCP Client → STDIO/HTTP Transport
+  → FastMCP protocol handler
+    → mcp.get_prompts() (internal)
+      → Returns registered prompts
+        → generate-spec, generate-task-list-from-spec, manage-tasks, generate-context
+```
+
+### 4.3 Architectural Patterns
+
+#### 🟢 Factory Pattern
+
+- **Pattern name:** Application Factory
+- **Evidence:** create_app() function (mcp_server/**init**.py:17-45)
+- **Purpose:** Create configured FastMCP instance
+- **Benefits:**
+  - Testability (can create multiple instances)
+  - Configuration injection
+  - Clear initialization sequence
+- **Confidence:** 🟢 High (standard FastMCP pattern, 3+ references)
+
+#### 🟢 Singleton Pattern
+
+- **Pattern name:** Configuration Singleton
+- **Evidence:** Global `config` instance (mcp_server/config.py:69)
+- **Purpose:** Single source of configuration truth
+- **Usage:** Imported by mcp_server/**init**.py:11
+- **Confidence:** 🟢 High (explicit global instance)
+
+#### 🟢 Data Transfer Object (DTO) Pattern
+
+- **Pattern name:** Immutable DTOs
+- **Evidence:**
+  - MarkdownPrompt dataclass (prompt_utils.py:18-39)
+  - PromptArgumentSpec dataclass (prompt_utils.py:11-16)
+- **Characteristics:** frozen=True, type hints, no behavior
+- **Benefits:** Immutability, type safety, clear contracts
+- **Confidence:** 🟢 High (Python dataclass best practice)
+
+#### 🟢 Strategy Pattern
+
+- **Pattern name:** Transport Strategy
+- **Evidence:** TransportType = Literal["stdio", "http"] (config.py:13)
+- **Purpose:** Switch between STDIO and HTTP transports
+- **Usage:** SDD_TRANSPORT env var selects strategy
+- **Benefits:** Same server code works with different transports
+- **Confidence:** 🟢 High (FastMCP framework feature)
+
+#### 🟢 Decorator Pattern
+
+- **Pattern name:** Prompt Registration Decorator
+- **Evidence:** @mcp.prompt decorator (prompts_loader.py:16)
+- **Purpose:** Declarative prompt registration
+- **Benefits:**
+  - Automatic MCP protocol handling
+  - Metadata injection (name, description, tags)
+  - Clean separation of registration from implementation
+- **Confidence:** 🟢 High (FastMCP core pattern, 4+ usages)
+
+#### 🟢 Template Method Pattern
+
+- **Pattern name:** Frontmatter + Markdown Template
+- **Evidence:** All prompts follow YAML frontmatter → Markdown body structure
+- **Template:** parse_frontmatter() (prompt_utils.py:84-98)
+- **Benefits:** Consistent structure, extensible metadata
+- **Confidence:** 🟢 High (4 prompts follow pattern)
+
+#### 🟢 Plugin/Extension Pattern
+
+- **Pattern name:** Dynamic Prompt Discovery
+- **Evidence:** register_prompts() scans directory (prompts_loader.py:28-36)
+- **Characteristics:** Auto-discovery, no code changes to add prompts
+- **Benefits:** Extensibility - drop new .md file, auto-registers
+- **Confidence:** 🟢 High (core feature, tested)
+
+### 4.4 Architectural Philosophy
+
+**🟢 Minimalist MCP Server:**
+
+- **Evidence:** 312 lines of Python code, 4 modules, focused scope
+- **Philosophy:** "Do one thing well" - serve spec-driven development prompts via MCP
+- **Characteristics:**
+  - No database (prompts are files)
+  - No authentication (delegated to MCP client)
+  - No business logic (prompts contain workflows)
+  - Stateless (no session management)
+- **Confidence:** 🟢 High (README.md:20-28 emphasizes prompt-first approach)
+
+**🟢 Configuration Over Code:**
+
+- **Evidence:** 11 environment variables for all settings (config.py:19-48)
+- **Philosophy:** 12-factor app principles
+- **Examples:**
+  - Workspace location configurable (SDD_WORKSPACE_ROOT)
+  - Transport switchable (SDD_TRANSPORT: stdio/http)
+  - CORS configurable (SDD_CORS_ENABLED, SDD_CORS_ORIGINS)
+- **Confidence:** 🟢 High (docs/operations.md:59-83)
+
+**🟢 Content-Driven Architecture:**
+
+- **Evidence:** Prompts are Markdown files, not Python code
+- **Philosophy:** Separation of content (prompts) from code (server)
+- **Benefits:**
+  - Non-developers can edit prompts
+  - Prompts versioned separately from server
+  - No code deployment to update workflows
+- **Confidence:** 🟢 High (README.md:26-28: "Markdown artifacts instead of tooling")
+
+**🟢 Testability First:**
+
+- **Evidence:**
+  - Factory pattern for app creation (testable)
+  - Fixtures for test setup (conftest.py:10-93)
+  - 100% test coverage for prompt loading
+- **Philosophy:** Code designed for testing (not retrofitted)
+- **Confidence:** 🟢 High (tests written alongside features)
+
+**🟢 Extensibility Through Convention:**
+
+- **Evidence:** Auto-discovery of .md files in prompts/ directory
+- **Philosophy:** Convention over configuration
+- **Pattern:** Add file → automatically registered
+- **Confidence:** 🟢 High (core design principle)
+
+**🟢 Type Safety with Runtime Validation:**
+
+- **Evidence:**
+  - Type hints throughout (config.py, prompt_utils.py)
+  - Runtime port validation (config.py:32-36)
+  - Dataclasses with frozen=True for immutability
+- **Philosophy:** Fail fast with clear errors
+- **Confidence:** 🟢 High (Python 3.12+ typing features used)
+
+**🟢 Dependency Minimalism:**
+
+- **Evidence:** Only 4 core dependencies (fastmcp, pre-commit, pytest, ruff)
+- **Philosophy:** Avoid dependency bloat
+- **PyYAML:** Implicit (likely bundled with FastMCP)
+- **Confidence:** 🟢 High (pyproject.toml:8-14)
+
+---
+
+## 5. Conventions & Standards
+
+### 5.1 Code Style
+
+- **Linter:** Ruff (pyproject.toml:40-64)
+- **Line length:** 100 characters (pyproject.toml:42)
+- **Target:** Python 3.12 (pyproject.toml:43)
+- **Formatter:** Ruff format (pyproject.toml:45-49)
+  - Quote style: Double quotes
+  - Indent: Spaces
+  - Preview mode enabled
+- **Enabled rules:**
+  - E: pycodestyle errors
+  - F: pyflakes
+  - I: import sorting (isort)
+  - B: flake8-bugbear
+  - UP: pyupgrade
+  - SIM: flake8-simplify
+  - PL: Pylint-inspired rules
+  - RUF: Ruff-specific rules
+- **Ignored rules:**
+  - PLR2004: Simple length comparisons without constants allowed
+
+### 5.2 Naming Conventions
+
+- **Python modules:** snake_case (prompts_loader.py, prompt_utils.py)
+- **Classes:** PascalCase (Config, MarkdownPrompt, PromptArgumentSpec)
+- **Functions:** snake_case (create_app, load_markdown_prompt, parse_frontmatter)
+- **Constants:** UPPER_SNAKE_CASE (TransportType as type alias)
+- **Private functions:** _snake_case (_load_prompt, _register_prompt)
+- **Prompt files:** kebab-case (generate-spec.md, manage-tasks.md)
+
+### 5.3 File Organization
+
+- **Pattern:** Flat module structure under mcp_server/
+- **Modules:** 4 total (**init**, config, prompts_loader, prompt_utils)
+- **No deep nesting:** Clear separation of concerns
+- **Tests:** Parallel to source (tests/ mirrors mcp_server/)
+- **Fixtures:** Centralized in conftest.py
+- **Evidence:** Consistent across codebase
+
+### 5.4 Git Workflow
+
+**Branching:**
+
+- **Naming:** `<type>/<short-topic>` (CONTRIBUTING.md:69-82)
+- **Types:** feat, fix, docs, chore, refactor
+- **Examples:** feat/issue-templates, docs/contributing-guide
+- **Current branch:** add-reverse-engineer-codebase-prompt
+
+**Commits:**
+
+- **Convention:** Conventional Commits (CONTRIBUTING.md:84-94)
+- **Format:** `<type>: <description>`
+- **Types:** feat, fix, docs, chore, refactor, build
+- **Breaking:** Use `!` suffix (e.g., `feat!: drop Python 3.10 support`)
+- **Examples:**
+  - `feat: add helper tool to list artifacts`
+  - `fix: handle missing prompt metadata in loader`
+  - `docs: clarify HTTP transport usage`
+
+**Versioning:**
+
+- **Automated:** python-semantic-release (pyproject.toml:72-96)
+- **Tag format:** `v{version}` (e.g., v1.5.0)
+- **Changelog:** Auto-generated CHANGELOG.md
+- **Assets:** uv.lock included in releases
+- **Build command:** Runs `uv lock` and stages uv.lock for commit
+
+**Pull Requests:**
+
+- **Title:** Conventional commit format
+- **Template:**
+
+  ```markdown
+  ## Why?
+  ## What Changed?
+  ## Additional Notes
+  ```
+
+- **Checks:** Tests + pre-commit must pass
+- **Scope:** Keep PRs focused and well-scoped
+
+---
+
+## 6. Testing Strategy
+
+### 6.1 Frameworks
+
+- **Unit:** pytest 8.4.2+ (pyproject.toml:11)
+- **Coverage:** pytest-cov 7.0.0+ (pyproject.toml:12)
+- **Async:** anyio (tests/test_prompts.py:3) - Run async FastMCP methods
+- **Configuration:** pyproject.toml:66-69
+
+### 6.2 Coverage
+
+- **Current:** Unknown (no coverage report in repo)
+- **Target:** 80% (industry standard - [User confirmed: 2025-01-23])
+- **Gap:** No coverage threshold configured in pytest config
+- **Recommendation:** Add `--cov-fail-under=80` to pyproject.toml:68
+
+### 6.3 Patterns
+
+**Test Organization:**
+
+- **Location:** tests/ directory (parallel to mcp_server/)
+- **Naming:** test_*.py (e.g., test_prompts.py)
+- **Structure:** Class-based organization
+  - TestFrontmatterParsing (tests/test_prompts.py:10-50)
+  - TestPromptLoading (tests/test_prompts.py:52-116)
+- **Fixtures:** Centralized in conftest.py
+  - temp_workspace (conftest.py:10-21)
+  - temp_prompts_dir (conftest.py:24-83)
+  - mcp_server (conftest.py:86-93)
+
+**Test Coverage:**
+
+- **Frontmatter parsing:** 100% coverage (3 tests)
+  - Valid YAML
+  - No frontmatter
+  - Invalid YAML
+- **Prompt loading:** 100% coverage (5 tests)
+  - Register prompts from directory
+  - Metadata preservation
+  - Nonexistent directory handling
+  - Prompt body returns string
+  - Decorator kwargs serialization
+
+**Run Commands:**
+
+- Basic: `uv run pytest` (CONTRIBUTING.md:36)
+- With coverage: `uv run pytest --cov=mcp_server --cov-report=html`
+- Verbose: `uv run pytest -v`
+- Specific file: `uv run pytest tests/test_prompts.py -v`
+
+**No integration or E2E tests** (appropriate for simple server)
+
+---
+
+## 7. Build & Deployment
+
+### 7.1 Build Process
+
+- **Tool:** hatchling (pyproject.toml:26-28)
+- **Packages:** mcp_server, prompts (pyproject.toml:34)
+- **Force-include:** server.py (pyproject.toml:36-37)
+- **Build command:** `uv build` (implicit via hatchling)
+- **Artifacts:** .whl and .tar.gz (dist/ directory)
+
+### 7.2 Installation
+
+**Via uv (development):**
+
+```bash
+uv sync  # Install dependencies
+```
+
+**Via uvx (end-user):**
+
+```bash
+uvx spec-driven-development-mcp  # Run directly
+```
+
+**Via pip (published package):**
+
+```bash
+pip install spec-driven-development-mcp
+```
+
+### 7.3 Running the Server
+
+**STDIO Transport (Default):**
+
+```bash
+uvx fastmcp run server.py
+```
+
+**With MCP Inspector:**
+
+```bash
+uvx fastmcp dev server.py
+```
+
+**HTTP Transport:**
+
+```bash
+uvx fastmcp run server.py --transport http --port 8000
+```
+
+**Console Script (after installation):**
+
+```bash
+spec-driven-development-mcp  # Calls server:main
+```
+
+### 7.4 Deployment Environments
+
+**Development:**
+
+- **Transport:** STDIO
+- **Prompts:** Local ./prompts directory
+- **Workspace:** Local filesystem
+- **Tools:** MCP Inspector for debugging
+
+**Production (HTTP):**
+
+- **Transport:** HTTP on configurable port
+- **CORS:** Configurable origins
+- **Workspace:** Configurable via SDD_WORKSPACE_ROOT
+- **Logging:** JSON format (SDD_LOG_FORMAT=json)
+
+**MCP Client Integration:**
+
+- **Claude Desktop:** STDIO via config (docs/operations.md:94-107)
+- **VS Code MCP Plugin:** STDIO via workspace settings (docs/operations.md:109-123)
+- **FastMCP Inspector:** HTTP proxy mode (docs/operations.md:125-138)
+
+### 7.5 CI/CD
+
+**Automated via GitHub Actions:**
+
+- **Semantic Release:** python-semantic-release
+- **Workflow:**
+  1. Conventional commit detection
+  2. Version bump (pyproject.toml)
+  3. CHANGELOG.md generation
+  4. uv.lock refresh
+  5. Git tag creation (v{version})
+  6. Release creation
+- **Evidence:** pyproject.toml:72-96, CHANGELOG.md auto-generated
+
+---
+
+## 8. Integration Points & Dependencies
+
+### 8.1 External Services
+
+**No external service integrations.** This is a self-contained MCP server.
+
+### 8.2 Transport Mechanisms
+
+**🟢 STDIO Transport (Default):**
+
+- **Usage:** Local development, IDE integration
+- **Configuration:** SDD_TRANSPORT=stdio
+- **Clients:** Claude Desktop, Claude Code, VS Code, Cursor
+- **How:** FastMCP handles stdio communication
+- **Evidence:** config.py:28 default, docs/operations.md:29-47
+- **Confidence:** 🟢 High
+
+**🟢 HTTP Transport (Optional):**
+
+- **Usage:** Remote access, web-based clients
+- **Configuration:**
+  - SDD_TRANSPORT=http
+  - SDD_HTTP_HOST (default: 0.0.0.0)
+  - SDD_HTTP_PORT (default: 8000)
+- **CORS:**
+  - SDD_CORS_ENABLED (default: true)
+  - SDD_CORS_ORIGINS (default: *)
+- **How:** FastMCP exposes HTTP endpoints
+- **Evidence:** config.py:29-48, docs/operations.md:49-56
+- **Confidence:** 🟢 High
+
+### 8.3 Python Dependencies
+
+**Core Dependencies (pyproject.toml:8-14):**
+
+**🟢 FastMCP (>=2.12.4):**
+
+- **Purpose:** MCP server framework
+- **Usage:** Core framework for MCP protocol implementation
+- **Features used:**
+  - FastMCP() initialization (mcp_server/**init**.py:24)
+  - @mcp.prompt() decorator (prompts_loader.py:16)
+  - @mcp.tool() decorator (mcp_server/**init**.py:33)
+  - @mcp.custom_route() decorator (mcp_server/**init**.py:26)
+- **Recommendation:** Pin to minor version: `>=2.12.4,<3.0.0` [User confirmed: 2025-01-23]
+- **Confidence:** 🟢 High
+
+**🟢 pre-commit (>=4.3.0):**
+
+- **Purpose:** Git hooks for quality checks
+- **Usage:** Pre-commit linting/formatting enforcement
+- **Evidence:** CONTRIBUTING.md:23
+- **Confidence:** 🟢 High
+
+**🟢 pytest (>=8.4.2):**
+
+- **Purpose:** Testing framework
+- **Usage:** Unit tests
+- **Evidence:** tests/test_prompts.py:4
+- **Confidence:** 🟢 High
+
+**🟢 pytest-cov (>=7.0.0):**
+
+- **Purpose:** Coverage reporting
+- **Usage:** Test coverage measurement
+- **Evidence:** pyproject.toml:12
+- **Confidence:** 🟢 High
+
+**🟢 Ruff (>=0.14.0):**
+
+- **Purpose:** Linting and formatting
+- **Usage:** Code quality enforcement
+- **Evidence:** pyproject.toml:13, configured pyproject.toml:40-64
+- **Confidence:** 🟢 High
+
+**Development Dependencies (pyproject.toml:16-24):**
+
+**🟢 python-semantic-release (>=10.4.1):**
+
+- **Purpose:** Automated versioning and releases
+- **Usage:** CI/CD version bumps, CHANGELOG generation
+- **Evidence:** pyproject.toml:21, configured pyproject.toml:72-96
+- **Confidence:** 🟢 High
+
+**Implicit Dependencies:**
+
+**🟡 Starlette (via FastMCP):**
+
+- **Purpose:** ASGI framework
+- **Usage:** Custom HTTP routes (Request, PlainTextResponse)
+- **Evidence:** mcp_server/**init**.py:8-9
+- **Confidence:** 🟡 Medium (implicit dependency)
+
+**🟡 PyYAML (via FastMCP or stdlib):**
+
+- **Purpose:** YAML parsing for frontmatter
+- **Usage:** parse_frontmatter() (prompt_utils.py:8)
+- **Gap:** Not explicit in pyproject.toml
+- **Recommendation:** Add `pyyaml>=6.0.1` to dependencies [User confirmed: 2025-01-23]
+- **Confidence:** 🟡 Medium (used but not explicit)
+
+**🟢 anyio (via pytest/FastMCP):**
+
+- **Purpose:** Async test support
+- **Usage:** anyio.run() to call async FastMCP methods in tests
+- **Evidence:** tests/test_prompts.py:3, tests/test_prompts.py:62
+- **Confidence:** 🟢 High (test dependency)
+
+### 8.4 Crosscutting Concerns
+
+#### Logging & Observability
+
+**🔴 Planned but not implemented:**
+
+- **Configuration exists:** SDD_LOG_LEVEL, SDD_LOG_FORMAT (config.py:38-40)
+- **Not used:** No logger instantiation found
+- **TODO:** mcp_server/**init**.py:43
+- **Current state:** Relies on FastMCP/Starlette default logging
+- **Confidence:** 🔴 Low (config defined but not wired up)
+
+#### Error Handling & Resilience
+
+**🟢 Validation with Clear Errors:**
+
+- **Port validation:** Raises ValueError with message (config.py:32-36)
+- **Directory validation:** Raises ValueError if prompts dir missing (prompts_loader.py:24-25)
+- **File validation:** Raises FileNotFoundError if prompt missing (prompt_utils.py:43-44)
+- **Confidence:** 🟢 High
+
+**🟡 Graceful YAML Parsing:**
+
+- **Strategy:** Return empty dict on YAML error (prompt_utils.py:92-95)
+- **Resilience:** Prompts still load with defaults
+- **Trade-off:** Silent failure vs. robustness
+- **Confidence:** 🟡 Medium (intentional design choice)
+
+**No retry mechanisms or circuit breakers** (not needed for stateless server)
+
+#### Configuration Management
+
+**🟢 Environment Variables Strategy:**
+
+- **Pattern:** Environment variables with sensible defaults
+- **Evidence:** Config class (config.py:19-48)
+- **Variables:** 11 total (SDD_* prefix)
+- **Validation:** Port range validation (config.py:33-34)
+- **Confidence:** 🟢 High (12-factor app pattern)
+
+**No secrets management** (no secrets required - MCP auth delegated to client)
+
+**🟢 Multi-environment Support:**
+
+- **Dev:** STDIO transport, local paths
+- **Production:** HTTP transport, configurable workspace
+- **Evidence:** docs/operations.md:59-90
+- **Confidence:** 🟢 High
+
+#### Security Practices
+
+**🟢 CORS Configuration:**
+
+- **Default:** Enabled with wildcard (config.py:42-48)
+- **Configurable:** Can restrict origins
+- **Evidence:** SDD_CORS_ENABLED, SDD_CORS_ORIGINS
+- **Confidence:** 🟢 High (HTTP transport only)
+
+**🟡 No Authentication/Authorization:**
+
+- **Rationale:** Delegated to MCP client
+- **Trade-off:** Simplicity vs. security
+- **Appropriate for:** Local development, trusted clients
+- **Confidence:** 🟡 Medium (intentional design choice)
+
+**🟢 Input Validation:**
+
+- **Port range:** 1-65535 (config.py:33-34)
+- **Path validation:** Resolves to absolute paths (config.py:22-25)
+- **Confidence:** 🟢 High
+
+**No rate limiting** (expected to run behind MCP client)
+
+#### Performance & Caching
+
+**🟢 No Caching Needed:**
+
+- **Rationale:** Prompts are static files, loaded once at startup
+- **Evidence:** register_prompts() called once in create_app() (mcp_server/**init**.py:31)
+- **Appropriate for:** Small set of static prompts
+- **Confidence:** 🟢 High
+
+**🟢 Lightweight:**
+
+- **Memory:** ~4 Markdown files loaded into memory (~10-20 KB)
+- **CPU:** No heavy computation
+- **Startup time:** Fast (no database connections, no external services)
+- **Confidence:** 🟢 High
+
+---
+
+## 9. Essential Files to Read
+
+Priority files for anyone working on this codebase:
+
+1. **server.py:7-22** - Entry point, main() function, mcp instance creation
+2. **mcp_server/**init**.py:17-45** - Application factory, server initialization, prompt/tool registration
+3. **mcp_server/config.py:16-70** - Configuration management, environment variables, validation
+4. **mcp_server/prompts_loader.py:23-36** - Prompt loading orchestration
+5. **mcp_server/prompt_utils.py:42-98** - Markdown parsing, frontmatter extraction, YAML handling
+6. **prompts/generate-spec.md:1-50** - Spec generation workflow (understand prompt structure)
+7. **tests/test_prompts.py:10-116** - Test patterns, how prompts are tested
+8. **tests/conftest.py:10-93** - Test fixtures, how to set up tests
+9. **README.md** - Project overview, philosophy, workflow
+10. **docs/roadmap/PROGRESS.md:1-100** - Implementation status, planned features, design decisions
+
+---
+
+## 10. Execution Path Examples
+
+### Example 1: Server Startup (STDIO Transport)
+
+```
+1. User runs: uvx fastmcp run server.py
+   Entry: FastMCP CLI discovers mcp instance
+
+2. FastMCP CLI imports server.py
+   Module load: server.py:1-11
+
+3. server.py imports create_app
+   Import: server.py:7 → from mcp_server import create_app
+
+4. server.py calls create_app()
+   Call: server.py:11 → mcp = create_app()
+   Function: mcp_server/__init__.py:17-45
+
+5. create_app() initializes Config singleton
+   Access: mcp_server/__init__.py:11 → from .config import config
+   Instantiation: config.py:69 → config = Config()
+   Initialization: config.py:19-48 → Load env vars, validate port
+
+6. create_app() creates FastMCP instance
+   Call: mcp_server/__init__.py:24 → FastMCP(name="spec-driven-development-mcp")
+
+7. create_app() registers health check route
+   Decorator: mcp_server/__init__.py:26 → @mcp.custom_route("/health", methods=["GET"])
+   Handler: mcp_server/__init__.py:27-28 → Returns PlainTextResponse("OK")
+
+8. create_app() loads prompts
+   Call: mcp_server/__init__.py:31 → register_prompts(mcp, config.prompts_dir)
+   Function: prompts_loader.py:23-36
+
+9. register_prompts() scans prompts directory
+   Scan: prompts_loader.py:28-31 → prompts_dir.iterdir(), filter .md files, sort
+
+10. For each .md file (e.g., generate-spec.md):
+    Load: prompts_loader.py:35 → _load_prompt(prompts_dir, "generate-spec.md")
+    Function: prompts_loader.py:10-11 → load_markdown_prompt()
+    Parser: prompt_utils.py:42-81
+
+11. load_markdown_prompt() reads file
+    Read: prompt_utils.py:46 → path.read_text()
+    Parse: prompt_utils.py:47 → parse_frontmatter(content)
+    Function: prompt_utils.py:84-98
+
+12. parse_frontmatter() extracts YAML + body
+    Split: prompt_utils.py:88-90 → content.split("---", 2)
+    Parse YAML: prompt_utils.py:93 → yaml.safe_load(parts[1])
+    Extract body: prompt_utils.py:97 → parts[2].strip()
+    Return: prompt_utils.py:98 → (frontmatter_dict, body_string)
+
+13. load_markdown_prompt() creates MarkdownPrompt DTO
+    Extract metadata: prompt_utils.py:49-70
+    Create DTO: prompt_utils.py:72-81 → MarkdownPrompt(path, name, description, ...)
+    Return: prompt_utils.py:72-81
+
+14. register_prompts() registers prompt with FastMCP
+    Call: prompts_loader.py:36 → _register_prompt(mcp, prompt)
+    Function: prompts_loader.py:14-20
+
+15. _register_prompt() applies decorator
+    Decorator: prompts_loader.py:16 → @mcp.prompt(**prompt.decorator_kwargs())
+    Handler: prompts_loader.py:17-18 → Returns prompt.body
+    Rename: prompts_loader.py:20 → prompt_handler.__name__ = f"{prompt.name}_prompt"
+
+16. Repeat steps 10-15 for remaining prompts:
+    - generate-task-list-from-spec.md
+    - manage-tasks.md
+    - generate-context.md
+
+17. create_app() registers basic-example tool
+    Decorator: mcp_server/__init__.py:33 → @mcp.tool(name="basic-example")
+    Handler: mcp_server/__init__.py:34-37 → Returns "Basic example tool invoked successfully."
+
+18. create_app() returns configured mcp instance
+    Return: mcp_server/__init__.py:45 → return mcp
+
+19. server.py assigns to module-level mcp
+    Assignment: server.py:11 → mcp = create_app()
+
+20. FastMCP CLI runs mcp instance
+    Run: FastMCP internal → mcp.run() with STDIO transport
+    Listen: STDIO stream, process MCP protocol messages
+```
+
+### Example 2: MCP Client Requests Prompt (generate-spec)
+
+```
+1. MCP Client sends list_prompts request via STDIO
+   Transport: STDIO → FastMCP protocol handler
+
+2. FastMCP calls mcp.get_prompts()
+   Internal: FastMCP discovers registered prompts
+
+3. FastMCP returns prompt metadata
+   Response: {
+     "name": "generate-spec",
+     "description": "Generate a Specification (Spec) for a feature",
+     "tags": ["planning", "specification"],
+     "meta": {
+       "category": "spec-development",
+       "allowed-tools": "Glob, Grep, LS, Read, Edit, ..."
+     }
+   }
+
+4. MCP Client sends get_prompt request for "generate-spec"
+   Request: get_prompt(name="generate-spec")
+
+5. FastMCP looks up registered prompt handler
+   Lookup: prompts["generate-spec"]
+
+6. FastMCP calls prompt handler function
+   Call: generate-spec_prompt() → prompts_loader.py:17-18
+
+7. Handler returns prompt body
+   Return: prompt.body (Markdown content from generate-spec.md)
+
+8. FastMCP sends response to client
+   Response: {"content": "## Generate Specification\n\n## Goal\n\n..."}
+
+9. MCP Client displays prompt to user
+   Display: Prompt content rendered in client UI
+```
+
+### Example 3: Health Check Request (HTTP Transport)
+
+```
+1. User runs: uvx fastmcp run server.py --transport http --port 8000
+   Entry: FastMCP CLI with HTTP transport flag
+
+2. Server starts with HTTP transport
+   Startup: Steps 1-19 from Example 1 (same initialization)
+   Difference: FastMCP runs HTTP server instead of STDIO
+
+3. HTTP client sends GET /health
+   Request: curl http://localhost:8000/health
+
+4. Starlette routes request to health check handler
+   Routing: @mcp.custom_route("/health") → mcp_server/__init__.py:26
+
+5. Handler executes
+   Function: mcp_server/__init__.py:27-28
+   Logic: async def health_check(request: Request) -> PlainTextResponse
+   Return: PlainTextResponse("OK")
+
+6. Starlette sends HTTP response
+   Response: HTTP/1.1 200 OK
+            Content-Type: text/plain
+
+            OK
+```
+
+---
+
+## 11. Confidence Summary
+
+### High Confidence Findings ✅
+
+**Architecture & Design:**
+
+- ✅ Minimalist MCP server architecture (312 lines, focused scope)
+- ✅ FastMCP as core framework (pyproject.toml:9, mcp_server/**init**.py:7)
+- ✅ Factory pattern for app creation (mcp_server/**init**.py:17-45)
+- ✅ Configuration via environment variables (config.py:16-70, 11 vars)
+- ✅ Dynamic prompt loading from Markdown files (prompts_loader.py:23-36)
+- ✅ YAML frontmatter + Markdown pattern (prompt_utils.py:84-98)
+
+**Features:**
+
+- ✅ Health check endpoint (mcp_server/**init**.py:26-28, v1.5.0)
+- ✅ 4 workflow prompts (generate-spec, generate-task-list, manage-tasks, generate-context)
+- ✅ STDIO and HTTP transport support (config.py:13, 28-48)
+- ✅ Basic example tool (placeholder - mcp_server/**init**.py:33-37)
+
+**Code Quality:**
+
+- ✅ Ruff linting/formatting (pyproject.toml:40-64)
+- ✅ Conventional commits (CONTRIBUTING.md:84-94)
+- ✅ Automated semantic versioning (pyproject.toml:72-96)
+- ✅ 100% test coverage for prompt loading (tests/test_prompts.py)
+
+**Documentation:**
+
+- ✅ Comprehensive README with workflow guide
+- ✅ Operations guide for deployment
+- ✅ Contributing guide with conventions
+- ✅ Detailed PROGRESS.md tracking implementation
+
+### Medium Confidence (Needs Validation) ⚠️
+
+**Design Choices:**
+
+- 🟡 Python 3.12 requirement rationale (inferred: modern type hints, performance)
+- 🟡 /workspace default (inferred: container-oriented design)
+- 🟡 Silent YAML error handling (design choice: robustness over strict validation)
+- 🟡 Global config singleton (trade-off: simplicity vs. testability)
+
+**Dependencies:**
+
+- 🟡 PyYAML as implicit dependency (likely bundled with FastMCP, should be explicit)
+- 🟡 Starlette as implicit dependency (bundled with FastMCP)
+
+**Configuration:**
+
+- 🟡 No test coverage threshold (recommended 80%)
+- 🟡 FastMCP version pinning (recommended: pin to minor version)
+
+### Low Confidence (Unknown) ❓
+
+**Planned Features:**
+
+- 🔴 Resources (Task 2.1) - No details on implementation
+- 🔴 Tools (Task 5.1) - Priority and timeline unknown
+- 🔴 Notifications (Task 5.2) - Priority and timeline unknown
+- 🔴 Sampling (Task 5.3) - Priority and timeline unknown
+- 🔴 Logging (Task 5.4) - Config exists but not implemented
+
+**Implementation Details:**
+
+- 🔴 TODO task number references (Task 2.1, 5.1, etc.) - Source unknown
+- 🔴 Health check intended use (container orchestration? monitoring?)
+- 🔴 Current test coverage percentage (no report available)
+
+---
+
+## 12. Gaps, Unknowns & Recommendations
+
+### 12.1 Critical Gaps (🟥 Must Address)
+
+**None identified.** Core functionality is complete and working.
+
+### 12.2 Important Gaps (🟧 Should Address)
+
+**🟧 GAP-001: TODO Task Traceability**
+
+- **Issue:** TODOs reference "Task 2.1", "Task 5.1", etc. without links (mcp_server/**init**.py:39-43)
+- **Impact:** Hard to track where these tasks are defined
+- **Recommendation:** Link TODOs to task files or document task numbers
+- **Effort:** 10 min | **Priority:** 🟧 Important
+
+**🟧 GAP-002: Planned Features Priority**
+
+- **Issue:** 5 TODOs with no priority or timeline
+- **User Answer [2025-01-23]:** Priority order: Tools (high), Resources (medium), Logging (medium), Notifications (low), Sampling (low)
+- **Recommendation:** Document priorities in PROGRESS.md, update TODOs with priority tags
+- **Effort:** 15 min | **Priority:** 🟧 Important
+
+### 12.3 Minor Gaps (🟨 Nice to Have)
+
+**🟨 GAP-003: PyYAML Dependency Not Explicit**
+
+- **Issue:** PyYAML used (prompt_utils.py:8) but not in pyproject.toml
+- **User Answer [2025-01-23]:** PyYAML is likely a FastMCP dependency, but should be explicit
+- **Recommendation:** Add to pyproject.toml:
+
+  ```toml
+  dependencies = [
+      "fastmcp>=2.12.4",
+      "pyyaml>=6.0.1",  # Add this
+      ...
+  ]
+  ```
+
+- **Effort:** 2 min | **Impact:** Low | **Priority:** 🟨 Minor
+
+**🟨 GAP-004: FastMCP Version Pinning**
+
+- **Issue:** Uses `>=2.12.4` (open-ended, risk of breaking changes)
+- **User Answer [2025-01-23]:** Yes, pin to minor version to prevent breaking changes
+- **Recommendation:** Change to `fastmcp>=2.12.4,<3.0.0`
+- **Effort:** 2 min | **Impact:** Low | **Priority:** 🟨 Minor
+
+**🟨 GAP-005: Test Coverage Threshold**
+
+- **Issue:** pytest-cov installed but no threshold configured
+- **User Answer [2025-01-23]:** Target 80% coverage
+- **Recommendation:** Add to pyproject.toml:
+
+  ```toml
+  [tool.pytest.ini_options]
+  minversion = "8.0"
+  addopts = "-ra --cov=mcp_server --cov-fail-under=80"
+  testpaths = ["tests"]
+  ```
+
+- **Effort:** 5 min | **Impact:** Low | **Priority:** 🟨 Minor
+
+**🟨 GAP-006: Python 3.12 Requirement Rationale**
+
+- **Issue:** No documented rationale for Python 3.12 minimum
+- **User Answer [2025-01-23]:** Chosen for modern type hints (PEP 695), improved error messages, performance, long-term support (until 2028)
+- **Recommendation:** Document in README or CONTRIBUTING.md
+- **Effort:** 5 min | **Impact:** Low | **Priority:** 🟨 Minor
+
+**🟨 GAP-007: Health Check Purpose**
+
+- **Issue:** /health endpoint exists but no documentation on its use
+- **Questions:** Container orchestration? Should it check prompts directory?
+- **Recommendation:** Document intended use in docs/operations.md
+- **Effort:** 10 min | **Impact:** Low | **Priority:** 🟨 Minor
+
+**🟨 GAP-008: Logging Not Implemented**
+
+- **Issue:** Config exists (SDD_LOG_LEVEL, SDD_LOG_FORMAT) but not wired up
+- **TODO:** mcp_server/**init**.py:43
+- **User Answer [2025-01-23]:** Medium priority (after Tools and Resources)
+- **Recommendation:** Implement structured logging in future PR
+- **Effort:** 2-3 hours | **Impact:** Medium | **Priority:** 🟨 Minor (for now)
+
+---
+
+## 13. Recommendations for New Features
+
+When building new features in this codebase:
+
+### 13.1 Architecture Patterns to Follow
+
+1. **Add New Prompts:**
+   - Create .md file in prompts/ directory
+   - Include YAML frontmatter with metadata
+   - Follow existing prompt structure (generate-spec.md as template)
+   - Auto-discovery handles registration (no code changes needed)
+
+2. **Add New Tools:**
+   - Use @mcp.tool() decorator in mcp_server/**init**.py
+   - Follow basic-example pattern (mcp_server/**init**.py:33-37)
+   - Provide clear description for MCP clients
+   - Consider moving to separate tools/ module if many tools
+
+3. **Add Configuration:**
+   - Add env var to Config class (config.py)
+   - Provide sensible default
+   - Document in docs/operations.md
+   - Add validation if needed
+
+4. **Add Tests:**
+   - Create test_*.py in tests/ directory
+   - Use class-based organization
+   - Add fixtures to conftest.py if reusable
+   - Aim for 80% coverage
+
+### 13.2 Code Style Guidelines
+
+- **Type hints:** Use modern Python 3.12+ syntax
+- **Line length:** 100 characters max
+- **Imports:** Sort with isort (Ruff I rule)
+- **Docstrings:** Google style preferred
+- **Error messages:** Be specific and actionable
+
+### 13.3 Git Workflow
+
+- **Branches:** `<type>/<short-description>` (e.g., `feat/add-resource-tool`)
+- **Commits:** Conventional format (`feat:`, `fix:`, `docs:`)
+- **PRs:** Use template (Why? What Changed? Additional Notes)
+- **Tests:** Must pass before merge
+- **Pre-commit:** Run `uv run pre-commit run --all-files`
+
+### 13.4 Prompt Development
+
+- **Frontmatter fields:**
+
+  ```yaml
+  ---
+  name: my-prompt-name
+  description: Brief description for MCP clients
+  tags:
+    - category1
+    - category2
+  arguments: []
+  meta:
+    category: my-category
+    allowed-tools: Tool1, Tool2, Tool3
+  ---
+  ```
+
+- **Content:** Use clear Markdown with ## headings
+- **Structure:** Goal → Process → Output Format → Examples
+- **User interaction:** Ask short questions (3-5), not long questionnaires
+
+### 13.5 Testing Patterns
+
+```python
+# Class-based organization
+class TestMyFeature:
+    """Tests for my new feature."""
+
+    def test_happy_path(self, fixture_name):
+        """Test the main use case."""
+        # Arrange
+        # Act
+        # Assert
+
+    def test_error_handling(self):
+        """Test error cases."""
+        with pytest.raises(ValueError, match="expected message"):
+            # Code that should raise
+```
+
+### 13.6 Integration with Existing System
+
+- **Prompts:** Reference other prompts by name (e.g., "After running generate-spec...")
+- **Configuration:** Use global `config` instance, don't create new Config
+- **FastMCP:** Access via `mcp` instance, don't create new FastMCP
+- **File paths:** Use Path from pathlib, resolve to absolute
+- **Errors:** Raise specific exceptions with clear messages
+
+---
+
+## 14. Next Steps
+
+After this context analysis:
+
+1. **Use `generate-spec` prompt** to create detailed specification for your feature
+2. **Reference this analysis** when making architectural decisions
+3. **Follow identified patterns** to ensure consistency
+4. **Address high-priority gaps** if they block your work (GAP-002 recommended)
+5. **Update this analysis** if you discover new patterns during implementation
+
+---
+
+## 15. Key Principles to Remember
+
+1. **Evidence-Based:** Every claim needs file:line or doc#heading citation
+2. **Confidence Levels:** Mark High/Medium/Low confidence for all findings
+3. **Separate WHAT/HOW/WHY:**
+   - Code analysis tells you WHAT and HOW
+   - Documentation tells you WHY
+   - User fills in gaps and confirms intent
+4. **Stay in Your Lane:** Don't infer WHY from code - flag it as a gap for user to answer
+5. **Minimalist Philosophy:** Add only what's needed, keep it simple
+6. **Content Over Code:** Prompts are Markdown files, not Python code
+7. **Type Safety:** Use modern Python type hints, validate at runtime
+8. **Testability:** Design for testing from the start
+9. **Convention Over Configuration:** Auto-discover prompts, no manual registration
+
+---
+
+**Analysis completed:** 2025-01-23
+**Last validated with user:** 2025-01-23
+**Status:** Ready for feature specification
+
+---
+
+## Appendix A: File Structure
+
+```
+mcp-spec-driven/
+├── server.py                           # Entry point (23 lines)
+├── pyproject.toml                      # Project config, dependencies
+├── uv.lock                            # Locked dependencies
+├── CHANGELOG.md                       # Auto-generated version history
+├── CONTRIBUTING.md                    # Development guide
+├── LICENSE                            # Apache 2.0
+├── README.md                          # Project overview
+├── mcp_server/                        # Core application (4 modules, 289 lines)
+│   ├── __init__.py                    # App factory, server setup (46 lines)
+│   ├── config.py                      # Configuration management (70 lines)
+│   ├── prompts_loader.py              # Prompt registration (37 lines)
+│   └── prompt_utils.py                # Markdown parsing utilities (141 lines)
+├── prompts/                           # Workflow prompts (Markdown)
+│   ├── generate-spec.md               # Spec generation
+│   ├── generate-task-list-from-spec.md # Task list generation
+│   ├── manage-tasks.md                # Task execution
+│   └── generate-context.md            # Codebase analysis (in progress)
+├── tests/                             # Unit tests (pytest)
+│   ├── conftest.py                    # Test fixtures (94 lines)
+│   └── test_prompts.py                # Prompt loading tests (116 lines)
+├── docs/                              # Documentation
+│   ├── operations.md                  # Deployment guide
+│   ├── mcp-prompt-support.md          # Client compatibility
+│   ├── roadmap/
+│   │   └── PROGRESS.md                # Implementation tracking (681 lines)
+│   └── research/
+│       └── codebase-context/          # Research documents
+│           ├── README.md
+│           ├── code-analyst.md
+│           ├── information-analyst.md
+│           ├── context_bootstrap.md
+│           ├── claude-code-feature-dev-comparison.md
+│           └── research-synthesis.md
+└── tasks/                             # Spec and task list outputs
+    ├── 0001-spec-sdd-mcp-poc.md
+    ├── tasks-0001-spec-sdd-mcp-poc.md
+    ├── 0002-spec-open-source-ready.md
+    └── tasks-0002-spec-open-source-ready.md
+```
+
+**Total Lines of Code:** ~312 Python (source only)
+
+---
+
+## Appendix B: Environment Variables Reference
+
+| Variable | Default | Type | Description |
+|----------|---------|------|-------------|
+| `SDD_WORKSPACE_ROOT` | `/workspace` | Path | Output directory for specs/tasks |
+| `SDD_PROMPTS_DIR` | `./prompts` | Path | Prompt templates directory |
+| `SDD_TRANSPORT` | `stdio` | Enum | Transport type (stdio/http) |
+| `SDD_HTTP_HOST` | `0.0.0.0` | String | HTTP server host |
+| `SDD_HTTP_PORT` | `8000` | Int | HTTP server port (1-65535) |
+| `SDD_LOG_LEVEL` | `INFO` | Enum | Logging level (DEBUG/INFO/WARNING/ERROR) |
+| `SDD_LOG_FORMAT` | `json` | Enum | Log format (json/text) |
+| `SDD_CORS_ENABLED` | `true` | Bool | Enable CORS for HTTP |
+| `SDD_CORS_ORIGINS` | `*` | String | Allowed CORS origins (comma-separated) |
+
+**Source:** mcp_server/config.py:19-48, docs/operations.md:59-83
+
+---
+
+## Appendix C: MCP Client Compatibility
+
+| Client | Loads MCP? | Prompt Actions | Experience |
+|--------|------------|----------------|------------|
+| Claude Code CLI | Yes | Slash commands (auto) | Ideal |
+| Claude Code Desktop | Yes | TBD | Ideal |
+| Cursor | Yes | Implicit (natural language) | Ideal |
+| Gemini CLI | Yes | Slash commands (auto) | Ideal |
+| OpenCode | Yes | Implicit (natural language) | Ideal |
+| Windsurf | Yes | No | Not good (requires dummy tool) |
+| VS Code | Yes | Slash commands (not executed) | Not good |
+| Codex CLI | Yes | No | Non-existent |
+| Goose | Yes | TBD | TBD |
+
+**Source:** docs/mcp-prompt-support.md
+
+---
+
+## Appendix D: Decision Log
+
+### Technical Decisions Made
+
+1. **FastMCP Framework** - Official Python MCP framework
+2. **uv Package Manager** - Modern Python tooling
+3. **Ruff Linting** - Fast, comprehensive rules
+4. **Conventional Commits** - Automated versioning
+5. **Phased Implementation** - PRs focused and reviewable
+6. **Evidence Citations** - Traceability and accountability
+7. **Confidence Levels** - Distinguish facts from inferences
+8. **Interactive Questioning** - Short rounds, better engagement
+9. **Prompt-First Workflow** - Markdown artifacts, portable
+
+**Source:** docs/roadmap/PROGRESS.md:617-647
+
+### User-Confirmed Decisions [2025-01-23]
+
+1. **PyYAML Dependency** - Make explicit in pyproject.toml
+2. **Planned Features Priority** - Tools > Resources/Logging > Notifications/Sampling
+3. **Test Coverage Target** - 80% with fail-under enforcement
+4. **FastMCP Version Pinning** - Pin to minor version: `>=2.12.4,<3.0.0`
+5. **Python 3.12 Rationale** - Modern type hints, performance, long-term support
+
+---
+
+**End of Analysis**
diff --git a/docs/research/codebase-context/README.md b/docs/research/codebase-context/README.md
new file mode 100644
index 0000000..18f52ab
--- /dev/null
+++ b/docs/research/codebase-context/README.md
@@ -0,0 +1,362 @@
+# Research: Reverse Engineering & Codebase Analysis Patterns
+
+**Last Updated:** 2025-01-21
+**Status:** Research Complete - Implementation Phase 1 Complete
+
+---
+
+## Overview
+
+This directory contains research and analysis conducted to improve our MCP spec-driven development prompts. The research synthesizes patterns from:
+
+1. **Claude Code feature-dev plugin** - Production-tested 7-phase workflow
+2. **Existing research files** - code-analyst, information-analyst, context_bootstrap patterns
+3. **Best practices** - Evidence-based analysis, confidence assessment, interactive questioning
+
+**Primary Goal:** Enhance prompts with battle-tested patterns for better feature development outcomes.
+
+---
+
+## Research Documents
+
+### 1. Claude Code Feature-Dev Comparison
+
+**File:** [`claude-code-feature-dev-comparison.md`](./claude-code-feature-dev-comparison.md)
+**Size:** 18,287 words
+**Purpose:** Comprehensive analysis of Claude Code's feature-dev plugin
+
+**Contents:**
+
+- Complete 7-phase workflow breakdown
+- Agent specifications (code-explorer, code-architect, code-reviewer)
+- Comparison with our current MCP prompts
+- Gap analysis with priority ratings (Critical/Important/Minor)
+- Implementation roadmap (3 sprints)
+- Updated workflow diagrams
+- Detailed recommendations
+
+**Key Findings:**
+
+- ❌ Missing mandatory clarifying questions phase
+- ❌ No architecture options comparison
+- ❌ No quality review before completion
+- ✅ Good: Document-based artifacts
+- ✅ Good: Explicit sequencing
+- ✅ Good: Comprehensive analysis
+
+**Use This For:**
+
+- Understanding Claude Code's proven workflow
+- Identifying gaps in our current approach
+- Planning future enhancements
+- Architecture decision justification
+
+---
+
+### 2. Research Synthesis
+
+**File:** [`research-synthesis.md`](./research-synthesis.md)
+**Size:** 8,000+ words
+**Purpose:** Actionable integration plan combining all research sources
+
+**Contents:**
+
+- Core philosophy: Code (WHAT/HOW) vs Docs (WHY) vs User (Intent)
+- Two-agent specialization pattern (code-analyst + information-analyst)
+- Manager orchestration pattern (context_bootstrap)
+- Comparison matrix: Our approach vs Research best practices
+- Actionable recommendations with priority matrix
+- Specific enhancements for each prompt
+- Implementation roadmap (3 sprints)
+- Success metrics
+
+**Key Recommendations:**
+
+- 🔴 HIGH: Evidence citation standards (file:line, path#heading)
+- 🔴 HIGH: Confidence assessment (High/Medium/Low)
+- 🔴 HIGH: Mandatory clarifying phase in spec generation
+- 🔴 HIGH: Architecture options prompt (new)
+- 🔴 HIGH: Implementation review prompt (new)
+- 🟡 MEDIUM: Interactive phased questioning
+- 🟡 MEDIUM: ADR template creation
+
+**Use This For:**
+
+- Planning specific prompt enhancements
+- Understanding priority of improvements
+- Implementation guidance with examples
+- Success criteria for each enhancement
+
+---
+
+### 3. Code Analyst Pattern
+
+**File:** [`code-analyst.md`](./code-analyst.md)
+**Source:** Existing research file (cataloged)
+**Purpose:** Specialized agent for discovering WHAT and HOW from code
+
+**Responsibilities:**
+
+- Discover WHAT system does (features, workflows, business rules)
+- Discover HOW it's structured (architecture, patterns, communication)
+- Identify WHAT technologies are used
+
+**Key Principles:**
+
+- Code is ground truth - report what exists
+- Be specific - reference exact file:line
+- Distinguish fact from inference
+- Flag feature toggles and dormant code
+- **Stay in lane** - don't infer WHY
+
+**What NOT to include:**
+
+- ❌ Internal data models (implementation detail)
+- ❌ Missing/planned features (belongs in roadmap)
+- ❌ Code quality judgments
+- ❌ Specific versions (too volatile)
+- ❌ Testing infrastructure details
+
+**Applied To:** `generate-context` Phase 3 (Code Analysis)
+
+---
+
+### 4. Information Analyst Pattern
+
+**File:** [`information-analyst.md`](./information-analyst.md)
+**Source:** Existing research file (cataloged)
+**Purpose:** Specialized agent for extracting WHY from documentation
+
+**Primary Job:** Extract decision rationale from docs (not discoverable from code)
+
+**Responsibilities:**
+
+- Discover WHY system was built this way
+- Extract rationale from documentation
+- Find decision context and trade-offs
+- Capture historical evolution
+
+**What to Look For:**
+
+- Why was [technology X] chosen?
+- Why [pattern Y] over alternatives?
+- What constraints drove decisions?
+- What trade-offs were considered?
+
+**Key Principles:**
+
+- Direct quotes for "why"
+- Source everything (path#heading)
+- Attach metadata (timestamps)
+- Flag conflicts, don't resolve
+- Distinguish explicit vs implicit
+- Focus on rationale (unique value)
+
+**Applied To:** `generate-context` Phase 2 (Documentation Audit)
+
+---
+
+### 5. Context Bootstrap Pattern
+
+**File:** [`context_bootstrap.md`](./context_bootstrap.md)
+**Source:** Existing research file (cataloged)
+**Purpose:** Manager orchestration pattern for coordinating specialized agents
+
+**Core Philosophy:**
+> "Code explains HOW the system currently behaves; the user supplies WHAT it is supposed to achieve and WHY choices were made."
+
+**Six-Phase Workflow:**
+
+1. Analyze repository structure
+2. Audit existing documentation
+3. Deep code analysis (subprocess: Code Analyst)
+4. User collaboration (fill gaps, resolve conflicts)
+5. Draft documentation set (PRDs, ADRs, SYSTEM-OVERVIEW)
+6. Review with user
+
+**Key Pattern:** "Keep dialog interactive. Ask focused follow-up questions instead of long questionnaires."
+
+**Deliverables:**
+
+- PRDs (Product Requirements)
+- ADRs (Architecture Decision Records in MADR format)
+- SYSTEM-OVERVIEW.md
+- README.md updates
+
+**Applied To:** Overall `generate-context` structure and phasing
+
+---
+
+## How Research Was Applied
+
+### Phase 1 (Completed) ✅
+
+**Enhanced `generate-context` Prompt:**
+
+From **code-analyst.md:**
+
+- ✅ File:line evidence citations for all code findings
+- ✅ Confidence levels (High/Needs Validation/Unknown)
+- ✅ "Stay in your lane" - don't infer WHY from code
+- ✅ Flag feature toggles and dormant paths
+- ✅ Technology names only (NO versions)
+- ✅ Focus on working features, not missing ones
+
+From **information-analyst.md:**
+
+- ✅ Documentation audit phase (scan + timestamp + inventory)
+- ✅ Rationale extraction with direct quotes
+- ✅ Source references with path#heading format
+- ✅ Conflict detection between docs
+- ✅ Distinguish explicit vs implicit knowledge
+
+From **context_bootstrap.md:**
+
+- ✅ Repository structure detection (workspace/monorepo/single)
+- ✅ User collaboration phase (interactive, not batch)
+- ✅ Capture user answers as direct quotes for citation
+
+From **Claude Code feature-dev:**
+
+- ✅ Essential files list with line ranges (5-10 files)
+- ✅ Execution path traces (step-by-step flows)
+- ✅ Interactive short questions (not batch questionnaires)
+
+---
+
+### Phase 2 (Planned for Next PR)
+
+**Enhancements Planned:**
+
+1. **`generate-spec` Enhancement:**
+   - Mandatory clarifying phase (Claude Code Phase 3)
+   - Phased interactive questioning (context_bootstrap pattern)
+   - WHY questions (information-analyst focus)
+
+2. **`generate-architecture-options` (NEW):**
+   - Based on Claude Code code-architect agent
+   - Generate 2-3 approaches with trade-offs
+   - User must choose before proceeding
+
+3. **`review-implementation` (NEW):**
+   - Based on Claude Code code-reviewer agent
+   - Multi-focus review (bugs, quality, conventions)
+   - Confidence-based filtering (≥80%)
+
+See [`../../PROGRESS.md`](../../PROGRESS.md) for detailed roadmap.
+
+---
+
+## Key Insights
+
+### 1. Separation of Concerns
+
+**Discovery:** Code, docs, and users each provide different information
+
+- **Code → WHAT + HOW:** Features, architecture, patterns (observable facts)
+- **Docs → WHY:** Decisions, rationale, trade-offs (recorded intent)
+- **User → Goals + Intent:** Purpose, value, strategic fit (current direction)
+
+**Application:** Don't conflate these sources - keep them separate and clearly attributed
+
+---
+
+### 2. Evidence-Based Analysis
+
+**Discovery:** Every claim needs proof
+
+- Code findings: `file.ts:45-67` (line ranges)
+- Doc findings: `doc.md#heading` (section anchors)
+- User input: `[User confirmed: YYYY-MM-DD]` (dated quotes)
+
+**Application:** Traceability and accountability for all findings
+
+---
+
+### 3. Confidence Assessment
+
+**Discovery:** Distinguish facts from inferences
+
+- High: Strong evidence from working code or explicit docs
+- Medium: Inferred from context, feature flags, implied
+- Low: Cannot determine, conflicts, unknowns
+
+**Application:** Flag gaps explicitly rather than guessing
+
+---
+
+### 4. Interactive Collaboration
+
+**Discovery:** Short focused conversations > long questionnaires
+
+- Ask 3-5 questions, wait for answers
+- Use answers to inform next round of questions
+- Capture direct quotes for later citation
+
+**Application:** Better engagement, more thoughtful answers
+
+---
+
+### 5. Mandatory Checkpoints
+
+**Discovery:** Critical decisions need explicit user approval
+
+- ⛔ STOP after clarifying questions (don't proceed without answers)
+- ⛔ STOP after architecture options (user must choose)
+- ⛔ STOP after implementation (user decides what to fix)
+
+**Application:** User control at key decision points
+
+---
+
+## Success Metrics
+
+### Phase 1 Metrics ✅
+
+- ✅ 100% of code findings have file:line citations
+- ✅ 100% of findings categorized by confidence level
+- ✅ Documentation audit phase included
+- ✅ Interactive questioning approach (3-5 questions per round)
+- ✅ Essential files list structure (5-10 files with ranges)
+- ✅ Execution path traces included in examples
+
+### Phase 2 Metrics (Target)
+
+- [ ] Clarifying questions are mandatory (cannot proceed without)
+- [ ] Architecture options always present 2-3 approaches
+- [ ] User explicitly chooses architecture before tasks
+- [ ] Review catches common issues before PR
+- [ ] All prompts use consistent evidence standards
+
+---
+
+## References
+
+### External Sources
+
+- [Claude Code Repository](https://github.com/anthropics/claude-code)
+- [Feature-Dev Plugin](https://github.com/anthropics/claude-code/tree/main/plugins/feature-dev)
+- [Feature-Dev README](https://github.com/anthropics/claude-code/blob/main/plugins/feature-dev/README.md)
+- [Code Explorer Agent](https://github.com/anthropics/claude-code/blob/main/plugins/feature-dev/agents/code-explorer.md)
+- [Code Architect Agent](https://github.com/anthropics/claude-code/blob/main/plugins/feature-dev/agents/code-architect.md)
+- [Code Reviewer Agent](https://github.com/anthropics/claude-code/blob/main/plugins/feature-dev/agents/code-reviewer.md)
+- [MADR Format](https://adr.github.io/madr/)
+
+### Internal Documents
+
+- [Progress Tracking](../../PROGRESS.md)
+- [Main README](../../../README.md)
+
+---
+
+## Next Steps
+
+1. **Review Phase 1 PR:** `add-reverse-engineer-codebase-prompt` branch
+2. **Plan Phase 2 PR:** After Phase 1 merge
+3. **Implement remaining enhancements:** Per roadmap in PROGRESS.md
+
+---
+
+**Research Status:** Complete and applied to Phase 1
+**Next Research:** None planned - focus on implementation
+**Last Updated:** 2025-01-21
diff --git a/docs/research/codebase-context/claude-code-feature-dev-comparison.md b/docs/research/codebase-context/claude-code-feature-dev-comparison.md
new file mode 100644
index 0000000..7bef71b
--- /dev/null
+++ b/docs/research/codebase-context/claude-code-feature-dev-comparison.md
@@ -0,0 +1,901 @@
+# Claude Code Feature-Dev Plugin Analysis & Comparison
+
+**Date:** 2025-01-21
+**Purpose:** Analyze the Claude Code feature-dev plugin workflow and compare with our MCP spec-driven development prompts to identify improvement opportunities
+
+---
+
+## Executive Summary
+
+The Claude Code feature-dev plugin implements a battle-tested 7-phase workflow that emphasizes:
+
+1. **Explicit clarifying questions** before design (prevents building wrong things)
+2. **Multi-approach architecture** with trade-off analysis (enables better decisions)
+3. **Agent-based parallel exploration** for efficiency
+4. **Quality review gates** before completion (catches issues early)
+
+**Key Finding:** Our current workflow is missing critical phases for clarification, architecture comparison, and quality review that the Claude Code workflow proves essential.
+
+---
+
+## Claude Code Feature-Dev Workflow (7 Phases)
+
+### Phase 1: Discovery
+
+**Goal:** Understand what needs to be built
+
+**Process:**
+
+- Create todo list with all phases
+- If feature unclear, ask user for problem, requirements, constraints
+- Summarize understanding and confirm with user
+
+**Key Pattern:** Early validation of understanding
+
+---
+
+### Phase 2: Codebase Exploration
+
+**Goal:** Understand relevant existing code and patterns at both high and low levels
+
+**Process:**
+
+1. Launch 2-3 `code-explorer` agents in parallel
+2. Each agent targets different aspect (similar features, architecture, UX patterns)
+3. **Critical:** Each agent returns **list of 5-10 key files to read**
+4. After agents return, **read all identified files** to build deep understanding
+5. Present comprehensive summary
+
+**Example Agent Prompts:**
+
+- "Find features similar to [feature] and trace through implementation comprehensively"
+- "Map the architecture and abstractions for [feature area]"
+- "Analyze current implementation of [existing feature/area]"
+
+**Key Pattern:** Agent-based parallel discovery + explicit file reading
+
+### Agent: code-explorer
+
+- **Tools:** Glob, Grep, LS, Read, NotebookRead, WebFetch, TodoWrite, WebSearch
+- **Model:** Sonnet
+- **Focus:** Trace execution paths from entry points to data storage
+- **Output:** Entry points, step-by-step flow, architecture insights, key files list
+
+---
+
+### Phase 3: Clarifying Questions ⭐ CRITICAL
+
+**Goal:** Fill in gaps and resolve ALL ambiguities before designing
+
+**Process:**
+
+1. Review codebase findings and original feature request
+2. Identify underspecified aspects:
+   - Edge cases
+   - Error handling
+   - Integration points
+   - Scope boundaries
+   - Design preferences
+   - Backward compatibility
+   - Performance needs
+3. **Present ALL questions in organized list**
+4. **WAIT FOR ANSWERS** before proceeding to architecture design
+
+**Key Pattern:** Explicit stop point - NO assumptions, NO "whatever you think is best" without confirmation
+
+**Why Critical:** This prevents building the wrong thing. Most feature failures come from misunderstood requirements.
+
+---
+
+### Phase 4: Architecture Design
+
+**Goal:** Design multiple implementation approaches with different trade-offs
+
+**Process:**
+
+1. Launch 2-3 `code-architect` agents in parallel with different focuses:
+   - **Minimal changes:** Smallest change, maximum reuse
+   - **Clean architecture:** Maintainability, elegant abstractions
+   - **Pragmatic balance:** Speed + quality
+2. Review all approaches and form opinion on which fits best
+3. Present to user:
+   - Brief summary of each approach
+   - Trade-offs comparison
+   - **Recommendation with reasoning**
+   - Concrete implementation differences
+4. **Ask user which approach they prefer**
+
+**Key Pattern:** Options with trade-offs + recommendation, not just one solution
+
+### Agent: code-architect
+
+- **Tools:** Glob, Grep, LS, Read, NotebookRead, WebFetch, TodoWrite, WebSearch
+- **Model:** Sonnet
+- **Focus:** Design complete architecture with confident decisions
+- **Output:**
+  - Patterns & conventions found (with file:line refs)
+  - Architecture decision with rationale
+  - Component design (files, responsibilities, dependencies)
+  - Implementation map (specific files to create/modify)
+  - Data flow diagrams
+  - Build sequence (phased checklist)
+  - Critical details (error handling, state, testing, performance, security)
+
+---
+
+### Phase 5: Implementation
+
+**Goal:** Build the feature
+
+**Process:**
+
+1. **DO NOT START WITHOUT USER APPROVAL**
+2. Wait for explicit user approval
+3. Read all relevant files identified in previous phases
+4. Implement following chosen architecture
+5. Follow codebase conventions strictly
+6. Write clean, well-documented code
+7. Update todos as you progress
+
+**Key Pattern:** Explicit approval gate before code changes
+
+---
+
+### Phase 6: Quality Review
+
+**Goal:** Ensure code is simple, DRY, elegant, and functionally correct
+
+**Process:**
+
+1. Launch 3 `code-reviewer` agents in parallel with different focuses:
+   - **Simplicity/DRY/Elegance:** Code quality and maintainability
+   - **Bugs/Functional Correctness:** Logic errors and bugs
+   - **Project Conventions/Abstractions:** CLAUDE.md compliance, patterns
+2. Consolidate findings and identify highest severity issues
+3. **Present findings and ask what user wants to do:**
+   - Fix now
+   - Fix later
+   - Proceed as-is
+4. Address issues based on user decision
+
+**Key Pattern:** Parallel multi-focus review + user decision on fixes
+
+### Agent: code-reviewer
+
+- **Tools:** Glob, Grep, LS, Read, NotebookRead, WebFetch, TodoWrite, WebSearch
+- **Model:** Sonnet
+- **Focus:** Find bugs, quality issues, guideline violations
+- **Filtering:** Only report high-confidence issues (≥80% confidence)
+- **Output:**
+  - Critical issues (confidence 75-100)
+  - Important issues (confidence 50-74)
+  - Specific fixes with file:line references
+  - Project guideline references
+
+---
+
+### Phase 7: Summary
+
+**Goal:** Document what was accomplished
+
+**Process:**
+
+1. Mark all todos complete
+2. Summarize:
+   - What was built
+   - Key decisions made
+   - Files modified
+   - Suggested next steps
+
+**Key Pattern:** Documentation of decisions and outcomes
+
+---
+
+## Our Current MCP Workflow
+
+### Prompt 1: generate-context (NEW)
+
+**Goal:** Analyze codebase architecture, patterns, and conventions
+
+**Process:**
+
+- Conversational, iterative analysis
+- Ask user about repo structure, service architecture, priority areas
+- Automated discovery: tech stack, config files, directory structure
+- Deep dive into priority areas (database, API, auth, frontend, testing, etc.)
+- Generate comprehensive analysis document in `/tasks/[n]-analysis-[name].md`
+
+**Output Structure:**
+
+- Overview (project type, languages, frameworks)
+- Architecture (system design, directory structure)
+- Tech stack deep dive
+- Data layer (database, ORM, migrations)
+- API layer (routes, validation, middleware)
+- Auth & authorization
+- Frontend architecture (if applicable)
+- Testing strategy
+- Build & deployment
+- Code patterns & conventions
+- Integration points
+- Git workflow
+- Key files reference
+- Recommendations for new features
+- Open questions
+
+**Strengths:**
+
+- Very comprehensive documentation
+- Persistent artifact (`.md` file)
+- Covers all architectural aspects
+
+**Gaps vs Claude Code:**
+
+- No explicit "return 5-10 key files to read" instruction
+- Less focused on execution path tracing
+- More documentation-oriented than action-oriented
+
+---
+
+### Prompt 2: generate-spec
+
+**Goal:** Create detailed specification for a feature
+
+**Process:**
+
+1. Receive initial prompt
+2. Ask clarifying questions (examples provided)
+3. Generate spec using structured template
+4. Save as `/tasks/[n]-spec-[feature-name].md`
+5. Ask if user is satisfied
+6. Complete when user approves
+
+**Spec Structure:**
+
+- Introduction/Overview
+- Goals
+- User Stories
+- Demoable Units of Work
+- Functional Requirements
+- Non-Goals
+- Design Considerations
+- Technical Considerations
+- Success Metrics
+- Open Questions
+
+**Clarifying Questions (Examples):**
+
+- Problem/Goal
+- Target User
+- Core Functionality
+- User Stories
+- Acceptance Criteria
+- Scope/Boundaries
+- Data Requirements
+- Design/UI
+- Edge Cases
+- Unit of Work
+- Demoability
+
+**Strengths:**
+
+- Comprehensive spec structure
+- Demoable units focus
+- Persistent documentation
+
+**Gaps vs Claude Code:**
+
+- Clarifying questions are examples, not a mandatory phase
+- No explicit "WAIT FOR ANSWERS" checkpoint
+- Happens before codebase exploration (should be after)
+- No architecture options phase follows
+
+---
+
+### Prompt 3: generate-task-list-from-spec
+
+**Goal:** Create detailed task list from spec
+
+**Process:**
+
+1. Receive spec reference
+2. Analyze spec
+3. Define demoable units of work
+4. Assess current state (codebase review)
+5. **Phase 1:** Generate parent tasks (high-level)
+6. Present tasks to user
+7. **Wait for "Generate sub tasks" confirmation**
+8. **Phase 2:** Generate sub-tasks for each parent
+9. Identify relevant files
+10. Save as `/tasks/tasks-[spec-file-name].md`
+
+**Output Structure:**
+
+- Relevant Files (with descriptions)
+- Notes (test conventions, commands)
+- Tasks (parent + sub-tasks with demo criteria and proof artifacts)
+
+**Strengths:**
+
+- Two-phase generation (parent tasks → sub-tasks)
+- Explicit user checkpoint
+- Demo criteria and proof artifacts for each parent task
+- Codebase-aware task generation
+
+**Gaps vs Claude Code:**
+
+- No architecture options to choose from
+- Codebase assessment is brief, not agent-based
+- No "key files to read" from prior analysis
+
+---
+
+### Prompt 4: manage-tasks
+
+**Goal:** Execute and track task progress
+
+**Process:**
+
+- Three task states: `[ ]` not started, `[~]` in-progress, `[x]` completed
+- One sub-task at a time
+- Mark in-progress immediately
+- Completion protocol:
+  1. Mark sub-task complete
+  2. When all sub-tasks done: run tests
+  3. If tests pass: stage changes
+  4. Validate against demo criteria
+  5. Clean up temporary code
+  6. Commit with conventional commit format
+  7. Mark parent task complete
+- Update "Relevant Files" section as work progresses
+
+**Strengths:**
+
+- Clear state management
+- Test-driven completion
+- Demo criteria validation
+- Git integration with conventional commits
+
+**Gaps vs Claude Code:**
+
+- No quality review phase before completion
+- No parallel reviewer agents
+- No user checkpoint after implementation
+
+---
+
+## Gap Analysis: What We're Missing
+
+### 🔴 CRITICAL GAPS
+
+| Gap | Claude Code | Our Current | Impact | Priority |
+|-----|-------------|-------------|--------|----------|
+| **Mandatory Clarifying Questions Phase** | Dedicated Phase 3 with explicit WAIT | Questions are examples in spec prompt | Build wrong features | **HIGH** |
+| **Multi-Approach Architecture** | 2-3 parallel architect agents with trade-offs | Single spec, no options | Miss better designs | **HIGH** |
+| **Quality Review Before Merge** | Phase 6 with parallel reviewers | No formal review step | Ship bugs and tech debt | **HIGH** |
+
+### 🟡 IMPORTANT GAPS
+
+| Gap | Claude Code | Our Current | Impact | Priority |
+|-----|-------------|-------------|--------|----------|
+| **Agent-Based File Discovery** | Agents return "5-10 key files to read" | Manual AI discovery | Less efficient exploration | **MEDIUM** |
+| **Explicit Approval Gates** | WAIT commands at phases 3, 4, 5 | Implicit in some prompts | Less user control | **MEDIUM** |
+| **Execution Path Tracing** | Code-explorer focuses on flow | Context prompt focuses on structure | Miss runtime behavior insights | **MEDIUM** |
+
+### 🟢 MINOR GAPS
+
+| Gap | Claude Code | Our Current | Impact | Priority |
+|-----|-------------|-------------|--------|----------|
+| **Parallel Agent Execution** | 2-3 agents at once | Sequential single prompt | Slower execution | **LOW** |
+| **Summary Phase** | Dedicated Phase 7 | Implicit in task completion | Less visibility on outcomes | **LOW** |
+
+---
+
+## Workflow Comparison
+
+### Claude Code Flow
+
+```text
+1. Discovery           →  Understand feature request
+                          ↓
+2. Codebase           →  Launch 2-3 code-explorer agents
+   Exploration            Read identified files
+                          ↓
+3. Clarifying         →  Ask ALL questions
+   Questions              ↓
+                       [⛔ WAIT FOR ANSWERS]
+                          ↓
+4. Architecture       →  Launch 2-3 code-architect agents
+   Design                 Present options with trade-offs
+                          ↓
+                       [⛔ WAIT FOR USER CHOICE]
+                          ↓
+5. Implementation     →  [⛔ WAIT FOR APPROVAL]
+                          Build feature
+                          ↓
+6. Quality Review     →  Launch 3 code-reviewer agents
+                          Present findings
+                          ↓
+                       [⛔ WAIT FOR FIX DECISION]
+                          ↓
+7. Summary            →  Document outcomes
+```
+
+### Our Current Flow
+
+```text
+1. generate-          →  Comprehensive codebase analysis
+   codebase-context      Generate analysis document
+                          ↓
+2. generate-spec      →  Ask clarifying questions (examples)
+                          Generate spec document
+                          ↓
+                       [✓ User approval of spec]
+                          ↓
+3. generate-task-     →  Generate parent tasks
+   list-from-spec        ↓
+                       [✓ Wait for "Generate sub tasks"]
+                          ↓
+                          Generate sub-tasks
+                          ↓
+4. manage-tasks       →  Execute implementation
+                          Run tests
+                          Commit with conventional format
+```
+
+**Key Differences:**
+
+- ❌ We have no dedicated clarifying phase with mandatory stop
+- ❌ We have no architecture options comparison
+- ❌ We have no quality review phase
+- ✅ We generate persistent documentation artifacts
+- ✅ We have explicit demoable units and proof artifacts
+
+---
+
+## Recommended Improvements
+
+### 🎯 Phase 1: Critical Enhancements (Do First)
+
+#### 1. Enhance `generate-spec` with Mandatory Clarifying Phase
+
+**Current State:**
+
+```markdown
+## Clarifying Questions (Examples)
+The AI should adapt its questions based on the prompt...
+```
+
+**Recommended Change:**
+
+```text
+## Phase 1: Initial Understanding
+- Receive feature request
+- Clarify if unclear
+
+## Phase 2: Codebase Context Review
+- **PREREQUISITE:** Must have run generate-context first
+- Read the analysis document
+- Review key files identified in analysis
+- Understand existing patterns
+
+## Phase 3: Clarifying Questions ⭐ CRITICAL - DO NOT SKIP
+**MANDATORY STOP POINT**
+
+Based on the feature request and codebase context, identify ALL:
+- Edge cases and error scenarios
+- Integration points and dependencies
+- Scope boundaries (what's in/out)
+- Design and UX preferences
+- Backward compatibility needs
+- Performance requirements
+- Security considerations
+
+**Present ALL questions in an organized list**
+**WAIT FOR USER ANSWERS BEFORE PROCEEDING**
+
+If user says "whatever you think is best", provide recommendation and get explicit confirmation.
+
+## Phase 4: Generate Specification
+- Using answered questions, generate spec
+- ...
+```
+
+**Rationale:** This makes clarifying questions a mandatory checkpoint, preventing requirement misunderstandings.
+
+---
+
+#### 2. Create NEW Prompt: `generate-architecture-options`
+
+**Location:** `prompts/generate-architecture-options.md`
+
+**Purpose:** Generate and compare multiple architectural approaches before task generation
+
+**Process:**
+
+1. Review spec and codebase context
+2. Generate 2-3 approaches:
+   - **Minimal Changes:** Smallest change, maximum code reuse, fastest to ship
+   - **Clean Architecture:** Best maintainability, elegant abstractions, extensible
+   - **Pragmatic Balance:** Balanced trade-off between speed and quality
+3. For each approach, document:
+   - Key architectural decisions
+   - Components and responsibilities
+   - Files to create/modify
+   - Integration approach
+   - Trade-offs (pros/cons)
+4. Provide recommendation with reasoning
+5. **WAIT FOR USER CHOICE**
+6. Save chosen approach to `/tasks/architecture-[spec-number].md`
+
+**Integration Point:** Run after `generate-spec`, before `generate-task-list-from-spec`
+
+**Rationale:** Enables better architectural decisions by comparing trade-offs explicitly.
+
+---
+
+#### 3. Create NEW Prompt: `review-implementation`
+
+**Location:** `prompts/review-implementation.md`
+
+**Purpose:** Quality review of implemented code before considering feature complete
+
+**Process:**
+
+1. **Prerequisite:** Implementation tasks are complete
+2. Review all modified/created files
+3. Check for:
+   - **Bugs and Logic Errors:** Functional correctness, edge cases
+   - **Code Quality:** DRY violations, complexity, readability
+   - **Project Conventions:** CLAUDE.md compliance, naming, structure
+   - **Testing:** Test coverage, test quality
+   - **Performance:** Obvious inefficiencies
+   - **Security:** Common vulnerabilities
+4. Categorize findings:
+   - Critical (must fix)
+   - Important (should fix)
+   - Nice-to-have (optional)
+5. **Present findings to user and ask:**
+   - Fix all issues now?
+   - Fix only critical issues?
+   - Fix later (document as tech debt)?
+   - Proceed as-is?
+6. Take action based on user decision
+
+**Integration Point:** Run after `manage-tasks` completes all tasks, before final commit/PR
+
+**Rationale:** Catches quality issues and bugs before they reach production.
+
+---
+
+### 🎯 Phase 2: Important Enhancements
+
+#### 4. Enhance `generate-context` to be More Actionable
+
+**Current State:** Comprehensive but documentation-focused
+
+**Recommended Changes:**
+
+Add to the **Output** section:
+
+```markdown
+## Essential Files to Read
+
+After completing this analysis, provide a prioritized list of 5-10 essential files that anyone working on features in this codebase should read:
+
+1. **[path/to/file.ts:45-120]** - Core [domain concept] implementation
+2. **[path/to/file.py:10-50]** - Authentication flow entry point
+...
+
+**Rationale for each file:** Briefly explain why this file is essential.
+```
+
+Add to **Phase 2: Deep Architectural Analysis**:
+
+```markdown
+### Execution Path Tracing
+
+For key user flows, trace the execution path:
+- Entry point (API endpoint, UI component, CLI command)
+- Request flow through layers
+- Data transformations at each step
+- Side effects and state changes
+- Output/response generation
+
+**Example Flow:**
+
+```text
+User Login:
+
+1. POST /api/auth/login → routes/auth.ts:23
+2. AuthController.login() → controllers/AuthController.ts:45
+3. AuthService.validateCredentials() → services/AuthService.ts:67
+4. UserRepository.findByEmail() → repositories/UserRepository.ts:34
+5. Database query → models/User.ts:89
+6. JWT token generation → utils/jwt.ts:12
+7. Response with token → controllers/AuthController.ts:52
+```
+
+**Rationale:** Makes codebase context more action-oriented, similar to code-explorer agent.
+
+---
+
+#### 5. Update `generate-task-list-from-spec` to Reference Architecture
+
+**Current State:**
+
+```text
+## Process
+...
+4. Assess current state (codebase review)
+5. Generate parent tasks
+...
+```
+
+**Recommended Change:**
+
+```text
+## Process
+...
+4. **Review Architecture Decision:**
+   - **PREREQUISITE:** Must have chosen architecture approach from `generate-architecture-options`
+   - Read the architecture document: `/tasks/architecture-[spec-number].md`
+   - Understand chosen approach and rationale
+5. **Review Codebase Context:**
+   - Read key files identified in codebase analysis
+   - Understand existing patterns
+6. Generate parent tasks following chosen architecture
+...
+```
+
+**Rationale:** Ensures task generation aligns with chosen architectural approach.
+
+---
+
+### 🎯 Phase 3: Process Improvements
+
+#### 6. Add Explicit Checkpoints to All Prompts
+
+Add checkpoint markers:
+
+```text
+## Checkpoints
+
+This prompt has the following user interaction checkpoints:
+
+- ⛔ **STOP 1:** After clarifying questions - WAIT FOR ANSWERS
+- ⛔ **STOP 2:** After presenting spec draft - WAIT FOR APPROVAL
+- ✅ **PROCEED:** When user approves, save spec and complete
+```
+
+**Rationale:** Makes user control points explicit and consistent.
+
+---
+
+#### 7. Document Complete Workflow
+
+Create `docs/workflow.md`:
+
+```markdown
+# Spec-Driven Development Workflow
+
+## Complete Flow
+
+1. **Analyze Codebase** - Run `generate-context`
+   - Output: Analysis document + key files list
+
+2. **Create Specification** - Run `generate-spec`
+   - ⛔ STOP: Answer clarifying questions
+   - Output: Spec document
+
+3. **Design Architecture** - Run `generate-architecture-options`
+   - ⛔ STOP: Choose architectural approach
+   - Output: Architecture document
+
+4. **Generate Tasks** - Run `generate-task-list-from-spec`
+   - ⛔ STOP: Approve parent tasks before sub-tasks
+   - Output: Task list document
+
+5. **Execute Implementation** - Run `manage-tasks`
+   - Output: Code changes, commits
+
+6. **Review Quality** - Run `review-implementation`
+   - ⛔ STOP: Decide what issues to fix
+   - Output: Review findings, fixes
+
+7. **Complete** - Create PR, deploy, document
+```
+
+---
+
+## Updated Workflow Diagram
+
+```text
+┌─────────────────────────────────────────────────────────────┐
+│                    SPEC-DRIVEN DEVELOPMENT                  │
+└─────────────────────────────────────────────────────────────┘
+
+1. generate-context
+   └─> Output: /tasks/0001-analysis-[name].md
+       └─> Key files list (5-10 essential files)
+           └─> Execution path traces
+               │
+               ↓
+2. generate-spec (ENHANCED)
+   ├─> Phase 1: Initial understanding
+   ├─> Phase 2: Review codebase context + read key files
+   ├─> Phase 3: ⛔ CLARIFYING QUESTIONS (MANDATORY STOP)
+   │   └─> Wait for user answers
+   └─> Phase 4: Generate spec
+       └─> Output: /tasks/0001-spec-[feature].md
+           │
+           ↓
+3. generate-architecture-options (NEW)
+   ├─> Generate 3 approaches:
+   │   ├─> Minimal Changes
+   │   ├─> Clean Architecture
+   │   └─> Pragmatic Balance
+   ├─> Present trade-offs + recommendation
+   └─> ⛔ WAIT FOR USER CHOICE
+       └─> Output: /tasks/architecture-0001.md
+           │
+           ↓
+4. generate-task-list-from-spec (ENHANCED)
+   ├─> Review chosen architecture
+   ├─> Review key files from context
+   ├─> Generate parent tasks
+   ├─> ⛔ WAIT FOR "Generate sub tasks"
+   └─> Generate sub-tasks
+       └─> Output: /tasks/tasks-0001-spec-[feature].md
+           │
+           ↓
+5. manage-tasks
+   ├─> Execute sub-tasks sequentially
+   ├─> Run tests after each parent task
+   ├─> Validate demo criteria
+   └─> Commit with conventional format
+       │
+       ↓
+6. review-implementation (NEW)
+   ├─> Review for bugs, quality, conventions
+   ├─> Categorize findings (critical/important/nice-to-have)
+   ├─> Present to user
+   └─> ⛔ WAIT FOR FIX DECISION
+       └─> Apply fixes if requested
+           │
+           ↓
+7. Complete
+   └─> Create PR, deploy, document decisions
+```
+
+---
+
+## Implementation Priority
+
+See [docs/roadmap/PROGRESS.md](../../roadmap/PROGRESS.md) for detailed Phase 2 planning,
+effort estimates, and acceptance criteria.
+
+### Sprint 1: Critical Gaps (Week 1)
+
+- [ ] Enhance `generate-spec` with mandatory clarifying phase
+- [ ] Create `generate-architecture-options` prompt
+- [ ] Create `review-implementation` prompt
+- [ ] Update workflow documentation
+
+### Sprint 2: Important Improvements (Week 2)
+
+- [ ] Enhance `generate-context` with key files output
+- [ ] Add execution path tracing to context analysis
+- [ ] Update `generate-task-list-from-spec` to reference architecture
+- [ ] Add explicit checkpoints to all prompts
+
+### Sprint 3: Polish (Week 3)
+
+- [ ] Test complete workflow end-to-end
+- [ ] Refine based on feedback
+- [ ] Document examples and best practices
+- [ ] Create tutorial/getting started guide
+
+---
+
+## Key Learnings from Claude Code Plugin
+
+1. **Mandatory Clarification is Critical:** Most feature failures come from misunderstood requirements. An explicit stop point for questions prevents this.
+
+2. **Architecture Deserves Multiple Options:** There's rarely one "right" architecture. Presenting trade-offs enables better decisions.
+
+3. **Quality Review Before Merge:** Catching issues before they ship is vastly cheaper than fixing them in production.
+
+4. **Agent-Based Parallel Execution:** Running multiple focused agents in parallel is more efficient than sequential single-agent work.
+
+5. **Explicit > Implicit:** User checkpoints should be explicit STOP commands, not implicit in the flow.
+
+6. **Action-Oriented Context:** Codebase analysis should produce actionable outputs (key files, execution paths) not just comprehensive documentation.
+
+7. **Focused Agents:** Specialized agents (explorer, architect, reviewer) with narrow focus produce better results than general-purpose analysis.
+
+---
+
+## Appendix: Claude Code Agent Specifications
+
+### code-explorer Agent
+
+```yaml
+name: code-explorer
+description: Deeply analyzes existing codebase features by tracing execution paths
+tools: Glob, Grep, LS, Read, NotebookRead, WebFetch, TodoWrite, WebSearch, KillShell, BashOutput
+model: sonnet
+color: yellow
+```
+
+**Output Requirements:**
+
+- Entry points with file:line references
+- Step-by-step execution flow with data transformations
+- Key components and their responsibilities
+- Architecture insights: patterns, layers, design decisions
+- Dependencies (external and internal)
+- Observations about strengths, issues, opportunities
+- **List of 5-10 files essential to understanding the topic**
+
+---
+
+### code-architect Agent
+
+```yaml
+name: code-architect
+description: Designs feature architectures by analyzing codebase patterns and providing implementation blueprints
+tools: Glob, Grep, LS, Read, NotebookRead, WebFetch, TodoWrite, WebSearch, KillShell, BashOutput
+model: sonnet
+color: green
+```
+
+**Output Requirements:**
+
+- **Patterns & Conventions Found:** Existing patterns with file:line references
+- **Architecture Decision:** Chosen approach with rationale and trade-offs
+- **Component Design:** Each component with file path, responsibilities, dependencies, interfaces
+- **Implementation Map:** Specific files to create/modify with detailed change descriptions
+- **Data Flow:** Complete flow from entry points through transformations to outputs
+- **Build Sequence:** Phased implementation steps as a checklist
+- **Critical Details:** Error handling, state management, testing, performance, security
+
+**Philosophy:** Make confident architectural choices rather than presenting multiple options (when used standalone). Provide file paths, function names, and concrete steps.
+
+---
+
+### code-reviewer Agent
+
+```yaml
+name: code-reviewer
+description: Reviews code for bugs, quality issues, and project conventions
+tools: Glob, Grep, LS, Read, NotebookRead, WebFetch, TodoWrite, WebSearch, KillShell, BashOutput
+model: sonnet
+color: blue
+```
+
+**Focus Areas:**
+
+- Project guideline compliance (CLAUDE.md)
+- Bug detection
+- Code quality issues
+- Confidence-based filtering (only reports high-confidence issues ≥80)
+
+**Output Requirements:**
+
+- Critical issues (confidence 75-100)
+- Important issues (confidence 50-74)
+- Specific fixes with file:line references
+- Project guideline references
+
+---
+
+## References
+
+- Claude Code Repository: https://github.com/anthropics/claude-code
+- Feature-Dev Plugin: https://github.com/anthropics/claude-code/tree/main/plugins/feature-dev
+- Feature-Dev README: https://github.com/anthropics/claude-code/blob/main/plugins/feature-dev/README.md
+- Feature-Dev Command: https://github.com/anthropics/claude-code/blob/main/plugins/feature-dev/commands/feature-dev.md
+- Code Explorer Agent: https://github.com/anthropics/claude-code/blob/main/plugins/feature-dev/agents/code-explorer.md
+- Code Architect Agent: https://github.com/anthropics/claude-code/blob/main/plugins/feature-dev/agents/code-architect.md
+- Code Reviewer Agent: https://github.com/anthropics/claude-code/blob/main/plugins/feature-dev/agents/code-reviewer.md
diff --git a/docs/research/codebase-context/code-analyst.md b/docs/research/codebase-context/code-analyst.md
new file mode 100644
index 0000000..b76a0b9
--- /dev/null
+++ b/docs/research/codebase-context/code-analyst.md
@@ -0,0 +1,288 @@
+---
+name: code-analyst
+description: Specialized agent for analyzing source code to discover what the system does, how it's structured, and what patterns it uses. This agent has deep code analysis capabilities including static analysis, execution tracing, dependency mapping, and architectural pattern recognition.
+
+---
+
+# Code Analyst
+
+You are a specialized code analysis agent, part of the context_bootstrap pattern
+(see [context_bootstrap.md](./context_bootstrap.md) for orchestration details).
+
+You are a Code Analyst with expertise in reverse-engineering systems through source code analysis. Your job is to discover what a system does and how it's built by analyzing its implementation.
+
+## Your Job
+
+You work for a manager who needs to document an existing system. Your specific responsibility is **code analysis** - understanding the system through its implementation. You will analyze source code and return structured findings that help the manager create:
+
+1. **PRDs (Product Requirements)** - What functional capabilities exist
+2. **ADRs (Architecture Decisions)** - What technologies and patterns are used
+3. **SYSTEM-OVERVIEW** - How components are structured and connected
+4. **Core onboarding documents** (for example `README.md`, contribution or runbooks) - Provide accurate current-state behavior, setup expectations, and pointers to other generated artifacts
+
+## What You're Looking For
+
+### 1. Functional Capabilities (for PRDs)
+
+**Discover what the system DOES for users RIGHT NOW**:
+
+- What features WORK? (functioning API endpoints, interactive UI screens, running background jobs)
+- What user workflows are SUPPORTED? (trace working code paths)
+- What business rules are ENFORCED? (active validation logic, working calculations)
+- What external systems does it INTEGRATE WITH? (active API clients, working SDKs)
+
+**How to find it**:
+
+- Trace from entry points (API routes, UI components, event handlers)
+- Follow execution paths through the code
+- Read business logic in services/controllers/handlers
+- Check integration points and API clients
+- Note feature toggles or dormant code paths and flag them for manager validation
+
+**DO NOT INCLUDE**:
+
+- ❌ Internal data models (not external contract - implementation detail)
+- ❌ Missing/planned features (belongs in ROADMAP.md, not PRD)
+- ❌ Code quality judgments (not your job)
+- ❌ Specific dependency versions (for example `[framework]` 3.5.0 — too volatile)
+- ❌ Testing infrastructure details (not a user-facing feature)
+
+### 2. Technology Stack (for ADRs)
+
+- **Discover what MAJOR technologies are USED**:
+- Programming languages (name only, not specific version)
+- Major frameworks (for example `[web-framework]`, `[mobile-framework]` — name only)
+- Databases and storage systems (for example `[relational-database]`, `[document-store]` — type only)
+- Cloud services (for example `[cloud-provider]` — provider only)
+- API styles (`REST`, `GraphQL`, `gRPC`, etc., inferred from route definitions)
+- Authentication/authorization approaches (for example `[auth-provider]`, `[protocol]` — approach only)
+
+**How to find it**:
+
+- Read dependency files (`package.json`, `requirements.txt`, `[build-manifest]`, etc.)
+- Examine imports and SDK usage
+- Check configuration files
+- Look at infrastructure-as-code definitions (for example `[iac-tool]`, `[orchestration-config]`)
+
+**DO NOT INCLUDE**:
+
+- ❌ Specific versions (for example `[framework]` 3.5.0 — too volatile)
+- ❌ Minor libraries (utility packages, helpers - ADR if decision was significant)
+- ❌ Testing tools details (belongs in testing docs, not ADRs)
+
+### 3. Architecture & Patterns (for SYSTEM-OVERVIEW)
+
+**Discover how it's STRUCTURED**:
+
+- What components/services exist? (directories, modules, microservices)
+- How do they communicate? (API calls, events, message queues)
+- What are the boundaries? (imports, dependencies between modules)
+- What patterns are used? (event-driven, CQRS, layered, etc.)
+- How is it deployed? (serverless, containers, VMs - from infra code)
+
+**How to find it**:
+
+- Map directory/module structure
+- Analyze import graphs and dependencies
+- Identify service boundaries (no cross-database access, etc.)
+- Recognize architectural patterns from code organization
+- Read infrastructure code for deployment topology
+
+## What You're NOT Looking For
+
+**Do NOT try to find**:
+
+- **Rationale** ("why was X chosen?") - You can't know why from code alone
+- **Historical context** ("what was the problem that led to this?") - Not in code
+- **Trade-offs considered** ("why X over Y?") - Not discoverable from implementation
+
+**These come from documentation** - the Information Analyst will handle that.
+
+## Output Format
+
+Return a structured summary that the manager can use:
+
+```markdown
+## Code Analysis Summary
+
+### System Capabilities
+
+#### Features Discovered
+1. **[Feature Name]**: [What it does - from code behavior]
+   - Entry point: [file:line]
+   - Key logic: [brief description]
+
+2. **[Feature Name]**: [What it does]
+
+#### User Workflows
+1. [Workflow description traced through code]
+2. [Workflow description]
+
+#### Business Rules
+- [Rule 1 found in validation/calculation logic]
+- [Rule 2]
+
+#### External Integrations (WORKING)
+- **[Service]**: [How it's used - from active API client code]
+- **[Service]**: [How it's used]
+
+### Technology Stack
+
+#### Languages & Frameworks
+- **Language**: [Name only - NO version]
+- **Framework**: [Name only - NO version] - [Usage context]
+
+#### Data Storage
+- **Database**: [Type] - [Evidence: connection string, ORM config]
+- **Cache**: [Type] - [Evidence]
+
+#### Infrastructure
+- **Cloud Provider**: [Name] - [Evidence: SDK imports, config]
+- **Key Services**: [List from infrastructure code]
+- **Deployment**: [Pattern from Dockerfile, K8s manifests, etc.]
+
+#### Integration Patterns
+- **API Style**: [REST/GraphQL/gRPC] - [Evidence: route definitions]
+- **Async**: [Events/Queues/None] - [Evidence: pub/sub code]
+
+### Confidence & Gaps
+
+#### High Confidence
+- [Finding with strong evidence: cite file:line]
+
+#### Needs Validation
+- [Finding tied to feature toggle, dormant path, or incomplete evidence]
+
+#### Unknowns
+- [Areas the code cannot resolve]
+
+### Architecture
+
+#### Components/Services
+1. **[Name]**:
+   - Location: [directory]
+   - Purpose: [inferred from code]
+   - Responsibilities: [what it handles]
+
+#### Communication Patterns
+- [Component A] → [Protocol] → [Component B]
+  - Evidence: [import/API call at file:line]
+  - Data: [what's exchanged]
+
+#### Service Boundaries
+- **Proper**: [List components that communicate via APIs/events]
+- **Violations**: [Any direct database access across services]
+
+#### Architectural Patterns
+- **[Pattern Name]**: [Evidence from code structure]
+  - Example: "Event-driven" - found event publishers/subscribers
+
+### Output Examples: Good vs Bad
+
+**Good Analysis** (focuses on what exists and works):
+
+This demonstrates: specific evidence, confidence marking, working features only
+```markdown
+### System Capabilities
+- REST API exposes catalog search, item detail, and purchase flows (Entry point: `services/api/catalog/routes.ts#L12`)
+- Authentication workflow integrates with `[auth-provider]` (Evidence: `apps/web/src/auth/client.ts#L8`)
+- Background worker processes `[event-type]` messages (Evidence: `services/worker/handlers/events.ts#L30`)
+
+### Technology Stack
+- Language: `[primary-language]`
+- Framework: `[web-framework]`
+- Data store: `[database-type]`
+- Hosting: `[cloud-provider]`
+
+### Architecture
+- Components: `[service-api]`, `[service-worker]`, `[ui-client]`
+- Communication: REST APIs between services, async events on `[queue/bus]`
+- Pattern: Event-driven orchestration for long-running tasks
+
+### Confidence & Gaps
+- High confidence: Catalog search workflow (full trace, tests observed)
+- Needs validation: Feature flag `enable_related_items` currently disabled
+- Unknowns: Purpose of experimental `beta` directory not clear from code
+```
+
+**Bad Analysis** (too detailed, judges code, lists missing features):
+
+Problems: includes code quality judgments, specific versions, missing features, internal models
+
+```markdown
+### System Capabilities
+- REST API with 5 endpoints (GOOD CODE QUALITY, well-tested)
+- Authentication via `[auth-provider]` (NEEDS IMPROVEMENT - missing MFA)
+- Streaming works BUT caching layer not implemented yet
+- MISSING: Offline support, push notifications, social features
+
+### Technology Stack
+- `[language]` 5.2.0 (should upgrade to 5.3.0)
+- `[web-framework]` 4.18.2
+- `[database-type]` 15.3 with these exact packages:
+  - `[db-driver]` 8.11.0
+  - `[orm-library]` 0.3.17
+
+### Data Models
+- Song: { id: string, title: string, artist: string, duration: number... }
+- User: { id: string, email: string, preferences: {...} }
+(Internal models - not external contract)
+
+### Testing Infrastructure
+- `[test-runner]` 29.5.0
+- Coverage: 90% (EXCELLENT!)
+- 247 unit tests, 45 integration tests
+(Testing is not a user-facing feature)
+```
+
+**Key Principle**: Report what the system DOES, not what it's missing or how well it's coded.
+
+### Questions for Manager
+
+Based on code analysis, manager should ask user:
+
+1. [Question about ambiguous implementation]
+2. [Question about missing context]
+
+```
+## Analysis Approach
+
+### Phase 1: Discovery Scan
+- Read dependency files to understand tech stack
+- Map directory/module structure for components
+- Identify entry points (main files, route definitions, handlers)
+
+### Phase 2: Behavioral Analysis
+- Trace execution from entry points
+- Follow key workflows through the code
+- Extract business rules from logic
+- Map data flows
+
+### Phase 3: Structural Analysis
+- Build component dependency graph
+- Identify communication patterns
+- Map integration points
+- Recognize architectural patterns
+
+### Phase 4: Synthesis
+- Organize findings into categories
+- Flag uncertainties and gaps
+- Prepare questions for manager
+
+## Key Principles
+
+1. **Code is ground truth** - What you find in code is what the system actually does
+2. **Be specific** - Reference exact files/lines for evidence
+3. **Distinguish fact from inference** - Mark when you're inferring vs. observing
+4. **Flag toggles and dormant paths** - Call out anything that might be disabled or experimental
+5. **Flag gaps** - Be clear about what you can't determine from code
+6. **Stay in your lane** - Don't guess at "why" - that's not your job
+7. **Concise summaries** - Manager needs actionable insights, not code dumps
+
+## Remember
+
+You are running in a **subprocess** to do deep code analysis without overwhelming the main context. Do the heavy lifting here - read all the code, trace all the paths, map all the structure. Then return a **concise, structured summary** that gives the manager exactly what they need to document the system.
+
+Your findings will be combined with the Information Analyst's findings (from docs) to create complete context.
+
+```
diff --git a/docs/research/codebase-context/context_bootstrap.md b/docs/research/codebase-context/context_bootstrap.md
new file mode 100644
index 0000000..c562778
--- /dev/null
+++ b/docs/research/codebase-context/context_bootstrap.md
@@ -0,0 +1,58 @@
+---
+description: Reverse-engineer codebase into PRDs/ADRs/SYSTEM-OVERVIEW/README/etc
+
+---
+
+# Bootstrap Context Command
+
+## Mission
+
+Reverse-engineer an existing codebase into structured, human-readable documentation. Produce:
+
+- Product requirements overviews (PRDs) describing user-facing capabilities
+- Architecture decision records (ADRs) in MADR format capturing rationale
+- `SYSTEM-OVERVIEW.md` summarizing architecture and integration patterns
+- Updated `README.md` and any other core onboarding documents that are missing or outdated
+
+## Core Principles
+
+- Code explains *how* the system currently behaves; the user supplies *what* it is supposed to achieve and *why* choices were made.
+- Target stable, business-relevant behavior and architectural rationale. Avoid speculative implementation details.
+- Keep the dialog interactive. Ask focused follow-up questions instead of long questionnaires.
+- Update existing documentation in place when accurate; otherwise, create minimal, clear replacements.
+- Record open questions or assumptions explicitly when user confirmation is unavailable.
+
+## Repository Layout Awareness
+
+Automatically infer the structure before generating artifacts. Support these common patterns (names are illustrative placeholders):
+
+- **Multi-service workspace** – multiple peer directories (for example `[service-a]/`, `[service-b]/`) with independent build tooling. Create shared context at the workspace root and service-specific context under each service directory.
+- **Monorepo** – a unified repository with grouped packages/apps (for example `packages/[component]/`, `apps/[interface]/`). Provide cross-cutting docs at the root and scoped docs within each relevant package or app.
+- **Single application** – a single deployable unit (for example `src/`, `config/`, `tests/`). Generate all artifacts at the repository root.
+  Document any hybrid layout you discover and adapt scoping rules accordingly.
+
+## Command Invocation
+
+- `/bootstrap-context` with no arguments: analyze the entire repository/workspace and emit both workspace-level and component-level artifacts.
+- `/bootstrap-context [target ...]`: restrict analysis to the listed directories. Only write PRDs/ADRs and related files inside those targets. Leave workspace-level files untouched unless explicitly instructed by the user.
+- `/bootstrap-context help`: return a concise usage guide that mirrors these invocation rules, lists the deliverables (PRDs, ADRs, system overview, README updates), recommends when to run the command (onboarding, auditing existing systems, refreshing stale docs), summarizes the workflow (layout detection, analysis, user collaboration, documentation drafting, review), and restates supported repository layouts (multi-service workspace, monorepo, single application) using placeholders only.
+- Confirm the inferred repository structure and target scope with the user before modifying files, even when running without arguments. Clarify which directories map to services, packages, or components.
+
+## Six-Phase Workflow
+
+Announce each phase clearly to the user, gather input where needed, and proceed only after resolving blockers.
+
+1. **Analyze repository structure** – detect layout, enumerate components, note detected technologies and entry points.
+2. **Audit existing documentation** – catalogue current docs, note currency, capture rationale already recorded, and flag conflicts between docs and code.
+3. **Deep code analysis** – identify capabilities, integration points, data flows, dependencies, and implicit product behavior. Produce targeted questions for missing context.
+4. **User collaboration** – run short, iterative conversations to confirm behavior, uncover rationale, and resolve conflicts or gaps. Capture explicit quotes or decisions for later citation.
+5. **Draft documentation set** – generate PRDs, ADRs (use the MADR structure and populate it with confirmed details; when details are missing, ask the user and only leave clearly marked follow-up items if the gap cannot be resolved), `SYSTEM-OVERVIEW.md`, README updates, and any other onboarding docs required for clarity. Note assumptions and unresolved questions inline, then keep the dialogue open until you either resolve them or document them as tracked gaps.
+6. **Review with user** – summarize changes, surface open issues, and offer next steps. Adjust documents based on feedback before finalizing.
+
+## Subagent Orchestration
+
+You operate as the manager orchestrating two specialists:
+
+- **Code Analyst** – inspects source, dependencies, APIs, data models, integrations; returns summarized findings plus validation questions.
+- **Information Analyst** – reviews documentation artifacts, diagrams, and in-code commentary; returns inventories, rationale evidence, gaps, and conflicts.
+  Keep subprocess outputs concise. Integrate their findings into user conversations and documentation.
diff --git a/docs/research/codebase-context/information-analyst.md b/docs/research/codebase-context/information-analyst.md
new file mode 100644
index 0000000..6e2926b
--- /dev/null
+++ b/docs/research/codebase-context/information-analyst.md
@@ -0,0 +1,284 @@
+---
+name: information-analyst
+description: Specialized agent for extracting knowledge from documentation, diagrams, and written artifacts. This agent excels at reading any format (markdown, PDFs, images, diagrams) to discover rationale, decisions, context, and the "why" behind system design.
+
+---
+
+# Information Analyst
+
+You are an Information Analyst with expertise in extracting knowledge from documentation and visual artifacts. Your job is to discover WHY a system was built the way it was by analyzing written and visual materials.
+
+## Your Job
+
+You work for a manager who needs to document an existing system. Your specific responsibility is **information extraction** - understanding the reasoning, decisions, and context from documentation. You will analyze documents and diagrams (from any source) and return structured findings that help the manager create:
+
+1. **PRDs (Product Requirements)** - Context about what problems the system solves
+2. **ADRs (Architecture Decisions)** - WHY technologies and patterns were chosen
+3. **SYSTEM-OVERVIEW** - Intended architecture and design rationale
+4. **Core onboarding documents** (for example `README.md`, playbooks, runbooks) - Capture origin stories, operating expectations, and references to future or planned work
+
+## What You're Looking For
+
+### 1. System Context & Purpose (for PRDs)
+
+**Discover WHY the system exists**:
+
+- What problem does it solve?
+- Who are the users?
+- What business value does it provide?
+- What use cases drove requirements?
+- What goals or metrics define success?
+
+**How to find it**:
+
+- README "About" or "Overview" sections
+- Project proposals, RFCs, design docs
+- User stories or product specs
+- Executive summaries
+- Mission statements
+
+### 2. Decision Rationale (for ADRs)
+
+**Discover WHY choices were made** (this is your primary job):
+
+- Why was [technology X] chosen?
+- Why [pattern Y] over alternatives?
+- What constraints drove decisions?
+- What trade-offs were considered?
+- What problems were these choices solving?
+
+**How to find it**:
+
+- Existing ADRs (if any)
+- Design documents with "Rationale" sections
+- Architecture docs explaining "Why we chose..."
+- Meeting notes about technical decisions
+- Comments in docs explaining choices
+- Email/chat discussions (if provided)
+- Commit messages explaining significant changes
+- Record the precise source location (relative path, section heading, anchor, or page number) for each rationale item
+
+### 3. Intended Architecture (for SYSTEM-OVERVIEW)
+
+**Discover how it was DESIGNED to work**:
+
+- What's the intended architecture? (from design docs)
+- How should components interact? (from diagrams)
+- What patterns were intended? (from architecture docs)
+- How was it meant to be deployed? (from deployment docs)
+
+**How to find it**:
+
+- Architecture diagrams (extract components, flows, relationships)
+- System design documents
+- Deployment guides and topology diagrams
+- Infrastructure documentation
+- API documentation showing intended contracts
+
+### 4. Historical Context
+
+**Discover the evolution**:
+
+- What changed and why?
+- What problems were encountered?
+- What was tried and didn't work?
+- How did decisions evolve?
+
+**How to find it**:
+
+- CHANGELOGs and release notes
+- "History" or "Background" sections in docs
+- Migration guides
+- Post-mortems or incident reports
+- Version history in wikis
+
+## What You're Analyzing
+
+You will analyze ALL documentation - both in-repo and external.
+
+**Your first job**: Scan the repository for documentation files and capture metadata (path, title, last modified timestamp when available):
+
+- README files (all levels)
+- docs/, documentation/, wiki/ directories
+- *.md,*.txt files with documentation
+- Architecture diagrams (*.png,*.jpg, *.svg in docs/)
+- Design documents (*.pdf in docs/)
+- Any other documentation artifacts
+
+**Then analyze** what the manager provides (if any external materials).
+
+These can be:
+
+**Text Documents**:
+
+- README.md, ARCHITECTURE.md, DESIGN.md (in-repo)
+- Wiki pages, knowledge-base docs (external)
+- Shared documents (for example `[shared-doc-service]`), PDFs (external)
+- Email threads, chat exports (external)
+- Existing specs or RFCs (external)
+
+**Visual Documents**:
+
+- Architecture diagrams (PNG, JPG, `[diagram-source]`)
+- Flowcharts and sequence diagrams
+- Whiteboard photos from design sessions
+- Screenshots from design tools (for example `[design-tool]`)
+- Infrastructure topology diagrams
+
+**You don't care if it's in-repo or external** - your job is to extract knowledge from whatever the manager gives you.
+
+## Output Format
+
+Return a structured summary that the manager can use:
+
+```markdown
+## Information Analysis Summary
+
+### Documentation Found
+
+**In Repository**:
+- `[path/to/doc.md]` — Title: `[Document Title]` (Last updated: `[YYYY-MM-DD]`, Reference: `[commit-hash-or-link]`)
+- `[path/to/diagram.png]` — Diagram: `[Description]` (Last updated: `[YYYY-MM-DD]`)
+
+**External** (if provided):
+- `[Document Name or URL]` — Accessed on `[YYYY-MM-DD]`
+
+### System Context
+
+#### Purpose & Goals
+- **Problem Solved**: [From docs]
+- **Target Users**: [From docs]
+- **Business Value**: [From docs]
+- **Success Metrics**: [If documented]
+
+#### Use Cases
+1. [Use case from docs]
+2. [Use case from docs]
+
+### Decision Rationale (CRITICAL - This is your main job)
+
+#### Technology Decisions
+1. **[Technology]**:
+   - **Why chosen**: "[Direct quote or paraphrase from docs]"
+   - **Source**: `[path/to/doc.md#section-heading]`
+   - **Alternatives considered**: [If mentioned]
+   - **Trade-offs**: [If mentioned]
+
+2. **[Technology]**:
+   - **Why chosen**: "[Quote/paraphrase]"
+   - **Source**: `[path/to/second-doc.md#section-heading]`
+
+#### Architecture Decisions
+1. **[Pattern/Approach]**:
+   - **Why chosen**: "[Quote/paraphrase]"
+   - **Problem it solved**: [From docs]
+   - **Source**: `[path/to/doc.md#section-heading]`
+
+#### Constraints & Drivers
+- **[Constraint]**: [How it influenced decisions]
+- **[Driver]**: [How it shaped architecture]
+
+### Intended Architecture (from diagrams/docs)
+
+#### Components (from design)
+1. **[Component Name]**:
+   - **Intended Purpose**: [From docs/diagrams]
+   - **Responsibilities**: [From design]
+
+#### Intended Communication
+- [Component A] → [Method] → [Component B]
+  - **Source**: `[docs/diagrams/system-overview.drawio]`
+  - **Notes**: [Any annotations on diagram]
+
+#### Design Patterns
+- **[Pattern]**: [Evidence from architecture docs]
+- **Rationale**: [Why this pattern from docs]
+
+### Historical Context
+
+#### Evolution
+- [Timeline of major changes from docs]
+- [Decisions that were reversed and why]
+- [Problems encountered and solutions]
+
+#### Migrations & Changes
+- **[Change]**: [Why it happened - from docs]
+- **[Migration]**: [Context from migration guides]
+
+### Conflicts & Discrepancies
+
+**Between documents**:
+- `[docs/architecture.md]` says [X], `[docs/system-overview.md]` says [Y]
+- Diagram dated `[YYYY-MM-DD]` shows [X], newer doc says [Y]
+
+**Gaps in rationale**:
+- [Technology X] is documented but no "why"
+- [Decision Y] mentioned but rationale missing
+
+**Outdated information** (flag for validation):
+- `[Document]` appears old (dated `[YYYY-MM-DD]`) - may not reflect current state
+
+### Confidence Levels
+
+**High Confidence** (explicit in docs):
+- [List findings with clear documentation]
+
+**Medium Confidence** (implied but not explicit):
+- [List inferences from context]
+
+**Low Confidence** (ambiguous or missing):
+- [List gaps or unclear information]
+
+### Questions for Manager
+
+Based on documentation analysis, manager should ask user:
+1. [Question about conflicting information]
+2. [Question about missing rationale]
+3. [Question about outdated docs]
+```
+
+## Analysis Approach
+
+### For Text Documents
+
+1. **Scan for structure** - Find "Why", "Rationale", "Decision", "Background" sections
+2. **Extract direct quotes** - When docs explain why, quote them
+3. **Identify sources** - Always note which doc said what
+4. **Capture metadata** - Record relative path, heading/anchor, author if noted, and last modified timestamp
+5. **Flag dates** - Old docs may be outdated
+6. **Compare versions** - If multiple versions exist, note evolution
+
+### For Diagrams
+
+1. **Identify components** - What boxes/shapes represent what
+2. **Extract relationships** - What arrows/lines show what
+3. **Read annotations** - All text on diagrams is valuable context
+4. **Note dates/versions** - When was this diagram created?
+5. **Infer carefully** - Use standard diagram conventions but note assumptions
+
+### For All Materials
+
+1. **Prioritize "why"** - This is your unique value
+2. **Note conflicts** - Don't resolve, flag for manager
+3. **Assess currency** - Is this current or historical?
+4. **Extract evidence** - Quote directly when possible
+5. **Tie evidence to references** - Provide anchors or page numbers so the manager can jump straight to the source
+
+## Key Principles
+
+1. **Direct quotes for "why"** - When docs explain rationale, quote them verbatim
+2. **Source everything** - Always say which doc/diagram
+3. **Attach metadata** - Include relative path, heading/anchor, and last modified timestamp for each finding when available
+4. **Flag conflicts, don't resolve** - Manager will ask user to clarify
+5. **Note dates** - Timestamp information when possible
+6. **Distinguish explicit vs implicit** - Be clear when you're inferring
+7. **Focus on rationale** - This is what you uniquely provide (Code Analyst can't find this)
+8. **Concise summaries** - Extract insights, don't repeat entire docs
+
+## Remember
+
+You are running in a **subprocess** to do deep information extraction without overwhelming the main context. Read all the documents thoroughly, analyze all the diagrams carefully, extract all the rationale you can find. Then return a **concise, structured summary** focused on the "why" - this is what the manager can't get from code alone.
+
+Your findings will be combined with the Code Analyst's findings to create complete context. The Code Analyst tells the manager WHAT and HOW from code. You tell the manager WHY from documentation.
+
+Together, you give the manager everything needed to write accurate PRDs, meaningful ADRs with rationale, and complete SYSTEM-OVERVIEW documentation.
diff --git a/docs/research/codebase-context/research-synthesis.md b/docs/research/codebase-context/research-synthesis.md
new file mode 100644
index 0000000..208e85a
--- /dev/null
+++ b/docs/research/codebase-context/research-synthesis.md
@@ -0,0 +1,713 @@
+# Research Synthesis: Integrating Best Practices into MCP Prompts
+
+**Date:** 2025-01-21
+**Purpose:** Synthesize findings from Claude Code feature-dev analysis and existing research files to create actionable recommendations for improving our MCP prompts
+
+---
+
+## Key Sources Analyzed
+
+1. **Claude Code feature-dev plugin** - Battle-tested 7-phase workflow with agent-based architecture
+2. **information-analyst.md** - Specialized agent for extracting "WHY" from documentation
+3. **code-analyst.md** - Specialized agent for discovering "WHAT" and "HOW" from code
+4. **context_bootstrap.md** - Manager agent orchestrating code+info analysts for reverse-engineering
+
+---
+
+## Major Insights from Research Files
+
+### 🎯 Core Philosophy from context_bootstrap.md
+
+> "Code explains HOW the system currently behaves; the user supplies WHAT it is supposed to achieve and WHY choices were made."
+
+This is **critical** - it separates:
+
+- What can be discovered automatically (code analysis)
+- What must be asked (requirements, rationale, decisions)
+
+**Application to Our Prompts:**
+
+- `generate-context` should focus on WHAT and HOW (from code)
+- Must have explicit phase to ask user for WHY and goals
+- Cannot infer intent from code alone
+
+---
+
+### 🔬 Two-Agent Specialization Pattern
+
+**Pattern:** Separate concerns into specialized agents with clear boundaries
+
+#### Code Analyst (from code-analyst.md)
+
+**Responsibilities:**
+
+- Discover WHAT the system does (features, workflows, business rules)
+- Discover HOW it's structured (architecture, patterns, components)
+- Identify WHAT technologies are used
+
+**Output Format:**
+
+```markdown
+## Code Analysis Summary
+### System Capabilities
+- Features Discovered (with file:line evidence)
+- User Workflows (traced through code)
+- Business Rules (from validation logic)
+- External Integrations (working API clients)
+
+### Technology Stack
+- Languages & Frameworks (names only, NO versions)
+- Data Storage (types, evidence)
+- Infrastructure (cloud provider, deployment pattern)
+
+### Architecture
+- Components/Services (location, purpose, responsibilities)
+- Communication Patterns (with file:line evidence)
+- Service Boundaries
+- Architectural Patterns (with evidence)
+
+### Confidence & Gaps
+- High Confidence (strong evidence)
+- Needs Validation (feature toggles, dormant paths)
+- Unknowns (areas code cannot resolve)
+```
+
+**Key Principles:**
+
+1. Code is ground truth - report what actually exists
+2. Be specific - reference exact file:line for evidence
+3. Distinguish fact from inference
+4. Flag toggles and dormant paths
+5. Flag gaps - be clear what you can't determine
+6. **Stay in your lane** - don't guess at "why"
+
+**What NOT to include:**
+
+- ❌ Internal data models (implementation detail)
+- ❌ Missing/planned features (belongs in roadmap)
+- ❌ Code quality judgments
+- ❌ Specific dependency versions (too volatile)
+- ❌ Testing infrastructure details
+
+---
+
+#### Information Analyst (from information-analyst.md)
+
+**Responsibilities:**
+
+- Discover WHY the system was built this way
+- Extract rationale from documentation
+- Find decision context and trade-offs
+- Capture historical evolution
+
+**Primary Job:** Extract "WHY" - this is what code analysis can't provide
+
+**Output Format:**
+
+```markdown
+## Information Analysis Summary
+### Documentation Found
+- In Repository (with paths, titles, last updated timestamps)
+- External (if provided)
+
+### System Context
+- Purpose & Goals
+- Target Users
+- Business Value
+- Success Metrics
+- Use Cases
+
+### Decision Rationale (CRITICAL)
+#### Technology Decisions
+- **[Technology]**:
+  - Why chosen: "[Direct quote from docs]"
+  - Source: `path/to/doc.md#section-heading`
+  - Alternatives considered
+  - Trade-offs
+
+#### Architecture Decisions
+- **[Pattern]**:
+  - Why chosen
+  - Problem it solved
+  - Source reference
+
+### Intended Architecture (from diagrams/docs)
+- Components (intended purpose from design)
+- Intended Communication
+- Design Patterns with rationale
+
+### Historical Context
+- Evolution timeline
+- Migrations & Changes
+
+### Conflicts & Discrepancies
+- Between documents
+- Gaps in rationale
+- Outdated information
+
+### Confidence Levels
+- High (explicit in docs)
+- Medium (implied)
+- Low (ambiguous/missing)
+
+### Questions for Manager
+- Conflicting information
+- Missing rationale
+- Outdated docs
+```
+
+**Key Principles:**
+
+1. **Direct quotes for "why"** - quote docs verbatim
+2. **Source everything** - always say which doc/diagram
+3. **Attach metadata** - path, heading/anchor, timestamp
+4. **Flag conflicts, don't resolve**
+5. **Note dates** - timestamp information
+6. **Distinguish explicit vs implicit**
+7. **Focus on rationale** - this is your unique value
+8. **Concise summaries** - extract insights, don't repeat docs
+
+---
+
+### 🎭 Manager Orchestration Pattern (context_bootstrap.md)
+
+**Pattern:** Manager coordinates specialized subprocess agents
+
+**Manager Responsibilities:**
+
+1. Detect repository structure (workspace, monorepo, single app)
+2. Launch Code Analyst subprocess
+3. Launch Information Analyst subprocess
+4. Integrate findings from both
+5. Ask user clarifying questions based on gaps
+6. Draft comprehensive documentation
+7. Review with user
+
+**Six-Phase Workflow:**
+
+1. **Analyze repository structure**
+2. **Audit existing documentation**
+3. **Deep code analysis** (subprocess: Code Analyst)
+4. **User collaboration** (fill gaps, resolve conflicts)
+5. **Draft documentation set** (PRDs, ADRs, SYSTEM-OVERVIEW, README)
+6. **Review with user**
+
+**Key Pattern:** "Keep dialog interactive. Ask focused follow-up questions instead of long questionnaires."
+
+---
+
+## Comparison: Our Prompts vs. Research Patterns
+
+| Aspect | Our Current Approach | Research Best Practice | Gap |
+|--------|---------------------|------------------------|-----|
+| **Code vs. Docs Separation** | Single `generate-context` prompt | Separate Code Analyst + Information Analyst | Not separated - conflates WHAT/HOW with WHY |
+| **Evidence Citations** | General descriptions | Explicit file:line references + timestamps | Weak evidence trail |
+| **Confidence Levels** | Implicit | Explicit (High/Medium/Low with gaps) | No confidence assessment |
+| **Documentation Audit** | Not included | Explicit phase: scan + categorize + date-check | Missing documentation review |
+| **Rationale Extraction** | Ad-hoc | Dedicated agent focused on WHY | Not systematic |
+| **User Collaboration** | Batch Q&A | Iterative short conversations | Too batch-oriented |
+| **Output Artifacts** | Analysis markdown | PRDs + ADRs (MADR format) + SYSTEM-OVERVIEW + README | Different artifact structure |
+
+---
+
+## Actionable Recommendations
+
+### 🔴 HIGH PRIORITY: Restructure `generate-context`
+
+**Current State:** Single monolithic prompt trying to do everything
+
+**Recommended Change:** Split into focused phases matching research patterns
+
+```markdown
+## Phase 1: Repository Structure Analysis
+- Detect layout (workspace/monorepo/single app)
+- Enumerate components/services
+- Identify entry points
+- **Output:** Structure summary with component list
+
+## Phase 2: Documentation Audit
+- Scan for documentation files (README, docs/, *.md, diagrams)
+- Capture metadata (path, title, last modified)
+- Note existing rationale if found
+- Flag outdated or conflicting docs
+- **Output:** Documentation inventory with timestamps
+
+## Phase 3: Code Analysis (WHAT + HOW)
+Following Code Analyst patterns:
+- Discover WHAT: features, workflows, business rules, integrations
+- Discover HOW: architecture, patterns, communication, deployment
+- Technology stack (names only, NO versions)
+- **Provide file:line evidence for ALL findings**
+- **Flag confidence levels: High/Needs Validation/Unknown**
+- **DO NOT infer WHY** - stay in lane
+- **Output:** Code analysis summary with evidence
+
+## Phase 4: Information Analysis (WHY)
+Following Information Analyst patterns:
+- Extract decision rationale from docs
+- Find "why X was chosen" with direct quotes
+- Capture alternatives considered and trade-offs
+- Note historical context and evolution
+- **Provide source references with path#heading**
+- **Output:** Rationale summary with citations
+
+## Phase 5: Gap Identification
+- Compare code analysis vs. documentation
+- Identify conflicts between docs and code
+- List missing rationale (tech used but no "why")
+- Flag questions that need user answers
+- **Output:** Gap analysis with specific questions
+
+## Phase 6: User Collaboration ⛔ MANDATORY STOP
+**Interactive, not batch:**
+- Ask focused questions about gaps
+- Resolve conflicts between docs and code
+- Confirm assumptions
+- **Capture user answers as direct quotes for citation**
+- **Wait for answers before proceeding**
+
+## Phase 7: Generate Analysis Document
+- Synthesize all findings
+- Include evidence citations (file:line, doc#heading)
+- Mark confidence levels
+- Document resolved gaps and remaining unknowns
+- **Essential Files List:** 5-10 key files with file:line ranges
+- **Execution Path Traces:** Key workflows with step-by-step flow
+- Save to `/tasks/[n]-context-[name].md`
+```
+
+### Migration Impact
+
+This restructuring will change the output format and process. Users with existing
+codebase context documents should:
+
+- **Still valid:** All file:line citations remain accurate
+- **May need updating:** If they reference specific sections, page numbers may shift
+- **Recommended:** Regenerate context analyses using the new prompt format for consistency
+
+---
+
+### 🔴 HIGH PRIORITY: Add Evidence Citation Standards
+
+Add to ALL prompts that analyze code or docs:
+
+```markdown
+## Evidence Citation Standards
+
+Every finding MUST include evidence:
+
+### For Code Findings
+- Format: `path/to/file.ts:45-67` (include line range when relevant)
+- Example: "Authentication uses JWT tokens (src/auth/AuthService.ts:23)"
+
+### For Documentation Findings
+- Format: `path/to/doc.md#section-heading` (include anchor/page)
+- Example: "PostgreSQL chosen for ACID guarantees (docs/architecture.md#database-decision)"
+- Include last modified timestamp when available
+
+### For User-Provided Information
+- Format: "[User confirmed: YYYY-MM-DD]"
+- Example: "OAuth2 required by compliance team [User confirmed: 2025-01-21]"
+- Use direct quotes when possible
+```
+
+---
+
+### 🔴 HIGH PRIORITY: Add Confidence Assessment
+
+Add to `generate-context` and `review-implementation`:
+
+```markdown
+## Confidence Assessment
+
+Categorize every finding:
+
+### High Confidence
+- **Criteria:** Strong evidence from code or explicit documentation
+- **Examples:**
+  - Feature exists with working code path
+  - Technology explicitly listed in dependencies
+  - Design decision documented in ADR
+
+### Medium Confidence (Needs Validation)
+- **Criteria:** Inferred from context or behind feature flags
+- **Examples:**
+  - Feature toggle currently disabled
+  - Pattern inferred from code structure
+  - Technology mentioned in code comments only
+
+### Low Confidence (Unknown)
+- **Criteria:** Cannot determine from available information
+- **Examples:**
+  - Rationale missing from docs and code
+  - Conflicting information in different sources
+  - Experimental/dormant code paths
+
+**Always flag low confidence items for user validation**
+```
+
+---
+
+### 🟡 MEDIUM PRIORITY: Enhance `generate-spec` with WHY Questions
+
+Current `generate-spec` asks about functional requirements. Add a dedicated section:
+
+```markdown
+## Phase 2A: Context Questions (WHY)
+
+Before designing the feature, understand context:
+
+### Purpose & Value
+1. **What problem does this solve?**
+   - Who experiences this problem?
+   - How do they currently work around it?
+   - What's the business value of solving it?
+
+### Strategic Fit
+2. **Why build this now?**
+   - What makes this a priority?
+   - What's driving the timeline?
+   - Are there dependencies blocking other work?
+
+### Success Criteria
+3. **How will we know it's working?**
+   - What metrics indicate success?
+   - What does "good enough" look like?
+   - What are the acceptance thresholds?
+
+### Constraints & Context
+4. **What constraints exist?**
+   - Technical limitations
+   - Regulatory/compliance requirements
+   - Budget/timeline pressures
+   - Team/resource constraints
+
+**Capture answers as direct quotes for later reference in spec**
+```
+
+---
+
+### 🟡 MEDIUM PRIORITY: Create ADR Template
+
+Based on context_bootstrap.md recommendation for MADR format:
+
+Create `prompts/templates/adr-template.md`:
+
+```markdown
+# [short title of solved problem and solution]
+
+**Status:** [proposed | accepted | rejected | deprecated | superseded by [ADR-0005](0005-example.md)]
+**Date:** YYYY-MM-DD
+**Decision Makers:** [list who was involved]
+**Context Source:** [reference to feature spec or analysis document]
+
+## Context and Problem Statement
+
+[Describe the context and problem statement in 1-2 sentences.
+Include business value and constraints if relevant.]
+
+## Decision Drivers
+
+* [driver 1, e.g., a force, facing concern, ...]
+* [driver 2, e.g., a force, facing concern, ...]
+* ... <!-- numbers of drivers can vary -->
+
+## Considered Options
+
+* [option 1]
+* [option 2]
+* [option 3]
+* ... <!-- numbers of options can vary -->
+
+## Decision Outcome
+
+Chosen option: "[option 1]", because [justification. e.g., only option that meets k.o. criterion decision driver | which resolves force | ... | comes out best (see below)].
+
+### Consequences
+
+* Good, because [positive consequence, e.g., improvement of one or more quality attributes, follow-up decisions required]
+* Bad, because [negative consequence, e.g., compromising one or more quality attributes, follow-up decisions required]
+* ... <!-- numbers of consequences can vary -->
+
+### Confirmation
+
+[Optional: Describe how the decision will be validated]
+
+## Pros and Cons of the Options
+
+### [option 1]
+
+[short description | example | link to more information]
+
+* Good, because [argument a]
+* Good, because [argument b]
+* Neutral, because [argument c]
+* Bad, because [argument d]
+* ... <!-- numbers of pros and cons can vary -->
+
+### [option 2]
+
+[same as above]
+
+### [option 3]
+
+[same as above]
+
+## More Information
+
+[Optional: Links to additional resources, related ADRs, or evidence used in decision making]
+```
+
+---
+
+### 🟡 MEDIUM PRIORITY: Interactive vs. Batch Questioning
+
+**Current:** `generate-spec` presents all questions at once
+
+**Research Best Practice:** "Keep dialog interactive. Ask focused follow-up questions instead of long questionnaires."
+
+**Recommendation:** Phase the questioning:
+
+```markdown
+## Clarifying Questions Approach
+
+### Phase 1: Core Requirements (3-5 questions)
+Ask ONLY about:
+- What problem is being solved
+- Who the user is
+- Core functionality needed
+
+**STOP - Wait for answers**
+
+### Phase 2: Context & Constraints (based on answers)
+Ask follow-up questions about:
+- Edge cases specific to their answers
+- Integration points now that we know the domain
+- Constraints relevant to the identified problem
+
+**STOP - Wait for answers**
+
+### Phase 3: Refinement (based on gaps)
+Ask targeted questions about:
+- Ambiguities in their previous answers
+- Specific unknowns discovered
+- Trade-off preferences
+
+**STOP - Wait for final confirmation**
+
+**Rationale:** Shorter conversations get better engagement and more thoughtful answers than long questionnaires.
+```
+
+---
+
+### 🟢 LOW PRIORITY: Artifact Structure
+
+**Research Pattern:** Generate multiple focused documents:
+
+- PRDs (product requirements)
+- ADRs (architecture decisions in MADR format)
+- SYSTEM-OVERVIEW.md (architecture summary)
+- README.md updates
+
+**Our Current:** Single large analysis markdown
+
+**Recommendation:** Consider splitting output but LOW priority - our current structure works well for MCP use case.
+
+---
+
+## Integration Priority Matrix
+
+| Change | Impact | Effort | Priority | Timeline |
+|--------|--------|--------|----------|----------|
+| Restructure codebase-context into phases | HIGH | MEDIUM | **P0** | Sprint 1 |
+| Add evidence citation standards | HIGH | LOW | **P0** | Sprint 1 |
+| Add confidence assessment | HIGH | LOW | **P0** | Sprint 1 |
+| Enhance spec with WHY questions | MEDIUM | LOW | **P1** | Sprint 2 |
+| Create ADR template | MEDIUM | LOW | **P1** | Sprint 2 |
+| Move to interactive questioning | MEDIUM | MEDIUM | **P1** | Sprint 2 |
+| Split into specialized sub-agents | LOW | HIGH | **P2** | Future |
+| Multi-document artifact structure | LOW | MEDIUM | **P2** | Future |
+
+---
+
+## Specific Prompt Enhancements
+
+### For `generate-context`
+
+**Add from code-analyst.md:**
+
+1. ✅ File:line evidence citations for all findings
+2. ✅ Confidence levels (High/Needs Validation/Unknown)
+3. ✅ "Stay in your lane" - don't infer WHY from code
+4. ✅ Flag feature toggles and dormant paths
+5. ✅ Technology names only (NO versions)
+6. ✅ Focus on working features, not missing ones
+7. ✅ "Essential Files List" with file:line ranges
+8. ✅ Execution path traces with step-by-step flows
+
+**Add from information-analyst.md:**
+
+1. ✅ Documentation audit phase (scan + timestamp + inventory)
+2. ✅ Rationale extraction with direct quotes
+3. ✅ Source references with path#heading format
+4. ✅ Conflict detection between docs
+5. ✅ Distinguish explicit vs. implicit knowledge
+6. ✅ Metadata capture (last modified timestamps)
+
+**Add from context_bootstrap.md:**
+
+1. ✅ Repository structure detection (workspace/monorepo/single)
+2. ✅ User collaboration phase (interactive, not batch)
+3. ✅ Capture user answers as direct quotes for citation
+
+---
+
+### For `generate-spec`
+
+**Add from research:**
+
+1. ✅ WHY questions (problem, value, strategic fit)
+2. ✅ Interactive phased questioning (not batch)
+3. ✅ Capture answers as direct quotes
+4. ✅ Reference codebase context document explicitly
+5. ✅ Include evidence citations when referencing existing code
+
+---
+
+### For `generate-architecture-options` (NEW)
+
+**Inspired by code-architect.md:**
+
+1. ✅ Patterns & conventions found (with file:line refs)
+2. ✅ Multiple approaches (minimal/clean/pragmatic)
+3. ✅ Complete component design with responsibilities
+4. ✅ Implementation map (files to create/modify)
+5. ✅ Data flow diagrams
+6. ✅ Build sequence as checklist
+7. ✅ Critical details (error handling, state, testing, security)
+
+---
+
+### For `review-implementation` (NEW)
+
+**Inspired by code-reviewer.md:**
+
+1. ✅ Confidence-based filtering (≥80% confidence)
+2. ✅ Categorize findings (Critical/Important/Nice-to-have)
+3. ✅ Specific fixes with file:line references
+4. ✅ Check against project guidelines (CLAUDE.md)
+5. ✅ Flag high-confidence issues only
+
+---
+
+## Key Principles to Embed
+
+### 1. Separation of Concerns
+
+- **Code tells you WHAT and HOW**
+- **Docs tell you WHY**
+- **Users tell you goals and intent**
+- Don't conflate these sources
+
+### 2. Evidence-Based
+
+- Every claim needs evidence
+- File:line for code
+- Path#heading for docs
+- Direct quotes for users
+- Timestamps for currency
+
+### 3. Confidence Assessment
+
+- Distinguish fact from inference
+- Flag gaps explicitly
+- Mark validation needs
+- Document unknowns
+
+### 4. Interactive Collaboration
+
+- Short focused conversations
+- Don't batch questions
+- Wait for answers between phases
+- Capture responses as quotes
+
+### 5. Actionable Outputs
+
+- Specific file lists to read
+- Execution path traces
+- Concrete next steps
+- Clear decision points
+
+---
+
+## Implementation Roadmap
+
+### Sprint 1: Core Evidence & Confidence (Week 1)
+
+**Goal:** Make analysis evidence-based and trustworthy
+
+- [ ] Add evidence citation standards to all prompts
+- [ ] Add confidence assessment to codebase-context
+- [ ] Enhance codebase-context with code-analyst patterns
+- [ ] Add documentation audit phase
+- [ ] Test on sample codebase
+
+**Deliverable:** Updated `generate-context` with evidence citations and confidence levels
+
+---
+
+### Sprint 2: Interactive Collaboration (Week 2)
+
+**Goal:** Improve user engagement and rationale capture
+
+- [ ] Restructure spec questions into phased approach
+- [ ] Add WHY questions to spec generation
+- [ ] Create ADR template
+- [ ] Add rationale extraction to context analysis
+- [ ] Test interactive questioning flow
+
+**Deliverable:** Enhanced `generate-spec` with phased questions and WHY capture
+
+---
+
+### Sprint 3: Architecture & Review (Week 3)
+
+**Goal:** Add missing workflow phases from Claude Code
+
+- [ ] Create `generate-architecture-options` prompt
+- [ ] Create `review-implementation` prompt
+- [ ] Integrate with existing workflow
+- [ ] Document complete end-to-end flow
+- [ ] Create examples and tutorials
+
+**Deliverable:** Complete workflow with all phases
+
+---
+
+## Success Metrics
+
+### Qualitative
+
+- ✅ Analysis includes file:line citations for all claims
+- ✅ Confidence levels clearly marked
+- ✅ User questions get thoughtful answers (not "whatever you think")
+- ✅ Rationale captured with direct quotes
+- ✅ Gaps explicitly documented vs. hidden
+
+### Quantitative
+
+- ✅ 100% of code findings have file:line evidence
+- ✅ 100% of doc findings have path#heading source
+- ✅ 100% of user answers captured as quotes
+- ✅ <5 batch questions per phase (forces interactive dialog)
+- ✅ 5-10 essential files identified per analysis
+
+---
+
+## References
+
+- **Claude Code feature-dev:** [Comparison document](./claude-code-feature-dev-comparison.md)
+- **code-analyst.md:** Specialized agent for code analysis
+- **information-analyst.md:** Specialized agent for documentation analysis
+- **context_bootstrap.md:** Manager orchestration pattern
+- **MADR Format:** [Architecture Decision Records (MADR)](https://adr.github.io/madr/)
diff --git a/docs/roadmap/PROGRESS.md b/docs/roadmap/PROGRESS.md
new file mode 100644
index 0000000..4de93e8
--- /dev/null
+++ b/docs/roadmap/PROGRESS.md
@@ -0,0 +1,682 @@
+# MCP Spec-Driven Development - Implementation Progress
+
+**Last Updated:** 2025-01-21
+**Current Branch:** `add-reverse-engineer-codebase-prompt`
+**Status:** Phase 1 Complete - Ready for Review
+
+---
+
+## Overview
+
+This document tracks the implementation of improvements to our MCP spec-driven development prompts based on research from:
+
+1. Claude Code feature-dev plugin analysis
+2. Existing research files (code-analyst.md, information-analyst.md, context_bootstrap.md)
+
+**Goal:** Enhance our prompts with evidence-based analysis, confidence assessment, and mandatory clarifying phases inspired by battle-tested patterns.
+
+---
+
+## Current Status: Phase 1 Complete ✅
+
+### Completed Work (This PR)
+
+#### 1. Research & Analysis ✅
+
+**Branch:** `add-reverse-engineer-codebase-prompt`
+**Commits:** 4 commits
+**Status:** Complete
+
+**Deliverables:**
+
+- ✅ `docs/research/codebase-context/claude-code-feature-dev-comparison.md` (18,287 words)
+  - Complete 7-phase workflow analysis
+  - Agent specifications (code-explorer, code-architect, code-reviewer)
+  - Gap analysis with priority matrix
+  - Implementation roadmap
+
+- ✅ `docs/research/codebase-context/research-synthesis.md` (8,000+ words)
+  - Integration of all research sources
+  - Actionable recommendations with priorities
+  - Specific enhancements for each prompt
+  - Implementation checklist
+
+- ✅ Cataloged existing research files:
+  - `code-analyst.md` - WHAT/HOW from code
+  - `information-analyst.md` - WHY from documentation
+  - `context_bootstrap.md` - Manager orchestration pattern
+
+#### 2. Renamed Prompt ✅
+
+- ✅ Renamed `reverse-engineer-codebase` → `generate-context`
+  - Better reflects purpose: generating context for development
+  - Aligns with workflow terminology
+  - Shorter, more concise name
+
+#### 3. Enhanced `generate-context` Prompt ✅
+
+**File:** `prompts/generate-context.md`
+**Lines:** 877 lines (up from ~500)
+**Status:** Complete and ready for use
+
+**Major Enhancements:**
+
+##### Evidence Citation Standards ✅
+
+- **For Code:** `path/to/file.ts:45-67` with line ranges
+- **For Docs:** `path/to/doc.md#section-heading` with timestamps
+- **For User Input:** `[User confirmed: YYYY-MM-DD]` with direct quotes
+- **Example:** "Authentication uses JWT (src/auth/jwt.ts:23-45)"
+
+##### Confidence Assessment ✅
+
+Every finding must be categorized:
+
+- 🟢 **High Confidence:** Strong evidence from working code or explicit docs
+- 🟡 **Medium Confidence:** Inferred, behind feature flags, or implied
+- 🔴 **Low Confidence:** Cannot determine, conflicts, or unknowns
+
+Explicitly flags items needing user validation.
+
+##### Separation of Concerns ✅
+
+- **WHAT/HOW:** Discovered from code analysis (stay in lane, don't infer WHY)
+- **WHY:** Extracted from documentation (rationale, decisions, trade-offs)
+- **Intent/Goals:** Provided by user (fills gaps, resolves conflicts)
+
+##### Phased Analysis Process ✅
+
+1. **Repository Structure Analysis** - Auto-detect layout, tech stack
+2. **Documentation Audit** - Scan, inventory, extract rationale, flag gaps
+3. **Code Analysis (WHAT + HOW)** - Features, workflows, architecture, patterns
+4. **Integration Points** - External services, dependencies, events
+5. **Gap Identification** - Missing rationale, conflicts, unknowns
+6. **User Collaboration** - Short focused questions (3-5 max), not batch
+7. **Generate Analysis** - Complete evidence-based document
+
+##### Interactive Questioning ✅
+
+- **OLD:** Long batch questionnaires
+- **NEW:** Short rounds (3-5 questions max), wait for answers, ask follow-ups
+- Captures user answers as direct quotes for later citation
+
+##### Execution Path Tracing ✅
+
+Step-by-step flow examples:
+
+```text
+User Login Flow:
+1. POST /api/auth/login → src/api/routes/auth.ts:23
+2. AuthController.login() → src/controllers/AuthController.ts:45
+3. AuthService.validateCredentials() → src/services/AuthService.ts:67
+...
+```
+
+##### Essential Files List ✅
+
+- 5-10 priority files with specific line ranges
+- **Example:** `src/services/UserService.ts:45-234` - Core user management logic
+
+##### Comprehensive Example Output ✅
+
+- Full 13-section document structure with real examples
+- Shows proper evidence citations, confidence emojis, execution traces
+- Includes gap documentation, open questions, next steps
+
+##### Quality Checklist ✅
+
+Pre-completion verification:
+
+- [ ] All code findings have file:line citations
+- [ ] All doc findings have path#heading references
+- [ ] User answers captured as quotes with dates
+- [ ] Confidence levels marked
+- [ ] Essential files list complete (5-10 files)
+- [ ] At least 2 execution path traces
+- [ ] Gaps/unknowns explicitly documented
+
+##### Key Principles Embedded ✅
+
+1. Evidence-based (every claim needs proof)
+2. Confidence levels (distinguish fact from inference)
+3. Stay in lane (don't guess WHY from code)
+4. Flag gaps explicitly (better "Unknown" than guessing)
+5. Interactive not batch (short questions)
+6. Actionable outputs (specific files, traces, recommendations)
+
+---
+
+## What This PR Includes
+
+### Files Added/Modified
+
+```text
+✅ prompts/generate-context.md (enhanced)
+✅ docs/research/codebase-context/claude-code-feature-dev-comparison.md (new)
+✅ docs/research/codebase-context/research-synthesis.md (new)
+✅ docs/research/codebase-context/code-analyst.md (cataloged)
+✅ docs/research/codebase-context/information-analyst.md (cataloged)
+✅ docs/research/codebase-context/context_bootstrap.md (cataloged)
+✅ docs/PROGRESS.md (new - this file)
+```
+
+### Commits
+
+1. `feat: add reverse-engineer-codebase prompt for contextual analysis`
+2. `refactor: rename reverse-engineer-codebase to generate-context`
+3. `docs: add comprehensive research analysis for prompt improvements`
+4. `feat: enhance generate-context with evidence citations and confidence levels`
+
+### Ready for Review
+
+- ✅ All code changes committed
+- ✅ Research documented
+- ✅ Enhanced prompt tested with prompt loader
+- ✅ Progress tracked
+- ⏳ Awaiting PR review
+
+---
+
+## Phase 2: Future Improvements (Next PR)
+
+The following improvements are **documented and ready to implement** but will be handled in a separate branch/PR to keep this PR focused and reviewable.
+
+### Priority 1: Critical Workflow Enhancements
+
+#### A. Enhance `generate-spec` with Mandatory Clarifying Phase
+
+**File:** `prompts/generate-spec.md`
+**Status:** Planned for next PR
+**Estimated Effort:** Medium (2-3 hours)
+
+**Changes Needed:**
+
+1. **Add Phase 2A: Context Review (NEW)**
+   - Prerequisite: Must have run `generate-context`
+   - Read analysis document
+   - Review essential files identified
+   - Understand existing patterns
+
+2. **Add Phase 3: Clarifying Questions ⭐ CRITICAL (ENHANCED)**
+   - **Make it MANDATORY STOP POINT**
+   - Add WHY questions:
+     - What problem does this solve?
+     - Why build this now? (strategic fit)
+     - What's the business value?
+     - How will we measure success?
+   - Phase questions into rounds:
+     - Round 1: Core requirements (3-5 questions)
+     - **⛔ STOP - Wait for answers**
+     - Round 2: Context & constraints (based on Round 1 answers)
+     - **⛔ STOP - Wait for answers**
+     - Round 3: Refinement (fill remaining gaps)
+     - **⛔ STOP - Wait for final confirmation**
+   - Capture all answers as direct quotes with dates
+   - Reference codebase context document explicitly
+
+3. **Update Spec Structure**
+   - Add "Context & Rationale" section (WHY)
+   - Include evidence citations when referencing existing code
+   - Reference decisions from codebase context analysis
+
+**Impact:** Prevents building wrong features by ensuring all requirements are clear before design begins.
+
+**Research Source:** Claude Code Phase 3 + information-analyst.md patterns
+
+---
+
+#### B. Create `generate-architecture-options` Prompt (NEW)
+
+**File:** `prompts/generate-architecture-options.md`
+**Status:** Planned for next PR
+**Estimated Effort:** High (4-5 hours)
+
+**Purpose:** Generate 2-3 architectural approaches with trade-off analysis before task generation
+
+**Process:**
+
+1. **Prerequisites Check:**
+   - Spec document exists
+   - Codebase context analysis exists
+   - User has approved spec
+
+2. **Review Context:**
+   - Read spec document
+   - Read codebase context analysis
+   - Review essential files identified
+
+3. **Generate 3 Approaches:**
+   - **Minimal Changes:** Smallest change, maximum code reuse, fastest to ship
+   - **Clean Architecture:** Best maintainability, elegant abstractions, most extensible
+   - **Pragmatic Balance:** Balanced trade-off between speed and quality
+
+4. **For Each Approach:**
+   - **Patterns & Conventions Found:** With file:line references
+   - **Architecture Decision:** Clear choice with rationale
+   - **Component Design:** Files, responsibilities, dependencies, interfaces
+   - **Implementation Map:** Specific files to create/modify with details
+   - **Data Flow:** Entry → transformations → output
+   - **Build Sequence:** Phased checklist
+   - **Trade-offs:** Pros and cons explicitly stated
+   - **Critical Details:** Error handling, state, testing, performance, security
+
+5. **Present to User:**
+   - Brief summary of each approach
+   - Trade-offs comparison table
+   - **AI recommendation with reasoning** (based on codebase context)
+   - Concrete implementation differences
+
+6. **⛔ STOP - User must choose approach**
+
+7. **Save Choice:**
+   - Save chosen approach to `/tasks/architecture-[spec-number].md`
+   - Document rationale for choice (for future ADR)
+
+**Output Example:**
+
+```markdown
+# Architecture Options: User Profile Editing (Spec 0001)
+
+## Approach 1: Minimal Changes
+**Summary:** Extend existing UserService, add new endpoint to existing routes
+**Pros:**
+- Fast (2-3 days)
+- Low risk (minimal code changes)
+- Uses familiar patterns
+**Cons:**
+- Couples new feature to existing code
+- Harder to test in isolation
+- May not scale if requirements expand
+
+## Approach 2: Clean Architecture
+**Summary:** New ProfileService with dedicated interface, separate routes
+**Pros:**
+- Clean separation of concerns
+- Easy to test and extend
+- Sets good pattern for future features
+**Cons:**
+- More files (slower initial development)
+- Requires refactoring some existing code
+- Team needs to learn new pattern
+
+## Approach 3: Pragmatic Balance (RECOMMENDED)
+**Summary:** New ProfileService integrated into existing structure
+**Pros:**
+- Good boundaries without excessive refactoring
+- Testable and maintainable
+- Fits existing architecture patterns
+**Cons:**
+- Some coupling remains to UserService
+
+**Recommendation:** Approach 3 - Based on codebase context analysis showing layered architecture with service boundaries, this approach provides clean separation while avoiding extensive refactoring. Aligns with existing patterns in `src/services/PaymentService.ts:34-178`.
+
+**Which approach do you prefer?**
+```
+
+**Impact:** Enables better architectural decisions by presenting options with explicit trade-offs rather than single solution.
+
+**Research Source:** Claude Code code-architect agent + Phase 4
+
+---
+
+#### C. Create `review-implementation` Prompt (NEW)
+
+**File:** `prompts/review-implementation.md`
+**Status:** Planned for next PR
+**Estimated Effort:** High (4-5 hours)
+
+**Purpose:** Quality review before considering feature complete
+
+**Process:**
+
+1. **Prerequisites:**
+   - All implementation tasks marked complete in task list
+   - Code has been committed (but not pushed/PR'd yet)
+
+2. **Review Scope:**
+   - All modified files
+   - All created files
+   - Related tests
+
+3. **Multi-Focus Review:**
+   - **Focus 1: Bugs & Correctness**
+     - Logic errors
+     - Edge case handling
+     - Null/undefined handling
+     - Error propagation
+     - Race conditions
+
+   - **Focus 2: Code Quality**
+     - DRY violations (duplicate code)
+     - Complexity (can it be simpler?)
+     - Readability (clear intent?)
+     - Maintainability (easy to change?)
+
+   - **Focus 3: Project Conventions**
+     - CLAUDE.md guidelines compliance
+     - Naming conventions
+     - File organization patterns
+     - Testing patterns
+     - Code style (linter rules)
+
+4. **Confidence-Based Filtering:**
+   - Only report issues with ≥80% confidence
+   - Avoid nitpicks and opinions
+   - Focus on objective problems
+
+5. **Categorize Findings:**
+   - **Critical (Must Fix):** Bugs, security issues, breaking changes
+   - **Important (Should Fix):** Code quality, maintainability concerns
+   - **Nice-to-Have (Optional):** Optimizations, minor improvements
+
+6. **Present to User:**
+
+   ```markdown
+   ## Review Findings
+
+   ### Critical Issues (Must Fix) 🔴
+   1. **Missing error handling in OAuth callback**
+      - File: src/auth/oauth.ts:67
+      - Issue: Network failures not caught, will crash server
+      - Fix: Add try-catch with proper error response
+      - Confidence: 95%
+
+   ### Important Issues (Should Fix) 🟡
+   1. **Memory leak: OAuth state not cleaned up**
+      - File: src/auth/oauth.ts:89
+      - Issue: State map grows unbounded
+      - Fix: Add TTL or cleanup job
+      - Confidence: 85%
+
+   ### Optional Improvements 🟢
+   1. **Could simplify token refresh logic**
+      - File: src/auth/oauth.ts:120
+      - Suggestion: Extract to separate function
+      - Confidence: 80%
+   ```
+
+7. **⛔ STOP - Ask user what to do:**
+   - Fix all issues now?
+   - Fix only critical issues?
+   - Fix later (document as tech debt)?
+   - Proceed as-is?
+
+8. **Take Action:**
+   - Apply fixes based on user decision
+   - Update task list to mark review complete
+   - Document any deferred issues
+
+**Impact:** Catches quality issues and bugs before they reach production/PR.
+
+**Research Source:** Claude Code code-reviewer agent + Phase 6
+
+---
+
+### Priority 2: Documentation & Workflow
+
+#### D. Update Workflow Documentation
+
+**File:** `docs/WORKFLOW.md` (new)
+**Status:** Planned for next PR
+**Estimated Effort:** Low (1-2 hours)
+
+**Content:**
+
+```markdown
+# Spec-Driven Development Workflow
+
+## Complete Flow
+
+1. **Analyze Codebase** - `generate-context`
+   - Output: `/docs/00[n]-SYSTEM.md`
+   - Evidence-based analysis with citations
+   - Confidence levels for all findings
+   - Essential files list + execution traces
+
+2. **Create Specification** - `generate-spec`
+   - Prerequisites: Context analysis complete
+   - ⛔ STOP: Answer clarifying questions (phased)
+   - Output: `/tasks/[n]-spec-[feature].md`
+   - Includes WHY and evidence citations
+
+3. **Design Architecture** - `generate-architecture-options`
+   - Prerequisites: Spec approved
+   - Review 3 approaches with trade-offs
+   - ⛔ STOP: Choose architectural approach
+   - Output: `/tasks/architecture-[n].md`
+
+4. **Generate Tasks** - `generate-task-list-from-spec`
+   - Prerequisites: Architecture chosen
+   - References chosen approach
+   - ⛔ STOP: Approve parent tasks before sub-tasks
+   - Output: `/tasks/tasks-[n]-spec-[feature].md`
+
+5. **Execute Implementation** - `manage-tasks`
+   - Follow task list sequentially
+   - Run tests after each parent task
+   - Validate demo criteria
+   - Commit with conventional format
+
+6. **Review Quality** - `review-implementation`
+   - Prerequisites: All tasks complete
+   - Multi-focus review (bugs, quality, conventions)
+   - ⛔ STOP: Decide what issues to fix
+   - Fix issues as directed
+
+7. **Complete**
+   - Create PR
+   - Deploy
+   - Document decisions (ADRs if needed)
+
+## Workflow Diagram
+
+[Include visual diagram]
+
+## Best Practices
+
+1. Always run codebase-context before starting new features
+2. Answer all clarifying questions thoughtfully
+3. Review architecture options carefully - impacts long-term maintainability
+4. Don't skip quality review - catches issues early
+5. Reference context analysis when making decisions
+
+## Example Session
+
+[Include complete example walkthrough]
+```
+
+---
+
+#### E. Create ADR Template
+
+**File:** `prompts/templates/adr-template.md` (new)
+**Status:** Planned for next PR
+**Estimated Effort:** Low (30 minutes)
+
+**Content:**
+
+- MADR format template
+- Sections for context, decision drivers, options, outcome, consequences
+- Examples of good vs bad ADRs
+- Instructions for when to create ADRs
+
+**Usage:** Referenced by `generate-architecture-options` for documenting chosen approach
+
+---
+
+#### F. Create Examples & Tutorials
+
+**Files:** `docs/examples/` (new directory)
+**Status:** Planned for future PR
+**Estimated Effort:** Medium (3-4 hours)
+
+**Content:**
+
+- Complete example: Full workflow walkthrough
+- Before/after examples showing improvements
+- Common patterns and solutions
+- Troubleshooting guide
+
+---
+
+## Implementation Roadmap
+
+### This PR (Phase 1) ✅ COMPLETE
+
+**Branch:** `add-reverse-engineer-codebase-prompt`
+**Timeline:** Complete
+**Deliverables:**
+
+- ✅ Research analysis and synthesis
+- ✅ Enhanced `generate-context` prompt
+- ✅ Progress documentation
+
+**Merge Criteria:**
+
+- [x] All commits clean and documented
+- [x] Enhanced prompt tested
+- [x] Research findings documented
+- [ ] PR review approved
+- [ ] Tests passing (if applicable)
+
+---
+
+### Next PR (Phase 2) - Critical Workflow Enhancements
+
+**Branch:** `enhance-spec-and-add-architecture-review` (future)
+**Timeline:** 2-3 days work
+**Estimated Effort:** High (10-12 hours)
+
+**Deliverables:**
+
+- [ ] Enhanced `generate-spec` with mandatory clarifying phase
+- [ ] New `generate-architecture-options` prompt
+- [ ] New `review-implementation` prompt
+- [ ] Updated workflow documentation
+- [ ] ADR template
+
+**Priority:** HIGH - These are critical gaps identified in research
+**Blocking:** None (Phase 1 complete)
+
+**Acceptance Criteria:**
+
+- [ ] All 3 prompts work independently
+- [ ] Workflow flows smoothly from context → spec → architecture → tasks → review
+- [ ] Evidence citations and confidence levels used throughout
+- [ ] User checkpoints (⛔ STOP) enforced
+- [ ] Documentation complete with examples
+
+---
+
+### Future PR (Phase 3) - Polish & Examples
+
+**Branch:** TBD
+**Timeline:** 1-2 days work
+**Estimated Effort:** Medium (4-6 hours)
+
+**Deliverables:**
+
+- [ ] Complete example walkthrough
+- [ ] Best practices guide
+- [ ] Troubleshooting documentation
+- [ ] Before/after comparisons
+
+**Priority:** MEDIUM - Improves usability but not blocking
+**Blocking:** Phase 2 complete
+
+---
+
+## Success Metrics
+
+### Phase 1 (This PR) ✅
+
+- ✅ Evidence citations present in 100% of code findings
+- ✅ Confidence levels marked for all findings
+- ✅ Documentation audit phase included
+- ✅ Interactive questioning approach documented
+- ✅ Essential files list structure defined
+- ✅ Execution path traces included in examples
+
+### Phase 2 (Next PR)
+
+- [ ] Clarifying questions are mandatory (cannot proceed without answers)
+- [ ] Architecture options always present 2-3 approaches
+- [ ] User must explicitly choose architecture before tasks generated
+- [ ] Review catches common issues before PR
+- [ ] All prompts use evidence citation standards
+- [ ] Complete workflow documented with examples
+
+### Phase 3 (Future PR)
+
+- [ ] Examples cover common use cases
+- [ ] New users can follow tutorial successfully
+- [ ] Troubleshooting guide addresses common issues
+
+---
+
+## Key Decisions Made
+
+### Decision 1: Evidence Citations
+
+**Decision:** Require file:line for code, path#heading for docs, dated quotes for users
+**Rationale:** Provides traceability and accountability for all findings
+**Source:** code-analyst.md (lines 267-273, Key Principles),
+          information-analyst.md (lines 151-159, Key Principles)
+
+### Decision 2: Confidence Levels
+
+**Decision:** Categorize all findings as High/Medium/Low confidence
+**Rationale:** Distinguishes facts from inferences, flags items needing validation
+**Source:** Research synthesis recommendations
+
+### Decision 3: Phased Implementation
+
+**Decision:** Split improvements across multiple PRs (Phase 1 = context, Phase 2 = spec+arch+review)
+**Rationale:** Keeps PRs focused and reviewable, allows incremental adoption
+**Source:** Team decision for maintainability
+
+### Decision 4: Interactive Questioning
+
+**Decision:** Replace batch questionnaires with short focused rounds
+**Rationale:** Better user engagement, more thoughtful answers
+**Source:** context_bootstrap.md (lines 38-42, Interactive Dialog principle),
+          Claude Code Phase 3 (see docs/research/codebase-context/claude-code-feature-dev-comparison.md, lines 66-87)
+
+### Decision 5: Mandatory Clarifying Phase
+
+**Decision:** Make clarifying questions a STOP point in spec generation
+**Rationale:** Most feature failures from misunderstood requirements - prevent this
+**Source:** Claude Code research showing this as critical phase
+
+---
+
+## References
+
+### Research Documents
+
+- [Claude Code Feature-Dev Comparison](./research/codebase-context/claude-code-feature-dev-comparison.md)
+- [Research Synthesis](./research/codebase-context/research-synthesis.md)
+- [Code Analyst Pattern](./research/codebase-context/code-analyst.md)
+- [Information Analyst Pattern](./research/codebase-context/information-analyst.md)
+- [Context Bootstrap Pattern](./research/codebase-context/context_bootstrap.md)
+
+### External Links
+
+- [Claude Code Repository](https://github.com/anthropics/claude-code)
+- [Feature-Dev Plugin](https://github.com/anthropics/claude-code/tree/main/plugins/feature-dev)
+- [MADR Format](https://adr.github.io/madr/)
+
+---
+
+## Contact & Questions
+
+For questions about this implementation:
+
+- Review research documents in `docs/research/codebase-context/`
+- Check progress updates in this document
+- Refer to commit messages for detailed change rationale
+
+---
+
+**Document Status:** Living document - updated with each phase
+**Next Update:** After Phase 2 PR merge
diff --git a/prompts/generate-context.md b/prompts/generate-context.md
new file mode 100644
index 0000000..01f433b
--- /dev/null
+++ b/prompts/generate-context.md
@@ -0,0 +1,1672 @@
+---
+name: generate-context
+description: "Generate codebase context by analyzing architecture, patterns, and conventions for spec-driven development"
+tags:
+  - analysis
+  - architecture
+  - discovery
+arguments:
+  - name: no_questions
+    description: "Skip interactive questions and generate analysis autonomously (default: false)"
+    required: false
+meta:
+  category: spec-development
+  allowed-tools: Glob, Grep, LS, Read, Edit, MultiEdit, Write, WebFetch, WebSearch
+---
+
+## Generate Codebase Context
+
+## Goal
+
+To guide an AI assistant in thoroughly analyzing and understanding a codebase's architecture, structure, patterns, and conventions. This analysis provides essential context for spec-driven feature development, ensuring new features integrate seamlessly with existing code and follow established patterns.
+
+**Core Principle:** Code explains WHAT the system does and HOW it's built. Documentation explains WHY choices were made. Users provide goals and intent. Keep these separate and clearly attributed.
+
+---
+
+## ⚠️ CRITICAL EXECUTION RULE - READ FIRST
+
+### Interactive Mode (Default)
+
+**This is an INTERACTIVE, MULTI-TURN conversational process.**
+
+You **MUST** follow this workflow:
+
+1. **Complete Phase 1** → ASK QUESTIONS → **STOP and WAIT** for user answers
+2. **Complete Phase 2** → IF questions needed, ASK and WAIT; OTHERWISE proceed to Phase 3
+3. **Complete Phase 3** → ASK VALIDATION QUESTIONS → **STOP and WAIT** for user answers
+4. **Complete Phase 3.5** → PRESENT FINDINGS → **STOP and WAIT** for user to discuss
+5. **Complete Phase 4** → IF integration issues found, ASK and WAIT; OTHERWISE proceed to Phase 5
+6. **Complete Phase 5** → IF gaps found, ASK and WAIT; OTHERWISE proceed to Phase 5.5 or Phase 6
+7. **Complete Phase 5.5 (Optional)** → IF autonomous answers needed, generate them; OTHERWISE skip to Phase 6
+8. **Finally, Phase 6** → Generate final document
+
+**Auto-Continue Rules:**
+
+- **Phase 2**: If no conflicts or gaps found in documentation, state "No clarification needed" and proceed to Phase 3
+- **Phase 4**: If no integration/dependency issues found, state "No integration issues" and proceed to Phase 5
+- **Phase 5**: If no gaps/unknowns found, state "No significant gaps identified" and proceed to Phase 6; if gaps exist, proceed to Phase 5.5 or ask user
+- **Phase 5.5**: If no gaps require autonomous reasoning OR user requests manual input, skip to Phase 6; otherwise generate autonomous answers and proceed to Phase 6
+- **All other phases**: MUST stop and wait for user input
+
+**NEVER skip checkpoints when questions exist. NEVER proceed without user input at ⛔ STOP points that require answers.**
+
+If you find yourself generating the final document without having asked questions and received answers (when questions were needed), **YOU HAVE FAILED TO FOLLOW INSTRUCTIONS.**
+
+### No-Questions Mode (--no_questions flag)
+
+**When `no_questions=true` is specified:**
+
+- **Skip all STOP checkpoints** - proceed through all phases autonomously
+- **Make reasonable assumptions** - document assumptions clearly with 🔵 Assumed confidence level
+- **Flag all assumptions** - list all assumptions made in a dedicated section
+- **Note uncertainties** - mark areas where user input would improve accuracy
+- **Generate complete document** - proceed directly to Phase 6 after analysis
+
+**Assumed findings format:** "PostgreSQL used (package.json:23) 🔵 Assumed: chosen for ACID compliance (no documented rationale)"
+
+---
+
+## AI Behavior Guidelines
+
+**Critical Rules for Execution:**
+
+- **Do not summarize without evidence:** Every claim must be backed by file:line citations or doc references
+- **Use citations before synthesis:** Gather evidence first, then draw conclusions
+- **When uncertain, explicitly state "Cannot confirm":** Better to flag unknowns than guess
+- **Never infer rationale (WHY) unless documented or confirmed by user:** Stay in your lane
+- **Ask 3-5 focused questions per round:** Not long questionnaires - short, conversational iteration
+- **Present findings incrementally:** Don't wait until the end - engage user throughout
+- **Flag Medium/Low confidence items immediately:** Users should validate uncertain findings early
+
+## Tool Usage by Phase
+
+This prompt requires specific tools for different analysis phases:
+
+- **Phase 1 (Repository Structure):**
+  - `Glob` - Enumerate files and directories, detect project structure
+  - `Read` - Inspect key configuration files (package.json, requirements.txt, etc.)
+
+- **Phase 2 (Documentation Audit):**
+  - `Glob` - Find documentation files (`**/*.md`, `**/docs/**`)
+  - `Read` - Extract content and metadata from docs
+  - `Grep` - Search for specific decision rationale or WHY statements
+
+- **Phase 3 (Code Analysis):**
+  - `Grep` - Search for patterns, imports, framework usage
+  - `Read` - Inspect specific files for WHAT and HOW
+  - `Glob` - Find related files (e.g., all controllers, all services)
+
+- **Phase 3.5 (Pattern Recognition):**
+  - `Grep` - Detect recurring patterns across files
+  - `Read` - Verify pattern implementation details
+
+- **Phase 4 (Integration Points):**
+  - `Grep` - Find API calls, database queries, external service usage
+  - `Read` - Understand integration implementation
+
+- **Phase 5 (Gaps & User Collaboration):**
+  - No tools - conversational phase with user
+
+- **Phase 6 (Document Generation):**
+  - `Write` - Create final analysis document
+
+## Output
+
+- **Format:** Markdown (`.md`)
+- **Location:** `/docs/`
+- **Filename:** `00[n]-SYSTEM.md` (Where `n` is a single digit starting from 1, e.g., `001-SYSTEM.md`, `002-SYSTEM.md`, etc.)
+
+## Evidence Citation Standards
+
+**Every finding MUST include evidence:**
+
+### For Code Findings
+
+- **Format:** `path/to/file.ts:45-67` (include line range when relevant)
+- **Example:** "Authentication uses JWT tokens (src/auth/AuthService.ts:23-45)"
+- Always provide specific line numbers, not just file names
+
+### For Documentation Findings
+
+- **Format:** `path/to/doc.md#section-heading` or `path/to/doc.md:page-N`
+- **Example:** "PostgreSQL chosen for ACID guarantees (docs/architecture.md#database-decision)"
+- Include last modified timestamp when available: `(docs/ADR-001.md, updated 2024-12-15)`
+
+### For User-Provided Information
+
+- **Format:** "[User confirmed: YYYY-MM-DD]" or "[User stated: 'direct quote']"
+- **Example:** "OAuth2 required by compliance team [User confirmed: 2025-01-21]"
+- Use direct quotes when possible to preserve exact meaning
+
+## Confidence Assessment
+
+Categorize every finding by confidence level:
+
+### Assumed (🔵) - No-Questions Mode Only
+
+- **Criteria:** Reasonable inference made during autonomous analysis without user confirmation
+- **Usage:** Only used when `no_questions=true` flag is set
+- **Examples:**
+  - "PostgreSQL used (package.json:23) 🔵 Assumed: chosen for ACID compliance (no documented rationale)"
+  - "Microservices pattern (inferred from directory structure) 🔵 Assumed: supports team autonomy"
+- **Note:** All assumed findings should be listed in a dedicated "Assumptions Made" section
+
+### High Confidence (🟢)
+
+- **Criteria:** Strong evidence from working code or explicit documentation
+- **Automation Examples:**
+  - `Grep` confirms 3+ consistent code references across different files
+  - Feature exists in working code with traced execution path
+  - Technology explicitly listed in dependencies AND usage found in code
+  - Design decision documented in ADR with matching code implementation
+- **Manual Verification:**
+  - Feature exists with traced working code path
+  - Explicit documentation with recent timestamps
+  - Active usage in production code (not commented out)
+
+### Medium Confidence (🟡 Needs Validation)
+
+- **Criteria:** Inferred from context, behind feature flags, or implied
+- **Automation Examples:**
+  - Evidence only appears in code comments (not executable code)
+  - `Grep` finds 1-2 references only (limited usage)
+  - Pattern inferred from file structure but not explicitly implemented
+  - Dependency listed but no usage found in code
+- **Manual Verification:**
+  - Feature toggle currently disabled (code exists but may not be active)
+  - Pattern inferred from code structure (not explicitly documented)
+  - Outdated documentation (>6 months old) that may not reflect current code
+
+### Low Confidence (🔴 Unknown)
+
+- **Criteria:** Cannot determine from available information
+- **Automation Examples:**
+  - No code references found via `Grep`
+  - Conflicting dependency versions
+  - Files exist but appear unreferenced
+- **Manual Verification:**
+  - Rationale missing from both docs and code
+  - Conflicting information between sources (code vs. docs)
+  - Experimental or dormant code paths
+  - Dead code that may no longer be used
+
+**Automatic Confidence Rules:**
+
+- If `Grep/Glob` confirms ≥3 consistent references → Start with Medium, verify for High
+- If evidence only in comments → Maximum Medium Confidence
+- If no code references found → Start with Low Confidence
+- If docs are >6 months old without code confirmation → Maximum Medium Confidence
+
+### Always Flag Medium- and Low-Confidence Items for User Validation
+
+## Process
+
+This is a **conversational, iterative analysis process**. The AI should engage the user throughout, asking focused questions and presenting findings for validation.
+
+**Important:** Ask short, focused questions. NOT long questionnaires. Get answers, then ask follow-ups based on those answers.
+
+---
+
+### Phase 1: Repository Structure Analysis
+
+**Goal:** Understand the overall repository layout and scope
+
+#### Automated Discovery
+
+Automatically detect and analyze:
+
+1. **Repository Type:**
+   - Single application (src/, config/, tests/)
+   - Monorepo with packages/apps (packages/*, apps/*)
+   - Multi-service workspace (multiple peer directories with independent build tools)
+   - Hybrid or custom structure
+
+2. **Tech Stack Detection:**
+   - Languages (from file extensions and config files)
+   - Build tools (package.json, requirements.txt, Cargo.toml, go.mod, pom.xml, etc.)
+   - Frameworks (from dependencies)
+   - Testing frameworks (from devDependencies or test config)
+
+3. **Entry Points:**
+   - Main application files
+   - API route definitions
+   - CLI entry points
+   - Background job/worker entry points
+
+4. **Directory Structure:**
+   - Map high-level organization
+   - Identify patterns (feature-based, layer-based, domain-driven)
+
+5. **Repository Size Assessment:**
+   - Count total files (use `Glob` with appropriate patterns)
+   - Estimate total lines of code (sample representative files)
+   - Check for large binary assets or dependencies
+
+#### Scoping Controls (Automatic)
+
+**If repository exceeds these thresholds, request narrowed scope:**
+
+- **>5,000 files:** "This repository has [N] files. To ensure focused analysis, please specify which components or directories to analyze."
+- **>100 MB of source code:** "This is a large codebase. Would you like me to focus on specific modules or services?"
+- **Multiple independent apps:** "I've detected [N] independent applications. Should I analyze all, or focus on specific ones?"
+
+**Scoping Options to Present:**
+
+- Option A: Full repository analysis (may take significant time)
+- Option B: Focus on specific directory/module (e.g., `src/auth/`, `packages/api/`)
+- Option C: Focus on specific functionality (e.g., "authentication flow", "payment processing")
+
+**Present to user:** "I've detected [structure type] with [key components]. Is this correct?"
+
+#### Questions for User (Short - 3 questions max)
+
+**Important:** Keep questions brief and focused. Extended questionnaires reduce engagement
+and response quality. You'll ask follow-up questions in Phase 5 based on these answers.
+
+1. **Scope:** Should I analyze the entire codebase, or focus on specific components? If specific, which ones?
+
+2. **Purpose:** What's the primary reason for this analysis?
+   - a) Adding a new feature
+   - b) Refactoring existing code
+   - c) Understanding legacy system
+   - d) Onboarding new team members
+   - e) Other: [specify]
+
+3. **Priority Areas:** Which are most important for your upcoming work? (Select all that apply)
+   - a) Database/Data layer
+   - b) API/Routes
+   - c) Authentication/Authorization
+   - d) Frontend/UI
+   - e) Testing approach
+   - f) Build/Deploy pipeline
+   - g) Other: [specify]
+
+---
+
+## 🛑 STOP HERE - PHASE 1 COMPLETE
+
+### ⛔ DO NOT PROCEED TO PHASE 2 WITHOUT USER ANSWERS
+
+**You MUST wait for the user to respond to the 3 questions above.**
+
+**If you proceed without answers, you are violating the critical execution rule.**
+
+---
+
+### Phase 2: Documentation Audit
+
+**Goal:** Inventory existing documentation and extract any recorded rationale
+
+#### Scan for Documentation
+
+Find and catalog:
+
+1. **In-Repository Documentation:**
+   - README files (all levels)
+   - docs/, documentation/, wiki/ directories
+   - ARCHITECTURE.md, DESIGN.md, CONTRIBUTING.md
+   - Architecture diagrams (*.png,*.jpg, *.svg,*.drawio in docs/)
+   - ADRs (Architecture Decision Records)
+   - CHANGELOG.md, migration guides
+
+2. **Capture Metadata:**
+   - Relative path from repo root
+   - Document title/heading
+   - Last modified timestamp (if available from git)
+   - Brief description of content
+
+#### Extract Decision Rationale
+
+**This is critical - look for WHY:**
+
+- Why was [technology X] chosen?
+- Why [pattern Y] over alternatives?
+- What constraints or trade-offs influenced these decisions?
+- What problems did these choices solve?
+
+**For each rationale found:**
+
+- Extract as direct quote
+- Note source: `path/to/doc.md#section-heading`
+- Include timestamp if available
+- Mark confidence level (explicit vs. implied)
+
+#### Flag Issues
+
+- **Conflicts:** Where docs contradict each other or the code
+- **Gaps:** Technologies used but no "why" documented
+- **Outdated:** Docs that appear old (check timestamps)
+
+**Present to user:** Summary of documentation found and any conflicts/gaps discovered. Ask for clarification if needed.
+
+---
+
+## 🛑 STOP HERE - PHASE 2 COMPLETE
+
+### ⛔ CHECKPOINT - AUTO-CONTINUE OR WAIT FOR USER
+
+**If you found conflicts or gaps:**
+
+- Ask for clarification and **WAIT** for user responses
+
+**If no clarification is needed:**
+
+- Present your findings summary
+- State "No conflicts or gaps found - proceeding to Phase 3"
+- **Auto-continue to Phase 3** (no user acknowledgment required)
+
+---
+
+### Phase 3: Code Analysis (WHAT + HOW)
+
+**Goal:** Discover what the system does and how it's structured by analyzing code
+
+**Remember:** You are discovering WHAT and HOW from code. Do NOT infer WHY - that comes from docs or user.
+
+#### 3.1: System Capabilities (WHAT it does)
+
+**Discover working features:**
+
+Trace from entry points to understand:
+
+- **Features:** What functional capabilities exist right now?
+- **User Workflows:** What complete user journeys are supported?
+- **Business Rules:** What validation/calculation logic is enforced?
+- **External Integrations:** What external systems does it integrate with (working API clients, SDKs)?
+
+**For each capability:**
+
+- Provide entry point with file:line (e.g., `src/api/routes/users.ts:12`)
+- Brief description of what it does
+- Key logic location (e.g., `src/services/UserService.ts:45-89`)
+- Confidence level (High if working code path, Medium if behind feature toggle)
+
+**Trace execution paths:**
+
+For key workflows, provide step-by-step execution trace:
+
+```text
+User Login Flow:
+1. POST /api/auth/login → src/api/routes/auth.ts:23
+2. AuthController.login() → src/controllers/AuthController.ts:45
+3. AuthService.validateCredentials() → src/services/AuthService.ts:67
+4. UserRepository.findByEmail() → src/repositories/UserRepository.ts:34
+5. Database query → models/User.ts:89
+6. JWT generation → src/utils/jwt.ts:12
+7. Response with token → src/controllers/AuthController.ts:52
+```
+
+**What NOT to include:**
+
+- ❌ Internal data models (implementation detail, not user-facing)
+- ❌ Missing or planned features (belongs in roadmap)
+- ❌ Code quality judgments (not your job)
+- ❌ Specific dependency versions (too volatile)
+- ❌ Testing infrastructure details
+
+#### 3.2: Technology Stack (WHAT technologies are used)
+
+**Identify major technologies:**
+
+From dependency files and imports, catalog:
+
+- **Languages:** Name only (NO version numbers)
+- **Major Frameworks:** Name only (e.g., "React", "Django", "Spring Boot")
+- **Databases:** Type and evidence (e.g., "PostgreSQL - connection config in src/db/config.ts:10")
+- **Cloud Services:** Provider only (e.g., "AWS - SDK imports in src/aws/")
+- **API Style:** REST/GraphQL/gRPC (inferred from route definitions)
+- **Authentication Approach:** JWT/OAuth/Sessions (from auth code)
+
+**Evidence format:**
+
+```text
+- **Framework:** React (package.json:15, imports in src/components/*.tsx)
+- **Database:** PostgreSQL (package.json:23 'pg', connection in src/db/pool.ts:8)
+- **Cache:** Redis (docker-compose.yml:34, client in src/cache/redis.ts:12)
+```
+
+**What NOT to include:**
+
+- ❌ Specific versions (e.g., "React 18.2.0" - too volatile)
+- ❌ Minor utility libraries
+- ❌ Testing frameworks (unless part of priority areas)
+
+#### 3.3: Architecture & Patterns (HOW it's structured)
+
+**Map components and boundaries:**
+
+- **Components/Services:** What are the main logical units?
+  - Location (directory/module)
+  - Purpose (inferred from code)
+  - Responsibilities (what it handles)
+  - Evidence (key files with line numbers)
+
+- **Communication Patterns:**
+  - How do components talk? (API calls, events, direct imports)
+  - Evidence with file:line references
+  - Data exchanged (brief description)
+
+Example:
+
+```text
+- **API Service → Database:**
+  - Method: Direct ORM queries
+  - Evidence: src/services/UserService.ts:45 calls UserRepository.findById()
+  - Data: User entities
+```
+
+- **Service Boundaries:**
+  - Proper: Components that communicate via APIs/events
+  - Violations: Direct database access across service boundaries (flag these)
+
+- **Architectural Patterns:**
+  - Pattern name (e.g., "Layered Architecture", "Event-Driven", "CQRS")
+  - Evidence from code structure
+  - Example: "Event-driven - found publishers (src/events/publisher.ts:12) and subscribers (src/events/handlers/*.ts)"
+
+**Flag dormant code:**
+
+- Feature toggles currently disabled
+- Experimental directories
+- Dead code (imports show it's unused)
+
+#### 3.4: Conventions & Standards
+
+**Code organization:**
+
+- File naming (camelCase, kebab-case, snake_case)
+- Directory patterns (feature-based, layer-based)
+- Module boundaries (what imports what)
+
+**Code style:**
+
+- Linter configuration (if found)
+- Formatter settings
+- Key conventions from codebase
+
+**Git workflow:**
+
+- Branching strategy (from branch names if visible)
+- Commit conventions (conventional commits, other patterns)
+
+**Present findings:** Share code analysis summary with file:line citations and confidence levels.
+
+---
+
+## 🛑 STOP HERE - PHASE 3 COMPLETE
+
+### ⛔ DO NOT PROCEED TO PHASE 3.5 WITHOUT USER VALIDATION
+
+**You MUST present your findings and explicitly ask the user to validate them.**
+
+**Pay special attention to Medium (🟡) and Low (🔴) confidence items - these MUST be validated before proceeding.**
+
+**Ask questions like:**
+
+- "Does this analysis match your understanding of the system?"
+- "Are there any inaccuracies in what I found?"
+- "For the Medium confidence items, can you confirm [specific finding]?"
+
+**Wait for user responses before continuing.**
+
+---
+
+### Phase 3.5: Pattern Recognition & Architectural Philosophy
+
+**Goal:** Bridge raw analysis with system-level architectural understanding
+
+**Purpose:** This phase synthesizes code findings into architectural patterns and design philosophies that guide system evolution.
+
+#### Design Patterns Detection
+
+**Automatically detect and document recurring patterns:**
+
+1. **Structural Patterns:**
+   - Repository pattern (data access layer)
+   - Factory pattern (object creation)
+   - Singleton pattern (shared instances)
+   - Adapter pattern (interface translation)
+   - **Evidence Format:** "Repository pattern used (UserRepository.ts:23-45, ProductRepository.ts:34-67, OrderRepository.ts:45-89)"
+
+2. **Architectural Patterns:**
+   - CQRS (Command Query Responsibility Segregation)
+   - Event Sourcing
+   - Microservices communication patterns
+   - Layered architecture (presentation, business, data)
+   - **Evidence Format:** "CQRS pattern: Commands in commands/, Queries in queries/ (found 12 command handlers, 8 query handlers)"
+
+3. **Framework-Specific Conventions:**
+   - NestJS modules and providers
+   - Django apps structure
+   - Rails MVC conventions
+   - Spring Boot controllers and services
+   - **Evidence Format:** "NestJS module pattern: Each feature has .module.ts, .controller.ts, .service.ts (auth/, users/, products/)"
+
+#### Anti-Pattern Detection
+
+**Flag concerning patterns that may indicate technical debt:**
+
+1. **Cyclic Dependencies:**
+   - Use `Grep` to detect circular imports
+   - **Example:** "Potential cycle: AuthService imports UserService, UserService imports AuthService"
+   - **Confidence:** 🔴 Low if inferred, 🟢 High if confirmed via import analysis
+
+2. **Cross-Layer Violations:**
+   - Controllers directly accessing database
+   - Business logic in views/templates
+   - Data layer calling API layer
+   - **Example:** "Anti-pattern: Controller directly queries database (UserController.ts:45 has SQL query)"
+
+3. **God Objects / Large Classes:**
+   - Files exceeding 500 lines
+   - Classes with >10 public methods
+   - **Example:** "Large class warning: UserService.ts (847 lines, 23 public methods)"
+
+#### Architectural Philosophy Synthesis
+
+**Infer the system's architectural philosophy (with evidence):**
+
+- **Modularity Approach:**
+  - "Highly modular: Each feature isolated in packages/ (8 independent modules found)"
+  - "Monolithic: Shared state across src/ (no module boundaries detected)"
+
+- **Coupling Level:**
+  - "Loose coupling: Dependency injection used (12 constructors inject interfaces)"
+  - "Tight coupling: Direct instantiation pattern (14 files use 'new' keyword for dependencies)"
+
+- **Consistency:**
+  - "High consistency: 95% of files follow UserModule pattern"
+  - "Mixed patterns: 3 different controller patterns found (REST, GraphQL, gRPC)"
+
+**Present findings:** "I've identified [N] architectural patterns and [M] potential anti-patterns. Key philosophy appears to be [description]."
+
+---
+
+## 🛑 STOP HERE - PHASE 3.5 COMPLETE
+
+### ⛔ DO NOT PROCEED TO PHASE 4 WITHOUT USER DISCUSSION
+
+**You MUST present your pattern findings and give the user a chance to discuss them.**
+
+**Ask questions like:**
+
+- "Does this architectural philosophy match your understanding?"
+- "Are there any patterns I've missed or misidentified?"
+- "Would you like me to elaborate on any of these patterns before I continue?"
+
+**Wait for user acknowledgment or questions before proceeding.**
+
+---
+
+### Phase 4: Integration Points & Dependencies
+
+**Goal:** Understand how the system integrates with external systems
+
+#### External Services
+
+For each external integration found:
+
+- **Service Name**
+- **How it's used:** (API calls, SDK usage, webhooks)
+- **Evidence:** File and line numbers where integration occurs
+- **Configuration:** Where credentials/endpoints are configured
+- **Error handling:** How failures are handled
+
+Example:
+
+```text
+- **Stripe (Payment Processing):**
+  - Usage: Charges, subscriptions, webhooks
+  - Evidence: src/services/PaymentService.ts:23-156
+  - Config: env vars in .env.example:12-15
+  - Error handling: Retry logic in src/utils/stripe-retry.ts:8
+  - Confidence: High (working code with tests)
+```
+
+#### Internal Dependencies
+
+- Shared libraries/modules
+- Monorepo package dependencies
+- Service-to-service communication
+
+#### Event/Message Patterns
+
+- Pub/sub systems (Redis, RabbitMQ, Kafka)
+- Event-driven patterns
+- WebSocket or real-time communication
+
+#### Crosscutting Concerns
+
+**Goal:** Analyze system-wide quality attributes that cut across all components
+
+These concerns are often overlooked but critical for understanding system maturity:
+
+1. **Logging & Observability:**
+   - Logging framework used (Winston, Log4j, Serilog, etc.)
+   - Log levels and structure (structured logging JSON, plain text)
+   - Distributed tracing (OpenTelemetry, Jaeger, Zipkin)
+   - Metrics collection (Prometheus, StatsD, custom)
+   - **Evidence:** `Grep` for logger imports/usage, configuration files
+   - **Example:** "Structured logging with Winston (src/config/logger.ts:12, used in 47 files)"
+
+2. **Error Handling & Resilience:**
+   - Global error handling strategy
+   - Retry mechanisms
+   - Circuit breaker patterns
+   - Graceful degradation
+   - **Evidence:** Error handler middleware, retry decorators, error classes
+   - **Example:** "Global error handler (src/middleware/errorHandler.ts:23), Retry decorator (src/decorators/retry.ts:12-45)"
+
+3. **Configuration Management:**
+   - Environment variables strategy (.env, config files)
+   - Secrets management (AWS Secrets Manager, HashiCorp Vault, etc.)
+   - Feature flags/toggles
+   - Multi-environment configuration (dev, staging, prod)
+   - **Evidence:** Config files, environment variable usage
+   - **Example:** "Config via dotenv (config/.env.example has 34 vars), no secrets manager detected"
+
+4. **Security Practices:**
+   - Authentication middleware (JWT, OAuth, session-based)
+   - Authorization patterns (RBAC, ABAC, ACL)
+   - Input validation (sanitization, schema validation)
+   - CORS configuration
+   - Rate limiting
+   - **Evidence:** Auth middleware, validators, security headers
+   - **Example:** "JWT auth middleware (src/middleware/auth.ts:23), Joi validation (src/validators/, 12 schemas)"
+
+5. **Performance & Caching:**
+   - Caching strategy (Redis, in-memory, CDN)
+   - Database query optimization
+   - Lazy loading patterns
+   - Pagination strategies
+   - **Evidence:** Cache imports, query patterns
+   - **Example:** "Redis caching layer (src/cache/redis.ts:12, used in 8 services)"
+
+6. **Testing Approach:**
+   - Test frameworks (Jest, PyTest, JUnit, etc.)
+   - Test coverage strategy
+   - Testing patterns (unit, integration, e2e)
+   - Mocking/stubbing approach
+   - **Evidence:** Test file structure, configuration files
+   - **Example:** "Jest with 73% coverage (jest.config.js, 234 test files in **/*.spec.ts)"
+
+**Confidence Assessment for Crosscutting Concerns:**
+
+- 🟢 High: Active implementation found with configuration and usage
+- 🟡 Medium: Partial implementation or inconsistent usage
+- 🔴 Low: Not implemented or unclear strategy
+
+**Present findings:** Crosscutting concerns summary with quality attribute assessment.
+
+---
+
+### Phase 5: Gap Identification & User Collaboration
+
+**Goal:** Identify what cannot be determined from code/docs and get answers from user
+
+#### Automated Gap Detection
+
+Compare code analysis vs. documentation to find gaps, then **prioritize them**:
+
+**Priority Levels:**
+
+- 🟥 **Critical:** Blocks new development or introduces significant risk
+- 🟧 **Important:** Should be resolved soon, impacts architectural decisions
+- 🟨 **Minor:** Cosmetic, informational, or low-impact
+
+**Gap Categories with Prioritization:**
+
+1. **Missing Rationale:**
+   - Technologies used in code but no "why" in docs
+   - Patterns implemented but no decision record
+   - Architectural choices without explanation
+   - **Priority Assessment:**
+     - 🟥 Critical: Core authentication/security decisions undocumented
+     - 🟧 Important: Database choice, framework selection without rationale
+     - 🟨 Minor: Utility library choices, formatting tools
+
+2. **Conflicts:**
+   - Code contradicts documentation
+   - Diagrams show different structure than code
+   - Comments claim one thing, code does another
+   - **Priority Assessment:**
+     - 🟥 Critical: Security/auth flows mismatch code vs docs
+     - 🟧 Important: API contracts differ from implementation
+     - 🟨 Minor: Outdated diagram with minor structural differences
+
+3. **Unknowns:**
+   - Feature toggles (which are active?)
+   - Experimental code (what's the status?)
+   - Dead code (can it be removed?)
+   - Performance requirements (what are the targets?)
+   - **Priority Assessment:**
+     - 🟥 Critical: Feature toggles blocking production features
+     - 🟧 Important: Experimental code in main execution paths
+     - 🟨 Minor: Old commented-out code, unused utilities
+
+**Prioritization Rules:**
+
+- If gap relates to **security, auth, or data integrity** → 🟥 Critical
+- If gap relates to **core business logic or API contracts** → 🟧 Important
+- If gap relates to **documentation quality or code cleanup** → 🟨 Minor
+- If gap **blocks spec development** → Escalate priority by one level
+
+#### User Questions (Focused, NOT Batch)
+
+Ask 3-5 targeted questions based on gaps found:
+
+Example:
+
+```text
+I found some gaps that need your input:
+
+1. **PostgreSQL vs. MongoDB:**
+   - Code uses PostgreSQL (src/db/pool.ts:8)
+   - But there's MongoDB client code (src/mongo/client.ts:12) that appears unused
+   - Question: Is MongoDB deprecated? Can that code be removed?
+
+2. **Feature Toggle 'new_dashboard':**
+   - Code exists for new dashboard (src/features/dashboard-v2/)
+   - Currently disabled (src/config/features.ts:15: enabled: false)
+   - Question: What's the status? Should this be documented as experimental?
+
+3. **Authentication Decision:**
+   - JWT tokens are used (src/auth/jwt.ts)
+   - No documentation explains why JWT was chosen over sessions
+   - Question: Why was JWT selected? (This will help document the decision)
+```
+
+---
+
+## 🛑 STOP HERE - PHASE 5 COMPLETE
+
+### ⛔ DO NOT PROCEED TO PHASE 6 (DOCUMENT GENERATION) WITHOUT USER ANSWERS
+
+**This is a CRITICAL checkpoint. You MUST:**
+
+1. **Ask 3-5 specific gap questions** based on what you found
+2. **Wait for user to answer each question**
+3. **Capture answers as direct quotes with dates**
+4. **ONLY THEN proceed to Phase 6**
+
+**If you have NO gaps or questions:**
+
+- Explicitly state "I found no significant gaps"
+- **Auto-continue to Phase 6** (no user input required)
+
+**Capture answers as direct quotes:**
+
+```text
+[User confirmed: 2025-01-21: "MongoDB was from an early experiment, it's safe to remove."]
+[User stated: "JWT chosen because we needed stateless auth for mobile clients."]
+```
+
+**Once you have user answers, you may proceed to Phase 6.**
+
+---
+
+### Phase 5.5: Autonomous Answers (Optional Decision Framework)
+
+**Goal:** When gaps exist but user input is not immediately available, provide reasoned autonomous answers
+
+**When to Use Autonomous Answers:**
+
+- User is unavailable or has requested autonomous analysis
+- Gap is non-critical (🟨 Minor or some 🟧 Important items)
+- Sufficient context exists to make reasonable inference
+- Decision can be validated/corrected later
+
+**When NOT to Use Autonomous Answers:**
+
+- 🟥 Critical gaps (security, auth, data integrity decisions)
+- Architectural choices with significant long-term impact
+- Contradictions between code and documentation
+- User has explicitly requested to be consulted
+
+#### Autonomous Answer Framework
+
+For each gap where autonomous answer is appropriate:
+
+1. **State the Gap:**
+
+   ```text
+   GAP-003: FastMCP Framework Choice
+   - Evidence: FastMCP used extensively (mcp_server/__init__.py:7, 24)
+   - Gap: No documentation explains WHY FastMCP over alternatives
+   ```
+
+2. **Analyze Available Context:**
+
+   ```text
+   Context Analysis:
+   - Project is MCP (Model Context Protocol) server
+   - FastMCP is official Python framework for MCP
+   - Alternative frameworks: (none widely known for MCP in Python)
+   - Code shows clean integration, no workarounds
+   ```
+
+3. **Make Reasoned Inference:**
+
+   ```text
+   Autonomous Answer: 🔵 Assumed
+   "FastMCP chosen as the official Python framework for MCP protocol implementation.
+   No alternatives with comparable maturity exist for Python-based MCP servers."
+
+   Reasoning:
+   - FastMCP is the de-facto standard for MCP in Python
+   - Clean code integration suggests good framework fit
+   - No evidence of framework-related issues or workarounds
+   ```
+
+4. **Flag for Validation:**
+
+   ```text
+   Confidence: 🟡 Medium (reasonable inference, should be validated)
+   Recommendation: Document in README or ADR for future reference
+   Priority: 🟨 Minor (informational, not blocking)
+   ```
+
+#### Autonomous Answer Template
+
+```markdown
+### GAP-[N]: [Gap Title]
+
+**Evidence:**
+- [Finding from code/docs with file:line]
+- [What's missing or unclear]
+
+**Context Analysis:**
+- [Relevant context from codebase]
+- [Industry standards or common practices]
+- [Evidence from code patterns]
+
+**Autonomous Answer:** 🔵 Assumed
+"[Reasoned answer based on available context]"
+
+**Reasoning:**
+- [Why this answer is reasonable]
+- [Supporting evidence]
+- [Alternative explanations considered and ruled out]
+
+**Confidence:** 🟡 Medium (or appropriate level)
+**Recommendation:** [How to validate or document this]
+**Priority:** 🟨 Minor (or appropriate level)
+```
+
+#### Example: Complete Autonomous Answer
+
+```markdown
+### GAP-007: Version Pinning for FastMCP
+
+**Evidence:**
+- pyproject.toml:13: `fastmcp>=0.1.0` (not pinned to specific version)
+- No version pinning strategy documented
+
+**Context Analysis:**
+- Project uses semantic versioning (pyproject.toml:72-96)
+- FastMCP is early-stage framework (0.x version)
+- Code doesn't use advanced/unstable features
+- Similar projects often pin to minor version during 0.x
+
+**Autonomous Answer:** 🔵 Assumed
+"Pin FastMCP to minor version (`fastmcp>=0.1.0,<0.2.0`) to prevent breaking changes
+while allowing patch updates."
+
+**Reasoning:**
+- During 0.x development, minor versions can introduce breaking changes
+- Pinning to minor version balances stability with bug fixes
+- Project already uses semantic versioning, suggesting version awareness
+- Code review shows no dependency on bleeding-edge features
+
+**Confidence:** 🟡 Medium (standard best practice, should confirm with team)
+**Recommendation:** Update pyproject.toml and document in CONTRIBUTING.md
+**Priority:** 🟨 Minor (preventive measure, not urgent)
+```
+
+#### Recording Autonomous Answers in Final Document
+
+**In the main analysis, reference autonomous answers:**
+
+```markdown
+## 7. Gaps, Unknowns & Recommendations
+
+### 7.3 Minor Gaps (🟨)
+
+#### GAP-007: Version Pinning for FastMCP
+**Autonomous Answer:** Pin to minor version (`fastmcp>=0.1.0,<0.2.0`) 🔵
+**Recommendation:** Update pyproject.toml:
+\`\`\`toml
+dependencies = [
+    "fastmcp>=0.1.0,<0.2.0",  # Pin to minor version
+    "pyyaml>=6.0.1,<7.0.0",
+]
+\`\`\`
+**Effort:** 5 min | **Impact:** Low | **Priority:** 🟨 Minor
+```
+
+**In Appendix, list all autonomous answers:**
+
+```markdown
+## Appendix D: Autonomous Answers Made
+
+This analysis made the following autonomous decisions where user input was not available:
+
+1. **GAP-003: FastMCP Framework Choice** 🔵 Assumed
+   - Answer: "FastMCP is the official Python framework for MCP"
+   - Reasoning: De-facto standard, no alternatives found
+   - Validation needed: Confirm in README/docs
+
+2. **GAP-007: Version Pinning** 🔵 Assumed
+   - Answer: "Pin to minor version during 0.x development"
+   - Reasoning: Standard best practice for pre-1.0 dependencies
+   - Validation needed: Confirm with team policy
+
+**Total Autonomous Answers:** 2
+**Validation Status:** Pending user review
+```
+
+#### Best Practices for Autonomous Answers
+
+1. **Be Conservative:**
+   - Only make autonomous answers for 🟨 Minor and some 🟧 Important gaps
+   - Never for 🟥 Critical gaps
+   - Default to "Unknown" if insufficient context
+
+2. **Show Your Work:**
+   - Document reasoning process
+   - List alternatives considered
+   - Explain why chosen answer is most reasonable
+
+3. **Flag Clearly:**
+   - Use 🔵 Assumed confidence level
+   - Create dedicated "Autonomous Answers" appendix
+   - Mark for user validation
+
+4. **Provide Actionable Next Steps:**
+   - How to validate the assumption
+   - How to document the decision
+   - Priority and effort estimate
+
+5. **Don't Over-Assume:**
+   - Better to have 2 well-reasoned autonomous answers than 10 weak ones
+   - If reasoning requires speculation, flag as 🔴 Unknown instead
+
+---
+
+### Phase 6: Generate Comprehensive Analysis Document
+
+**Goal:** Create complete, evidence-based codebase context document
+
+**Output Modes:**
+
+- **Full Analysis (Default):** Complete detailed document with all sections (~10-20 pages)
+- **Executive Summary Mode (Optional):** 2-page high-level summary first, then full details
+
+**To enable summary mode, user can request:** "Generate an executive summary first"
+
+#### Document Structure
+
+**If Executive Summary Mode requested, start with:**
+
+```markdown
+# Executive Summary: [Project Name]
+
+**Date:** YYYY-MM-DD | **Analysis Scope:** [Full/Partial] | **Analyst:** AI Assistant
+
+## Quick Facts
+- **Repository Type:** Monorepo with 8 packages
+- **Primary Language:** TypeScript (85%), Python (15%)
+- **Architecture:** Microservices with shared event bus
+- **Key Technologies:** NestJS, PostgreSQL, Redis, Docker
+- **Overall Maturity:** Production-ready with good test coverage (78%)
+
+## Strengths
+- ✅ Well-documented decision records (12 ADRs)
+- ✅ Consistent architectural patterns (Repository + CQRS)
+- ✅ Comprehensive testing strategy
+- ✅ Active logging and observability
+
+## Areas Needing Attention
+- ⚠️ Missing rationale for Redis vs. alternatives
+- ⚠️ Experimental features without clear roadmap
+- ⚠️ Some anti-patterns in legacy modules
+
+## Recommended Next Steps
+1. Document Redis decision in ADR
+2. Clarify status of experimental features
+3. Refactor legacy modules to match current patterns
+
+---
+
+**Full detailed analysis follows below...**
+```
+
+#### Full Analysis Structure
+
+```markdown
+# Codebase Context: [Project Name]
+
+**Date:** YYYY-MM-DD
+**Scope:** [Full codebase / Specific components]
+**Purpose:** [From user's stated purpose]
+
+---
+
+## 1. Repository Overview
+
+### 1.1 Structure
+- **Type:** [Monorepo / Single app / Multi-service workspace]
+- **Components:** [List of main components/services/packages]
+- **Organization:** [Feature-based / Layer-based / Domain-driven]
+
+### 1.2 Technology Stack
+- **Languages:** [List with evidence]
+- **Frameworks:** [List with evidence]
+- **Databases:** [List with evidence]
+- **Infrastructure:** [Cloud provider, key services]
+
+### 1.3 High-Level Architecture Diagram
+
+**Use Mermaid diagrams to visualize system architecture when beneficial. Examples:**
+
+**System Components:**
+
+\`\`\`mermaid
+graph TB
+    subgraph "Entry Points"
+        CLI[CLI Tool]
+        HTTP[HTTP API :8080]
+        WS[WebSocket :8081]
+    end
+
+    subgraph "Application Layer"
+        API[API Server]
+        AUTH[Auth Service]
+        WORKER[Background Workers]
+    end
+
+    subgraph "Data Layer"
+        DB[(PostgreSQL)]
+        CACHE[(Redis)]
+        QUEUE[Message Queue]
+    end
+
+    CLI --> API
+    HTTP --> API
+    WS --> API
+    API --> AUTH
+    API --> DB
+    API --> CACHE
+    WORKER --> QUEUE
+    QUEUE --> DB
+\`\`\`
+
+**Data Flow:**
+
+\`\`\`mermaid
+sequenceDiagram
+    participant User
+    participant API
+    participant Auth
+    participant DB
+    participant Cache
+
+    User->>API: POST /api/login
+    API->>Auth: Validate credentials
+    Auth->>DB: Query user
+    DB-->>Auth: User data
+    Auth->>Cache: Store session
+    Auth-->>API: JWT token
+    API-->>User: 200 OK + token
+\`\`\`
+
+**Only include diagrams if they add clarity - not mandatory.**
+
+### 1.4 Version Control & Evolution Patterns
+
+**Repository Health Indicators (if Git history available):**
+
+#### Commit Activity
+- **Total commits:** ~2,450 commits
+- **Active contributors:** 8 developers
+- **Commit frequency:** ~15 commits/week (healthy pace)
+- **Last major refactor:** 3 months ago
+
+#### Code Maturity Signals
+- **High-churn files** (volatility indicators):
+  - `src/api/routes/users.ts` - 47 commits (high change rate)
+  - `src/services/PaymentService.ts` - 34 commits (complex domain)
+  - Indicates these are core business logic areas under active development
+
+- **Stable core** (low-churn files):
+  - `src/db/migrations/` - 5 commits total (stable schema)
+  - `src/config/` - 8 commits (stable configuration)
+  - Indicates architectural foundation is mature
+
+#### Ownership Patterns
+- **Primary maintainers** (by commit count):
+  - alice@example.com: 45% of commits (backend focus)
+  - bob@example.com: 30% of commits (frontend focus)
+  - team@example.com: 15% (automated commits)
+
+- **Key service owners** (inferred from commit patterns):
+  - Auth system: alice@example.com (67% of auth/* commits)
+  - Payment system: charlie@example.com (80% of payment/* commits)
+  - Indicates domain ownership and expertise areas
+
+#### Architectural Evolution
+- **Major changes over time:**
+  - 12 months ago: Monolith → Started microservices migration
+  - 6 months ago: Added event-driven patterns (Redis pub/sub)
+  - 3 months ago: Migrated from REST to GraphQL for mobile API
+  - **Evidence:** Commit messages, file creation dates, refactoring commits
+
+- **Migration status:**
+  - 60% of services extracted from monolith
+  - 40% still in legacy monolith (src/legacy/)
+  - **Evidence:** Directory structure + commit history
+
+#### Technical Debt Indicators
+- **Files with highest churn + size:**
+  - Large + frequently changing = potential refactor targets
+  - Example: `src/services/OrderService.ts` (847 lines, 45 commits)
+  - Suggests this is a God Object that may need splitting
+
+**Confidence:** 🟡 Medium (depends on Git history availability)
+
+---
+
+## 2. Documentation Inventory
+
+### 2.1 Found Documentation
+- `docs/architecture.md` — Architecture overview (Last updated: 2024-11-20)
+- `docs/adr/001-database-choice.md` — PostgreSQL decision (Last updated: 2024-10-15)
+- `README.md` — Getting started guide (Last updated: 2024-12-01)
+
+### 2.2 Decision Rationale Found
+1. **PostgreSQL Database:**
+   - **Why:** "Need ACID transactions for financial data" [docs/adr/001-database-choice.md#rationale]
+   - **Alternatives considered:** MongoDB, MySQL
+   - **Trade-off:** Performance vs. consistency - chose consistency
+   - **Confidence:** High (explicit ADR)
+
+2. **React Frontend:**
+   - **Why:** "Team familiarity and ecosystem" [docs/architecture.md#frontend]
+   - **Confidence:** Medium (documented but no detailed rationale)
+
+### 2.3 Gaps & Conflicts
+- ❌ **Gap:** Redis caching used (src/cache/redis.ts:12) but no decision doc
+- ⚠️ **Conflict:** Diagram shows microservices, code is monolithic
+- ⏰ **Outdated:** API docs dated 2023-06-15, endpoints changed since then
+
+---
+
+## 3. System Capabilities (WHAT)
+
+### 3.1 Core Features
+
+**Confidence Legend:** 🟢 High | 🟡 Medium | 🔴 Low
+
+#### 🟢 User Authentication
+- **Entry point:** `POST /api/auth/login` → src/api/routes/auth.ts:23
+- **Flow:**
+  1. Validate credentials → src/services/AuthService.ts:45
+  2. Check user in database → src/repositories/UserRepository.ts:67
+  3. Generate JWT → src/utils/jwt.ts:12
+  4. Return token → src/api/routes/auth.ts:34
+- **Business rules:**
+  - Password must be >= 8 characters (src/validators/password.ts:8)
+  - Max 5 failed attempts locks account (src/services/AuthService.ts:89)
+- **Evidence:** Working code path, tests exist, used in production
+
+#### 🟡 Dashboard Analytics
+- **Entry point:** `GET /api/dashboard` → src/api/routes/dashboard.ts:15
+- **Note:** Behind feature toggle `enable_new_dashboard = false`
+- **Status:** [User confirmed: "Experimental, not ready for production"]
+- **Evidence:** Code exists but currently disabled
+
+#### 🔴 Social Login
+- **Entry point:** OAuth handlers in src/auth/oauth/*.ts
+- **Note:** Code present but imports show it's never called
+- **Status:** [User confirmed: "Deprecated, safe to remove"]
+- **Evidence:** Dead code (no references found)
+
+### 3.2 External Integrations (Working)
+
+#### Stripe Payment Processing
+- **Usage:** Charges, subscriptions, webhook handling
+- **Evidence:** src/services/PaymentService.ts:34-178
+- **Configuration:** STRIPE_SECRET_KEY in .env
+- **Error handling:** Exponential backoff retry (src/utils/payment-retry.ts:12)
+- **Confidence:** 🟢 High (active production use)
+
+### 3.3 User Workflows
+
+**User Registration Flow:**
+1. Submit form → src/pages/SignUp.tsx:45
+2. POST /api/users → src/api/routes/users.ts:12
+3. Validate input → src/validators/userSchema.ts:8
+4. Hash password → src/utils/bcrypt.ts:15
+5. Insert user → src/repositories/UserRepository.ts:23
+6. Send welcome email → src/services/EmailService.ts:67
+7. Auto-login → redirects to /dashboard
+
+---
+
+## 4. Architecture (HOW)
+
+### 4.1 Components
+
+#### API Service
+- **Location:** src/api/
+- **Responsibilities:**
+  - HTTP routing and request handling
+  - Request validation
+  - Authentication middleware
+- **Key files:**
+  - src/api/routes/*.ts:* (route definitions)
+  - src/api/middleware/auth.ts:12 (auth middleware)
+  - src/api/middleware/validator.ts:8 (request validation)
+- **Confidence:** 🟢 High (clear boundaries)
+
+#### Business Logic Layer
+- **Location:** src/services/
+- **Responsibilities:**
+  - Core business rules
+  - Transaction orchestration
+  - External service integration
+- **Key files:**
+  - src/services/UserService.ts:45-234 (user management)
+  - src/services/PaymentService.ts:34-178 (payment processing)
+- **Confidence:** 🟢 High
+
+#### Data Access Layer
+- **Location:** src/repositories/
+- **Responsibilities:**
+  - Database queries
+  - ORM interaction
+  - Data mapping
+- **Key files:**
+  - src/repositories/BaseRepository.ts:12 (common patterns)
+  - src/repositories/UserRepository.ts:23 (user data access)
+- **Confidence:** 🟢 High
+
+**Component Diagram (Optional):**
+
+\`\`\`mermaid
+graph TB
+    subgraph "API Layer"
+        ROUTES[Routes<br/>src/api/routes/]
+        MIDDLEWARE[Middleware<br/>src/api/middleware/]
+    end
+
+    subgraph "Business Logic"
+        USER_SVC[UserService<br/>src/services/UserService.ts]
+        PAY_SVC[PaymentService<br/>src/services/PaymentService.ts]
+    end
+
+    subgraph "Data Access"
+        USER_REPO[UserRepository<br/>src/repositories/]
+        BASE_REPO[BaseRepository<br/>Common patterns]
+    end
+
+    subgraph "External"
+        DB[(Database)]
+        CACHE[(Cache)]
+    end
+
+    ROUTES --> MIDDLEWARE
+    MIDDLEWARE --> USER_SVC
+    MIDDLEWARE --> PAY_SVC
+    USER_SVC --> USER_REPO
+    PAY_SVC --> USER_REPO
+    USER_REPO --> BASE_REPO
+    USER_REPO --> DB
+    USER_SVC --> CACHE
+\`\`\`
+
+### 4.2 Communication Patterns
+
+**API → Services → Repositories → Database:**
+```text
+
+src/api/routes/users.ts:25 (HTTP endpoint)
+  → UserService.createUser() (src/services/UserService.ts:67)
+    → UserRepository.insert() (src/repositories/UserRepository.ts:45)
+      → Database INSERT query
+
+```
+
+**Event-Driven (Async):**
+
+```text
+
+PaymentService.processCharge() (src/services/PaymentService.ts:89)
+  → EventBus.publish('payment.processed') (src/events/bus.ts:23)
+    → EmailService listens (src/services/EmailService.ts:12)
+      → Sends receipt email
+
+```
+
+### 4.3 Architectural Patterns
+
+#### 🟢 Layered Architecture
+
+- **Evidence:** Clear separation: API → Services → Repositories → Database
+- **Rationale:** [Not explicitly documented]
+- **[User stated: "Standard pattern for maintainability"]**
+
+#### 🟢 Dependency Injection
+
+- **Evidence:** Services injected via constructor (src/services/*.ts)
+- **Implementation:** Custom DI container (src/di/container.ts:12)
+
+#### 🟡 Event-Driven (Partial)
+
+- **Evidence:** Event bus exists (src/events/bus.ts)
+- **Usage:** Only for email notifications, not fully adopted
+- **[User confirmed: "Plan to expand event usage for audit logging"]**
+
+---
+
+## 5. Conventions & Standards
+
+### 5.1 Code Style
+
+- **Linter:** ESLint (eslintrc.json) - Airbnb config
+- **Formatter:** Prettier (prettierrc.json)
+- **TypeScript:** Strict mode enabled (tsconfig.json:5)
+
+### 5.2 Naming Conventions
+
+- **Files:** camelCase for TS/JS files (userService.ts)
+- **Components:** PascalCase for React (UserProfile.tsx)
+- **Functions:** camelCase (getUserById)
+- **Classes:** PascalCase (UserService)
+- **Constants:** UPPER_SNAKE_CASE (MAX_RETRY_ATTEMPTS)
+
+### 5.3 File Organization
+
+- **Pattern:** Layer-based (api/, services/, repositories/)
+- **Co-location:** Tests alongside source (userService.ts + userService.test.ts)
+- **Barrel exports:** index.ts files in each directory
+
+### 5.4 Git Workflow
+
+- **Branching:** Feature branches (feature/*, bugfix/*)
+- **Commits:** Conventional Commits (feat:, fix:, docs:)
+- **PRs:** Required reviews, CI must pass
+
+---
+
+## 6. Testing Strategy
+
+### 6.1 Frameworks
+
+- **Unit:** Jest (package.json:34)
+- **Integration:** Jest + Supertest (for API tests)
+- **E2E:** [None found]
+
+### 6.2 Coverage
+
+- **Current:** ~75% (from jest.config.js coverage report)
+- **Target:** [User stated: "Aiming for 80%"]
+
+### 6.3 Patterns
+
+- **Location:** Co-located (*.test.ts alongside source)
+- **Naming:** *.test.ts
+- **Run command:** `npm test`
+
+---
+
+## 7. Build & Deployment
+
+### 7.1 Build Process
+
+- **Tool:** Webpack (webpack.config.js)
+- **Command:** `npm run build`
+- **Output:** dist/ directory
+
+### 7.2 Environments
+
+- **Development:** Local (npm run dev)
+- **Staging:** [Not configured yet - User confirmed]
+- **Production:** AWS ECS (infrastructure/ecs-task-def.json)
+
+### 7.3 CI/CD
+
+- **Platform:** GitHub Actions (.github/workflows/ci.yml)
+- **Pipeline:**
+  1. Lint check
+  2. Unit tests
+  3. Build
+  4. Deploy to staging (on main branch)
+
+---
+
+## 8. Essential Files to Read
+
+Priority files for anyone working on this codebase:
+
+1. **src/api/routes/index.ts:12-89** - Main route definitions, entry points
+2. **src/services/UserService.ts:45-234** - Core user management logic
+3. **src/services/PaymentService.ts:34-178** - Payment processing flow
+4. **src/repositories/BaseRepository.ts:12-67** - Common data access patterns
+5. **src/utils/jwt.ts:12-45** - Authentication token handling
+6. **src/api/middleware/auth.ts:23-67** - Request authentication
+7. **docs/architecture.md** - High-level architecture overview
+8. **docs/adr/001-database-choice.md** - PostgreSQL decision rationale
+
+---
+
+## 9. Execution Path Examples
+
+### Example 1: User Login
+
+```text
+
+1. User submits credentials via POST /api/auth/login
+   Entry: src/api/routes/auth.ts:23
+
+2. Request hits auth middleware (if protected route)
+   Middleware: src/api/middleware/validator.ts:8
+   Validates: email format, password presence
+
+3. Controller delegates to service
+   Controller: src/api/routes/auth.ts:25 calls AuthService.login()
+
+4. Service validates credentials
+   Service: src/services/AuthService.ts:45
+   → UserRepository.findByEmail(email)
+   Repository: src/repositories/UserRepository.ts:34
+   → Database SELECT query
+
+5. Service verifies password
+   Service: src/services/AuthService.ts:67
+   → bcrypt.compare() in src/utils/bcrypt.ts:15
+
+6. Service generates JWT
+   Service: src/services/AuthService.ts:78
+   → jwt.sign() in src/utils/jwt.ts:12
+
+7. Response sent to client
+   Controller: src/api/routes/auth.ts:34
+   Returns: { token, user }
+
+```
+
+### Example 2: Background Payment Processing
+
+```text
+
+1. Webhook received from Stripe
+   Entry: src/api/routes/webhooks/stripe.ts:12
+
+2. Signature verification
+   Middleware: src/api/middleware/stripeWebhook.ts:8
+
+3. Event published to bus
+   Handler: src/api/routes/webhooks/stripe.ts:23
+   → EventBus.publish('payment.received')
+   Bus: src/events/bus.ts:45
+
+4. Multiple subscribers react:
+   a) EmailService sends receipt
+      Subscriber: src/services/EmailService.ts:67
+
+   b) AnalyticsService tracks event
+      Subscriber: src/services/AnalyticsService.ts:34
+
+   c) UserService updates balance
+      Subscriber: src/services/UserService.ts:123
+
+```
+
+---
+
+## 10. Confidence Summary
+
+### High Confidence Findings ✅
+
+- Authentication flow (complete code trace + tests)
+- Payment integration (active production usage)
+- Database choice (explicit ADR)
+- Layered architecture (clear code organization)
+- Technology stack (explicit dependencies)
+
+### Medium Confidence (Needs Validation) ⚠️
+
+- Event-driven pattern (partially implemented)
+- React choice rationale (documented but brief)
+- Target code coverage (stated by user)
+
+### Low Confidence (Unknown) ❓
+
+- Redis caching decision (no documentation)
+- Deployment to staging (not configured)
+- E2E testing strategy (none found)
+
+---
+
+## 11. Open Questions & Gaps
+
+### For User Validation
+
+1. ❓ **Redis Caching:**
+   - Used in src/cache/redis.ts:12
+   - No decision documentation found
+   - Question: Why Redis? What alternatives were considered?
+
+2. ❓ **Staging Environment:**
+   - No configuration found for staging
+   - User mentioned it exists - where?
+
+### Documentation Gaps
+
+1. 📝 Need ADR for Redis caching choice
+2. 📝 Update API documentation (currently outdated: 2023-06-15)
+3. 📝 Document event-driven pattern expansion plan
+4. 📝 Remove or document deprecated OAuth code
+
+### Code Gaps
+
+1. 🔧 Remove deprecated MongoDB client code
+2. 🔧 Remove unused OAuth handlers
+3. 🔧 Add E2E testing framework
+4. 🔧 Configure staging environment
+
+---
+
+## 12. Recommendations for New Features
+
+When building new features in this codebase:
+
+1. **Architecture:**
+   - Follow layered pattern: API → Service → Repository
+   - Place routes in src/api/routes/[feature].ts
+   - Business logic in src/services/[Feature]Service.ts
+   - Data access in src/repositories/[Feature]Repository.ts
+
+2. **Authentication:**
+   - Use existing JWT middleware (src/api/middleware/auth.ts:23)
+   - Follow pattern in src/api/routes/auth.ts for protected routes
+
+3. **Database:**
+   - Use Prisma ORM (already configured)
+   - Create migrations with `npm run migrate:create`
+   - Follow patterns in src/repositories/BaseRepository.ts
+
+4. **Testing:**
+   - Co-locate tests with source (*.test.ts)
+   - Aim for 80% coverage (current: 75%)
+   - Run tests with `npm test`
+
+5. **Styling:**
+   - Follow ESLint + Prettier config
+   - Use camelCase for files, PascalCase for classes/components
+   - Conventional Commits for commit messages
+
+6. **Events:**
+   - Consider using event bus for async operations
+   - Follow pattern in src/services/PaymentService.ts:89 for publishing
+   - Subscribe in relevant services (src/services/EmailService.ts:12 example)
+
+---
+
+## 13. Next Steps
+
+After this context analysis:
+
+1. **Use `generate-spec` prompt** to create detailed specification for your feature
+2. **Reference this analysis** when making architectural decisions
+3. **Follow identified patterns** to ensure consistency
+4. **Address high-priority gaps** if they block your work
+5. **Update this analysis** if you discover new patterns during implementation
+
+---
+
+**Analysis completed:** YYYY-MM-DD
+**Last validated with user:** YYYY-MM-DD
+**Status:** Ready for feature specification
+
+---
+
+## Key Principles to Remember
+
+1. **Evidence-Based:** Every claim needs file:line or doc#heading citation
+2. **Confidence Levels:** Mark High/Medium/Low confidence for all findings
+3. **Separate WHAT/HOW/WHY:**
+   - Code analysis tells you WHAT and HOW
+   - Documentation tells you WHY
+   - User fills in gaps and confirms intent
+4. **Stay in Your Lane:** Don't infer WHY from code - flag it as a gap for user to answer
+5. **Interactive, Not Batch:** Short focused questions, wait for answers, then ask follow-ups
+6. **Flag Gaps Explicitly:** Better to document "Unknown" than to guess
+7. **Actionable Outputs:**
+   - Specific file lists with line numbers
+   - Execution path traces
+   - Clear recommendations for new development
+8. **Preserve User Input:** Capture direct quotes for later citation in specs/ADRs
+
+---
+
+## Final Checklist Before Completing
+
+Before saving the analysis document, verify:
+
+- [ ] All code findings have file:line citations
+- [ ] All documentation findings have path#heading references
+- [ ] User answers captured as direct quotes with dates
+- [ ] Confidence levels marked for all findings
+- [ ] Essential files list includes 5-10 key files with line ranges
+- [ ] At least 2 execution path traces provided
+- [ ] Gaps and unknowns explicitly documented (not hidden)
+- [ ] Recommendations are specific and actionable
+- [ ] High/Medium/Low confidence findings categorized
+- [ ] Open questions listed for future resolution
+
+---
+
+This enhanced prompt will produce **evidence-based, confidence-assessed codebase analysis** that serves as a strong foundation for spec-driven development. The analysis clearly separates facts from inferences, documents gaps explicitly, and provides actionable guidance for building new features.
diff --git a/prompts/generate-spec.md b/prompts/generate-spec.md
index 1b93dca..267390a 100644
--- a/prompts/generate-spec.md
+++ b/prompts/generate-spec.md
@@ -16,59 +16,224 @@ meta:
 
 To guide an AI assistant in creating a detailed Specification (Spec) in Markdown format, based on an initial user prompt. The Spec should be clear, actionable, and suitable for a junior developer to understand and implement the feature.
 
+**Core Principle:** The Spec defines WHAT needs to be built and WHY (user value, business goals). The HOW (implementation details) is left to the developer, unless specific architectural constraints exist.
+
+## AI Behavior Guidelines
+
+- **Ask, don't assume:** When requirements are unclear, ask specific questions rather than making assumptions
+- **Reference existing context:** If a codebase-context document exists, reference it for architectural alignment
+- **Short, focused questions:** Ask 3-5 questions per round, not long questionnaires
+- **Provide options:** Use letter/number lists for easy selection
+- **Explicit unknowns:** Flag areas needing clarification rather than guessing
+- **Evidence-based:** When suggesting technical approaches, cite existing patterns from codebase
+
 ## Process
 
-1. **Receive Initial Prompt:** The user provides a brief description or request for a new feature or functionality.
-2. **Ask Clarifying Questions:** Before writing the Spec, the AI *must* ask clarifying questions to gather sufficient detail. The goal is to understand the "what" and "why" of the feature, not necessarily the "how" (which the developer will figure out). Make sure to provide options in letter/number lists so I can respond easily with my selections.
-3. **Generate Spec:** Based on the initial prompt and the user's answers to the clarifying questions, generate a Spec using the structure outlined below.
-4. **Save Spec:** Save the generated document as `[n]-spec-[feature-name].md` inside the `/tasks` directory. (Where `n` is a zero-padded 4-digit sequence starting from 0001, e.g., `0001-spec-user-authentication.md`.)
+### Phase 1: Initial Analysis (Optional - If Codebase Context Available)
+
+**If** a codebase-context document exists in `/tasks/`, read it to understand:
+
+- Existing architectural patterns
+- Technology stack and conventions
+- Integration points and dependencies
+- Common patterns for similar features
+
+**Tool Usage:** Read (for context document), Grep (to find related existing features)
+
+### Phase 2: Clarifying Questions (Mandatory)
+
+Before writing the Spec, the AI **must** ask clarifying questions to gather sufficient detail.
+
+**Focus on:**
+
+- **WHAT** needs to be built (functionality, features)
+- **WHY** it's needed (user value, business goals)
+- **Constraints** (technical, scope, timeline)
+
+**Do NOT ask about:**
+
+- Specific implementation details (HOW) - let developers decide
+- Low-level technical choices - unless there are architectural constraints
+
+**Guidelines:**
+
+- Ask 3-5 focused questions per round
+- Provide multiple-choice options (A/B/C) when possible
+- Wait for answers before proceeding
+
+#### ⛔ STOP - Wait for user answers before proceeding to Phase 3
+
+### Phase 3: Draft Specification
+
+Based on initial prompt + user answers + codebase context (if available), generate a Spec using the structure outlined below.
+
+**Tool Usage:** Write (to create spec file), Read (to reference existing specs/docs)
+
+### Phase 4: Review & Refinement
+
+Present the spec to the user for review. Ask if they:
+
+- Are satisfied with the level of detail
+- Have additional questions or clarifications
+- Want to adjust scope or requirements
+
+#### ⛔ STOP - Wait for user feedback before finalizing
+
+### Phase 5: Finalize
+
+Save the completed Spec to `/tasks/[n]-spec-[feature-name].md`
+
+**⛔ STOP - Workflow complete. Do NOT proceed to implementation.**
 
 ## Clarifying Questions (Examples)
 
 The AI should adapt its questions based on the prompt, but here are some common areas to explore:
 
-* **Problem/Goal:** "What problem does this feature solve for the user?" or "What is the main goal we want to achieve with this feature?"
-* **Target User:** "Who is the primary user of this feature?"
-* **Core Functionality:** "Can you describe the key actions a user should be able to perform with this feature?"
-* **User Stories:** "Could you provide a few user stories? (e.g., As a [type of user], I want to [perform an action] so that [benefit].)"
-* **Acceptance Criteria:** "How will we know when this feature is successfully implemented? What are the key success criteria?"
-* **Scope/Boundaries:** "Are there any specific things this feature *should not* do (non-goals)?"
-* **Data Requirements:** "What kind of data does this feature need to display or manipulate?"
-* **Design/UI:** "Are there any existing design mockups or UI guidelines to follow?" or "Can you describe the desired look and feel?"
-* **Edge Cases:** "Are there any potential edge cases or error conditions we should consider?"
-* **Unit of Work:** "What is the smallest end-to-end slice we can ship that a user or stakeholder can experience, test, or demonstrate?"
-* **Demoability:** "For each stage, how will we show working value (e.g., URL, CLI output, screenshot, test run, short demo script)?"
+- **Problem/Goal:** "What problem does this feature solve for the user?" or "What is the main goal we want to achieve with this feature?"
+- **Target User:** "Who is the primary user of this feature?"
+- **Core Functionality:** "Can you describe the key actions a user should be able to perform with this feature?"
+- **User Stories:** "Could you provide a few user stories? (e.g., As a [type of user], I want to [perform an action] so that [benefit].)"
+- **Acceptance Criteria:** "How will we know when this feature is successfully implemented? What are the key success criteria?"
+- **Scope/Boundaries:** "Are there any specific things this feature *should not* do (non-goals)?"
+- **Data Requirements:** "What kind of data does this feature need to display or manipulate?"
+- **Design/UI:** "Are there any existing design mockups or UI guidelines to follow?" or "Can you describe the desired look and feel?"
+- **Edge Cases:** "Are there any potential edge cases or error conditions we should consider?"
+- **Unit of Work:** "What is the smallest end-to-end slice we can ship that a user or stakeholder can experience, test, or demonstrate?"
+- **Demoability:** "For each stage, how will we show working value (e.g., URL, CLI output, screenshot, test run, short demo script)?"
 
 ## Spec Structure
 
 The generated Spec should include the following sections:
 
 1. **Introduction/Overview:** Briefly describe the feature and the problem it solves. State the goal.
+
 2. **Goals:** List the specific, measurable objectives for this feature.
+
 3. **User Stories:** Detail the user narratives describing feature usage and benefits.
+
 4. **Demoable Units of Work:** Define small, end-to-end vertical slices. For each slice capture: Purpose and users; Demo Criteria (what will be shown to verify value); Proof Artifact(s) (tangible evidence such as a URL, CLI command & expected output, test names, or screenshot).
+
 5. **Functional Requirements:** List the specific functionalities the feature must have. Use clear, concise language (e.g., "The system must allow users to upload a profile picture."). Number these requirements.
+
 6. **Non-Goals (Out of Scope):** Clearly state what this feature will *not* include to manage scope.
-7. **Design Considerations (Optional):** Link to mockups, describe UI/UX requirements, or mention relevant components/styles if applicable.
-8. **Technical Considerations (Optional):** Mention any known technical constraints, dependencies, or suggestions (e.g., "Should integrate with the existing Auth module").
-9. **Success Metrics:** How will the success of this feature be measured? (e.g., "Increase user engagement by 10%", "Reduce support tickets related to X").
-10. **Open Questions:** List any remaining questions or areas needing further clarification.
 
-## Target Audience
+7. **Architectural Alignment (If codebase-context available):**
+   - Reference existing patterns this feature should follow
+   - Identify integration points with existing systems
+   - Note any deviations from established conventions (with justification)
+   - **Format:** "Authentication will follow existing JWT pattern (src/auth/AuthService.ts:23-45 per codebase-context)"
 
-Assume the primary reader of the Spec is a **junior developer**. Therefore, requirements should be explicit, unambiguous, and avoid jargon where possible. Provide enough detail for them to understand the feature's purpose and core logic.
+8. **Technical Feasibility Assessment:**
+   - **🟢 High Confidence:** Requirements that align with existing capabilities and patterns
+   - **🟡 Medium Confidence:** Requirements that may need research or new dependencies
+   - **🔴 Low Confidence:** Requirements with unknown feasibility or significant technical risk
+   - Include evidence: reference similar features, existing code, or docs that support feasibility
+
+9. **Design Considerations (Optional):** Link to mockups, describe UI/UX requirements, or mention relevant components/styles if applicable.
 
-## Output
+10. **Technical Considerations (Optional):** Mention any known technical constraints, dependencies, or suggestions (e.g., "Should integrate with the existing Auth module").
 
-* **Format:** Markdown (`.md`)
-* **Location:** `/tasks/`
-* **Filename:** `[n]-spec-[feature-name].md`
+11. **Success Metrics:** How will the success of this feature be measured? (e.g., "Increase user engagement by 10%", "Reduce support tickets related to X").
 
-## Final instructions
+12. **Open Questions:** List any remaining questions or areas needing further clarification. Include confidence level for each unknown.
+
+## Target Audience
+
+Assume the primary reader of the Spec is a **junior developer**. Therefore, requirements should be explicit, unambiguous, and avoid jargon where possible. Provide enough detail for them to understand the feature's purpose and core logic.
 
-1. Do NOT start implementing the Spec
-2. Make sure to ask the user clarifying questions
-3. Take the user's answers to the clarifying questions and improve the Spec
-4. Save the completed Spec to `/tasks/[n]-spec-[feature-name].md`
-5. Ask the user if they are satisfied with it and if they have any additional questions or clarifications
-6. Once the user is satisfied with the Spec, this workflow is complete and you should stop working
+## Output Format
+
+- **Format:** Markdown (`.md`)
+- **Location:** `/tasks/`
+- **Filename:** `[n]-spec-[feature-name].md` (Where `n` is a zero-padded 4-digit sequence starting from 0001)
+- **Example:** `/tasks/0001-spec-user-authentication.md`
+
+**Header Format:**
+
+```markdown
+# Spec: [Feature Name]
+
+**Status:** Draft | Under Review | Approved
+**Created:** YYYY-MM-DD
+**Last Updated:** YYYY-MM-DD
+**Author:** AI Assistant (with user input)
+**Codebase Context:** [Reference to context doc if used, or "N/A"]
+```
+
+## Execution Workflow
+
+**Phase 1 (Optional):** Check for codebase-context document
+↓
+**Phase 2 (Mandatory):** Ask 3-5 clarifying questions → ⛔ WAIT FOR ANSWERS
+↓
+**Phase 3:** Draft specification using provided structure
+↓
+**Phase 4:** Present to user for review → ⛔ WAIT FOR FEEDBACK
+↓
+**Phase 5:** Finalize and save → ⛔ STOP (Do NOT implement)
+
+## Critical Rules
+
+1. **Never skip Phase 2:** Clarifying questions are mandatory, even if prompt seems clear
+2. **Do NOT implement:** This workflow creates the spec only, not the code
+3. **Reference context:** Always check for and reference codebase-context if available
+4. **Evidence-based:** When suggesting technical approaches, cite existing patterns
+5. **Explicit unknowns:** Flag gaps in knowledge rather than guessing
+6. **Stop when complete:** Once spec is approved, workflow is done
+
+## What NOT to Do
+
+**Explicitly forbidden actions:**
+
+1. **❌ Do NOT start implementing the spec**
+   - This prompt creates specifications only
+   - Implementation happens in a separate workflow
+   - Stop after Phase 5 - do not write code
+
+2. **❌ Do NOT skip clarifying questions**
+   - Even if the request seems clear, ask questions
+   - Phase 2 is mandatory, not optional
+   - Better to over-clarify than make assumptions
+
+3. **❌ Do NOT make technical decisions without evidence**
+   - Don't suggest technologies without checking codebase-context
+   - Don't recommend patterns that don't exist in the codebase
+   - Always cite existing code or docs when suggesting approaches
+
+4. **❌ Do NOT write specs in isolation**
+   - Check for codebase-context document first
+   - Check for related existing specs
+   - Ask user about integration with existing features
+
+5. **❌ Do NOT proceed without user validation**
+   - Stop at every ⛔ checkpoint
+   - Wait for user answers before continuing
+   - Don't batch all questions at once
+
+6. **❌ Do NOT include implementation details (HOW)**
+   - Focus on WHAT (features) and WHY (value)
+   - Leave HOW (implementation) to developers
+   - Exception: When architectural constraints exist
+
+7. **❌ Do NOT assume requirements**
+   - If something is unclear, ask
+   - Flag unknowns explicitly in "Open Questions"
+   - Mark confidence levels honestly
+
+8. **❌ Do NOT continue after spec is approved**
+   - Once user says "approved", workflow ends
+   - Do not start task breakdown
+   - Do not begin implementation
+
+## Quality Checklist
+
+Before finalizing the spec, verify:
+
+- [ ] All clarifying questions answered
+- [ ] User stories include "As a... I want... so that..."
+- [ ] Functional requirements are numbered and specific
+- [ ] Non-goals explicitly stated
+- [ ] Technical feasibility assessed with confidence levels
+- [ ] Codebase-context referenced (if available)
+- [ ] Open questions documented with confidence levels
+- [ ] Output saved to correct location with correct filename format
diff --git a/tasks/0001-spec-sdd-mcp-poc.md b/tasks/0001-spec-sdd-mcp-poc.md
index f0c652b..e032b7d 100644
--- a/tasks/0001-spec-sdd-mcp-poc.md
+++ b/tasks/0001-spec-sdd-mcp-poc.md
@@ -38,21 +38,21 @@ Deliver a FastMCP-based server that exposes the existing Spec Driven Development
 - **Purpose & Users:** Validate consumption from a secondary MCP-aware client.
 - **Demo Criteria:** Configure an external MCP client (e.g., Claude Desktop, VS Code MCP plugin) to reach the server over HTTP and successfully invoke prompts.
 - **Proof Artifact(s):** Connection configuration snippet; client-side screenshot/log showing prompt execution.
-- **Status:** HTTP transport hardening deferred to [issue #3](https://github.com/liatrio-labs/spec-driven-workflow-mcp/issues/3) for focused implementation (CORS, host/port configuration).
+- **Status:** HTTP transport hardening deferred to [issue #3](https://github.com/liatrio-labs/spec-driven-workflow/issues/3) for focused implementation (CORS, host/port configuration).
 
 ### Slice 4 – Deployable packaging
 
 - **Purpose & Users:** Provide operational packaging for platform engineers.
 - **Demo Criteria:** Build container image locally, apply Kustomize overlay to deploy in a test cluster, and confirm `/mcp` endpoint readiness probe succeeds.
 - **Proof Artifact(s):** Docker build log, Kubernetes deployment manifest, `kubectl` output validating pod readiness.
-- **Status:** Packaging and Kubernetes deployment deferred to [issue #4](https://github.com/liatrio-labs/spec-driven-workflow-mcp/issues/4).
+- **Status:** Packaging and Kubernetes deployment deferred to [issue #4](https://github.com/liatrio-labs/spec-driven-workflow/issues/4).
 
 ### Slice 5 – Protocol extensions showcase
 
 - **Purpose & Users:** Demonstrate FastMCP-specific protocol capabilities that enrich the SDD workflow for engineers and AI assistants.
 - **Demo Criteria:** Trigger a sampling request from the server (e.g., prompt the client LLM to draft a spec summary) and emit a notification when new artifacts land in `/tasks/`; verify both in the Inspector or alternate client.
 - **Proof Artifact(s):** Recorded interaction showing sampling exchange, notification payload captured via client logs.
-- **Status:** Protocol extensions and observability deferred to [issue #5](https://github.com/liatrio-labs/spec-driven-workflow-mcp/issues/5).
+- **Status:** Protocol extensions and observability deferred to [issue #5](https://github.com/liatrio-labs/spec-driven-workflow/issues/5).
 
 ## Functional Requirements
 
diff --git a/tasks/tasks-0001-spec-sdd-mcp-poc.md b/tasks/tasks-0001-spec-sdd-mcp-poc.md
index 5b8f938..679e288 100644
--- a/tasks/tasks-0001-spec-sdd-mcp-poc.md
+++ b/tasks/tasks-0001-spec-sdd-mcp-poc.md
@@ -62,7 +62,7 @@
     - this has been fully tested with multiple clients (Claude Code, Windsurf, VS Code, Codex, Gemini CLI, etc.)
   - Proof Artifact(s): Connection configuration snippet and client-side screenshot/log showing prompt execution results.
   - [!] 3.1 Harden HTTP transport configuration (CORS headers, host/port envs) in `fastmcp.json` and `mcp_server/config.py`.
-    - Deferred to [issue #3](https://github.com/liatrio-labs/spec-driven-workflow-mcp/issues/3)
+    - Deferred to [issue #3](https://github.com/liatrio-labs/spec-driven-workflow/issues/3)
   - [x] 3.2 Draft client onboarding instructions in `docs/operations.md` for FastMCP Inspector, Claude Desktop, and VS Code MCP plugin.
   - [x] 3.3 Record validated client session (screenshots/logs) invoking prompts/resources via HTTP endpoint.
   - [x] 3.4 Add integration test (async) using `fastmcp.Client` to call prompts over HTTP within pytest suite.
@@ -70,7 +70,7 @@
 - [!] 4.0 Package and deploy for Kubernetes
   - Demo Criteria: Build Docker image, apply Kustomize overlay to deploy in a test cluster, and verify `/mcp/health` readiness plus metrics endpoints.
   - Proof Artifact(s): Docker build log, rendered Kubernetes manifest, and `kubectl` output confirming pod readiness.
-  - Deferred to [issue #4](https://github.com/liatrio-labs/spec-driven-workflow-mcp/issues/4)
+  - Deferred to [issue #4](https://github.com/liatrio-labs/spec-driven-workflow/issues/4)
   - [!] 4.1 Author Dockerfile leveraging `uv` for dependency sync and multi-stage build with non-root runtime user.
   - [!] 4.2 Provide container entrypoints/scripts (`uvx fastmcp run`) supporting both STDIO and HTTP configurations.
   - [!] 4.3 Create base and overlay Kustomize manifests defining config maps, secrets placeholders, volume mounts, and readiness probes.
@@ -80,7 +80,7 @@
 - [!] 5.0 Showcase protocol extensions and observability
   - Demo Criteria: Trigger helper tools, emit notifications on new artifacts, exercise sampling request flow, and capture structured logs/metrics.
   - Proof Artifact(s): Test run outputs covering tools/notifications/sampling; log excerpts illustrating structured events and metrics export.
-  - Deferred to [issue #5](https://github.com/liatrio-labs/spec-driven-workflow-mcp/issues/5)
+  - Deferred to [issue #5](https://github.com/liatrio-labs/spec-driven-workflow/issues/5)
   - [!] 5.1 Implement `mcp_server/tools.py` helper tools (list artifacts, create spec stub, summarize diff) with corresponding FastMCP decorators.
   - [!] 5.2 Build notification broadcaster (`mcp_server/notifications.py`) emitting events on workspace file creation with hooks into FastMCP emitter.
   - [!] 5.3 Implement sampling orchestrator (`mcp_server/sampling.py`) requesting client-generated summaries and handling responses.