From b3bd88a466c1e9e58b3f1738c2d13dfd9c13788a Mon Sep 17 00:00:00 2001
From: Shay Palachy <shaypal5@users.noreply.github.com>
Date: Mon, 20 Apr 2026 11:13:05 +0300
Subject: [PATCH 1/2] =?UTF-8?q?feat:=20Milestone=202=20=E2=80=94=20narrati?=
 =?UTF-8?q?ve=20layer=20(NarrativeSpec,=20WorldSpec,=20dataset=20card)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- narrative/spec.py: frozen dataclasses for NarrativeSpec hierarchy
  (CompanySpec, ProductSpec, MarketSpec, GtmMotionSpec, PersonaSpec,
  FunnelStageSpec) with validated from_dict() classmethods
- narrative/dataset_card.py: render_dataset_card() produces Markdown
  dataset card from WorldSpec (header, narrative summary, task, stubs
  for table inventory and feature categories, use cases, caveats)
- core/models.py: WorldSpec.narrative field (NarrativeSpec | None)
- api/generator.py: world_spec property; from_recipe() resolves the
  recipe's narrative.yaml into a NarrativeSpec and populates WorldSpec
- 51 new tests covering spec validation, card rendering, and Generator
  integration (110 total); ruff + mypy clean

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .agent-plan.md                       |  46 +++--
 leadforge/api/generator.py           |  26 ++-
 leadforge/core/models.py             |  10 +-
 leadforge/narrative/dataset_card.py  | 157 +++++++++++++++
 leadforge/narrative/spec.py          | 285 +++++++++++++++++++++++++++
 tests/narrative/__init__.py          |   0
 tests/narrative/test_dataset_card.py | 112 +++++++++++
 tests/narrative/test_spec.py         | 196 ++++++++++++++++++
 8 files changed, 803 insertions(+), 29 deletions(-)
 create mode 100644 leadforge/narrative/dataset_card.py
 create mode 100644 leadforge/narrative/spec.py
 create mode 100644 tests/narrative/__init__.py
 create mode 100644 tests/narrative/test_dataset_card.py
 create mode 100644 tests/narrative/test_spec.py

diff --git a/.agent-plan.md b/.agent-plan.md
index a049bd0..ae236dc 100644
--- a/.agent-plan.md
+++ b/.agent-plan.md
@@ -6,44 +6,50 @@
 
 ## Current System State
 
-**v0.2.0 in progress.** Typed `Recipe` model, `GenerationConfig` with full validation, config
-precedence system, `RNGRoot` with deterministic substreams, `Generator.from_recipe()` fully
-implemented, `core/hashing.py`, `core/serialization.py`, and recipe narrative/difficulty-profile
-assets for `b2b_saas_procurement_v1`. 59 tests passing.
+**v0.2.0 in progress — Milestone 2 complete (PR open).** Typed `NarrativeSpec` hierarchy, `WorldSpec`
+with narrative field, `Generator.from_recipe()` populates `world_spec`, dataset card renderer, and
+full test coverage. 110 tests passing.
 
 ---
 
-## Active Task Breakdown — Milestone 2: Narrative Layer (v0.2.0 cont.)
+## Active Task Breakdown — Milestone 3: Schema Layer (v0.2.0 cont.)
 
-Goal: Build the concrete company/product/market story objects that anchor all later simulation.
+Goal: Define the relational entity schema (accounts, contacts, leads, etc.) and feature dictionary.
 
-- [ ] **1. Narrative models**
-  - Implement typed dataclasses in `narrative/`: `CompanySpec`, `ProductSpec`, `MarketSpec`,
-    `PersonaSpec`, `FunnelSpec`
-  - Loader: parse `narrative.yaml` into these models with validation
+- [ ] **1. Entity schema**
+  - Implement `schema/entities.py`: typed dataclasses for `Account`, `Contact`, `Lead`
+  - Implement `schema/events.py`: `Touch`, `SalesActivity`, `Opportunity` etc.
 
-- [ ] **2. WorldSpec population**
-  - Flesh out `WorldSpec` to hold a resolved `NarrativeSpec`
-  - Wire into `Generator.from_recipe()` so `gen.world_spec` is populated after construction
+- [ ] **2. Feature dictionary**
+  - Implement `schema/features.py` + `schema/dictionaries.py`
+  - Generate `feature_dictionary.csv` stub
 
-- [ ] **3. Dataset card generation**
-  - Implement `narrative/dataset_card.py`: render a Markdown dataset card from `WorldSpec`
-  - Tests: round-trip model → YAML → model, dataset-card text contains expected fields
+- [ ] **3. Task schema**
+  - Implement `schema/tasks.py`: `converted_within_90_days` task manifest structure
 
 ---
 
 ## Context Pointers
 
-- Milestone 2 scope: `docs/leadforge_implementation_plan.md` §5 "Milestone 2"
+- Milestone 3 scope: `docs/leadforge_implementation_plan.md` §6 "Milestone 3"
 - Full milestone dependency graph: `docs/leadforge_implementation_plan.md` §6
-- Narrative spec: `docs/leadforge_architecture_spec.md` §7
-- Recipe assets: `leadforge/recipes/b2b_saas_procurement_v1/narrative.yaml`
+- Schema spec: `docs/leadforge_architecture_spec.md` §8
+- Recipe assets: `leadforge/recipes/b2b_saas_procurement_v1/`
 
 ---
 
 ## Completed Phases
 
-### Milestone 1 — Canonical Config, Recipe & Model Objects ✓ (v0.2.0 in PR)
+### Milestone 2 — Narrative Layer ✓ (v0.2.0 in PR)
+- `leadforge/narrative/spec.py`: frozen dataclasses `NarrativeSpec`, `CompanySpec`, `ProductSpec`,
+  `MarketSpec`, `GtmMotionSpec`, `PersonaSpec`, `FunnelStageSpec` — all with validated `from_dict()`
+- `leadforge/narrative/dataset_card.py`: `render_dataset_card(world_spec)` — Markdown card
+- `leadforge/core/models.py`: `WorldSpec` gets `narrative: NarrativeSpec | None` field
+- `leadforge/api/generator.py`: `world_spec` property; `from_recipe()` resolves narrative into
+  `WorldSpec`
+- 51 new tests (spec validation, dataset card, Generator integration); total 110 passing
+
+### Milestone 1 — Canonical Config, Recipe & Model Objects ✓ (v0.2.0 merged)
 - `leadforge/core/rng.py`: `RNGRoot` with SHA-256-derived named substreams
 - `leadforge/core/hashing.py`: `hash_config()` — stable SHA-256 digest of `GenerationConfig`
 - `leadforge/core/serialization.py`: `load_yaml`, `load_json`, `dump_json`
diff --git a/leadforge/api/generator.py b/leadforge/api/generator.py
index 6f76f36..76acd26 100644
--- a/leadforge/api/generator.py
+++ b/leadforge/api/generator.py
@@ -5,7 +5,7 @@
 from typing import Any
 
 from leadforge.core.enums import DifficultyProfile, ExposureMode
-from leadforge.core.models import GenerationConfig, WorldBundle
+from leadforge.core.models import GenerationConfig, WorldBundle, WorldSpec
 from leadforge.core.rng import RNGRoot
 from leadforge.core.sentinels import _MISSING
 
@@ -23,18 +23,24 @@ class Generator:
         bundle = gen.generate(n_leads=5000, difficulty="intermediate")
         bundle.save("./out/demo_bundle")
 
-    ``from_recipe`` is implemented in Milestone 1. Full generation
-    (``generate``) is implemented across Milestones 2–9.
+    ``from_recipe`` is implemented in Milestone 1–2. Full generation
+    (``generate``) is implemented across Milestones 3–9.
     """
 
-    def __init__(self, config: GenerationConfig) -> None:
+    def __init__(self, config: GenerationConfig, world_spec: WorldSpec) -> None:
         self._config = config
+        self._world_spec = world_spec
         self._rng = RNGRoot(config.seed)
 
     @property
     def config(self) -> GenerationConfig:
         return self._config
 
+    @property
+    def world_spec(self) -> WorldSpec:
+        """The resolved world specification, including narrative."""
+        return self._world_spec
+
     @classmethod
     def from_recipe(
         cls,
@@ -69,8 +75,8 @@ def from_recipe(
                 Applied after recipe defaults but before explicit kwargs.
 
         Returns:
-            A configured :class:`Generator` instance ready to call
-            :meth:`generate` on.
+            A configured :class:`Generator` with a populated
+            :attr:`world_spec` (narrative resolved from the recipe).
 
         Raises:
             :class:`~leadforge.core.exceptions.InvalidRecipeError`: if the
@@ -78,6 +84,7 @@ def from_recipe(
                 exposure mode / difficulty is not supported.
         """
         from leadforge.api.recipes import Recipe
+        from leadforge.narrative.spec import NarrativeSpec
         from leadforge.recipes.registry import load_recipe
 
         raw = load_recipe(recipe_id)
@@ -93,7 +100,12 @@ def from_recipe(
             output_path=output_path,
             override=override,
         )
-        return cls(config)
+
+        narrative_data = recipe.load_narrative()
+        narrative = NarrativeSpec.from_dict(narrative_data) if narrative_data else None
+        world_spec = WorldSpec(config=config, narrative=narrative)
+
+        return cls(config, world_spec)
 
     def generate(
         self,
diff --git a/leadforge/core/models.py b/leadforge/core/models.py
index a7c0c73..1aec86f 100644
--- a/leadforge/core/models.py
+++ b/leadforge/core/models.py
@@ -3,12 +3,15 @@
 from __future__ import annotations
 
 from dataclasses import dataclass, field
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from leadforge.core.enums import DifficultyProfile, ExposureMode
 from leadforge.core.exceptions import InvalidConfigError
 from leadforge.version import __version__
 
+if TYPE_CHECKING:
+    from leadforge.narrative.spec import NarrativeSpec
+
 
 def _require_positive_int(value: Any, name: str) -> None:
     """Raise ``InvalidConfigError`` unless *value* is a positive plain ``int``.
@@ -74,10 +77,13 @@ def __post_init__(self) -> None:
 class WorldSpec:
     """Fully instantiated hidden world specification (post-sampling, pre-simulation).
 
-    Populated in Milestone 2 (narrative/schema) through Milestone 6 (mechanisms).
+    Populated incrementally across milestones:
+    - M2: config + narrative
+    - M3–M6: schema, structure, mechanisms
     """
 
     config: GenerationConfig = field(default_factory=GenerationConfig)
+    narrative: NarrativeSpec | None = None
 
 
 @dataclass
diff --git a/leadforge/narrative/dataset_card.py b/leadforge/narrative/dataset_card.py
new file mode 100644
index 0000000..8d82b1c
--- /dev/null
+++ b/leadforge/narrative/dataset_card.py
@@ -0,0 +1,157 @@
+"""Dataset card renderer.
+
+Produces the ``dataset_card.md`` artifact from a :class:`WorldSpec`.
+The card follows the structure required by the architecture spec (§14.3).
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from leadforge.core.models import WorldSpec
+
+
+def render_dataset_card(world_spec: WorldSpec) -> str:
+    """Return a Markdown dataset card string for *world_spec*.
+
+    Sections present at all milestones:
+    - Header (recipe id, version, seed, exposure mode)
+    - Narrative summary (company, product, market, GTM)
+    - Primary task and label definition
+    - Suggested use cases
+    - Caveats
+
+    Sections populated in later milestones (rendered as stubs here):
+    - Table inventory
+    - Feature categories
+    """
+    cfg = world_spec.config
+    narrative = world_spec.narrative
+
+    lines: list[str] = []
+
+    # ------------------------------------------------------------------
+    # Header
+    # ------------------------------------------------------------------
+    lines += [
+        "# leadforge dataset card",
+        "",
+        "| Field | Value |",
+        "|---|---|",
+        f"| Recipe | `{cfg.recipe_id}` |",
+        f"| Package version | `{cfg.package_version}` |",
+        f"| Seed | `{cfg.seed}` |",
+        f"| Exposure mode | `{cfg.exposure_mode}` |",
+        f"| Difficulty | `{cfg.difficulty}` |",
+        f"| Horizon | {cfg.horizon_days} days |",
+        "",
+    ]
+
+    # ------------------------------------------------------------------
+    # Narrative summary
+    # ------------------------------------------------------------------
+    lines.append("## Narrative summary")
+    lines.append("")
+    if narrative is not None:
+        c = narrative.company
+        p = narrative.product
+        m = narrative.market
+        gtm = narrative.gtm_motion
+        lines += [
+            f"**Vendor:** {c.name} ({c.stage}, founded {c.founded_year},"
+            f" {c.hq_city}, {c.hq_country})",
+            "",
+            f"**Product:** {p.name} — {p.category}. "
+            f"Deployment: {p.deployment}. "
+            f"Pricing: {p.pricing_model}. "
+            f"ACV range: ${p.acv_range_usd[0]:,}–${p.acv_range_usd[1]:,}.",
+            "",
+            f"**Target market:** {m.icp_employee_range[0]}–{m.icp_employee_range[1]}-employee"
+            f" firms in {', '.join(m.geographies)}. "
+            f"Key industries: {', '.join(m.icp_industries)}. "
+            f"Average deal size: ${m.avg_deal_size_usd:,}. "
+            f"Average sales cycle: {m.avg_sales_cycle_days} days.",
+            "",
+            f"**GTM motion:** {', '.join(gtm.channels)} "
+            f"({gtm.inbound_share:.0%} inbound / "
+            f"{gtm.outbound_share:.0%} outbound / "
+            f"{gtm.partner_share:.0%} partner).",
+            "",
+            "**Buyer personas:**",
+            "",
+        ]
+        for persona in narrative.personas:
+            ellipsis = "…" if len(persona.title_variants) > 2 else ""
+            lines.append(
+                f"- **{persona.role}** ({persona.decision_authority}) — "
+                f"{', '.join(persona.title_variants[:2])}{ellipsis}"
+            )
+        lines.append("")
+    else:
+        lines += ["*Narrative not available for this exposure mode.*", ""]
+
+    # ------------------------------------------------------------------
+    # Primary task
+    # ------------------------------------------------------------------
+    lines += [
+        "## Primary task",
+        "",
+        "**Task:** `converted_within_90_days`",
+        "",
+        "**Label definition:** A lead is considered converted if a `closed_won` event "
+        "is recorded within 90 days of the lead's snapshot anchor date. "
+        "The label is derived from simulated events — it is never sampled directly.",
+        "",
+    ]
+
+    # ------------------------------------------------------------------
+    # Table inventory (stub — populated in later milestones)
+    # ------------------------------------------------------------------
+    lines += [
+        "## Table inventory",
+        "",
+        "*Table counts will appear here once the simulation layer is implemented (v0.3.0+).*",
+        "",
+    ]
+
+    # ------------------------------------------------------------------
+    # Feature categories (stub)
+    # ------------------------------------------------------------------
+    lines += [
+        "## Feature categories",
+        "",
+        "*Feature dictionary will appear here once the schema layer is implemented (v0.3.0+).*",
+        "",
+    ]
+
+    # ------------------------------------------------------------------
+    # Suggested use cases
+    # ------------------------------------------------------------------
+    lines += [
+        "## Suggested use cases",
+        "",
+        "- Teaching binary classification on realistic CRM data",
+        "- Portfolio projects demonstrating end-to-end ML pipelines",
+        "- Benchmarking lead-scoring models under controlled signal/noise conditions",
+        "- Research on causal structure in funnel conversion data",
+        "",
+    ]
+
+    # ------------------------------------------------------------------
+    # Caveats
+    # ------------------------------------------------------------------
+    lines += [
+        "## Caveats",
+        "",
+        "- This is **synthetic** data. It does not represent any real company, product, or market.",
+        "- The hidden world structure varies by motif family and stochastic rewiring; "
+        "no two seeds produce the same DGP.",
+        "- Features are anchored at the snapshot date. No post-anchor data is "
+        "included (leakage-free by construction).",
+        "- In `student_public` mode, the latent world graph, mechanism summary, "
+        "and full world spec are withheld.",
+        "",
+    ]
+
+    return "\n".join(lines)
diff --git a/leadforge/narrative/spec.py b/leadforge/narrative/spec.py
new file mode 100644
index 0000000..71815c2
--- /dev/null
+++ b/leadforge/narrative/spec.py
@@ -0,0 +1,285 @@
+"""Typed narrative specification models.
+
+A ``NarrativeSpec`` is the fully parsed, validated in-memory representation of a
+recipe's ``narrative.yaml``.  Every downstream layer (schema, simulation,
+rendering) anchors to these objects rather than raw YAML dicts.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from leadforge.core.exceptions import InvalidRecipeError
+
+# ---------------------------------------------------------------------------
+# Leaf dataclasses
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class CompanySpec:
+    """The simulated vendor company."""
+
+    name: str
+    founded_year: int
+    hq_city: str
+    hq_country: str
+    stage: str
+    employee_range: tuple[int, int]
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> CompanySpec:
+        _require_keys(
+            data,
+            {"name", "founded_year", "hq_city", "hq_country", "stage", "employee_range"},
+            "company",
+        )
+        er = data["employee_range"]
+        if not (
+            isinstance(er, (list, tuple))
+            and len(er) == 2
+            and all(isinstance(v, int) and not isinstance(v, bool) for v in er)
+        ):
+            raise InvalidRecipeError(
+                f"company.employee_range must be a [min, max] int pair, got {er!r}"
+            )
+        return cls(
+            name=str(data["name"]),
+            founded_year=_pos_int(data["founded_year"], "company.founded_year"),
+            hq_city=str(data["hq_city"]),
+            hq_country=str(data["hq_country"]),
+            stage=str(data["stage"]),
+            employee_range=(int(er[0]), int(er[1])),
+        )
+
+
+@dataclass(frozen=True)
+class ProductSpec:
+    """The simulated product being sold."""
+
+    name: str
+    category: str
+    deployment: str
+    pricing_model: str
+    acv_range_usd: tuple[int, int]
+    contract_terms_months: tuple[int, ...]
+    free_trial_available: bool
+    demo_available: bool
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> ProductSpec:
+        _require_keys(
+            data,
+            {
+                "name",
+                "category",
+                "deployment",
+                "pricing_model",
+                "acv_range_usd",
+                "contract_terms_months",
+                "free_trial_available",
+                "demo_available",
+            },
+            "product",
+        )
+        acv = data["acv_range_usd"]
+        if not (
+            isinstance(acv, (list, tuple))
+            and len(acv) == 2
+            and all(isinstance(v, int) and not isinstance(v, bool) for v in acv)
+        ):
+            raise InvalidRecipeError(
+                f"product.acv_range_usd must be a [min, max] int pair, got {acv!r}"
+            )
+        terms = data["contract_terms_months"]
+        if not isinstance(terms, (list, tuple)) or not all(
+            isinstance(v, int) and not isinstance(v, bool) for v in terms
+        ):
+            raise InvalidRecipeError(
+                f"product.contract_terms_months must be a list of ints, got {terms!r}"
+            )
+        return cls(
+            name=str(data["name"]),
+            category=str(data["category"]),
+            deployment=str(data["deployment"]),
+            pricing_model=str(data["pricing_model"]),
+            acv_range_usd=(int(acv[0]), int(acv[1])),
+            contract_terms_months=tuple(int(t) for t in terms),
+            free_trial_available=bool(data["free_trial_available"]),
+            demo_available=bool(data["demo_available"]),
+        )
+
+
+@dataclass(frozen=True)
+class MarketSpec:
+    """The target market definition."""
+
+    icp_employee_range: tuple[int, int]
+    icp_industries: tuple[str, ...]
+    geographies: tuple[str, ...]
+    avg_deal_size_usd: int
+    avg_sales_cycle_days: int
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> MarketSpec:
+        _require_keys(
+            data,
+            {
+                "icp_employee_range",
+                "icp_industries",
+                "geographies",
+                "avg_deal_size_usd",
+                "avg_sales_cycle_days",
+            },
+            "market",
+        )
+        er = data["icp_employee_range"]
+        if not (
+            isinstance(er, (list, tuple))
+            and len(er) == 2
+            and all(isinstance(v, int) and not isinstance(v, bool) for v in er)
+        ):
+            raise InvalidRecipeError(
+                f"market.icp_employee_range must be a [min, max] int pair, got {er!r}"
+            )
+        return cls(
+            icp_employee_range=(int(er[0]), int(er[1])),
+            icp_industries=tuple(str(i) for i in data["icp_industries"]),
+            geographies=tuple(str(g) for g in data["geographies"]),
+            avg_deal_size_usd=_pos_int(data["avg_deal_size_usd"], "market.avg_deal_size_usd"),
+            avg_sales_cycle_days=_pos_int(
+                data["avg_sales_cycle_days"], "market.avg_sales_cycle_days"
+            ),
+        )
+
+
+@dataclass(frozen=True)
+class GtmMotionSpec:
+    """Go-to-market channels and approximate share mix."""
+
+    channels: tuple[str, ...]
+    inbound_share: float
+    outbound_share: float
+    partner_share: float
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> GtmMotionSpec:
+        _require_keys(
+            data,
+            {"channels", "inbound_share", "outbound_share", "partner_share"},
+            "gtm_motion",
+        )
+        return cls(
+            channels=tuple(str(c) for c in data["channels"]),
+            inbound_share=float(data["inbound_share"]),
+            outbound_share=float(data["outbound_share"]),
+            partner_share=float(data["partner_share"]),
+        )
+
+
+@dataclass(frozen=True)
+class PersonaSpec:
+    """A buyer persona present in the simulated market."""
+
+    role: str
+    title_variants: tuple[str, ...]
+    decision_authority: str
+    typical_involvement: str
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> PersonaSpec:
+        _require_keys(
+            data,
+            {"role", "title_variants", "decision_authority", "typical_involvement"},
+            "personas[]",
+        )
+        return cls(
+            role=str(data["role"]),
+            title_variants=tuple(str(t) for t in data["title_variants"]),
+            decision_authority=str(data["decision_authority"]),
+            typical_involvement=str(data["typical_involvement"]),
+        )
+
+
+@dataclass(frozen=True)
+class FunnelStageSpec:
+    """A single named stage in the sales funnel."""
+
+    name: str
+    label: str
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> FunnelStageSpec:
+        _require_keys(data, {"name", "label"}, "funnel_stages[]")
+        return cls(name=str(data["name"]), label=str(data["label"]))
+
+
+# ---------------------------------------------------------------------------
+# Root spec
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class NarrativeSpec:
+    """Complete parsed narrative for one generation run."""
+
+    company: CompanySpec
+    product: ProductSpec
+    market: MarketSpec
+    gtm_motion: GtmMotionSpec
+    personas: tuple[PersonaSpec, ...]
+    funnel_stages: tuple[FunnelStageSpec, ...]
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> NarrativeSpec:
+        """Parse and validate a raw narrative YAML payload.
+
+        Raises:
+            InvalidRecipeError: on missing keys, wrong types, or invalid values.
+        """
+        _require_keys(
+            data,
+            {"company", "product", "market", "gtm_motion", "personas", "funnel_stages"},
+            "narrative",
+        )
+        personas_raw = data["personas"]
+        if not isinstance(personas_raw, list):
+            raise InvalidRecipeError(
+                f"narrative.personas must be a list, got {type(personas_raw).__name__!r}"
+            )
+        funnel_raw = data["funnel_stages"]
+        if not isinstance(funnel_raw, list):
+            raise InvalidRecipeError(
+                f"narrative.funnel_stages must be a list, got {type(funnel_raw).__name__!r}"
+            )
+
+        return cls(
+            company=CompanySpec.from_dict(data["company"]),
+            product=ProductSpec.from_dict(data["product"]),
+            market=MarketSpec.from_dict(data["market"]),
+            gtm_motion=GtmMotionSpec.from_dict(data["gtm_motion"]),
+            personas=tuple(PersonaSpec.from_dict(p) for p in personas_raw),
+            funnel_stages=tuple(FunnelStageSpec.from_dict(s) for s in funnel_raw),
+        )
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+
+def _require_keys(data: dict[str, Any], required: set[str], context: str) -> None:
+    missing = required - data.keys()
+    if missing:
+        raise InvalidRecipeError(
+            f"Narrative section '{context}' is missing required keys: {sorted(missing)}"
+        )
+
+
+def _pos_int(value: Any, name: str) -> int:
+    if isinstance(value, bool) or not isinstance(value, int):
+        raise InvalidRecipeError(f"'{name}' must be a positive int, got {type(value).__name__!r}")
+    if value <= 0:
+        raise InvalidRecipeError(f"'{name}' must be positive, got {value}")
+    return int(value)
diff --git a/tests/narrative/__init__.py b/tests/narrative/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/narrative/test_dataset_card.py b/tests/narrative/test_dataset_card.py
new file mode 100644
index 0000000..fc1995f
--- /dev/null
+++ b/tests/narrative/test_dataset_card.py
@@ -0,0 +1,112 @@
+"""Tests for leadforge.narrative.dataset_card."""
+
+from leadforge.api.generator import Generator
+from leadforge.core.models import GenerationConfig, WorldSpec
+from leadforge.narrative.dataset_card import render_dataset_card
+
+
+def _make_world_spec(**kwargs: object) -> WorldSpec:
+    return WorldSpec(config=GenerationConfig(**kwargs))  # type: ignore[arg-type]
+
+
+# ---------------------------------------------------------------------------
+# Rendering without narrative (stub mode)
+# ---------------------------------------------------------------------------
+
+
+def test_card_returns_string() -> None:
+    spec = _make_world_spec()
+    card = render_dataset_card(spec)
+    assert isinstance(card, str)
+    assert len(card) > 0
+
+
+def test_card_contains_recipe_id() -> None:
+    spec = _make_world_spec(recipe_id="b2b_saas_procurement_v1")
+    assert "b2b_saas_procurement_v1" in render_dataset_card(spec)
+
+
+def test_card_contains_seed() -> None:
+    spec = _make_world_spec(seed=99)
+    assert "99" in render_dataset_card(spec)
+
+
+def test_card_contains_exposure_mode() -> None:
+    spec = _make_world_spec()
+    assert "student_public" in render_dataset_card(spec)
+
+
+def test_card_contains_primary_task() -> None:
+    assert "converted_within_90_days" in render_dataset_card(_make_world_spec())
+
+
+def test_card_contains_label_definition() -> None:
+    card = render_dataset_card(_make_world_spec())
+    assert "closed_won" in card
+    assert "90 days" in card
+
+
+def test_card_contains_use_cases() -> None:
+    card = render_dataset_card(_make_world_spec())
+    assert "use cases" in card.lower()
+
+
+def test_card_contains_caveats() -> None:
+    card = render_dataset_card(_make_world_spec())
+    assert "synthetic" in card.lower()
+
+
+def test_card_no_narrative_shows_stub() -> None:
+    spec = WorldSpec(config=GenerationConfig(), narrative=None)
+    assert "not available" in render_dataset_card(spec).lower()
+
+
+# ---------------------------------------------------------------------------
+# Rendering with narrative (full mode)
+# ---------------------------------------------------------------------------
+
+
+def test_card_with_narrative_contains_company_name() -> None:
+    gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=42)
+    card = render_dataset_card(gen.world_spec)
+    assert "Veridian Technologies" in card
+
+
+def test_card_with_narrative_contains_product_name() -> None:
+    gen = Generator.from_recipe("b2b_saas_procurement_v1")
+    card = render_dataset_card(gen.world_spec)
+    assert "Veridian Procure" in card
+
+
+def test_card_with_narrative_contains_geographies() -> None:
+    gen = Generator.from_recipe("b2b_saas_procurement_v1")
+    card = render_dataset_card(gen.world_spec)
+    assert "US" in card
+
+
+def test_card_with_narrative_contains_personas() -> None:
+    gen = Generator.from_recipe("b2b_saas_procurement_v1")
+    card = render_dataset_card(gen.world_spec)
+    assert "vp_finance" in card
+
+
+# ---------------------------------------------------------------------------
+# Generator integration
+# ---------------------------------------------------------------------------
+
+
+def test_generator_world_spec_has_narrative() -> None:
+    gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=42)
+    assert gen.world_spec.narrative is not None
+
+
+def test_generator_world_spec_config_matches() -> None:
+    gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=7)
+    assert gen.world_spec.config is gen.config
+
+
+def test_generator_world_spec_is_world_spec() -> None:
+    from leadforge.core.models import WorldSpec
+
+    gen = Generator.from_recipe("b2b_saas_procurement_v1")
+    assert isinstance(gen.world_spec, WorldSpec)
diff --git a/tests/narrative/test_spec.py b/tests/narrative/test_spec.py
new file mode 100644
index 0000000..adf16a6
--- /dev/null
+++ b/tests/narrative/test_spec.py
@@ -0,0 +1,196 @@
+"""Tests for leadforge.narrative.spec — NarrativeSpec and sub-models."""
+
+import dataclasses
+
+import pytest
+
+from leadforge.core.exceptions import InvalidRecipeError
+from leadforge.narrative.spec import (
+    CompanySpec,
+    MarketSpec,
+    NarrativeSpec,
+    ProductSpec,
+)
+
+# ---------------------------------------------------------------------------
+# Minimal valid payloads
+# ---------------------------------------------------------------------------
+
+COMPANY = {
+    "name": "Acme Corp",
+    "founded_year": 2015,
+    "hq_city": "Boston",
+    "hq_country": "US",
+    "stage": "Series A",
+    "employee_range": [50, 120],
+}
+
+PRODUCT = {
+    "name": "Acme Product",
+    "category": "AP Automation",
+    "deployment": "cloud_saas",
+    "pricing_model": "per_seat_annual",
+    "acv_range_usd": [10000, 80000],
+    "contract_terms_months": [12, 24],
+    "free_trial_available": True,
+    "demo_available": True,
+}
+
+MARKET = {
+    "icp_employee_range": [100, 1000],
+    "icp_industries": ["manufacturing", "logistics"],
+    "geographies": ["US"],
+    "avg_deal_size_usd": 30000,
+    "avg_sales_cycle_days": 40,
+}
+
+GTM = {
+    "channels": ["inbound_marketing", "sdr_outbound"],
+    "inbound_share": 0.6,
+    "outbound_share": 0.3,
+    "partner_share": 0.1,
+}
+
+PERSONA = {
+    "role": "vp_finance",
+    "title_variants": ["VP Finance", "CFO"],
+    "decision_authority": "economic_buyer",
+    "typical_involvement": "late_stage",
+}
+
+FUNNEL_STAGE = {"name": "mql", "label": "Marketing Qualified Lead"}
+
+VALID_NARRATIVE = {
+    "company": COMPANY,
+    "product": PRODUCT,
+    "market": MARKET,
+    "gtm_motion": GTM,
+    "personas": [PERSONA],
+    "funnel_stages": [FUNNEL_STAGE],
+}
+
+
+# ---------------------------------------------------------------------------
+# NarrativeSpec.from_dict — happy path
+# ---------------------------------------------------------------------------
+
+
+def test_narrative_spec_roundtrip() -> None:
+    spec = NarrativeSpec.from_dict(VALID_NARRATIVE)
+    assert spec.company.name == "Acme Corp"
+    assert spec.product.name == "Acme Product"
+    assert spec.market.avg_deal_size_usd == 30000
+    assert spec.gtm_motion.inbound_share == pytest.approx(0.6)
+    assert len(spec.personas) == 1
+    assert spec.personas[0].role == "vp_finance"
+    assert len(spec.funnel_stages) == 1
+    assert spec.funnel_stages[0].name == "mql"
+
+
+def test_narrative_spec_frozen() -> None:
+    spec = NarrativeSpec.from_dict(VALID_NARRATIVE)
+    with pytest.raises(dataclasses.FrozenInstanceError):
+        spec.company = None  # type: ignore[misc]
+
+
+# ---------------------------------------------------------------------------
+# NarrativeSpec.from_dict — validation errors
+# ---------------------------------------------------------------------------
+
+
+def test_narrative_missing_key_raises() -> None:
+    bad = {k: v for k, v in VALID_NARRATIVE.items() if k != "company"}
+    with pytest.raises(InvalidRecipeError, match="missing required keys"):
+        NarrativeSpec.from_dict(bad)
+
+
+def test_narrative_personas_not_list_raises() -> None:
+    bad = {**VALID_NARRATIVE, "personas": "not_a_list"}
+    with pytest.raises(InvalidRecipeError, match="personas"):
+        NarrativeSpec.from_dict(bad)
+
+
+def test_narrative_funnel_not_list_raises() -> None:
+    bad = {**VALID_NARRATIVE, "funnel_stages": {"name": "mql"}}
+    with pytest.raises(InvalidRecipeError, match="funnel_stages"):
+        NarrativeSpec.from_dict(bad)
+
+
+# ---------------------------------------------------------------------------
+# CompanySpec
+# ---------------------------------------------------------------------------
+
+
+def test_company_bool_founded_year_raises() -> None:
+    bad = {**COMPANY, "founded_year": True}
+    with pytest.raises(InvalidRecipeError, match="founded_year"):
+        CompanySpec.from_dict(bad)
+
+
+def test_company_bad_employee_range_raises() -> None:
+    bad = {**COMPANY, "employee_range": [50]}  # wrong length
+    with pytest.raises(InvalidRecipeError, match="employee_range"):
+        CompanySpec.from_dict(bad)
+
+
+# ---------------------------------------------------------------------------
+# ProductSpec
+# ---------------------------------------------------------------------------
+
+
+def test_product_bad_acv_range_raises() -> None:
+    bad = {**PRODUCT, "acv_range_usd": "10000-80000"}
+    with pytest.raises(InvalidRecipeError, match="acv_range_usd"):
+        ProductSpec.from_dict(bad)
+
+
+def test_product_bad_contract_terms_raises() -> None:
+    bad = {**PRODUCT, "contract_terms_months": [12, "twenty-four"]}
+    with pytest.raises(InvalidRecipeError, match="contract_terms_months"):
+        ProductSpec.from_dict(bad)
+
+
+# ---------------------------------------------------------------------------
+# MarketSpec
+# ---------------------------------------------------------------------------
+
+
+def test_market_bool_avg_deal_size_raises() -> None:
+    bad = {**MARKET, "avg_deal_size_usd": True}
+    with pytest.raises(InvalidRecipeError, match="avg_deal_size_usd"):
+        MarketSpec.from_dict(bad)
+
+
+def test_market_zero_sales_cycle_raises() -> None:
+    bad = {**MARKET, "avg_sales_cycle_days": 0}
+    with pytest.raises(InvalidRecipeError, match="avg_sales_cycle_days"):
+        MarketSpec.from_dict(bad)
+
+
+# ---------------------------------------------------------------------------
+# Real recipe round-trip
+# ---------------------------------------------------------------------------
+
+
+def test_real_narrative_yaml_parses() -> None:
+    """The shipped narrative.yaml must parse without errors."""
+    from leadforge.api.recipes import Recipe
+    from leadforge.recipes.registry import load_recipe
+
+    recipe = Recipe.from_dict(load_recipe("b2b_saas_procurement_v1"))
+    data = recipe.load_narrative()
+    spec = NarrativeSpec.from_dict(data)
+    assert spec.company.name == "Veridian Technologies"
+    assert len(spec.personas) >= 1
+    assert len(spec.funnel_stages) >= 1
+
+
+def test_real_narrative_tuples_are_immutable() -> None:
+    from leadforge.api.recipes import Recipe
+    from leadforge.recipes.registry import load_recipe
+
+    recipe = Recipe.from_dict(load_recipe("b2b_saas_procurement_v1"))
+    spec = NarrativeSpec.from_dict(recipe.load_narrative())
+    assert isinstance(spec.personas, tuple)
+    assert isinstance(spec.funnel_stages, tuple)
+    assert isinstance(spec.market.icp_industries, tuple)

From 8c933bd13c1849adaac375625a605016fd2b0c9a Mon Sep 17 00:00:00 2001
From: Shay Palachy <shaypal5@users.noreply.github.com>
Date: Tue, 21 Apr 2026 09:40:43 +0300
Subject: [PATCH 2/2] fix: address Copilot review comments on Milestone 2 PR

- spec.py: _require_keys now guards against non-dict input (COPILOT-3)
- spec.py: NarrativeSpec.from_dict validates each personas/funnel_stages
  element is a dict before passing to sub-from_dict (COPILOT-3)
- spec.py: GtmMotionSpec.from_dict validates channels is a list of
  strings, rejects bools for share floats, and enforces [0, 1] range
  (COPILOT-1)
- spec.py: PersonaSpec.from_dict validates title_variants is a list of
  strings instead of silently splitting a bare string (COPILOT-2)
- spec.py: ProductSpec.from_dict requires free_trial_available /
  demo_available to be actual bools; rejects int/str coercion (COPILOT-6)
- spec.py: MarketSpec.from_dict validates icp_industries and geographies
  are lists of strings (COPILOT-7)
- generator.py: Generator.__init__ takes only world_spec; config
  property derives from world_spec.config (single source of truth)
  (COPILOT-4)
- dataset_card.py: stub text changed to "Narrative unavailable for this
  dataset." (COPILOT-5); test updated to match

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 leadforge/api/generator.py           |  9 ++--
 leadforge/narrative/dataset_card.py  |  2 +-
 leadforge/narrative/spec.py          | 72 +++++++++++++++++++++++++---
 tests/narrative/test_dataset_card.py |  2 +-
 4 files changed, 71 insertions(+), 14 deletions(-)

diff --git a/leadforge/api/generator.py b/leadforge/api/generator.py
index 76acd26..0b1cfc8 100644
--- a/leadforge/api/generator.py
+++ b/leadforge/api/generator.py
@@ -27,14 +27,13 @@ class Generator:
     (``generate``) is implemented across Milestones 3–9.
     """
 
-    def __init__(self, config: GenerationConfig, world_spec: WorldSpec) -> None:
-        self._config = config
+    def __init__(self, world_spec: WorldSpec) -> None:
         self._world_spec = world_spec
-        self._rng = RNGRoot(config.seed)
+        self._rng = RNGRoot(world_spec.config.seed)
 
     @property
     def config(self) -> GenerationConfig:
-        return self._config
+        return self._world_spec.config
 
     @property
     def world_spec(self) -> WorldSpec:
@@ -105,7 +104,7 @@ def from_recipe(
         narrative = NarrativeSpec.from_dict(narrative_data) if narrative_data else None
         world_spec = WorldSpec(config=config, narrative=narrative)
 
-        return cls(config, world_spec)
+        return cls(world_spec)
 
     def generate(
         self,
diff --git a/leadforge/narrative/dataset_card.py b/leadforge/narrative/dataset_card.py
index 8d82b1c..aa61ffe 100644
--- a/leadforge/narrative/dataset_card.py
+++ b/leadforge/narrative/dataset_card.py
@@ -89,7 +89,7 @@ def render_dataset_card(world_spec: WorldSpec) -> str:
             )
         lines.append("")
     else:
-        lines += ["*Narrative not available for this exposure mode.*", ""]
+        lines += ["*Narrative unavailable for this dataset.*", ""]
 
     # ------------------------------------------------------------------
     # Primary task
diff --git a/leadforge/narrative/spec.py b/leadforge/narrative/spec.py
index 71815c2..d873b03 100644
--- a/leadforge/narrative/spec.py
+++ b/leadforge/narrative/spec.py
@@ -99,6 +99,11 @@ def from_dict(cls, data: dict[str, Any]) -> ProductSpec:
             raise InvalidRecipeError(
                 f"product.contract_terms_months must be a list of ints, got {terms!r}"
             )
+        for field_name in ("free_trial_available", "demo_available"):
+            if not isinstance(data[field_name], bool):
+                raise InvalidRecipeError(
+                    f"product.{field_name} must be a bool, got {type(data[field_name]).__name__!r}"
+                )
         return cls(
             name=str(data["name"]),
             category=str(data["category"]),
@@ -106,8 +111,8 @@ def from_dict(cls, data: dict[str, Any]) -> ProductSpec:
             pricing_model=str(data["pricing_model"]),
             acv_range_usd=(int(acv[0]), int(acv[1])),
             contract_terms_months=tuple(int(t) for t in terms),
-            free_trial_available=bool(data["free_trial_available"]),
-            demo_available=bool(data["demo_available"]),
+            free_trial_available=data["free_trial_available"],
+            demo_available=data["demo_available"],
         )
 
 
@@ -143,10 +148,28 @@ def from_dict(cls, data: dict[str, Any]) -> MarketSpec:
             raise InvalidRecipeError(
                 f"market.icp_employee_range must be a [min, max] int pair, got {er!r}"
             )
+        industries = data["icp_industries"]
+        if not isinstance(industries, (list, tuple)):
+            raise InvalidRecipeError(
+                f"market.icp_industries must be a list of strings, got {industries!r}"
+            )
+        if not all(isinstance(i, str) for i in industries):
+            raise InvalidRecipeError(
+                f"market.icp_industries must contain only strings, got {industries!r}"
+            )
+        geographies = data["geographies"]
+        if not isinstance(geographies, (list, tuple)):
+            raise InvalidRecipeError(
+                f"market.geographies must be a list of strings, got {geographies!r}"
+            )
+        if not all(isinstance(g, str) for g in geographies):
+            raise InvalidRecipeError(
+                f"market.geographies must contain only strings, got {geographies!r}"
+            )
         return cls(
             icp_employee_range=(int(er[0]), int(er[1])),
-            icp_industries=tuple(str(i) for i in data["icp_industries"]),
-            geographies=tuple(str(g) for g in data["geographies"]),
+            icp_industries=tuple(industries),
+            geographies=tuple(geographies),
             avg_deal_size_usd=_pos_int(data["avg_deal_size_usd"], "market.avg_deal_size_usd"),
             avg_sales_cycle_days=_pos_int(
                 data["avg_sales_cycle_days"], "market.avg_sales_cycle_days"
@@ -170,8 +193,21 @@ def from_dict(cls, data: dict[str, Any]) -> GtmMotionSpec:
             {"channels", "inbound_share", "outbound_share", "partner_share"},
             "gtm_motion",
         )
+        channels = data["channels"]
+        if not isinstance(channels, (list, tuple)) or not all(isinstance(c, str) for c in channels):
+            raise InvalidRecipeError(
+                f"gtm_motion.channels must be a list of strings, got {channels!r}"
+            )
+        for share_name in ("inbound_share", "outbound_share", "partner_share"):
+            v = data[share_name]
+            if isinstance(v, bool) or not isinstance(v, (int, float)):
+                raise InvalidRecipeError(
+                    f"gtm_motion.{share_name} must be a float in [0, 1], got {type(v).__name__!r}"
+                )
+            if not (0.0 <= float(v) <= 1.0):
+                raise InvalidRecipeError(f"gtm_motion.{share_name} must be in [0, 1], got {v!r}")
         return cls(
-            channels=tuple(str(c) for c in data["channels"]),
+            channels=tuple(channels),
             inbound_share=float(data["inbound_share"]),
             outbound_share=float(data["outbound_share"]),
             partner_share=float(data["partner_share"]),
@@ -194,9 +230,17 @@ def from_dict(cls, data: dict[str, Any]) -> PersonaSpec:
             {"role", "title_variants", "decision_authority", "typical_involvement"},
             "personas[]",
         )
+        title_variants = data["title_variants"]
+        if not (
+            isinstance(title_variants, (list, tuple))
+            and all(isinstance(t, str) for t in title_variants)
+        ):
+            raise InvalidRecipeError(
+                f"personas[].title_variants must be a list of strings, got {title_variants!r}"
+            )
         return cls(
             role=str(data["role"]),
-            title_variants=tuple(str(t) for t in data["title_variants"]),
+            title_variants=tuple(title_variants),
             decision_authority=str(data["decision_authority"]),
             typical_involvement=str(data["typical_involvement"]),
         )
@@ -248,11 +292,21 @@ def from_dict(cls, data: dict[str, Any]) -> NarrativeSpec:
             raise InvalidRecipeError(
                 f"narrative.personas must be a list, got {type(personas_raw).__name__!r}"
             )
+        for i, item in enumerate(personas_raw):
+            if not isinstance(item, dict):
+                raise InvalidRecipeError(
+                    f"narrative.personas[{i}] must be a mapping, got {type(item).__name__!r}"
+                )
         funnel_raw = data["funnel_stages"]
         if not isinstance(funnel_raw, list):
             raise InvalidRecipeError(
                 f"narrative.funnel_stages must be a list, got {type(funnel_raw).__name__!r}"
             )
+        for i, item in enumerate(funnel_raw):
+            if not isinstance(item, dict):
+                raise InvalidRecipeError(
+                    f"narrative.funnel_stages[{i}] must be a mapping, got {type(item).__name__!r}"
+                )
 
         return cls(
             company=CompanySpec.from_dict(data["company"]),
@@ -269,7 +323,11 @@ def from_dict(cls, data: dict[str, Any]) -> NarrativeSpec:
 # ---------------------------------------------------------------------------
 
 
-def _require_keys(data: dict[str, Any], required: set[str], context: str) -> None:
+def _require_keys(data: Any, required: set[str], context: str) -> None:
+    if not isinstance(data, dict):
+        raise InvalidRecipeError(
+            f"Narrative section '{context}' must be a mapping, got {type(data).__name__!r}"
+        )
     missing = required - data.keys()
     if missing:
         raise InvalidRecipeError(
diff --git a/tests/narrative/test_dataset_card.py b/tests/narrative/test_dataset_card.py
index fc1995f..104e46e 100644
--- a/tests/narrative/test_dataset_card.py
+++ b/tests/narrative/test_dataset_card.py
@@ -58,7 +58,7 @@ def test_card_contains_caveats() -> None:
 
 def test_card_no_narrative_shows_stub() -> None:
     spec = WorldSpec(config=GenerationConfig(), narrative=None)
-    assert "not available" in render_dataset_card(spec).lower()
+    assert "unavailable" in render_dataset_card(spec).lower()
 
 
 # ---------------------------------------------------------------------------