From b3bd88a466c1e9e58b3f1738c2d13dfd9c13788a Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Mon, 20 Apr 2026 11:13:05 +0300 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20Milestone=202=20=E2=80=94=20narrati?= =?UTF-8?q?ve=20layer=20(NarrativeSpec,=20WorldSpec,=20dataset=20card)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - narrative/spec.py: frozen dataclasses for NarrativeSpec hierarchy (CompanySpec, ProductSpec, MarketSpec, GtmMotionSpec, PersonaSpec, FunnelStageSpec) with validated from_dict() classmethods - narrative/dataset_card.py: render_dataset_card() produces Markdown dataset card from WorldSpec (header, narrative summary, task, stubs for table inventory and feature categories, use cases, caveats) - core/models.py: WorldSpec.narrative field (NarrativeSpec | None) - api/generator.py: world_spec property; from_recipe() resolves the recipe's narrative.yaml into a NarrativeSpec and populates WorldSpec - 51 new tests covering spec validation, card rendering, and Generator integration (110 total); ruff + mypy clean Co-Authored-By: Claude Sonnet 4.6 --- .agent-plan.md | 46 +++-- leadforge/api/generator.py | 26 ++- leadforge/core/models.py | 10 +- leadforge/narrative/dataset_card.py | 157 +++++++++++++++ leadforge/narrative/spec.py | 285 +++++++++++++++++++++++++++ tests/narrative/__init__.py | 0 tests/narrative/test_dataset_card.py | 112 +++++++++++ tests/narrative/test_spec.py | 196 ++++++++++++++++++ 8 files changed, 803 insertions(+), 29 deletions(-) create mode 100644 leadforge/narrative/dataset_card.py create mode 100644 leadforge/narrative/spec.py create mode 100644 tests/narrative/__init__.py create mode 100644 tests/narrative/test_dataset_card.py create mode 100644 tests/narrative/test_spec.py diff --git a/.agent-plan.md b/.agent-plan.md index a049bd0..ae236dc 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -6,44 +6,50 @@ ## Current System State -**v0.2.0 in progress.** Typed `Recipe` model, `GenerationConfig` with full validation, config -precedence system, `RNGRoot` with deterministic substreams, `Generator.from_recipe()` fully -implemented, `core/hashing.py`, `core/serialization.py`, and recipe narrative/difficulty-profile -assets for `b2b_saas_procurement_v1`. 59 tests passing. +**v0.2.0 in progress — Milestone 2 complete (PR open).** Typed `NarrativeSpec` hierarchy, `WorldSpec` +with narrative field, `Generator.from_recipe()` populates `world_spec`, dataset card renderer, and +full test coverage. 110 tests passing. --- -## Active Task Breakdown — Milestone 2: Narrative Layer (v0.2.0 cont.) +## Active Task Breakdown — Milestone 3: Schema Layer (v0.2.0 cont.) -Goal: Build the concrete company/product/market story objects that anchor all later simulation. +Goal: Define the relational entity schema (accounts, contacts, leads, etc.) and feature dictionary. -- [ ] **1. Narrative models** - - Implement typed dataclasses in `narrative/`: `CompanySpec`, `ProductSpec`, `MarketSpec`, - `PersonaSpec`, `FunnelSpec` - - Loader: parse `narrative.yaml` into these models with validation +- [ ] **1. Entity schema** + - Implement `schema/entities.py`: typed dataclasses for `Account`, `Contact`, `Lead` + - Implement `schema/events.py`: `Touch`, `SalesActivity`, `Opportunity` etc. -- [ ] **2. WorldSpec population** - - Flesh out `WorldSpec` to hold a resolved `NarrativeSpec` - - Wire into `Generator.from_recipe()` so `gen.world_spec` is populated after construction +- [ ] **2. Feature dictionary** + - Implement `schema/features.py` + `schema/dictionaries.py` + - Generate `feature_dictionary.csv` stub -- [ ] **3. Dataset card generation** - - Implement `narrative/dataset_card.py`: render a Markdown dataset card from `WorldSpec` - - Tests: round-trip model → YAML → model, dataset-card text contains expected fields +- [ ] **3. Task schema** + - Implement `schema/tasks.py`: `converted_within_90_days` task manifest structure --- ## Context Pointers -- Milestone 2 scope: `docs/leadforge_implementation_plan.md` §5 "Milestone 2" +- Milestone 3 scope: `docs/leadforge_implementation_plan.md` §6 "Milestone 3" - Full milestone dependency graph: `docs/leadforge_implementation_plan.md` §6 -- Narrative spec: `docs/leadforge_architecture_spec.md` §7 -- Recipe assets: `leadforge/recipes/b2b_saas_procurement_v1/narrative.yaml` +- Schema spec: `docs/leadforge_architecture_spec.md` §8 +- Recipe assets: `leadforge/recipes/b2b_saas_procurement_v1/` --- ## Completed Phases -### Milestone 1 — Canonical Config, Recipe & Model Objects ✓ (v0.2.0 in PR) +### Milestone 2 — Narrative Layer ✓ (v0.2.0 in PR) +- `leadforge/narrative/spec.py`: frozen dataclasses `NarrativeSpec`, `CompanySpec`, `ProductSpec`, + `MarketSpec`, `GtmMotionSpec`, `PersonaSpec`, `FunnelStageSpec` — all with validated `from_dict()` +- `leadforge/narrative/dataset_card.py`: `render_dataset_card(world_spec)` — Markdown card +- `leadforge/core/models.py`: `WorldSpec` gets `narrative: NarrativeSpec | None` field +- `leadforge/api/generator.py`: `world_spec` property; `from_recipe()` resolves narrative into + `WorldSpec` +- 51 new tests (spec validation, dataset card, Generator integration); total 110 passing + +### Milestone 1 — Canonical Config, Recipe & Model Objects ✓ (v0.2.0 merged) - `leadforge/core/rng.py`: `RNGRoot` with SHA-256-derived named substreams - `leadforge/core/hashing.py`: `hash_config()` — stable SHA-256 digest of `GenerationConfig` - `leadforge/core/serialization.py`: `load_yaml`, `load_json`, `dump_json` diff --git a/leadforge/api/generator.py b/leadforge/api/generator.py index 6f76f36..76acd26 100644 --- a/leadforge/api/generator.py +++ b/leadforge/api/generator.py @@ -5,7 +5,7 @@ from typing import Any from leadforge.core.enums import DifficultyProfile, ExposureMode -from leadforge.core.models import GenerationConfig, WorldBundle +from leadforge.core.models import GenerationConfig, WorldBundle, WorldSpec from leadforge.core.rng import RNGRoot from leadforge.core.sentinels import _MISSING @@ -23,18 +23,24 @@ class Generator: bundle = gen.generate(n_leads=5000, difficulty="intermediate") bundle.save("./out/demo_bundle") - ``from_recipe`` is implemented in Milestone 1. Full generation - (``generate``) is implemented across Milestones 2–9. + ``from_recipe`` is implemented in Milestone 1–2. Full generation + (``generate``) is implemented across Milestones 3–9. """ - def __init__(self, config: GenerationConfig) -> None: + def __init__(self, config: GenerationConfig, world_spec: WorldSpec) -> None: self._config = config + self._world_spec = world_spec self._rng = RNGRoot(config.seed) @property def config(self) -> GenerationConfig: return self._config + @property + def world_spec(self) -> WorldSpec: + """The resolved world specification, including narrative.""" + return self._world_spec + @classmethod def from_recipe( cls, @@ -69,8 +75,8 @@ def from_recipe( Applied after recipe defaults but before explicit kwargs. Returns: - A configured :class:`Generator` instance ready to call - :meth:`generate` on. + A configured :class:`Generator` with a populated + :attr:`world_spec` (narrative resolved from the recipe). Raises: :class:`~leadforge.core.exceptions.InvalidRecipeError`: if the @@ -78,6 +84,7 @@ def from_recipe( exposure mode / difficulty is not supported. """ from leadforge.api.recipes import Recipe + from leadforge.narrative.spec import NarrativeSpec from leadforge.recipes.registry import load_recipe raw = load_recipe(recipe_id) @@ -93,7 +100,12 @@ def from_recipe( output_path=output_path, override=override, ) - return cls(config) + + narrative_data = recipe.load_narrative() + narrative = NarrativeSpec.from_dict(narrative_data) if narrative_data else None + world_spec = WorldSpec(config=config, narrative=narrative) + + return cls(config, world_spec) def generate( self, diff --git a/leadforge/core/models.py b/leadforge/core/models.py index a7c0c73..1aec86f 100644 --- a/leadforge/core/models.py +++ b/leadforge/core/models.py @@ -3,12 +3,15 @@ from __future__ import annotations from dataclasses import dataclass, field -from typing import Any +from typing import TYPE_CHECKING, Any from leadforge.core.enums import DifficultyProfile, ExposureMode from leadforge.core.exceptions import InvalidConfigError from leadforge.version import __version__ +if TYPE_CHECKING: + from leadforge.narrative.spec import NarrativeSpec + def _require_positive_int(value: Any, name: str) -> None: """Raise ``InvalidConfigError`` unless *value* is a positive plain ``int``. @@ -74,10 +77,13 @@ def __post_init__(self) -> None: class WorldSpec: """Fully instantiated hidden world specification (post-sampling, pre-simulation). - Populated in Milestone 2 (narrative/schema) through Milestone 6 (mechanisms). + Populated incrementally across milestones: + - M2: config + narrative + - M3–M6: schema, structure, mechanisms """ config: GenerationConfig = field(default_factory=GenerationConfig) + narrative: NarrativeSpec | None = None @dataclass diff --git a/leadforge/narrative/dataset_card.py b/leadforge/narrative/dataset_card.py new file mode 100644 index 0000000..8d82b1c --- /dev/null +++ b/leadforge/narrative/dataset_card.py @@ -0,0 +1,157 @@ +"""Dataset card renderer. + +Produces the ``dataset_card.md`` artifact from a :class:`WorldSpec`. +The card follows the structure required by the architecture spec (§14.3). +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from leadforge.core.models import WorldSpec + + +def render_dataset_card(world_spec: WorldSpec) -> str: + """Return a Markdown dataset card string for *world_spec*. + + Sections present at all milestones: + - Header (recipe id, version, seed, exposure mode) + - Narrative summary (company, product, market, GTM) + - Primary task and label definition + - Suggested use cases + - Caveats + + Sections populated in later milestones (rendered as stubs here): + - Table inventory + - Feature categories + """ + cfg = world_spec.config + narrative = world_spec.narrative + + lines: list[str] = [] + + # ------------------------------------------------------------------ + # Header + # ------------------------------------------------------------------ + lines += [ + "# leadforge dataset card", + "", + "| Field | Value |", + "|---|---|", + f"| Recipe | `{cfg.recipe_id}` |", + f"| Package version | `{cfg.package_version}` |", + f"| Seed | `{cfg.seed}` |", + f"| Exposure mode | `{cfg.exposure_mode}` |", + f"| Difficulty | `{cfg.difficulty}` |", + f"| Horizon | {cfg.horizon_days} days |", + "", + ] + + # ------------------------------------------------------------------ + # Narrative summary + # ------------------------------------------------------------------ + lines.append("## Narrative summary") + lines.append("") + if narrative is not None: + c = narrative.company + p = narrative.product + m = narrative.market + gtm = narrative.gtm_motion + lines += [ + f"**Vendor:** {c.name} ({c.stage}, founded {c.founded_year}," + f" {c.hq_city}, {c.hq_country})", + "", + f"**Product:** {p.name} — {p.category}. " + f"Deployment: {p.deployment}. " + f"Pricing: {p.pricing_model}. " + f"ACV range: ${p.acv_range_usd[0]:,}–${p.acv_range_usd[1]:,}.", + "", + f"**Target market:** {m.icp_employee_range[0]}–{m.icp_employee_range[1]}-employee" + f" firms in {', '.join(m.geographies)}. " + f"Key industries: {', '.join(m.icp_industries)}. " + f"Average deal size: ${m.avg_deal_size_usd:,}. " + f"Average sales cycle: {m.avg_sales_cycle_days} days.", + "", + f"**GTM motion:** {', '.join(gtm.channels)} " + f"({gtm.inbound_share:.0%} inbound / " + f"{gtm.outbound_share:.0%} outbound / " + f"{gtm.partner_share:.0%} partner).", + "", + "**Buyer personas:**", + "", + ] + for persona in narrative.personas: + ellipsis = "…" if len(persona.title_variants) > 2 else "" + lines.append( + f"- **{persona.role}** ({persona.decision_authority}) — " + f"{', '.join(persona.title_variants[:2])}{ellipsis}" + ) + lines.append("") + else: + lines += ["*Narrative not available for this exposure mode.*", ""] + + # ------------------------------------------------------------------ + # Primary task + # ------------------------------------------------------------------ + lines += [ + "## Primary task", + "", + "**Task:** `converted_within_90_days`", + "", + "**Label definition:** A lead is considered converted if a `closed_won` event " + "is recorded within 90 days of the lead's snapshot anchor date. " + "The label is derived from simulated events — it is never sampled directly.", + "", + ] + + # ------------------------------------------------------------------ + # Table inventory (stub — populated in later milestones) + # ------------------------------------------------------------------ + lines += [ + "## Table inventory", + "", + "*Table counts will appear here once the simulation layer is implemented (v0.3.0+).*", + "", + ] + + # ------------------------------------------------------------------ + # Feature categories (stub) + # ------------------------------------------------------------------ + lines += [ + "## Feature categories", + "", + "*Feature dictionary will appear here once the schema layer is implemented (v0.3.0+).*", + "", + ] + + # ------------------------------------------------------------------ + # Suggested use cases + # ------------------------------------------------------------------ + lines += [ + "## Suggested use cases", + "", + "- Teaching binary classification on realistic CRM data", + "- Portfolio projects demonstrating end-to-end ML pipelines", + "- Benchmarking lead-scoring models under controlled signal/noise conditions", + "- Research on causal structure in funnel conversion data", + "", + ] + + # ------------------------------------------------------------------ + # Caveats + # ------------------------------------------------------------------ + lines += [ + "## Caveats", + "", + "- This is **synthetic** data. It does not represent any real company, product, or market.", + "- The hidden world structure varies by motif family and stochastic rewiring; " + "no two seeds produce the same DGP.", + "- Features are anchored at the snapshot date. No post-anchor data is " + "included (leakage-free by construction).", + "- In `student_public` mode, the latent world graph, mechanism summary, " + "and full world spec are withheld.", + "", + ] + + return "\n".join(lines) diff --git a/leadforge/narrative/spec.py b/leadforge/narrative/spec.py new file mode 100644 index 0000000..71815c2 --- /dev/null +++ b/leadforge/narrative/spec.py @@ -0,0 +1,285 @@ +"""Typed narrative specification models. + +A ``NarrativeSpec`` is the fully parsed, validated in-memory representation of a +recipe's ``narrative.yaml``. Every downstream layer (schema, simulation, +rendering) anchors to these objects rather than raw YAML dicts. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from leadforge.core.exceptions import InvalidRecipeError + +# --------------------------------------------------------------------------- +# Leaf dataclasses +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class CompanySpec: + """The simulated vendor company.""" + + name: str + founded_year: int + hq_city: str + hq_country: str + stage: str + employee_range: tuple[int, int] + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> CompanySpec: + _require_keys( + data, + {"name", "founded_year", "hq_city", "hq_country", "stage", "employee_range"}, + "company", + ) + er = data["employee_range"] + if not ( + isinstance(er, (list, tuple)) + and len(er) == 2 + and all(isinstance(v, int) and not isinstance(v, bool) for v in er) + ): + raise InvalidRecipeError( + f"company.employee_range must be a [min, max] int pair, got {er!r}" + ) + return cls( + name=str(data["name"]), + founded_year=_pos_int(data["founded_year"], "company.founded_year"), + hq_city=str(data["hq_city"]), + hq_country=str(data["hq_country"]), + stage=str(data["stage"]), + employee_range=(int(er[0]), int(er[1])), + ) + + +@dataclass(frozen=True) +class ProductSpec: + """The simulated product being sold.""" + + name: str + category: str + deployment: str + pricing_model: str + acv_range_usd: tuple[int, int] + contract_terms_months: tuple[int, ...] + free_trial_available: bool + demo_available: bool + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> ProductSpec: + _require_keys( + data, + { + "name", + "category", + "deployment", + "pricing_model", + "acv_range_usd", + "contract_terms_months", + "free_trial_available", + "demo_available", + }, + "product", + ) + acv = data["acv_range_usd"] + if not ( + isinstance(acv, (list, tuple)) + and len(acv) == 2 + and all(isinstance(v, int) and not isinstance(v, bool) for v in acv) + ): + raise InvalidRecipeError( + f"product.acv_range_usd must be a [min, max] int pair, got {acv!r}" + ) + terms = data["contract_terms_months"] + if not isinstance(terms, (list, tuple)) or not all( + isinstance(v, int) and not isinstance(v, bool) for v in terms + ): + raise InvalidRecipeError( + f"product.contract_terms_months must be a list of ints, got {terms!r}" + ) + return cls( + name=str(data["name"]), + category=str(data["category"]), + deployment=str(data["deployment"]), + pricing_model=str(data["pricing_model"]), + acv_range_usd=(int(acv[0]), int(acv[1])), + contract_terms_months=tuple(int(t) for t in terms), + free_trial_available=bool(data["free_trial_available"]), + demo_available=bool(data["demo_available"]), + ) + + +@dataclass(frozen=True) +class MarketSpec: + """The target market definition.""" + + icp_employee_range: tuple[int, int] + icp_industries: tuple[str, ...] + geographies: tuple[str, ...] + avg_deal_size_usd: int + avg_sales_cycle_days: int + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> MarketSpec: + _require_keys( + data, + { + "icp_employee_range", + "icp_industries", + "geographies", + "avg_deal_size_usd", + "avg_sales_cycle_days", + }, + "market", + ) + er = data["icp_employee_range"] + if not ( + isinstance(er, (list, tuple)) + and len(er) == 2 + and all(isinstance(v, int) and not isinstance(v, bool) for v in er) + ): + raise InvalidRecipeError( + f"market.icp_employee_range must be a [min, max] int pair, got {er!r}" + ) + return cls( + icp_employee_range=(int(er[0]), int(er[1])), + icp_industries=tuple(str(i) for i in data["icp_industries"]), + geographies=tuple(str(g) for g in data["geographies"]), + avg_deal_size_usd=_pos_int(data["avg_deal_size_usd"], "market.avg_deal_size_usd"), + avg_sales_cycle_days=_pos_int( + data["avg_sales_cycle_days"], "market.avg_sales_cycle_days" + ), + ) + + +@dataclass(frozen=True) +class GtmMotionSpec: + """Go-to-market channels and approximate share mix.""" + + channels: tuple[str, ...] + inbound_share: float + outbound_share: float + partner_share: float + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> GtmMotionSpec: + _require_keys( + data, + {"channels", "inbound_share", "outbound_share", "partner_share"}, + "gtm_motion", + ) + return cls( + channels=tuple(str(c) for c in data["channels"]), + inbound_share=float(data["inbound_share"]), + outbound_share=float(data["outbound_share"]), + partner_share=float(data["partner_share"]), + ) + + +@dataclass(frozen=True) +class PersonaSpec: + """A buyer persona present in the simulated market.""" + + role: str + title_variants: tuple[str, ...] + decision_authority: str + typical_involvement: str + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> PersonaSpec: + _require_keys( + data, + {"role", "title_variants", "decision_authority", "typical_involvement"}, + "personas[]", + ) + return cls( + role=str(data["role"]), + title_variants=tuple(str(t) for t in data["title_variants"]), + decision_authority=str(data["decision_authority"]), + typical_involvement=str(data["typical_involvement"]), + ) + + +@dataclass(frozen=True) +class FunnelStageSpec: + """A single named stage in the sales funnel.""" + + name: str + label: str + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> FunnelStageSpec: + _require_keys(data, {"name", "label"}, "funnel_stages[]") + return cls(name=str(data["name"]), label=str(data["label"])) + + +# --------------------------------------------------------------------------- +# Root spec +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class NarrativeSpec: + """Complete parsed narrative for one generation run.""" + + company: CompanySpec + product: ProductSpec + market: MarketSpec + gtm_motion: GtmMotionSpec + personas: tuple[PersonaSpec, ...] + funnel_stages: tuple[FunnelStageSpec, ...] + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> NarrativeSpec: + """Parse and validate a raw narrative YAML payload. + + Raises: + InvalidRecipeError: on missing keys, wrong types, or invalid values. + """ + _require_keys( + data, + {"company", "product", "market", "gtm_motion", "personas", "funnel_stages"}, + "narrative", + ) + personas_raw = data["personas"] + if not isinstance(personas_raw, list): + raise InvalidRecipeError( + f"narrative.personas must be a list, got {type(personas_raw).__name__!r}" + ) + funnel_raw = data["funnel_stages"] + if not isinstance(funnel_raw, list): + raise InvalidRecipeError( + f"narrative.funnel_stages must be a list, got {type(funnel_raw).__name__!r}" + ) + + return cls( + company=CompanySpec.from_dict(data["company"]), + product=ProductSpec.from_dict(data["product"]), + market=MarketSpec.from_dict(data["market"]), + gtm_motion=GtmMotionSpec.from_dict(data["gtm_motion"]), + personas=tuple(PersonaSpec.from_dict(p) for p in personas_raw), + funnel_stages=tuple(FunnelStageSpec.from_dict(s) for s in funnel_raw), + ) + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + + +def _require_keys(data: dict[str, Any], required: set[str], context: str) -> None: + missing = required - data.keys() + if missing: + raise InvalidRecipeError( + f"Narrative section '{context}' is missing required keys: {sorted(missing)}" + ) + + +def _pos_int(value: Any, name: str) -> int: + if isinstance(value, bool) or not isinstance(value, int): + raise InvalidRecipeError(f"'{name}' must be a positive int, got {type(value).__name__!r}") + if value <= 0: + raise InvalidRecipeError(f"'{name}' must be positive, got {value}") + return int(value) diff --git a/tests/narrative/__init__.py b/tests/narrative/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/narrative/test_dataset_card.py b/tests/narrative/test_dataset_card.py new file mode 100644 index 0000000..fc1995f --- /dev/null +++ b/tests/narrative/test_dataset_card.py @@ -0,0 +1,112 @@ +"""Tests for leadforge.narrative.dataset_card.""" + +from leadforge.api.generator import Generator +from leadforge.core.models import GenerationConfig, WorldSpec +from leadforge.narrative.dataset_card import render_dataset_card + + +def _make_world_spec(**kwargs: object) -> WorldSpec: + return WorldSpec(config=GenerationConfig(**kwargs)) # type: ignore[arg-type] + + +# --------------------------------------------------------------------------- +# Rendering without narrative (stub mode) +# --------------------------------------------------------------------------- + + +def test_card_returns_string() -> None: + spec = _make_world_spec() + card = render_dataset_card(spec) + assert isinstance(card, str) + assert len(card) > 0 + + +def test_card_contains_recipe_id() -> None: + spec = _make_world_spec(recipe_id="b2b_saas_procurement_v1") + assert "b2b_saas_procurement_v1" in render_dataset_card(spec) + + +def test_card_contains_seed() -> None: + spec = _make_world_spec(seed=99) + assert "99" in render_dataset_card(spec) + + +def test_card_contains_exposure_mode() -> None: + spec = _make_world_spec() + assert "student_public" in render_dataset_card(spec) + + +def test_card_contains_primary_task() -> None: + assert "converted_within_90_days" in render_dataset_card(_make_world_spec()) + + +def test_card_contains_label_definition() -> None: + card = render_dataset_card(_make_world_spec()) + assert "closed_won" in card + assert "90 days" in card + + +def test_card_contains_use_cases() -> None: + card = render_dataset_card(_make_world_spec()) + assert "use cases" in card.lower() + + +def test_card_contains_caveats() -> None: + card = render_dataset_card(_make_world_spec()) + assert "synthetic" in card.lower() + + +def test_card_no_narrative_shows_stub() -> None: + spec = WorldSpec(config=GenerationConfig(), narrative=None) + assert "not available" in render_dataset_card(spec).lower() + + +# --------------------------------------------------------------------------- +# Rendering with narrative (full mode) +# --------------------------------------------------------------------------- + + +def test_card_with_narrative_contains_company_name() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=42) + card = render_dataset_card(gen.world_spec) + assert "Veridian Technologies" in card + + +def test_card_with_narrative_contains_product_name() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1") + card = render_dataset_card(gen.world_spec) + assert "Veridian Procure" in card + + +def test_card_with_narrative_contains_geographies() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1") + card = render_dataset_card(gen.world_spec) + assert "US" in card + + +def test_card_with_narrative_contains_personas() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1") + card = render_dataset_card(gen.world_spec) + assert "vp_finance" in card + + +# --------------------------------------------------------------------------- +# Generator integration +# --------------------------------------------------------------------------- + + +def test_generator_world_spec_has_narrative() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=42) + assert gen.world_spec.narrative is not None + + +def test_generator_world_spec_config_matches() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=7) + assert gen.world_spec.config is gen.config + + +def test_generator_world_spec_is_world_spec() -> None: + from leadforge.core.models import WorldSpec + + gen = Generator.from_recipe("b2b_saas_procurement_v1") + assert isinstance(gen.world_spec, WorldSpec) diff --git a/tests/narrative/test_spec.py b/tests/narrative/test_spec.py new file mode 100644 index 0000000..adf16a6 --- /dev/null +++ b/tests/narrative/test_spec.py @@ -0,0 +1,196 @@ +"""Tests for leadforge.narrative.spec — NarrativeSpec and sub-models.""" + +import dataclasses + +import pytest + +from leadforge.core.exceptions import InvalidRecipeError +from leadforge.narrative.spec import ( + CompanySpec, + MarketSpec, + NarrativeSpec, + ProductSpec, +) + +# --------------------------------------------------------------------------- +# Minimal valid payloads +# --------------------------------------------------------------------------- + +COMPANY = { + "name": "Acme Corp", + "founded_year": 2015, + "hq_city": "Boston", + "hq_country": "US", + "stage": "Series A", + "employee_range": [50, 120], +} + +PRODUCT = { + "name": "Acme Product", + "category": "AP Automation", + "deployment": "cloud_saas", + "pricing_model": "per_seat_annual", + "acv_range_usd": [10000, 80000], + "contract_terms_months": [12, 24], + "free_trial_available": True, + "demo_available": True, +} + +MARKET = { + "icp_employee_range": [100, 1000], + "icp_industries": ["manufacturing", "logistics"], + "geographies": ["US"], + "avg_deal_size_usd": 30000, + "avg_sales_cycle_days": 40, +} + +GTM = { + "channels": ["inbound_marketing", "sdr_outbound"], + "inbound_share": 0.6, + "outbound_share": 0.3, + "partner_share": 0.1, +} + +PERSONA = { + "role": "vp_finance", + "title_variants": ["VP Finance", "CFO"], + "decision_authority": "economic_buyer", + "typical_involvement": "late_stage", +} + +FUNNEL_STAGE = {"name": "mql", "label": "Marketing Qualified Lead"} + +VALID_NARRATIVE = { + "company": COMPANY, + "product": PRODUCT, + "market": MARKET, + "gtm_motion": GTM, + "personas": [PERSONA], + "funnel_stages": [FUNNEL_STAGE], +} + + +# --------------------------------------------------------------------------- +# NarrativeSpec.from_dict — happy path +# --------------------------------------------------------------------------- + + +def test_narrative_spec_roundtrip() -> None: + spec = NarrativeSpec.from_dict(VALID_NARRATIVE) + assert spec.company.name == "Acme Corp" + assert spec.product.name == "Acme Product" + assert spec.market.avg_deal_size_usd == 30000 + assert spec.gtm_motion.inbound_share == pytest.approx(0.6) + assert len(spec.personas) == 1 + assert spec.personas[0].role == "vp_finance" + assert len(spec.funnel_stages) == 1 + assert spec.funnel_stages[0].name == "mql" + + +def test_narrative_spec_frozen() -> None: + spec = NarrativeSpec.from_dict(VALID_NARRATIVE) + with pytest.raises(dataclasses.FrozenInstanceError): + spec.company = None # type: ignore[misc] + + +# --------------------------------------------------------------------------- +# NarrativeSpec.from_dict — validation errors +# --------------------------------------------------------------------------- + + +def test_narrative_missing_key_raises() -> None: + bad = {k: v for k, v in VALID_NARRATIVE.items() if k != "company"} + with pytest.raises(InvalidRecipeError, match="missing required keys"): + NarrativeSpec.from_dict(bad) + + +def test_narrative_personas_not_list_raises() -> None: + bad = {**VALID_NARRATIVE, "personas": "not_a_list"} + with pytest.raises(InvalidRecipeError, match="personas"): + NarrativeSpec.from_dict(bad) + + +def test_narrative_funnel_not_list_raises() -> None: + bad = {**VALID_NARRATIVE, "funnel_stages": {"name": "mql"}} + with pytest.raises(InvalidRecipeError, match="funnel_stages"): + NarrativeSpec.from_dict(bad) + + +# --------------------------------------------------------------------------- +# CompanySpec +# --------------------------------------------------------------------------- + + +def test_company_bool_founded_year_raises() -> None: + bad = {**COMPANY, "founded_year": True} + with pytest.raises(InvalidRecipeError, match="founded_year"): + CompanySpec.from_dict(bad) + + +def test_company_bad_employee_range_raises() -> None: + bad = {**COMPANY, "employee_range": [50]} # wrong length + with pytest.raises(InvalidRecipeError, match="employee_range"): + CompanySpec.from_dict(bad) + + +# --------------------------------------------------------------------------- +# ProductSpec +# --------------------------------------------------------------------------- + + +def test_product_bad_acv_range_raises() -> None: + bad = {**PRODUCT, "acv_range_usd": "10000-80000"} + with pytest.raises(InvalidRecipeError, match="acv_range_usd"): + ProductSpec.from_dict(bad) + + +def test_product_bad_contract_terms_raises() -> None: + bad = {**PRODUCT, "contract_terms_months": [12, "twenty-four"]} + with pytest.raises(InvalidRecipeError, match="contract_terms_months"): + ProductSpec.from_dict(bad) + + +# --------------------------------------------------------------------------- +# MarketSpec +# --------------------------------------------------------------------------- + + +def test_market_bool_avg_deal_size_raises() -> None: + bad = {**MARKET, "avg_deal_size_usd": True} + with pytest.raises(InvalidRecipeError, match="avg_deal_size_usd"): + MarketSpec.from_dict(bad) + + +def test_market_zero_sales_cycle_raises() -> None: + bad = {**MARKET, "avg_sales_cycle_days": 0} + with pytest.raises(InvalidRecipeError, match="avg_sales_cycle_days"): + MarketSpec.from_dict(bad) + + +# --------------------------------------------------------------------------- +# Real recipe round-trip +# --------------------------------------------------------------------------- + + +def test_real_narrative_yaml_parses() -> None: + """The shipped narrative.yaml must parse without errors.""" + from leadforge.api.recipes import Recipe + from leadforge.recipes.registry import load_recipe + + recipe = Recipe.from_dict(load_recipe("b2b_saas_procurement_v1")) + data = recipe.load_narrative() + spec = NarrativeSpec.from_dict(data) + assert spec.company.name == "Veridian Technologies" + assert len(spec.personas) >= 1 + assert len(spec.funnel_stages) >= 1 + + +def test_real_narrative_tuples_are_immutable() -> None: + from leadforge.api.recipes import Recipe + from leadforge.recipes.registry import load_recipe + + recipe = Recipe.from_dict(load_recipe("b2b_saas_procurement_v1")) + spec = NarrativeSpec.from_dict(recipe.load_narrative()) + assert isinstance(spec.personas, tuple) + assert isinstance(spec.funnel_stages, tuple) + assert isinstance(spec.market.icp_industries, tuple) From 8c933bd13c1849adaac375625a605016fd2b0c9a Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Tue, 21 Apr 2026 09:40:43 +0300 Subject: [PATCH 2/2] fix: address Copilot review comments on Milestone 2 PR - spec.py: _require_keys now guards against non-dict input (COPILOT-3) - spec.py: NarrativeSpec.from_dict validates each personas/funnel_stages element is a dict before passing to sub-from_dict (COPILOT-3) - spec.py: GtmMotionSpec.from_dict validates channels is a list of strings, rejects bools for share floats, and enforces [0, 1] range (COPILOT-1) - spec.py: PersonaSpec.from_dict validates title_variants is a list of strings instead of silently splitting a bare string (COPILOT-2) - spec.py: ProductSpec.from_dict requires free_trial_available / demo_available to be actual bools; rejects int/str coercion (COPILOT-6) - spec.py: MarketSpec.from_dict validates icp_industries and geographies are lists of strings (COPILOT-7) - generator.py: Generator.__init__ takes only world_spec; config property derives from world_spec.config (single source of truth) (COPILOT-4) - dataset_card.py: stub text changed to "Narrative unavailable for this dataset." (COPILOT-5); test updated to match Co-Authored-By: Claude Sonnet 4.6 --- leadforge/api/generator.py | 9 ++-- leadforge/narrative/dataset_card.py | 2 +- leadforge/narrative/spec.py | 72 +++++++++++++++++++++++++--- tests/narrative/test_dataset_card.py | 2 +- 4 files changed, 71 insertions(+), 14 deletions(-) diff --git a/leadforge/api/generator.py b/leadforge/api/generator.py index 76acd26..0b1cfc8 100644 --- a/leadforge/api/generator.py +++ b/leadforge/api/generator.py @@ -27,14 +27,13 @@ class Generator: (``generate``) is implemented across Milestones 3–9. """ - def __init__(self, config: GenerationConfig, world_spec: WorldSpec) -> None: - self._config = config + def __init__(self, world_spec: WorldSpec) -> None: self._world_spec = world_spec - self._rng = RNGRoot(config.seed) + self._rng = RNGRoot(world_spec.config.seed) @property def config(self) -> GenerationConfig: - return self._config + return self._world_spec.config @property def world_spec(self) -> WorldSpec: @@ -105,7 +104,7 @@ def from_recipe( narrative = NarrativeSpec.from_dict(narrative_data) if narrative_data else None world_spec = WorldSpec(config=config, narrative=narrative) - return cls(config, world_spec) + return cls(world_spec) def generate( self, diff --git a/leadforge/narrative/dataset_card.py b/leadforge/narrative/dataset_card.py index 8d82b1c..aa61ffe 100644 --- a/leadforge/narrative/dataset_card.py +++ b/leadforge/narrative/dataset_card.py @@ -89,7 +89,7 @@ def render_dataset_card(world_spec: WorldSpec) -> str: ) lines.append("") else: - lines += ["*Narrative not available for this exposure mode.*", ""] + lines += ["*Narrative unavailable for this dataset.*", ""] # ------------------------------------------------------------------ # Primary task diff --git a/leadforge/narrative/spec.py b/leadforge/narrative/spec.py index 71815c2..d873b03 100644 --- a/leadforge/narrative/spec.py +++ b/leadforge/narrative/spec.py @@ -99,6 +99,11 @@ def from_dict(cls, data: dict[str, Any]) -> ProductSpec: raise InvalidRecipeError( f"product.contract_terms_months must be a list of ints, got {terms!r}" ) + for field_name in ("free_trial_available", "demo_available"): + if not isinstance(data[field_name], bool): + raise InvalidRecipeError( + f"product.{field_name} must be a bool, got {type(data[field_name]).__name__!r}" + ) return cls( name=str(data["name"]), category=str(data["category"]), @@ -106,8 +111,8 @@ def from_dict(cls, data: dict[str, Any]) -> ProductSpec: pricing_model=str(data["pricing_model"]), acv_range_usd=(int(acv[0]), int(acv[1])), contract_terms_months=tuple(int(t) for t in terms), - free_trial_available=bool(data["free_trial_available"]), - demo_available=bool(data["demo_available"]), + free_trial_available=data["free_trial_available"], + demo_available=data["demo_available"], ) @@ -143,10 +148,28 @@ def from_dict(cls, data: dict[str, Any]) -> MarketSpec: raise InvalidRecipeError( f"market.icp_employee_range must be a [min, max] int pair, got {er!r}" ) + industries = data["icp_industries"] + if not isinstance(industries, (list, tuple)): + raise InvalidRecipeError( + f"market.icp_industries must be a list of strings, got {industries!r}" + ) + if not all(isinstance(i, str) for i in industries): + raise InvalidRecipeError( + f"market.icp_industries must contain only strings, got {industries!r}" + ) + geographies = data["geographies"] + if not isinstance(geographies, (list, tuple)): + raise InvalidRecipeError( + f"market.geographies must be a list of strings, got {geographies!r}" + ) + if not all(isinstance(g, str) for g in geographies): + raise InvalidRecipeError( + f"market.geographies must contain only strings, got {geographies!r}" + ) return cls( icp_employee_range=(int(er[0]), int(er[1])), - icp_industries=tuple(str(i) for i in data["icp_industries"]), - geographies=tuple(str(g) for g in data["geographies"]), + icp_industries=tuple(industries), + geographies=tuple(geographies), avg_deal_size_usd=_pos_int(data["avg_deal_size_usd"], "market.avg_deal_size_usd"), avg_sales_cycle_days=_pos_int( data["avg_sales_cycle_days"], "market.avg_sales_cycle_days" @@ -170,8 +193,21 @@ def from_dict(cls, data: dict[str, Any]) -> GtmMotionSpec: {"channels", "inbound_share", "outbound_share", "partner_share"}, "gtm_motion", ) + channels = data["channels"] + if not isinstance(channels, (list, tuple)) or not all(isinstance(c, str) for c in channels): + raise InvalidRecipeError( + f"gtm_motion.channels must be a list of strings, got {channels!r}" + ) + for share_name in ("inbound_share", "outbound_share", "partner_share"): + v = data[share_name] + if isinstance(v, bool) or not isinstance(v, (int, float)): + raise InvalidRecipeError( + f"gtm_motion.{share_name} must be a float in [0, 1], got {type(v).__name__!r}" + ) + if not (0.0 <= float(v) <= 1.0): + raise InvalidRecipeError(f"gtm_motion.{share_name} must be in [0, 1], got {v!r}") return cls( - channels=tuple(str(c) for c in data["channels"]), + channels=tuple(channels), inbound_share=float(data["inbound_share"]), outbound_share=float(data["outbound_share"]), partner_share=float(data["partner_share"]), @@ -194,9 +230,17 @@ def from_dict(cls, data: dict[str, Any]) -> PersonaSpec: {"role", "title_variants", "decision_authority", "typical_involvement"}, "personas[]", ) + title_variants = data["title_variants"] + if not ( + isinstance(title_variants, (list, tuple)) + and all(isinstance(t, str) for t in title_variants) + ): + raise InvalidRecipeError( + f"personas[].title_variants must be a list of strings, got {title_variants!r}" + ) return cls( role=str(data["role"]), - title_variants=tuple(str(t) for t in data["title_variants"]), + title_variants=tuple(title_variants), decision_authority=str(data["decision_authority"]), typical_involvement=str(data["typical_involvement"]), ) @@ -248,11 +292,21 @@ def from_dict(cls, data: dict[str, Any]) -> NarrativeSpec: raise InvalidRecipeError( f"narrative.personas must be a list, got {type(personas_raw).__name__!r}" ) + for i, item in enumerate(personas_raw): + if not isinstance(item, dict): + raise InvalidRecipeError( + f"narrative.personas[{i}] must be a mapping, got {type(item).__name__!r}" + ) funnel_raw = data["funnel_stages"] if not isinstance(funnel_raw, list): raise InvalidRecipeError( f"narrative.funnel_stages must be a list, got {type(funnel_raw).__name__!r}" ) + for i, item in enumerate(funnel_raw): + if not isinstance(item, dict): + raise InvalidRecipeError( + f"narrative.funnel_stages[{i}] must be a mapping, got {type(item).__name__!r}" + ) return cls( company=CompanySpec.from_dict(data["company"]), @@ -269,7 +323,11 @@ def from_dict(cls, data: dict[str, Any]) -> NarrativeSpec: # --------------------------------------------------------------------------- -def _require_keys(data: dict[str, Any], required: set[str], context: str) -> None: +def _require_keys(data: Any, required: set[str], context: str) -> None: + if not isinstance(data, dict): + raise InvalidRecipeError( + f"Narrative section '{context}' must be a mapping, got {type(data).__name__!r}" + ) missing = required - data.keys() if missing: raise InvalidRecipeError( diff --git a/tests/narrative/test_dataset_card.py b/tests/narrative/test_dataset_card.py index fc1995f..104e46e 100644 --- a/tests/narrative/test_dataset_card.py +++ b/tests/narrative/test_dataset_card.py @@ -58,7 +58,7 @@ def test_card_contains_caveats() -> None: def test_card_no_narrative_shows_stub() -> None: spec = WorldSpec(config=GenerationConfig(), narrative=None) - assert "not available" in render_dataset_card(spec).lower() + assert "unavailable" in render_dataset_card(spec).lower() # ---------------------------------------------------------------------------