leadforge-dev · shaypal5 · May 1, 2026 · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/.agent-plan.md b/.agent-plan.md
@@ -141,7 +141,14 @@ Documentation + CI:
 - [x] `leadforge/pipelines/build_v6.py` — same `label_column` kwarg on `rename_and_select()`
 - [x] 16 new tests: `task_manifest_for_config`, bundle layout, manifest keys, validation with custom task, pipeline rename with custom label column; total 773 passing
 
-### Generalize dataset card prose (PR #41, closes #38)
+### label_window_days affects simulation (PR #43, closes #41)
+
+- [x] `leadforge/simulation/engine.py` — `converted_within_90_days` label now gated by `config.label_window_days`: conversion must occur before day N (not just within full `horizon_days`); simulation still runs for full horizon to produce rich event histories
+- [x] `leadforge/schema/entities.py` — `LeadRow` docstring updated to document that `converted_within_90_days` uses `label_window_days` (field name retained for schema stability)
+- [x] `leadforge/render/snapshots.py` — added comment documenting the feature-label temporal mismatch when `label_window_days < horizon_days` (features aggregate over full horizon; label uses shorter window)
+- [x] 7 new tests: default-90 unchanged, shorter window fewer conversions, 1-day window zero conversions, late conversions excluded (with day-offset verification), conversion_timestamp still set outside window, event counts unchanged by window, bundle round-trip integration; total 788 passing
+
+### Generalize dataset card prose (PR #42, closes #38)
 
 - [x] `leadforge/schema/tasks.py` — `TaskManifest.description` rewritten for dataset-card use; `task_manifest_for_config()` produces task-specific prose (conversion-specific for default task, generic for others)
 - [x] `leadforge/narrative/dataset_card.py` — `render_dataset_card()` accepts optional `task_manifest` kwarg; uses `description` for primary task prose when provided, task-agnostic fallback otherwise

diff --git a/leadforge/render/snapshots.py b/leadforge/render/snapshots.py
@@ -86,6 +86,12 @@ def build_snapshot(
         :data:`~leadforge.schema.features.LEAD_SNAPSHOT_FEATURES` and dtypes
         matching the feature spec.  Row order matches ``result.leads``.
     """
+    # Note: when label_window_days < horizon_days, the label
+    # (converted_within_90_days on LeadRow) is derived using the shorter
+    # window, but features here are aggregated over the full horizon.  This
+    # is intentional — the simulation produces rich event histories over the
+    # full horizon, and feature aggregation follows suit.  Callers that need
+    # windowed features should use the snapshot_day parameter.
     effective_window = snapshot_day if snapshot_day is not None else horizon_days
 
     # -------------------------------------------------------------------

diff --git a/leadforge/schema/entities.py b/leadforge/schema/entities.py
@@ -126,7 +126,19 @@ def empty_dataframe(cls) -> pd.DataFrame:
 
 @dataclass
 class LeadRow:
-    """One row in the ``leads`` table."""
+    """One row in the ``leads`` table.
+
+    .. note:: The ``converted_within_90_days`` field name is retained for
+       schema stability, but its value is derived using
+       ``GenerationConfig.label_window_days`` (which defaults to 90).  A
+       lead is marked ``True`` only if its conversion event occurred before
+       ``label_window_days`` from lead creation — **not** necessarily within
+       the full ``horizon_days`` simulation window.
+
+       Consequently, ``conversion_timestamp`` may be set (non-``None``)
+       while ``converted_within_90_days`` is ``False``, indicating the lead
+       converted after the label observation window closed.
+    """
 
     TABLE_NAME: ClassVar[str] = "leads"
     DTYPE_MAP: ClassVar[dict[str, str]] = {

diff --git a/leadforge/simulation/engine.py b/leadforge/simulation/engine.py
@@ -11,7 +11,10 @@
 - All randomness is derived from named substreams of ``RNGRoot(config.seed)``,
   making every run fully deterministic given ``(config, population, world_graph)``.
 - ``converted_within_90_days`` is **event-derived** — it becomes ``True`` only
-  when a lead's simulated trajectory reaches the ``closed_won`` terminal stage.
+  when a lead's simulated trajectory reaches the ``closed_won`` terminal stage
+  **within** ``config.label_window_days`` of the lead's creation.  The
+  simulation still runs for ``config.horizon_days`` to produce rich event
+  histories; only label derivation is gated by the label window.
 - Stage advancement is driven by :class:`~leadforge.mechanisms.transitions.HazardTransition`
   (mql → … → negotiation); final conversion is driven by
   :class:`~leadforge.mechanisms.hazards.ConversionHazard` (negotiation → closed_won).
@@ -336,6 +339,16 @@ def simulate_world(
         if state.converted and state.conversion_day is not None:
             conv_ts = (lead_date + timedelta(days=state.conversion_day)).isoformat()
 
+        # Label derivation: conversion must occur within the label window
+        # (label_window_days), not the full simulation horizon.  This allows
+        # the engine to produce rich event histories over horizon_days while
+        # only counting conversions in the configured observation window.
+        label = (
+            state.converted
+            and state.conversion_day is not None
+            and state.conversion_day < config.label_window_days
+        )
+
         updated_leads.append(
             LeadRow(
                 lead_id=lead.lead_id,
@@ -348,7 +361,7 @@ def simulate_world(
                 owner_rep_id=lead.owner_rep_id,
                 is_mql=True,  # all leads start at mql
                 is_sql=is_sql,
-                converted_within_90_days=state.converted,
+                converted_within_90_days=label,
                 conversion_timestamp=conv_ts,
             )
         )

diff --git a/tests/simulation/test_engine.py b/tests/simulation/test_engine.py
@@ -25,9 +25,17 @@
 # ---------------------------------------------------------------------------
 
 
-def _make_config(seed: int = 42, n_leads: int = 50) -> GenerationConfig:
+def _make_config(
+    seed: int = 42, n_leads: int = 50, label_window_days: int = 90
+) -> GenerationConfig:
     """Return a small GenerationConfig suitable for unit tests."""
-    return GenerationConfig(seed=seed, n_accounts=20, n_contacts=60, n_leads=n_leads)
+    return GenerationConfig(
+        seed=seed,
+        n_accounts=20,
+        n_contacts=60,
+        n_leads=n_leads,
+        label_window_days=label_window_days,
+    )
 
 
 def _make_narrative():
@@ -383,3 +391,122 @@ def test_growth(self) -> None:
     def test_enterprise(self) -> None:
         assert _plan_from_acv(80_000) == "enterprise"
         assert _plan_from_acv(200_000) == "enterprise"
+
+
+# ---------------------------------------------------------------------------
+# label_window_days affects label derivation
+# ---------------------------------------------------------------------------
+
+
+class TestLabelWindowDays:
+    """Verify that label_window_days gates converted_within_90_days."""
+
+    def _run_with_window(
+        self, label_window_days: int, seed: int = 42, n_leads: int = 200
+    ) -> SimulationResult:
+        config = _make_config(seed=seed, n_leads=n_leads, label_window_days=label_window_days)
+        narrative = _make_narrative()
+        graph = sample_hidden_graph(RNGRoot(seed))
+        pop = build_population(config, narrative, graph)
+        return simulate_world(config, pop, graph)
+
+    def test_default_90_unchanged(self) -> None:
+        """label_window_days=90 (default) matches the old behavior exactly."""
+        r_default = _run_sim(seed=42, n_leads=200)
+        r_explicit = self._run_with_window(label_window_days=90, seed=42, n_leads=200)
+        labels_default = [row.converted_within_90_days for row in r_default.leads]
+        labels_explicit = [row.converted_within_90_days for row in r_explicit.leads]
+        assert labels_default == labels_explicit
+
+    def test_shorter_window_fewer_conversions(self) -> None:
+        """A 30-day window should produce fewer (or equal) conversions than 90."""
+        r90 = self._run_with_window(label_window_days=90, seed=42, n_leads=300)
+        r30 = self._run_with_window(label_window_days=30, seed=42, n_leads=300)
+        conv90 = sum(row.converted_within_90_days for row in r90.leads)
+        conv30 = sum(row.converted_within_90_days for row in r30.leads)
+        assert conv30 <= conv90
+        # With 300 leads over 90 days, there should be *strictly* fewer at 30.
+        assert conv30 < conv90
+
+    def test_window_1_almost_no_conversions(self) -> None:
+        """With a 1-day window, nearly no leads can convert (need negotiation first)."""
+        r = self._run_with_window(label_window_days=1, seed=42, n_leads=200)
+        conv = sum(row.converted_within_90_days for row in r.leads)
+        # All leads start at mql; reaching negotiation + closed_won in <1 day
+        # is essentially impossible.
+        assert conv == 0
+
+    def test_late_conversions_excluded(self) -> None:
+        """Leads that convert after the label window should not be labeled positive."""
+        from datetime import date
+
+        r30 = self._run_with_window(label_window_days=30, seed=42, n_leads=300)
+        for lead in r30.leads:
+            if lead.converted_within_90_days:
+                # If labeled positive, conversion day must be < label_window_days.
+                assert lead.conversion_timestamp is not None
+                created = date.fromisoformat(lead.lead_created_at)
+                converted = date.fromisoformat(lead.conversion_timestamp)
+                day_offset = (converted - created).days
+                assert day_offset < 30, (
+                    f"Lead {lead.lead_id} labeled positive but converted on day {day_offset}"
+                )
+
+    def test_conversion_timestamp_still_set_outside_window(self) -> None:
+        """conversion_timestamp is set for all actual conversions, even outside window."""
+        r30 = self._run_with_window(label_window_days=30, seed=42, n_leads=300)
+        # Some leads should have conversion_timestamp but label=False
+        # (they converted after day 30 but before day 90).
+        late_conversions = [
+            lead
+            for lead in r30.leads
+            if lead.conversion_timestamp is not None and not lead.converted_within_90_days
+        ]
+        # With 300 leads, there should be at least some late conversions.
+        assert len(late_conversions) > 0
+
+    def test_event_count_unchanged_by_window(self) -> None:
+        """label_window_days does not affect event generation (only the label)."""
+        r90 = self._run_with_window(label_window_days=90, seed=42, n_leads=100)
+        r30 = self._run_with_window(label_window_days=30, seed=42, n_leads=100)
+        # Simulation runs identically; only label derivation differs.
+        assert len(r90.touches) == len(r30.touches)
+        assert len(r90.sessions) == len(r30.sessions)
+        assert len(r90.sales_activities) == len(r30.sales_activities)
+        assert len(r90.opportunities) == len(r30.opportunities)
+
+    def test_bundle_round_trip_respects_window(self, tmp_path) -> None:
+        """Full pipeline: generate → save → read task split reflects label_window_days."""
+        import pandas as pd
+
+        from leadforge.api.generator import Generator
+
+        # primary_task name stays the default; only label_window_days changes.
+        gen = Generator.from_recipe(
+            "b2b_saas_procurement_v1",
+            seed=42,
+            label_window_days=30,
+        )
+        bundle = gen.generate(n_accounts=20, n_contacts=60, n_leads=200)
+        out = str(tmp_path / "bundle")
+        bundle.save(out, generation_timestamp="2025-01-01T00:00:00Z")
+
+        task_dir = f"{out}/tasks/converted_within_90_days"
+        train = pd.read_parquet(f"{task_dir}/train.parquet")
+        conv_30 = int(train["converted_within_90_days"].sum())
+
+        # Compare with default 90-day window.
+        gen90 = Generator.from_recipe(
+            "b2b_saas_procurement_v1",
+            seed=42,
+            label_window_days=90,
+        )
+        bundle90 = gen90.generate(n_accounts=20, n_contacts=60, n_leads=200)
+        out90 = str(tmp_path / "bundle90")
+        bundle90.save(out90, generation_timestamp="2025-01-01T00:00:00Z")
+
+        task_dir90 = f"{out90}/tasks/converted_within_90_days"
+        train90 = pd.read_parquet(f"{task_dir90}/train.parquet")
+        conv_90 = int(train90["converted_within_90_days"].sum())
+
+        assert conv_30 <= conv_90