From 70eb2aade68f744736ec002b03f430526d6b645a Mon Sep 17 00:00:00 2001
From: Shay Palachy <shaypal5@users.noreply.github.com>
Date: Fri, 1 May 2026 23:45:12 +0300
Subject: [PATCH 1/3] feat: make label_window_days affect conversion label
 derivation (#41)

The simulation engine now gates `converted_within_90_days` by
`config.label_window_days` instead of using the full horizon.
Simulation still runs for `horizon_days` to produce rich event
histories; only label derivation respects the label window.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .agent-plan.md                  |  8 ++-
 leadforge/schema/entities.py    | 10 +++-
 leadforge/simulation/engine.py  | 17 ++++++-
 tests/simulation/test_engine.py | 87 ++++++++++++++++++++++++++++++++-
 4 files changed, 116 insertions(+), 6 deletions(-)

diff --git a/.agent-plan.md b/.agent-plan.md
index 7db8d38..28be09c 100644
--- a/.agent-plan.md
+++ b/.agent-plan.md
@@ -141,7 +141,13 @@ Documentation + CI:
 - [x] `leadforge/pipelines/build_v6.py` — same `label_column` kwarg on `rename_and_select()`
 - [x] 16 new tests: `task_manifest_for_config`, bundle layout, manifest keys, validation with custom task, pipeline rename with custom label column; total 773 passing
 
-### Generalize dataset card prose (PR #41, closes #38)
+### label_window_days affects simulation (PR #43, closes #41)
+
+- [x] `leadforge/simulation/engine.py` — `converted_within_90_days` label now gated by `config.label_window_days`: conversion must occur before day N (not just within full `horizon_days`); simulation still runs for full horizon to produce rich event histories
+- [x] `leadforge/schema/entities.py` — `LeadRow` docstring updated to document that `converted_within_90_days` uses `label_window_days` (field name retained for schema stability)
+- [x] 6 new tests: default-90 unchanged, shorter window fewer conversions, 1-day window zero conversions, late conversions excluded, conversion_timestamp still set outside window, event counts unchanged by window; total 787 passing
+
+### Generalize dataset card prose (PR #42, closes #38)
 
 - [x] `leadforge/schema/tasks.py` — `TaskManifest.description` rewritten for dataset-card use; `task_manifest_for_config()` produces task-specific prose (conversion-specific for default task, generic for others)
 - [x] `leadforge/narrative/dataset_card.py` — `render_dataset_card()` accepts optional `task_manifest` kwarg; uses `description` for primary task prose when provided, task-agnostic fallback otherwise
diff --git a/leadforge/schema/entities.py b/leadforge/schema/entities.py
index d97230a..6367815 100644
--- a/leadforge/schema/entities.py
+++ b/leadforge/schema/entities.py
@@ -126,7 +126,15 @@ def empty_dataframe(cls) -> pd.DataFrame:
 
 @dataclass
 class LeadRow:
-    """One row in the ``leads`` table."""
+    """One row in the ``leads`` table.
+
+    .. note:: The ``converted_within_90_days`` field name is retained for
+       schema stability, but its value is derived using
+       ``GenerationConfig.label_window_days`` (which defaults to 90).  A
+       lead is marked ``True`` only if its conversion event occurred before
+       ``label_window_days`` from lead creation — **not** necessarily within
+       the full ``horizon_days`` simulation window.
+    """
 
     TABLE_NAME: ClassVar[str] = "leads"
     DTYPE_MAP: ClassVar[dict[str, str]] = {
diff --git a/leadforge/simulation/engine.py b/leadforge/simulation/engine.py
index 530b281..61e434c 100644
--- a/leadforge/simulation/engine.py
+++ b/leadforge/simulation/engine.py
@@ -11,7 +11,10 @@
 - All randomness is derived from named substreams of ``RNGRoot(config.seed)``,
   making every run fully deterministic given ``(config, population, world_graph)``.
 - ``converted_within_90_days`` is **event-derived** — it becomes ``True`` only
-  when a lead's simulated trajectory reaches the ``closed_won`` terminal stage.
+  when a lead's simulated trajectory reaches the ``closed_won`` terminal stage
+  **within** ``config.label_window_days`` of the lead's creation.  The
+  simulation still runs for ``config.horizon_days`` to produce rich event
+  histories; only label derivation is gated by the label window.
 - Stage advancement is driven by :class:`~leadforge.mechanisms.transitions.HazardTransition`
   (mql → … → negotiation); final conversion is driven by
   :class:`~leadforge.mechanisms.hazards.ConversionHazard` (negotiation → closed_won).
@@ -336,6 +339,16 @@ def simulate_world(
         if state.converted and state.conversion_day is not None:
             conv_ts = (lead_date + timedelta(days=state.conversion_day)).isoformat()
 
+        # Label derivation: conversion must occur within the label window
+        # (label_window_days), not the full simulation horizon.  This allows
+        # the engine to produce rich event histories over horizon_days while
+        # only counting conversions in the configured observation window.
+        label = (
+            state.converted
+            and state.conversion_day is not None
+            and state.conversion_day < config.label_window_days
+        )
+
         updated_leads.append(
             LeadRow(
                 lead_id=lead.lead_id,
@@ -348,7 +361,7 @@ def simulate_world(
                 owner_rep_id=lead.owner_rep_id,
                 is_mql=True,  # all leads start at mql
                 is_sql=is_sql,
-                converted_within_90_days=state.converted,
+                converted_within_90_days=label,
                 conversion_timestamp=conv_ts,
             )
         )
diff --git a/tests/simulation/test_engine.py b/tests/simulation/test_engine.py
index 4825169..c373053 100644
--- a/tests/simulation/test_engine.py
+++ b/tests/simulation/test_engine.py
@@ -25,9 +25,17 @@
 # ---------------------------------------------------------------------------
 
 
-def _make_config(seed: int = 42, n_leads: int = 50) -> GenerationConfig:
+def _make_config(
+    seed: int = 42, n_leads: int = 50, label_window_days: int = 90
+) -> GenerationConfig:
     """Return a small GenerationConfig suitable for unit tests."""
-    return GenerationConfig(seed=seed, n_accounts=20, n_contacts=60, n_leads=n_leads)
+    return GenerationConfig(
+        seed=seed,
+        n_accounts=20,
+        n_contacts=60,
+        n_leads=n_leads,
+        label_window_days=label_window_days,
+    )
 
 
 def _make_narrative():
@@ -383,3 +391,78 @@ def test_growth(self) -> None:
     def test_enterprise(self) -> None:
         assert _plan_from_acv(80_000) == "enterprise"
         assert _plan_from_acv(200_000) == "enterprise"
+
+
+# ---------------------------------------------------------------------------
+# label_window_days affects label derivation
+# ---------------------------------------------------------------------------
+
+
+class TestLabelWindowDays:
+    """Verify that label_window_days gates converted_within_90_days."""
+
+    def _run_with_window(
+        self, label_window_days: int, seed: int = 42, n_leads: int = 200
+    ) -> SimulationResult:
+        config = _make_config(seed=seed, n_leads=n_leads, label_window_days=label_window_days)
+        narrative = _make_narrative()
+        graph = sample_hidden_graph(RNGRoot(seed))
+        pop = build_population(config, narrative, graph)
+        return simulate_world(config, pop, graph)
+
+    def test_default_90_unchanged(self) -> None:
+        """label_window_days=90 (default) matches the old behavior exactly."""
+        r_default = _run_sim(seed=42, n_leads=200)
+        r_explicit = self._run_with_window(label_window_days=90, seed=42, n_leads=200)
+        labels_default = [row.converted_within_90_days for row in r_default.leads]
+        labels_explicit = [row.converted_within_90_days for row in r_explicit.leads]
+        assert labels_default == labels_explicit
+
+    def test_shorter_window_fewer_conversions(self) -> None:
+        """A 30-day window should produce fewer (or equal) conversions than 90."""
+        r90 = self._run_with_window(label_window_days=90, seed=42, n_leads=300)
+        r30 = self._run_with_window(label_window_days=30, seed=42, n_leads=300)
+        conv90 = sum(row.converted_within_90_days for row in r90.leads)
+        conv30 = sum(row.converted_within_90_days for row in r30.leads)
+        assert conv30 <= conv90
+        # With 300 leads over 90 days, there should be *strictly* fewer at 30.
+        assert conv30 < conv90
+
+    def test_window_1_almost_no_conversions(self) -> None:
+        """With a 1-day window, nearly no leads can convert (need negotiation first)."""
+        r = self._run_with_window(label_window_days=1, seed=42, n_leads=200)
+        conv = sum(row.converted_within_90_days for row in r.leads)
+        # All leads start at mql; reaching negotiation + closed_won in <1 day
+        # is essentially impossible.
+        assert conv == 0
+
+    def test_late_conversions_excluded(self) -> None:
+        """Leads that convert after the label window should not be labeled positive."""
+        r30 = self._run_with_window(label_window_days=30, seed=42, n_leads=300)
+        for lead in r30.leads:
+            if lead.converted_within_90_days:
+                # If labeled positive, conversion must have happened within 30 days.
+                assert lead.conversion_timestamp is not None
+
+    def test_conversion_timestamp_still_set_outside_window(self) -> None:
+        """conversion_timestamp is set for all actual conversions, even outside window."""
+        r30 = self._run_with_window(label_window_days=30, seed=42, n_leads=300)
+        # Some leads should have conversion_timestamp but label=False
+        # (they converted after day 30 but before day 90).
+        late_conversions = [
+            lead
+            for lead in r30.leads
+            if lead.conversion_timestamp is not None and not lead.converted_within_90_days
+        ]
+        # With 300 leads, there should be at least some late conversions.
+        assert len(late_conversions) > 0
+
+    def test_event_count_unchanged_by_window(self) -> None:
+        """label_window_days does not affect event generation (only the label)."""
+        r90 = self._run_with_window(label_window_days=90, seed=42, n_leads=100)
+        r30 = self._run_with_window(label_window_days=30, seed=42, n_leads=100)
+        # Simulation runs identically; only label derivation differs.
+        assert len(r90.touches) == len(r30.touches)
+        assert len(r90.sessions) == len(r30.sessions)
+        assert len(r90.sales_activities) == len(r30.sales_activities)
+        assert len(r90.opportunities) == len(r30.opportunities)

From ad51b6dcf4d06875d288761289b9394c4d063648 Mon Sep 17 00:00:00 2001
From: Shay Palachy <shaypal5@users.noreply.github.com>
Date: Fri, 1 May 2026 23:50:08 +0300
Subject: [PATCH 2/3] fix: strengthen tests and document feature-label window
 mismatch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- test_late_conversions_excluded now verifies conversion day offset < 30
  (was a tautology — only checked timestamp not None)
- Add bundle round-trip integration test (Generator → save → read Parquet)
- Document feature-label temporal mismatch in build_snapshot when
  label_window_days < horizon_days

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .agent-plan.md                  |  3 ++-
 leadforge/render/snapshots.py   |  6 +++++
 tests/simulation/test_engine.py | 46 ++++++++++++++++++++++++++++++++-
 3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/.agent-plan.md b/.agent-plan.md
index 28be09c..b52a85e 100644
--- a/.agent-plan.md
+++ b/.agent-plan.md
@@ -145,7 +145,8 @@ Documentation + CI:
 
 - [x] `leadforge/simulation/engine.py` — `converted_within_90_days` label now gated by `config.label_window_days`: conversion must occur before day N (not just within full `horizon_days`); simulation still runs for full horizon to produce rich event histories
 - [x] `leadforge/schema/entities.py` — `LeadRow` docstring updated to document that `converted_within_90_days` uses `label_window_days` (field name retained for schema stability)
-- [x] 6 new tests: default-90 unchanged, shorter window fewer conversions, 1-day window zero conversions, late conversions excluded, conversion_timestamp still set outside window, event counts unchanged by window; total 787 passing
+- [x] `leadforge/render/snapshots.py` — added comment documenting the feature-label temporal mismatch when `label_window_days < horizon_days` (features aggregate over full horizon; label uses shorter window)
+- [x] 7 new tests: default-90 unchanged, shorter window fewer conversions, 1-day window zero conversions, late conversions excluded (with day-offset verification), conversion_timestamp still set outside window, event counts unchanged by window, bundle round-trip integration; total 788 passing
 
 ### Generalize dataset card prose (PR #42, closes #38)
 
diff --git a/leadforge/render/snapshots.py b/leadforge/render/snapshots.py
index 18baca3..d0158f4 100644
--- a/leadforge/render/snapshots.py
+++ b/leadforge/render/snapshots.py
@@ -86,6 +86,12 @@ def build_snapshot(
         :data:`~leadforge.schema.features.LEAD_SNAPSHOT_FEATURES` and dtypes
         matching the feature spec.  Row order matches ``result.leads``.
     """
+    # Note: when label_window_days < horizon_days, the label
+    # (converted_within_90_days on LeadRow) is derived using the shorter
+    # window, but features here are aggregated over the full horizon.  This
+    # is intentional — the simulation produces rich event histories over the
+    # full horizon, and feature aggregation follows suit.  Callers that need
+    # windowed features should use the snapshot_day parameter.
     effective_window = snapshot_day if snapshot_day is not None else horizon_days
 
     # -------------------------------------------------------------------
diff --git a/tests/simulation/test_engine.py b/tests/simulation/test_engine.py
index c373053..316dbb9 100644
--- a/tests/simulation/test_engine.py
+++ b/tests/simulation/test_engine.py
@@ -438,11 +438,19 @@ def test_window_1_almost_no_conversions(self) -> None:
 
     def test_late_conversions_excluded(self) -> None:
         """Leads that convert after the label window should not be labeled positive."""
+        from datetime import date
+
         r30 = self._run_with_window(label_window_days=30, seed=42, n_leads=300)
         for lead in r30.leads:
             if lead.converted_within_90_days:
-                # If labeled positive, conversion must have happened within 30 days.
+                # If labeled positive, conversion day must be < label_window_days.
                 assert lead.conversion_timestamp is not None
+                created = date.fromisoformat(lead.lead_created_at)
+                converted = date.fromisoformat(lead.conversion_timestamp)
+                day_offset = (converted - created).days
+                assert day_offset < 30, (
+                    f"Lead {lead.lead_id} labeled positive but converted on day {day_offset}"
+                )
 
     def test_conversion_timestamp_still_set_outside_window(self) -> None:
         """conversion_timestamp is set for all actual conversions, even outside window."""
@@ -466,3 +474,39 @@ def test_event_count_unchanged_by_window(self) -> None:
         assert len(r90.sessions) == len(r30.sessions)
         assert len(r90.sales_activities) == len(r30.sales_activities)
         assert len(r90.opportunities) == len(r30.opportunities)
+
+    def test_bundle_round_trip_respects_window(self, tmp_path) -> None:
+        """Full pipeline: generate → save → read task split reflects label_window_days."""
+        import pandas as pd
+
+        from leadforge.api.generator import Generator
+
+        # primary_task name stays the default; only label_window_days changes.
+        gen = Generator.from_recipe(
+            "b2b_saas_procurement_v1",
+            seed=42,
+            label_window_days=30,
+        )
+        bundle = gen.generate(n_accounts=20, n_contacts=60, n_leads=200)
+        out = str(tmp_path / "bundle")
+        bundle.save(out, generation_timestamp="2025-01-01T00:00:00Z")
+
+        task_dir = f"{out}/tasks/converted_within_90_days"
+        train = pd.read_parquet(f"{task_dir}/train.parquet")
+        conv_30 = int(train["converted_within_90_days"].sum())
+
+        # Compare with default 90-day window.
+        gen90 = Generator.from_recipe(
+            "b2b_saas_procurement_v1",
+            seed=42,
+            label_window_days=90,
+        )
+        bundle90 = gen90.generate(n_accounts=20, n_contacts=60, n_leads=200)
+        out90 = str(tmp_path / "bundle90")
+        bundle90.save(out90, generation_timestamp="2025-01-01T00:00:00Z")
+
+        task_dir90 = f"{out90}/tasks/converted_within_90_days"
+        train90 = pd.read_parquet(f"{task_dir90}/train.parquet")
+        conv_90 = int(train90["converted_within_90_days"].sum())
+
+        assert conv_30 <= conv_90

From 8c39c07b6197e5e7c9a1e89b18711baa1d9757f3 Mon Sep 17 00:00:00 2001
From: Shay Palachy <shaypal5@users.noreply.github.com>
Date: Fri, 1 May 2026 23:54:55 +0300
Subject: [PATCH 3/3] docs: note that conversion_timestamp can be set when
 label is False

Address Copilot review: document the intentional case where a lead
converts after the label window, producing a non-None
conversion_timestamp with converted_within_90_days=False.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 leadforge/schema/entities.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/leadforge/schema/entities.py b/leadforge/schema/entities.py
index 6367815..b20b8c7 100644
--- a/leadforge/schema/entities.py
+++ b/leadforge/schema/entities.py
@@ -134,6 +134,10 @@ class LeadRow:
        lead is marked ``True`` only if its conversion event occurred before
        ``label_window_days`` from lead creation — **not** necessarily within
        the full ``horizon_days`` simulation window.
+
+       Consequently, ``conversion_timestamp`` may be set (non-``None``)
+       while ``converted_within_90_days`` is ``False``, indicating the lead
+       converted after the label observation window closed.
     """
 
     TABLE_NAME: ClassVar[str] = "leads"