From cc33ba08b5bf90011f75b9f84796ba4441d7520f Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Tue, 19 May 2026 17:05:03 +0900
Subject: [PATCH 1/4] feat(explain): break down history boost into
 unigram/bigram/whole-path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add `HistoryBoostBreakdown` so `explain` exposes which component of
history learning contributes to a path's cost: per-segment unigram sum,
per-segment bigram sum, and whole-path ×5 boost. `history_rerank` now
goes through the same helper, keeping the math in one place.

Aimed at diagnosing reports of weakened history learning — the breakdown
lets us tell whether bigram learning is silent, whole-path is over- or
under-firing, or per-segment normalization is washing things out.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../crates/lex-core/src/converter/explain.rs  | 45 ++++++++++-
 .../crates/lex-core/src/converter/reranker.rs | 81 ++++++++++++++-----
 2 files changed, 102 insertions(+), 24 deletions(-)
diff --git a/engine/crates/lex-core/src/converter/explain.rs b/engine/crates/lex-core/src/converter/explain.rs
index db3bb1d..04c5acd 100644
--- a/engine/crates/lex-core/src/converter/explain.rs
+++ b/engine/crates/lex-core/src/converter/explain.rs
@@ -12,6 +12,7 @@ use super::cost::{conn_cost, script_cost, DefaultCostFunction};
 use super::features::{is_single_char_kanji_penalised, is_te_form_kanji_penalised};
 use super::lattice::{build_lattice, Lattice};
 use super::postprocess::{postprocess_observed, PostprocessContext, PostprocessObserver};
+use super::reranker::{compute_history_boost, HistoryBoostBreakdown};
 use super::viterbi::{viterbi_nbest, ScoredPath};
 
 /// Full diagnostic result for a single reading.
@@ -57,7 +58,9 @@ pub struct ExplainPath {
     pub viterbi_cost: i64,
     /// Cost delta from structure reranking.
     pub rerank_delta: i64,
-    /// Total history boost applied (negative = better).
+    /// Per-component history boost (raw sums + whole-path × 5).
+    pub history_breakdown: HistoryBoostBreakdown,
+    /// History boost actually subtracted from the cost (post-normalization).
     pub history_boost: i64,
     /// Final cost after all adjustments.
     pub final_cost: i64,
@@ -231,6 +234,7 @@ pub fn explain(
     };
     let final_paths = postprocess_observed(&mut raw_paths, &ctx, &mut observer);
 
+    let now = crate::user_history::now_epoch();
     let paths: Vec<ExplainPath> = final_paths
         .iter()
         .map(|scored| {
@@ -246,11 +250,15 @@ pub fn explain(
                 .copied()
                 .unwrap_or(original);
             let rerank_delta = post_rerank - original;
-            let history_boost = post_rerank - scored.viterbi_cost;
+            let history_breakdown = history
+                .map(|h| compute_history_boost(scored, h, now))
+                .unwrap_or_default();
+            let history_boost = history_breakdown.applied(scored.segments.len());
             ExplainPath {
                 segments: explain_segments(scored, conn, dict),
                 viterbi_cost: original,
                 rerank_delta,
+                history_breakdown,
                 history_boost,
                 final_cost: scored.viterbi_cost,
             }
@@ -364,6 +372,16 @@ pub fn format_text(result: &ExplainResult) -> String {
             "    viterbi={:<8} rerank={:<+8} history={:<+8} -> final={}\n",
             path.viterbi_cost, path.rerank_delta, -path.history_boost, path.final_cost,
         ));
+        let hb = &path.history_breakdown;
+        if hb.unigram_sum != 0 || hb.bigram_sum != 0 || hb.whole_path_boost != 0 {
+            out.push_str(&format!(
+                "      history: uni_sum={:<+7} bi_sum={:<+7} whole×5={:<+7} (/{} segs)\n",
+                -hb.unigram_sum,
+                -hb.bigram_sum,
+                -hb.whole_path_boost,
+                path.segments.len(),
+            ));
+        }
     }
 
     out
@@ -419,6 +437,29 @@ mod tests {
                 wh.final_cost < w.final_cost,
                 "final cost should be lower with history boost"
             );
+            // Single-segment きょう→京: whole-path is the only contributor.
+            // Per-segment unigram is also recorded (same reading+surface), so
+            // unigram_sum is also nonzero, but bigram_sum should be 0.
+            assert!(
+                wh.history_breakdown.whole_path_boost > 0,
+                "whole-path boost should fire for explicit full-input selection"
+            );
+            assert_eq!(
+                wh.history_breakdown.bigram_sum, 0,
+                "single-segment path has no bigram pairs"
+            );
+        }
+    }
+
+    #[test]
+    fn test_explain_history_breakdown_empty_without_history() {
+        let dict = test_dict();
+        let result = explain(&dict, None, None, "きょう", 5);
+        for path in &result.paths {
+            assert_eq!(path.history_boost, 0);
+            assert_eq!(path.history_breakdown.unigram_sum, 0);
+            assert_eq!(path.history_breakdown.bigram_sum, 0);
+            assert_eq!(path.history_breakdown.whole_path_boost, 0);
         }
     }
 
diff --git a/engine/crates/lex-core/src/converter/reranker.rs b/engine/crates/lex-core/src/converter/reranker.rs
index 3304b7d..fa1d830 100644
--- a/engine/crates/lex-core/src/converter/reranker.rs
+++ b/engine/crates/lex-core/src/converter/reranker.rs
@@ -98,12 +98,69 @@ pub fn rerank(
     debug!(paths_out = paths.len());
 }
 
+/// Breakdown of the history boost contributions for a single path.
+///
+/// Per-segment unigram/bigram sums are raw (pre-normalization). The actually
+/// applied boost is `(unigram_sum + bigram_sum) / max(seg_count, 1) + whole_path_boost`,
+/// available via [`Self::applied`].
+#[derive(Debug, Default, Clone, Copy, serde::Serialize)]
+pub struct HistoryBoostBreakdown {
+    /// Sum of per-segment unigram boosts (before /seg_count normalization).
+    pub unigram_sum: i64,
+    /// Sum of per-pair bigram boosts (before /seg_count normalization).
+    pub bigram_sum: i64,
+    /// Whole-path unigram boost × 5 (not normalized).
+    pub whole_path_boost: i64,
+}
+
+impl HistoryBoostBreakdown {
+    /// Boost actually subtracted from viterbi_cost, given the path's segment count.
+    pub fn applied(&self, seg_count: usize) -> i64 {
+        let n = (seg_count.max(1)) as i64;
+        (self.unigram_sum + self.bigram_sum) / n + self.whole_path_boost
+    }
+}
+
+/// Compute the history boost breakdown for a single path without mutating it.
+///
+/// Mirrors the contribution logic used by [`history_rerank`] so callers
+/// (e.g. `explain`) can inspect each component.
+pub fn compute_history_boost(
+    path: &ScoredPath,
+    history: &UserHistory,
+    now: u64,
+) -> HistoryBoostBreakdown {
+    let mut unigram_sum: i64 = 0;
+    for seg in &path.segments {
+        unigram_sum += history.unigram_boost(&seg.reading, &seg.surface, now);
+    }
+    let mut bigram_sum: i64 = 0;
+    for pair in path.segments.windows(2) {
+        bigram_sum +=
+            history.bigram_boost(&pair[0].surface, &pair[1].reading, &pair[1].surface, now);
+    }
+    let whole_path_boost =
+        history.unigram_boost(&path.full_reading(), &path.surface_key(), now) * 5;
+    HistoryBoostBreakdown {
+        unigram_sum,
+        bigram_sum,
+        whole_path_boost,
+    }
+}
+
 /// Apply user-history boosts to N-best paths, then re-sort.
 ///
 /// Unigram and bigram boosts are subtracted from each path's cost so that
 /// learned candidates float to the top. Because this operates on complete
 /// paths (not individual lattice nodes), it cannot cause the fragmentation
 /// problems that in-Viterbi boosting could.
+///
+/// Per-segment boosts are normalized by segment count: fragmented paths
+/// (e.g. き→機 + が + し + ます) would otherwise accumulate boosts from common
+/// particles across ALL prior conversions, gaining a structural advantage over
+/// compound paths. The whole-path boost is the strongest signal and is not
+/// normalized — it only fires when the full reading→surface was explicitly
+/// selected.
 pub fn history_rerank(paths: &mut [ScoredPath], history: &UserHistory) {
     let _span = debug_span!("history_rerank", paths_count = paths.len()).entered();
     if paths.is_empty() {
@@ -111,28 +168,8 @@ pub fn history_rerank(paths: &mut [ScoredPath], history: &UserHistory) {
     }
     let now = crate::user_history::now_epoch();
     for path in paths.iter_mut() {
-        // Per-segment boosts normalized by segment count. Fragmented paths
-        // (e.g. き→機 + が + し + ます) accumulate boosts from common particles
-        // (が, し, は, etc.) across ALL prior conversions, giving them a structural
-        // advantage over compound paths. Dividing by segment count neutralizes this.
-        let seg_count = path.segments.len().max(1) as i64;
-        let mut seg_boost: i64 = 0;
-        for seg in &path.segments {
-            seg_boost += history.unigram_boost(&seg.reading, &seg.surface, now);
-        }
-        for pair in path.segments.windows(2) {
-            seg_boost +=
-                history.bigram_boost(&pair[0].surface, &pair[1].reading, &pair[1].surface, now);
-        }
-        let mut boost = seg_boost / seg_count;
-
-        // Whole-path boost (not normalized): reward paths whose full reading→surface
-        // has been explicitly selected. This is the strongest learning signal and is
-        // not subject to cross-reading contamination.
-        let full_reading = path.full_reading();
-        let full_surface = path.surface_key();
-        boost += history.unigram_boost(&full_reading, &full_surface, now) * 5;
-        path.viterbi_cost -= boost;
+        let breakdown = compute_history_boost(path, history, now);
+        path.viterbi_cost -= breakdown.applied(path.segments.len());
     }
     paths.sort_by_key(|p| p.viterbi_cost);
     debug!(best_cost = paths.first().map(|p| p.viterbi_cost));

From ba6b14a13c859ac5380bca18e15225e020daeada Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Tue, 19 May 2026 17:22:48 +0900
Subject: [PATCH 2/4] =?UTF-8?q?fix(explain):=20PR247=20Copilot=20R1=20?=
 =?UTF-8?q?=E2=80=94=202=20findings=20resolved?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Re-export `HistoryBoostBreakdown` via `explain::HistoryBoostBreakdown`
  so downstream crates (lex-cli) can name the type behind the public
  `ExplainPath::history_breakdown` field. The definition stays in the
  crate-private `reranker` module.

- Capture history breakdown at the post-rerank / pre-history-rerank
  boundary via the observer, rather than recomputing on the final path.
  The recompute could disagree with the actual subtracted boost in two
  cases: paths whose segments were merged by `group_segments`, and
  rewriter-added candidates (numeric / katakana / kanji variants) that
  were synthesised after `history_rerank` ran. The snapshot is keyed by
  `surface_key()` (preserved through grouping) and absent for rewriter-
  added paths, which fall back to a zero breakdown.

- Add regression test `test_explain_unrelated_paths_have_zero_history_boost`
  asserting non-matching surfaces always report zero history boost.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../crates/lex-core/src/converter/explain.rs  | 146 ++++++++++++++----
 1 file changed, 112 insertions(+), 34 deletions(-)

diff --git a/engine/crates/lex-core/src/converter/explain.rs b/engine/crates/lex-core/src/converter/explain.rs
index 04c5acd..d499299 100644
--- a/engine/crates/lex-core/src/converter/explain.rs
+++ b/engine/crates/lex-core/src/converter/explain.rs
@@ -12,9 +12,14 @@ use super::cost::{conn_cost, script_cost, DefaultCostFunction};
 use super::features::{is_single_char_kanji_penalised, is_te_form_kanji_penalised};
 use super::lattice::{build_lattice, Lattice};
 use super::postprocess::{postprocess_observed, PostprocessContext, PostprocessObserver};
-use super::reranker::{compute_history_boost, HistoryBoostBreakdown};
+use super::reranker::compute_history_boost;
 use super::viterbi::{viterbi_nbest, ScoredPath};
 
+// Re-export so downstream crates (e.g. lex-cli) can name the type behind
+// `ExplainPath::history_breakdown` — the definition lives in the crate-private
+// `reranker` module.
+pub use super::reranker::HistoryBoostBreakdown;
+
 /// Full diagnostic result for a single reading.
 #[derive(Debug, Serialize)]
 pub struct ExplainResult {
@@ -94,38 +99,78 @@ pub struct ExplainSegment {
 // Observer that captures cost snapshots for explain diagnostics
 // ---------------------------------------------------------------------------
 
-/// Build a key string from a ScoredPath for cost tracking across pipeline stages.
-/// Uses ASCII control characters (US=\x1f, RS=\x1e) as delimiters to avoid
-/// collisions with any reading/surface content.
-fn path_key(path: &ScoredPath) -> String {
-    path.segments
-        .iter()
-        .map(|s| format!("{}\x1f{}", s.reading, s.surface))
-        .collect::<Vec<_>>()
-        .join("\x1e")
+/// Snapshot of one path's state at the post-rerank / pre-history-rerank stage.
+///
+/// Captured here (and not after history_rerank) so the recorded breakdown is
+/// computed on the same segments that history_rerank actually scored — the
+/// pipeline later runs rewriters that add new candidates and `group_segments`
+/// that merges adjacent segments, both of which would invalidate a recompute
+/// against the final path.
+#[derive(Default, Clone, Copy)]
+struct PreHistorySnapshot {
+    /// Cost after resegment + rerank, before any history adjustment.
+    cost: i64,
+    /// Per-component history boost (raw sums + whole-path × 5).
+    breakdown: HistoryBoostBreakdown,
+    /// Boost actually subtracted from `cost` by `history_rerank`.
+    applied_boost: i64,
 }
 
-#[derive(Default)]
-struct ExplainObserver {
+/// Diagnostic observer.
+///
+/// Keys are `ScoredPath::surface_key()` — i.e. the concatenated surface — so
+/// that lookups survive `group_segments` (which merges adjacent segments but
+/// preserves the overall surface). Paths that only appear after history_rerank
+/// (rewriter-added candidates: numeric, katakana, kanji variants) are absent
+/// from these maps and fall back to zero in the caller.
+struct ExplainObserver<'a> {
+    history: Option<&'a UserHistory>,
+    now: u64,
     /// viterbi_cost before resegment/rerank — the raw Viterbi output.
     original_costs: HashMap<String, i64>,
-    /// viterbi_cost after resegment + rerank (before history_rerank).
-    post_rerank_costs: HashMap<String, i64>,
+    /// State at the post-rerank / pre-history-rerank boundary.
+    pre_history: HashMap<String, PreHistorySnapshot>,
+}
+
+impl<'a> ExplainObserver<'a> {
+    fn new(history: Option<&'a UserHistory>, now: u64) -> Self {
+        Self {
+            history,
+            now,
+            original_costs: HashMap::new(),
+            pre_history: HashMap::new(),
+        }
+    }
 }
 
-impl PostprocessObserver for ExplainObserver {
+impl PostprocessObserver for ExplainObserver<'_> {
     fn after_viterbi(&mut self, paths: &[ScoredPath]) {
         self.original_costs = paths
             .iter()
-            .map(|p| (path_key(p), p.viterbi_cost))
+            .map(|p| (p.surface_key(), p.viterbi_cost))
             .collect();
     }
 
     fn after_rerank(&mut self, paths: &[ScoredPath]) {
-        self.post_rerank_costs = paths
-            .iter()
-            .map(|p| (path_key(p), p.viterbi_cost))
-            .collect();
+        self.pre_history.clear();
+        for p in paths {
+            let (breakdown, applied) = match self.history {
+                Some(h) => {
+                    let b = compute_history_boost(p, h, self.now);
+                    let a = b.applied(p.segments.len());
+                    (b, a)
+                }
+                None => (HistoryBoostBreakdown::default(), 0),
+            };
+            self.pre_history.insert(
+                p.surface_key(),
+                PreHistorySnapshot {
+                    cost: p.viterbi_cost,
+                    breakdown,
+                    applied_boost: applied,
+                },
+            );
+        }
     }
 }
 
@@ -223,7 +268,8 @@ pub fn explain(
     let oversample = (n * 3).max(50);
     let mut raw_paths = viterbi_nbest(&lattice, &cost_fn, oversample);
 
-    let mut observer = ExplainObserver::default();
+    let now = crate::user_history::now_epoch();
+    let mut observer = ExplainObserver::new(history, now);
     let ctx = PostprocessContext {
         lattice: &lattice,
         conn,
@@ -234,32 +280,34 @@ pub fn explain(
     };
     let final_paths = postprocess_observed(&mut raw_paths, &ctx, &mut observer);
 
-    let now = crate::user_history::now_epoch();
     let paths: Vec<ExplainPath> = final_paths
         .iter()
         .map(|scored| {
-            let key = path_key(scored);
+            let key = scored.surface_key();
+            // Look up snapshots by surface_key — preserved through group_segments.
+            // Rewriter-added candidates (numeric / katakana / kanji variants) are
+            // synthesised after history_rerank and have no snapshot, so they fall
+            // back to zero history boost and use the final cost for `viterbi_cost`.
             let original = observer
                 .original_costs
                 .get(&key)
                 .copied()
                 .unwrap_or(scored.viterbi_cost);
-            let post_rerank = observer
-                .post_rerank_costs
+            let snapshot = observer
+                .pre_history
                 .get(&key)
                 .copied()
-                .unwrap_or(original);
-            let rerank_delta = post_rerank - original;
-            let history_breakdown = history
-                .map(|h| compute_history_boost(scored, h, now))
-                .unwrap_or_default();
-            let history_boost = history_breakdown.applied(scored.segments.len());
+                .unwrap_or(PreHistorySnapshot {
+                    cost: original,
+                    breakdown: HistoryBoostBreakdown::default(),
+                    applied_boost: 0,
+                });
             ExplainPath {
                 segments: explain_segments(scored, conn, dict),
                 viterbi_cost: original,
-                rerank_delta,
-                history_breakdown,
-                history_boost,
+                rerank_delta: snapshot.cost - original,
+                history_breakdown: snapshot.breakdown,
+                history_boost: snapshot.applied_boost,
                 final_cost: scored.viterbi_cost,
             }
         })
@@ -463,6 +511,36 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_explain_unrelated_paths_have_zero_history_boost() {
+        // Paths whose surface does NOT match the recorded history must show a
+        // zero breakdown regardless of how they entered the final candidate set:
+        //   - Real Viterbi paths that simply don't match (lookup hit, zero score).
+        //   - Rewriter-added paths (katakana / kanji variants) that were
+        //     synthesised after history_rerank, so the observer never saw them.
+        //
+        // Regression for the PR #247 R1 review: previously the breakdown was
+        // recomputed against the final (post-grouping / post-rewriter) path,
+        // which could produce non-zero values for paths that never received
+        // an actual boost in `history_rerank`.
+        let dict = test_dict();
+        let mut h = UserHistory::new();
+        h.record(&[("きょう".into(), "京".into())]);
+
+        let result = explain(&dict, None, Some(&h), "きょう", 10);
+        for path in result.paths.iter().filter(|p| p.surface() != "京") {
+            assert_eq!(
+                path.history_boost,
+                0,
+                "non-matching surface {:?} must not receive a history boost",
+                path.surface(),
+            );
+            assert_eq!(path.history_breakdown.unigram_sum, 0);
+            assert_eq!(path.history_breakdown.bigram_sum, 0);
+            assert_eq!(path.history_breakdown.whole_path_boost, 0);
+        }
+    }
+
     #[test]
     fn test_explain_paths_sorted_by_final_cost() {
         let dict = test_dict();

From 9c6862ddc31907c97fa637f10f3a059e1a38016b Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Tue, 19 May 2026 17:32:00 +0900
Subject: [PATCH 3/4] =?UTF-8?q?fix(explain):=20PR247=20Copilot=20R2=20?=
 =?UTF-8?q?=E2=80=94=201=20finding=20resolved?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Track the segment count `history_rerank` used as its normalization
denominator alongside the breakdown snapshot, and surface it on
`ExplainPath` as `history_segment_count`. The `format_text` "/N segs"
display now uses this value so the displayed denominator matches the
one applied during normalization even when `group_segments` later
merges adjacent segments.

Without the change the display could disagree with `history_boost` —
e.g. a 4-segment path showing "/2 segs" after grouping while the actual
boost was computed against 4 — which is exactly the kind of incoherence
the breakdown was added to dispel.

Regression test asserts `history_segment_count == segments.len()` and
`history_boost == applied(history_segment_count)` for the no-grouping
path.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../crates/lex-core/src/converter/explain.rs  | 44 +++++++++++++++++--
 1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/engine/crates/lex-core/src/converter/explain.rs b/engine/crates/lex-core/src/converter/explain.rs
index d499299..61e5a3e 100644
--- a/engine/crates/lex-core/src/converter/explain.rs
+++ b/engine/crates/lex-core/src/converter/explain.rs
@@ -67,6 +67,10 @@ pub struct ExplainPath {
     pub history_breakdown: HistoryBoostBreakdown,
     /// History boost actually subtracted from the cost (post-normalization).
     pub history_boost: i64,
+    /// Segment count `history_rerank` used as the normalization denominator.
+    /// May differ from `segments.len()` when `group_segments` later merged
+    /// adjacent segments — keep this value when reporting `/N segs`.
+    pub history_segment_count: usize,
     /// Final cost after all adjustments.
     pub final_cost: i64,
 }
@@ -114,6 +118,11 @@ struct PreHistorySnapshot {
     breakdown: HistoryBoostBreakdown,
     /// Boost actually subtracted from `cost` by `history_rerank`.
     applied_boost: i64,
+    /// Segment count at the moment `history_rerank` saw the path. May differ
+    /// from the final `segments.len()` after `group_segments` merges adjacent
+    /// segments — kept here so the displayed `/N segs` matches the denominator
+    /// actually used during normalization.
+    segment_count: usize,
 }
 
 /// Diagnostic observer.
@@ -168,6 +177,7 @@ impl PostprocessObserver for ExplainObserver<'_> {
                     cost: p.viterbi_cost,
                     breakdown,
                     applied_boost: applied,
+                    segment_count: p.segments.len(),
                 },
             );
         }
@@ -301,6 +311,7 @@ pub fn explain(
                     cost: original,
                     breakdown: HistoryBoostBreakdown::default(),
                     applied_boost: 0,
+                    segment_count: scored.segments.len(),
                 });
             ExplainPath {
                 segments: explain_segments(scored, conn, dict),
@@ -308,6 +319,7 @@ pub fn explain(
                 rerank_delta: snapshot.cost - original,
                 history_breakdown: snapshot.breakdown,
                 history_boost: snapshot.applied_boost,
+                history_segment_count: snapshot.segment_count,
                 final_cost: scored.viterbi_cost,
             }
         })
@@ -424,10 +436,7 @@ pub fn format_text(result: &ExplainResult) -> String {
         if hb.unigram_sum != 0 || hb.bigram_sum != 0 || hb.whole_path_boost != 0 {
             out.push_str(&format!(
                 "      history: uni_sum={:<+7} bi_sum={:<+7} whole×5={:<+7} (/{} segs)\n",
-                -hb.unigram_sum,
-                -hb.bigram_sum,
-                -hb.whole_path_boost,
-                path.segments.len(),
+                -hb.unigram_sum, -hb.bigram_sum, -hb.whole_path_boost, path.history_segment_count,
             ));
         }
     }
@@ -511,6 +520,33 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_explain_history_segment_count_consistent_with_boost() {
+        // The reported `history_segment_count` is the denominator that
+        // `history_rerank` used at normalization time. Without `group_segments`
+        // (no conn passed here) the pre-history and final segmentation match,
+        // so the field must equal `segments.len()` AND
+        // `history_breakdown.applied(history_segment_count)` must reproduce
+        // the displayed `history_boost`. Regression for PR #247 R2.
+        let dict = test_dict();
+        let mut h = UserHistory::new();
+        h.record(&[("きょう".into(), "京".into())]);
+
+        let result = explain(&dict, None, Some(&h), "きょう", 5);
+        for path in &result.paths {
+            assert_eq!(
+                path.history_segment_count,
+                path.segments.len(),
+                "without grouping, history_segment_count should equal segments.len()",
+            );
+            assert_eq!(
+                path.history_boost,
+                path.history_breakdown.applied(path.history_segment_count),
+                "history_boost must equal applied(history_segment_count)",
+            );
+        }
+    }
+
     #[test]
     fn test_explain_unrelated_paths_have_zero_history_boost() {
         // Paths whose surface does NOT match the recorded history must show a

From 62602bfa6ace02c3527fd67a6c540becdc3ffcc5 Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Tue, 19 May 2026 17:43:44 +0900
Subject: [PATCH 4/4] =?UTF-8?q?fix(explain):=20PR247=20Copilot=20R3=20?=
 =?UTF-8?q?=E2=80=94=202=20findings=20resolved?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thread a single `now` through postprocess into history_rerank so the
observer's precomputed breakdown is guaranteed to align with the boost
actually subtracted in the pipeline, eliminating sub-second drift
around the second boundary.

- reranker: rename `history_rerank` → `history_rerank_at(paths, history,
  now)`; the convenience wrapper that captured `now` internally is gone
  because the only callers were postprocess (which now pins the value)
  and tests (which switch to passing `now_epoch()` directly).
- postprocess: capture `now` in `PostprocessContext.now` and pass it to
  `history_rerank_at`. Production wrapper computes the value once.
- explain: the observer and the postprocess context share the same `now`
  value, so `compute_history_boost` and the pipeline see the identical
  decay input.
- Contract test asserts `history_rerank_at` subtracts exactly what
  `compute_history_boost(..., now).applied(seg_count)` reports for the
  same `now`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../crates/lex-core/src/converter/explain.rs  |  1 +
 .../lex-core/src/converter/postprocess.rs     |  7 ++-
 .../crates/lex-core/src/converter/reranker.rs | 12 +++--
 .../lex-core/src/converter/tests/reranker.rs  | 46 ++++++++++++++++---
 4 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/engine/crates/lex-core/src/converter/explain.rs b/engine/crates/lex-core/src/converter/explain.rs
index 61e5a3e..2d520e9 100644
--- a/engine/crates/lex-core/src/converter/explain.rs
+++ b/engine/crates/lex-core/src/converter/explain.rs
@@ -287,6 +287,7 @@ pub fn explain(
         history,
         kana,
         n,
+        now,
     };
     let final_paths = postprocess_observed(&mut raw_paths, &ctx, &mut observer);
 
diff --git a/engine/crates/lex-core/src/converter/postprocess.rs b/engine/crates/lex-core/src/converter/postprocess.rs
index 5f45cf0..8401f78 100644
--- a/engine/crates/lex-core/src/converter/postprocess.rs
+++ b/engine/crates/lex-core/src/converter/postprocess.rs
@@ -42,6 +42,10 @@ pub(crate) struct PostprocessContext<'a> {
     pub history: Option<&'a UserHistory>,
     pub kana: &'a str,
     pub n: usize,
+    /// Timestamp passed to `history_rerank_at`. Pinning it here lets diagnostic
+    /// observers compute breakdowns against the exact value the pipeline will
+    /// use, avoiding sub-second drift across the second boundary.
+    pub now: u64,
 }
 
 // ---------------------------------------------------------------------------
@@ -65,6 +69,7 @@ pub(super) fn postprocess(
         history,
         kana,
         n,
+        now: crate::user_history::now_epoch(),
     };
     postprocess_observed(paths, &ctx, &mut NoopObserver)
         .into_iter()
@@ -112,7 +117,7 @@ pub(crate) fn postprocess_observed<O: PostprocessObserver>(
     };
 
     if let Some(h) = ctx.history {
-        reranker::history_rerank(paths, h);
+        reranker::history_rerank_at(paths, h, ctx.now);
     }
     let mut top: Vec<ScoredPath> = paths.drain(..ctx.n.min(paths.len())).collect();
 
diff --git a/engine/crates/lex-core/src/converter/reranker.rs b/engine/crates/lex-core/src/converter/reranker.rs
index fa1d830..4c0848a 100644
--- a/engine/crates/lex-core/src/converter/reranker.rs
+++ b/engine/crates/lex-core/src/converter/reranker.rs
@@ -123,7 +123,7 @@ impl HistoryBoostBreakdown {
 
 /// Compute the history boost breakdown for a single path without mutating it.
 ///
-/// Mirrors the contribution logic used by [`history_rerank`] so callers
+/// Mirrors the contribution logic used by [`history_rerank_at`] so callers
 /// (e.g. `explain`) can inspect each component.
 pub fn compute_history_boost(
     path: &ScoredPath,
@@ -148,7 +148,12 @@ pub fn compute_history_boost(
     }
 }
 
-/// Apply user-history boosts to N-best paths, then re-sort.
+/// Apply user-history boosts to N-best paths using the given `now`, then re-sort.
+///
+/// Callers that also want to inspect the breakdown (e.g. `explain`) should pass
+/// the same `now` they used with [`compute_history_boost`]; otherwise the
+/// stored breakdown can drift from the boost actually subtracted here when
+/// execution crosses a second boundary.
 ///
 /// Unigram and bigram boosts are subtracted from each path's cost so that
 /// learned candidates float to the top. Because this operates on complete
@@ -161,12 +166,11 @@ pub fn compute_history_boost(
 /// compound paths. The whole-path boost is the strongest signal and is not
 /// normalized — it only fires when the full reading→surface was explicitly
 /// selected.
-pub fn history_rerank(paths: &mut [ScoredPath], history: &UserHistory) {
+pub fn history_rerank_at(paths: &mut [ScoredPath], history: &UserHistory, now: u64) {
     let _span = debug_span!("history_rerank", paths_count = paths.len()).entered();
     if paths.is_empty() {
         return;
     }
-    let now = crate::user_history::now_epoch();
     for path in paths.iter_mut() {
         let breakdown = compute_history_boost(path, history, now);
         path.viterbi_cost -= breakdown.applied(path.segments.len());
diff --git a/engine/crates/lex-core/src/converter/tests/reranker.rs b/engine/crates/lex-core/src/converter/tests/reranker.rs
index e43508f..a24b5a0 100644
--- a/engine/crates/lex-core/src/converter/tests/reranker.rs
+++ b/engine/crates/lex-core/src/converter/tests/reranker.rs
@@ -1,7 +1,7 @@
-use crate::converter::reranker::{history_rerank, rerank};
+use crate::converter::reranker::{history_rerank_at, rerank};
 use crate::converter::viterbi::{RichSegment, ScoredPath};
 use crate::dict::connection::ConnectionMatrix;
-use crate::user_history::UserHistory;
+use crate::user_history::{now_epoch, UserHistory};
 
 #[test]
 fn test_rerank_penalizes_fragmented_path() {
@@ -258,7 +258,7 @@ fn test_history_rerank_unigram_boost_reorders() {
         },
     ];
 
-    history_rerank(&mut paths, &h);
+    history_rerank_at(&mut paths, &h, now_epoch());
 
     // "京" should be boosted to first place
     assert_eq!(paths[0].segments[0].surface, "京");
@@ -312,7 +312,7 @@ fn test_history_rerank_bigram_boost() {
         },
     ];
 
-    history_rerank(&mut paths, &h);
+    history_rerank_at(&mut paths, &h, now_epoch());
 
     // "今日は" path should be boosted (both unigram + bigram) to first
     assert_eq!(paths[0].segments[0].surface, "今日");
@@ -345,7 +345,7 @@ fn test_history_rerank_empty_history_preserves_order() {
         },
     ];
 
-    history_rerank(&mut paths, &h);
+    history_rerank_at(&mut paths, &h, now_epoch());
 
     assert_eq!(paths[0].segments[0].surface, "亜");
     assert_eq!(paths[0].viterbi_cost, 1000);
@@ -357,10 +357,44 @@ fn test_history_rerank_empty_history_preserves_order() {
 fn test_history_rerank_empty_paths() {
     let h = UserHistory::new();
     let mut paths: Vec<ScoredPath> = Vec::new();
-    history_rerank(&mut paths, &h);
+    history_rerank_at(&mut paths, &h, now_epoch());
     assert!(paths.is_empty());
 }
 
+#[test]
+fn test_history_rerank_at_matches_compute_history_boost() {
+    // Contract: `history_rerank_at` must subtract exactly the value reported
+    // by `compute_history_boost(...).applied(seg_count)` when given the same
+    // `now`. The `explain` observer relies on this so its precomputed
+    // breakdown matches what the pipeline actually applied. Regression for
+    // PR #247 R3.
+    use crate::converter::reranker::compute_history_boost;
+
+    let mut h = UserHistory::new();
+    h.record(&[("きょう".into(), "京".into())]);
+    let now = 1_700_000_000;
+
+    let path_before = ScoredPath {
+        segments: vec![RichSegment {
+            reading: "きょう".into(),
+            surface: "京".into(),
+            left_id: 0,
+            right_id: 0,
+            word_cost: 0,
+        }],
+        viterbi_cost: 10_000,
+    };
+    let expected_applied =
+        compute_history_boost(&path_before, &h, now).applied(path_before.segments.len());
+
+    let initial_cost = path_before.viterbi_cost;
+    let mut paths = vec![path_before];
+    history_rerank_at(&mut paths, &h, now);
+    let actual_applied = initial_cost - paths[0].viterbi_cost;
+
+    assert_eq!(actual_applied, expected_applied);
+}
+
 /// Build a connection matrix where all transitions cost the given value.
 fn uniform_conn(cost: i16) -> ConnectionMatrix {
     let num_ids = 4;