From 3e970e8f2fcfad3e0fe21833549ea6a46f5e8866 Mon Sep 17 00:00:00 2001 From: Yuxuan Hu Date: Tue, 4 Nov 2025 16:17:06 -0800 Subject: [PATCH 1/2] Add metrics for sampled policy age in replay buffer This adds three new metrics to track the policy age of episodes that are actually sampled from the replay buffer: - buffer/sample/avg_sampled_policy_age: Average age of sampled episodes - buffer/sample/max_sampled_policy_age: Maximum age of sampled episodes - buffer/sample/min_sampled_policy_age: Minimum age of sampled episodes This is distinct from the existing buffer/evict/avg_policy_age metric which tracks the age of all episodes remaining in the buffer after eviction. The new metrics provide visibility into whether training is using fresh data (low ages) or stale data (high ages) at sampling time. Test Plan: - Ran existing unit tests: python -m pytest tests/unit_tests/test_replay_buffer.py -v - All 8 tests passed --- src/forge/actors/replay_buffer.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/forge/actors/replay_buffer.py b/src/forge/actors/replay_buffer.py index 77b4f6f8a..22aabe292 100644 --- a/src/forge/actors/replay_buffer.py +++ b/src/forge/actors/replay_buffer.py @@ -120,6 +120,27 @@ async def sample( entry.sample_count += 1 sampled_episodes.append(entry.data) + # Calculate and record policy age metrics for sampled episodes + sampled_policy_ages = [ + curr_policy_version - ep.policy_version for ep in sampled_episodes + ] + if sampled_policy_ages: + record_metric( + "buffer/sample/avg_sampled_policy_age", + sum(sampled_policy_ages) / len(sampled_policy_ages), + Reduce.MEAN, + ) + record_metric( + "buffer/sample/max_sampled_policy_age", + max(sampled_policy_ages), + Reduce.MAX, + ) + record_metric( + "buffer/sample/min_sampled_policy_age", + min(sampled_policy_ages), + Reduce.MIN, + ) + # Reshape into (dp_size, bsz, ...) reshaped_episodes = [ sampled_episodes[dp_idx * self.batch_size : (dp_idx + 1) * self.batch_size] From 0e7e56a5f4a734fce93432f8a3c0d5a0b8ec5f6b Mon Sep 17 00:00:00 2001 From: Yuxuan Hu Date: Tue, 11 Nov 2025 16:30:10 -0800 Subject: [PATCH 2/2] Remove policy age metrics from _evict() method Removed buffer/evict/avg_policy_age and buffer/evict/max_policy_age metrics from the _evict() method as they are not relevant. Kept the buffer/evict/sum_episodes_evicted metric for tracking eviction counts. --- src/forge/actors/replay_buffer.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/forge/actors/replay_buffer.py b/src/forge/actors/replay_buffer.py index 22aabe292..37a1558d5 100644 --- a/src/forge/actors/replay_buffer.py +++ b/src/forge/actors/replay_buffer.py @@ -170,22 +170,6 @@ def _evict(self, curr_policy_version): ) self.buffer = deque(self._collect(indices)) - # Record evict metrics - policy_age = [ - curr_policy_version - ep.data.policy_version for ep in self.buffer - ] - if policy_age: - record_metric( - "buffer/evict/avg_policy_age", - sum(policy_age) / len(policy_age), - Reduce.MEAN, - ) - record_metric( - "buffer/evict/max_policy_age", - max(policy_age), - Reduce.MAX, - ) - evicted_count = buffer_len_before_evict - len(self.buffer) record_metric("buffer/evict/sum_episodes_evicted", evicted_count, Reduce.SUM)