From f8219b903dd9defe5569eaaf971a31b99ba824c2 Mon Sep 17 00:00:00 2001 From: "Jiyue (Jennifer) Wang" Date: Thu, 20 Nov 2025 10:40:45 -0800 Subject: [PATCH] Bug fix for dropping episodes in the GRPO (#601) Summary: ## Bug Description: https://github.com/meta-pytorch/torchforge/pull/580 had incorrect indentation cuasing the input_ids, episodes varibles to be deleted inside the episodes building loop, causing program to hang. Next diff shall make background thread crashes to be surfaced to the main thread so that we know what thread crashed for what reason. Reviewed By: daniellepintz Differential Revision: D87554570 --- apps/grpo/main.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/apps/grpo/main.py b/apps/grpo/main.py index ea37c5351..1c1c2bd4a 100644 --- a/apps/grpo/main.py +++ b/apps/grpo/main.py @@ -424,23 +424,21 @@ async def continuous_rollouts(): input_ids[i, :max_req_tokens] = episode.request_tensor input_ids[i, max_req_tokens:] = episode.response_tensor - # drop episodes if - # 1> reward std-dev is very small (including all 0s and all 1s) - # 2> response is potentially truncated (response_len >= max_res_tokens) - rewards = [e.reward for e in episodes] - rewards_std = torch.std(torch.tensor(rewards)) - max_response_len = max( - e.completion.token_ids.shape[0] for e in episodes - ) - drop = rewards_std < 1e-3 or max_response_len >= max_res_tokens - record_metric( - "main/continuous_rollouts/dropped_episodes", - 1 if drop else 0, - Reduce.SUM, - ) - if drop: - del input_ids, episodes - continue + # drop episodes if + # 1> reward std-dev is very small (including all 0s and all 1s) + # 2> response is potentially truncated (response_len >= max_res_tokens) + rewards = [e.reward for e in episodes] + rewards_std = torch.std(torch.tensor(rewards)) + max_response_len = max(e.completion.token_ids.shape[0] for e in episodes) + drop = rewards_std < 1e-3 or max_response_len >= max_res_tokens + record_metric( + "main/continuous_rollouts/dropped_episodes", + 1 if drop else 0, + Reduce.SUM, + ) + if drop: + del input_ids, episodes + continue t.step("reward_evaluation")