From 8e8a8cb2b74cc9ee337d64dc921aea559e80967a Mon Sep 17 00:00:00 2001 From: Allen Wang <9057208+allenwang28@users.noreply.github.com> Date: Mon, 13 Oct 2025 07:46:25 -0700 Subject: [PATCH] fix 8b config --- apps/grpo/qwen3_8b.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/grpo/qwen3_8b.yaml b/apps/grpo/qwen3_8b.yaml index fedf2f36a..1b817a1fb 100644 --- a/apps/grpo/qwen3_8b.yaml +++ b/apps/grpo/qwen3_8b.yaml @@ -53,7 +53,7 @@ trainer: lr_scheduler: warmup_steps: 1 training: - local_local_batch_size: ${local_batch_size} + local_batch_size: ${local_batch_size} seq_len: 2048 max_norm: 1.0 steps: 1000000 @@ -82,7 +82,7 @@ trainer: # Replay buffer configuration replay_buffer: - local_batch_size: ${local_batch_size} + batch_size: ${local_batch_size} max_policy_age: ${off_by_n} # This should match the dp_size of TorchTitan # Here it's set explicitly to 2, because we've set