From 8e8a8cb2b74cc9ee337d64dc921aea559e80967a Mon Sep 17 00:00:00 2001
From: Allen Wang <9057208+allenwang28@users.noreply.github.com>
Date: Mon, 13 Oct 2025 07:46:25 -0700
Subject: [PATCH] fix 8b config

---
 apps/grpo/qwen3_8b.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/grpo/qwen3_8b.yaml b/apps/grpo/qwen3_8b.yaml
index fedf2f36a..1b817a1fb 100644
--- a/apps/grpo/qwen3_8b.yaml
+++ b/apps/grpo/qwen3_8b.yaml
@@ -53,7 +53,7 @@ trainer:
   lr_scheduler:
     warmup_steps: 1
   training:
-    local_local_batch_size: ${local_batch_size}
+    local_batch_size: ${local_batch_size}
     seq_len: 2048
     max_norm: 1.0
     steps: 1000000
@@ -82,7 +82,7 @@ trainer:
 
 # Replay buffer configuration
 replay_buffer:
-  local_batch_size: ${local_batch_size}
+  batch_size: ${local_batch_size}
   max_policy_age: ${off_by_n}
   # This should match the dp_size of TorchTitan
   # Here it's set explicitly to 2, because we've set