From 37b7a15f9b4de59492e8136378aebf691389d2cd Mon Sep 17 00:00:00 2001 From: Allen Wang <9057208+allenwang28@users.noreply.github.com> Date: Mon, 13 Oct 2025 12:11:44 -0700 Subject: [PATCH 1/3] update config --- apps/grpo/qwen3_32b.yaml | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/apps/grpo/qwen3_32b.yaml b/apps/grpo/qwen3_32b.yaml index 593a2e1fb..5729517da 100644 --- a/apps/grpo/qwen3_32b.yaml +++ b/apps/grpo/qwen3_32b.yaml @@ -3,10 +3,10 @@ # NOTE - This has not been tested for correctness yet! All testing so far has been only for infrastructure stability # Global configuration -group_size: 2 -local_batch_size: 8 # per-device batch size -max_req_tokens: 512 -max_res_tokens: 512 +group_size: 16 +local_batch_size: 32 # per-device batch size +max_req_tokens: 1024 +max_res_tokens: 1024 model: "Qwen/Qwen3-32B" off_by_n: 1 # Off by one by default @@ -14,7 +14,7 @@ provisioner: launcher: slurm # Main loop configuration -rollout_threads: 1 # Recommended to set equal to policy.num_replicas +rollout_threads: 32 # make this 4x the number of policy replicas seems to work well # Observability configuration metric_logging: @@ -35,12 +35,12 @@ dataset: # Policy configuration policy: - engine_args: # https://docs.vllm.ai/en/v0.10.0/api/vllm/engine/arg_utils.html#vllm.engine.arg_utils.EngineArgs + engine_config: model: ${model} tensor_parallel_size: 4 pipeline_parallel_size: 1 enforce_eager: false - sampling_params: # https://docs.vllm.ai/en/v0.10.0/api/vllm/sampling_params.html#vllm.sampling_params.SamplingParams + sampling_config: n: ${group_size} max_tokens: ${max_res_tokens} temperature: 1.0 @@ -69,8 +69,8 @@ trainer: enable: false parallelism: data_parallel_replicate_degree: 1 - data_parallel_shard_degree: -1 - tensor_parallel_degree: 1 + data_parallel_shard_degree: 1 + tensor_parallel_degree: 8 pipeline_parallel_degree: 1 context_parallel_degree: 1 expert_parallel_degree: 1 @@ -90,7 +90,7 @@ replay_buffer: batch_size: ${local_batch_size} max_policy_age: ${off_by_n} # dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree - dp_size: 8 + dp_size: 1 # Reference model configuration ref_model: @@ -118,37 +118,30 @@ ref_model: # All resource allocations services: policy: - procs: ${policy.engine_args.tensor_parallel_size} - num_replicas: 1 + procs: ${policy.engine_config.tensor_parallel_size} + num_replicas: 4 hosts: 1 with_gpus: true - mesh_name: policy ref_model: procs: ${ref_model.parallelism.tensor_parallel_degree} num_replicas: 1 with_gpus: true - mesh_name: ref_model reward_actor: procs: 1 num_replicas: 1 with_gpus: false - mesh_name: reward_actor actors: dataset: procs: 1 with_gpus: false - mesh_name: dataset trainer: procs: 8 hosts: 1 with_gpus: true - mesh_name: trainer replay_buffer: procs: 1 with_gpus: false - mesh_name: replay_buffer compute_advantages: procs: 1 with_gpus: false - mesh_name: compute_advantages From 56abc3cad34c39a5ae819d1c2a7b75a594cfffac Mon Sep 17 00:00:00 2001 From: Allen Wang <9057208+allenwang28@users.noreply.github.com> Date: Mon, 13 Oct 2025 12:14:41 -0700 Subject: [PATCH 2/3] fix --- apps/grpo/qwen3_32b.yaml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/apps/grpo/qwen3_32b.yaml b/apps/grpo/qwen3_32b.yaml index 5729517da..e49b0ca93 100644 --- a/apps/grpo/qwen3_32b.yaml +++ b/apps/grpo/qwen3_32b.yaml @@ -35,12 +35,12 @@ dataset: # Policy configuration policy: - engine_config: + engine_args: # https://docs.vllm.ai/en/v0.10.0/api/vllm/engine/arg_utils.html#vllm.engine.arg_utils.EngineArgs model: ${model} tensor_parallel_size: 4 pipeline_parallel_size: 1 enforce_eager: false - sampling_config: + sampling_params: # https://docs.vllm.ai/en/v0.10.0/api/vllm/sampling_params.html#vllm.sampling_params.SamplingParams n: ${group_size} max_tokens: ${max_res_tokens} temperature: 1.0 @@ -118,30 +118,37 @@ ref_model: # All resource allocations services: policy: - procs: ${policy.engine_config.tensor_parallel_size} + procs: ${policy.engine_args.tensor_parallel_size} num_replicas: 4 hosts: 1 with_gpus: true + mesh_name: policy ref_model: procs: ${ref_model.parallelism.tensor_parallel_degree} num_replicas: 1 with_gpus: true + mesh_name: ref_model reward_actor: procs: 1 num_replicas: 1 with_gpus: false + mesh_name: reward_actor actors: dataset: procs: 1 with_gpus: false + mesh_name: dataset trainer: procs: 8 hosts: 1 with_gpus: true + mesh_name: trainer replay_buffer: procs: 1 with_gpus: false + mesh_name: replay_buffer compute_advantages: procs: 1 with_gpus: false + mesh_name: compute_advantages From 7decb30d55a5e0a0c2837359618ccda323405ba5 Mon Sep 17 00:00:00 2001 From: Allen Wang <9057208+allenwang28@users.noreply.github.com> Date: Mon, 13 Oct 2025 12:47:32 -0700 Subject: [PATCH 3/3] fix spacing --- apps/grpo/qwen3_32b.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/grpo/qwen3_32b.yaml b/apps/grpo/qwen3_32b.yaml index e49b0ca93..e7a0cf509 100644 --- a/apps/grpo/qwen3_32b.yaml +++ b/apps/grpo/qwen3_32b.yaml @@ -35,12 +35,12 @@ dataset: # Policy configuration policy: - engine_args: # https://docs.vllm.ai/en/v0.10.0/api/vllm/engine/arg_utils.html#vllm.engine.arg_utils.EngineArgs + engine_args: # https://docs.vllm.ai/en/v0.10.0/api/vllm/engine/arg_utils.html#vllm.engine.arg_utils.EngineArgs model: ${model} tensor_parallel_size: 4 pipeline_parallel_size: 1 enforce_eager: false - sampling_params: # https://docs.vllm.ai/en/v0.10.0/api/vllm/sampling_params.html#vllm.sampling_params.SamplingParams + sampling_params: # https://docs.vllm.ai/en/v0.10.0/api/vllm/sampling_params.html#vllm.sampling_params.SamplingParams n: ${group_size} max_tokens: ${max_res_tokens} temperature: 1.0