diff --git a/examples/math_tool/train_math_with_tool.sh b/examples/math_tool/train_math_with_tool.sh index 580c3fa19..961b5b77b 100644 --- a/examples/math_tool/train_math_with_tool.sh +++ b/examples/math_tool/train_math_with_tool.sh @@ -9,7 +9,7 @@ export VLLM_ENGINE_ITERATION_TIMEOUT_S=100000000000 # Find the directory where rllm package is located RLLM_DIR=$(python3 -c "import rllm; import os; print(os.path.dirname(os.path.dirname(rllm.__file__)))") -python3 -m examples.math_tool.train_math_with_tool \ +python3 -m train_math_with_tool \ algorithm.adv_estimator=grpo \ data.train_batch_size=32 \ data.val_batch_size=500 \ @@ -66,4 +66,4 @@ python3 -m examples.math_tool.train_math_with_tool \ agent.async_engine=True \ agent.use_stepwise_advantage=False \ agent.stepwise_advantage_mode="mc_return" \ - trainer.total_epochs=100 \ No newline at end of file + trainer.total_epochs=100