From bd0f5c80642bf1871e3fed9f1ab347690feb9b21 Mon Sep 17 00:00:00 2001 From: Daniel Vega-Myhre Date: Fri, 3 Oct 2025 09:12:13 -0700 Subject: [PATCH] [moe training] generic bench script for torchtitan MoEs --- .../float8/training/{llama4.sh => bench.sh} | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) rename benchmarks/float8/training/{llama4.sh => bench.sh} (58%) diff --git a/benchmarks/float8/training/llama4.sh b/benchmarks/float8/training/bench.sh similarity index 58% rename from benchmarks/float8/training/llama4.sh rename to benchmarks/float8/training/bench.sh index 216d1f918a..5d6feb79b6 100755 --- a/benchmarks/float8/training/llama4.sh +++ b/benchmarks/float8/training/bench.sh @@ -7,17 +7,20 @@ # This script can be used to launch a torchtitan float8 training run # with the given parameters, -# script arguments -LOCAL_BATCH_SIZE=${LOCAL_BATCH_SIZE:-1} -STEPS=${STEPS:-100} - # temporary log file which is deleted after performance data is parsed out and metrics are calculated. -LOG_FILE="/tmp/float8_training_log.txt" +LOG_FILE="/tmp/torchtitan_logs.txt" -# validate user has specified torchtitan root directory +# validate user has specified required args if [ -z "${TORCHTITAN_ROOT}" ]; then - echo "Error: TORCHTITAN environment variable is not set. Please set it before running this script." - echo "Usage: TORCHTITAN_ROOT= ./torchtitan_llama4.sh" + echo "Error: TORCHTITAN_ROOT environment variable is not set. Please set it before running this script." + echo "Usage: TORCHTITAN_ROOT= CONFIG_FILE= ./moe.sh" + echo " * EXTRA_ARGS: additional arguments to pass to the torchtitan training script." + exit 1 +fi + +if [ -z "${CONFIG_FILE}" ]; then + echo "Error: CONFIG_FILE environment variable is not set. Please set it before running this script." + echo "Usage: TORCHTITAN_ROOT= CONFIG_FILE= ./moe.sh" echo " * EXTRA_ARGS: additional arguments to pass to the torchtitan training script." exit 1 fi @@ -29,7 +32,7 @@ original_dir=$(pwd) cd ${TORCHTITAN_ROOT} # run the command with the specified arguments -CONFIG_FILE="./torchtitan/experiments/llama4/train_configs/debug_model.toml" ${TORCHTITAN_ROOT}/run_train.sh ${EXTRA_ARGS} 2>&1 | tee ${LOG_FILE} +${TORCHTITAN_ROOT}/run_train.sh ${EXTRA_ARGS} 2>&1 | tee ${LOG_FILE} # return to original working directory cd $original_dir