Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,20 @@
# This script can be used to launch a torchtitan float8 training run
# with the given parameters,

# script arguments
LOCAL_BATCH_SIZE=${LOCAL_BATCH_SIZE:-1}
STEPS=${STEPS:-100}

# temporary log file which is deleted after performance data is parsed out and metrics are calculated.
LOG_FILE="/tmp/float8_training_log.txt"
LOG_FILE="/tmp/torchtitan_logs.txt"

# validate user has specified torchtitan root directory
# validate user has specified required args
if [ -z "${TORCHTITAN_ROOT}" ]; then
echo "Error: TORCHTITAN environment variable is not set. Please set it before running this script."
echo "Usage: TORCHTITAN_ROOT=<directory> ./torchtitan_llama4.sh"
echo "Error: TORCHTITAN_ROOT environment variable is not set. Please set it before running this script."
echo "Usage: TORCHTITAN_ROOT=<directory> CONFIG_FILE=<model toml> ./moe.sh"
echo " * EXTRA_ARGS: additional arguments to pass to the torchtitan training script."
exit 1
fi

if [ -z "${CONFIG_FILE}" ]; then
echo "Error: CONFIG_FILE environment variable is not set. Please set it before running this script."
echo "Usage: TORCHTITAN_ROOT=<directory> CONFIG_FILE=<model toml> ./moe.sh"
echo " * EXTRA_ARGS: additional arguments to pass to the torchtitan training script."
exit 1
fi
Expand All @@ -29,7 +32,7 @@ original_dir=$(pwd)
cd ${TORCHTITAN_ROOT}

# run the command with the specified arguments
CONFIG_FILE="./torchtitan/experiments/llama4/train_configs/debug_model.toml" ${TORCHTITAN_ROOT}/run_train.sh ${EXTRA_ARGS} 2>&1 | tee ${LOG_FILE}
${TORCHTITAN_ROOT}/run_train.sh ${EXTRA_ARGS} 2>&1 | tee ${LOG_FILE}

# return to original working directory
cd $original_dir
Expand Down
Loading