From 17325b8e1aaace6525c03d80abdb4ba602df2865 Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Sat, 22 Mar 2025 18:37:43 -0700 Subject: [PATCH] use -m option to run scripts as modules --- .github/workflows/integration_test_8gpu.yaml | 3 --- README.md | 1 - docs/checkpoint.md | 2 +- run_train.sh | 2 +- scripts/estimate/run_memory_estimation.sh | 2 +- scripts/generate/README.md | 2 +- scripts/generate/run_llama_generate.sh | 2 +- 7 files changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/integration_test_8gpu.yaml b/.github/workflows/integration_test_8gpu.yaml index ec3f8eaf90..4280eddcb5 100644 --- a/.github/workflows/integration_test_8gpu.yaml +++ b/.github/workflows/integration_test_8gpu.yaml @@ -38,8 +38,5 @@ jobs: python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu124 - # install torchtitan to test the files in ./scripts - python -m pip install -e . - mkdir artifacts-to-be-uploaded python ./tests/integration_tests.py artifacts-to-be-uploaded --ngpu 8 diff --git a/README.md b/README.md index 4f0e79fb32..811df459c8 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,6 @@ cd torchtitan pip install -r requirements.txt pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu124 --force-reinstall [For AMD GPU] pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/rocm6.3 --force-reinstall -pip install -e . ``` ### Downloading a tokenizer diff --git a/docs/checkpoint.md b/docs/checkpoint.md index 198412c2b3..d427134c15 100644 --- a/docs/checkpoint.md +++ b/docs/checkpoint.md @@ -5,7 +5,7 @@ An example script for converting the original Llama3 checkpoints into the expect The script expects a path to the original checkpoint files, and a path to an output directory: ```bash -python3 scripts/convert_llama_to_dcp.py +python -m scripts.convert_llama_to_dcp ``` diff --git a/run_train.sh b/run_train.sh index 64570316b9..d1c51a05a2 100755 --- a/run_train.sh +++ b/run_train.sh @@ -25,4 +25,4 @@ PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True" \ TORCHFT_LIGHTHOUSE=${TORCHFT_LIGHTHOUSE} \ torchrun --nproc_per_node=${NGPU} --rdzv_backend c10d --rdzv_endpoint="localhost:0" \ --local-ranks-filter ${LOG_RANK} --role rank --tee 3 \ -torchtitan/train.py --job.config_file ${CONFIG_FILE} $overrides +-m torchtitan.train --job.config_file ${CONFIG_FILE} $overrides diff --git a/scripts/estimate/run_memory_estimation.sh b/scripts/estimate/run_memory_estimation.sh index 00aa53a8ac..00078a2665 100755 --- a/scripts/estimate/run_memory_estimation.sh +++ b/scripts/estimate/run_memory_estimation.sh @@ -23,4 +23,4 @@ fi # Export WORLD_SIZE and LOCAL_RANK export WORLD_SIZE=$((NGPU * NNODES)) export LOCAL_RANK=0 -python scripts/estimate/estimation.py --job.config_file ${CONFIG_FILE} --memory_estimation.enabled $overrides +python -m scripts.estimate.estimation --job.config_file ${CONFIG_FILE} --memory_estimation.enabled $overrides diff --git a/scripts/generate/README.md b/scripts/generate/README.md index 116c6b88d4..6d7f3e50c1 100644 --- a/scripts/generate/README.md +++ b/scripts/generate/README.md @@ -33,5 +33,5 @@ PROMPT="What is the meaning of life?" \ #### View Available Arguments ```bash -> python ./scripts/generate/test_generate.py --help +> python -m scripts.generate.test_generate --help ``` diff --git a/scripts/generate/run_llama_generate.sh b/scripts/generate/run_llama_generate.sh index e7c2524e4e..1169698953 100755 --- a/scripts/generate/run_llama_generate.sh +++ b/scripts/generate/run_llama_generate.sh @@ -37,7 +37,7 @@ set -x torchrun --standalone \ --nproc_per_node="${NGPU}" \ --local-ranks-filter="${LOG_RANK}" \ - scripts/generate/test_generate.py \ + -m scripts.generate.test_generate \ --config="${CONFIG_FILE}" \ --checkpoint="${CHECKPOINT_DIR}" \ --prompt="${PROMPT}" \