Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions mast/.torchxconfig
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ conda_path_in_fbpkg = conda
activate_conda = False
fbpkg_ids = fb-py-spy:prod
hpcIdentity = pytorch_distributed
rmAttribution = pytorch4all_clients_approved
rmAttribution = msl_infra_pytorch_dev
workspace_fbpkg_name = torchtitan_workspace
conda_pack_ignore_missing_files = True
git = False
hpcJobOncall = meta_conda
modelTypeName = gen_ai_conda
hpcClusterUuid = MastProdCluster
hpcClusterUuid = MastGenAICluster
localityConstraints = region;gtn
forceSingleRegion = False
use_caf = False

Expand Down
1 change: 0 additions & 1 deletion mast/run_torchtitan.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,4 @@ python torchtitan/train.py \
--validation.dataset_path "${dataset_path}" \
--metrics.save_tb_folder "${save_tb_folder}" \
--metrics.disable_color_printing \
--job.print_args \
$overrides
12 changes: 6 additions & 6 deletions mast/sweep.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,12 @@ def maybe_find_pulp(maybe_path: Optional[str] = None) -> Optional[str]:
],
"llama3_autop_1d_compile": llama3_1d_common_opts
+ [
"--model.name=llama3_auto_parallel",
"--model.name=auto_parallel.llama3",
"--compile.enable",
],
"llama3_autop_1d_compile_bucket_reorder": llama3_1d_common_opts
+ [
"--model.name=llama3_auto_parallel",
"--model.name=auto_parallel.llama3",
"--compile.enable",
"--experimental.bucket_all_gathers_fx=fsdp",
"--experimental.bucket_reduce_scatters_fx=fsdp",
Expand All @@ -125,12 +125,12 @@ def maybe_find_pulp(maybe_path: Optional[str] = None) -> Optional[str]:
],
"llama3_autop_2d_compile": llama3_2d_common_opts
+ [
"--model.name=llama3_auto_parallel",
"--model.name=auto_parallel.llama3",
"--compile.enable",
],
"llama3_autop_2d_compile_bucket_reorder": llama3_2d_common_opts
+ [
"--model.name=llama3_auto_parallel",
"--model.name=auto_parallel.llama3",
"--compile.enable",
"--experimental.bucket_all_gathers_fx=fsdp",
"--experimental.bucket_reduce_scatters_fx=fsdp",
Expand All @@ -153,13 +153,13 @@ def maybe_find_pulp(maybe_path: Optional[str] = None) -> Optional[str]:
| {
"llama3_autop_1d_compile_ruisi_bucket_reorder": llama3_1d_common_opts
+ [
"--model.name=llama3_auto_parallel",
"--model.name=auto_parallel.llama3",
"--compile.enable",
"--experimental.enable_simplefsdp_passes",
],
"llama3_autop_2d_compile_ruisi_bucket_reorder": llama3_2d_common_opts
+ [
"--model.name=llama3_auto_parallel",
"--model.name=auto_parallel.llama3",
"--compile.enable",
"--experimental.enable_simplefsdp_passes",
],
Expand Down
Loading