From adc49e6e9abc866c398f5be9e853da25a86d6aba Mon Sep 17 00:00:00 2001 From: Will Constable Date: Tue, 4 Nov 2025 10:32:45 -0800 Subject: [PATCH 1/3] Update mast launcher info Following https://www.internalfb.com/diff/D84945310 --- mast/.torchxconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mast/.torchxconfig b/mast/.torchxconfig index 67379f2f..ee01fbee 100644 --- a/mast/.torchxconfig +++ b/mast/.torchxconfig @@ -3,13 +3,14 @@ conda_path_in_fbpkg = conda activate_conda = False fbpkg_ids = fb-py-spy:prod hpcIdentity = pytorch_distributed -rmAttribution = pytorch4all_clients_approved +rmAttribution = msl_infra_pytorch_dev workspace_fbpkg_name = torchtitan_workspace conda_pack_ignore_missing_files = True git = False hpcJobOncall = meta_conda modelTypeName = gen_ai_conda -hpcClusterUuid = MastProdCluster +hpcClusterUuid = MastGenAICluster +localityConstraints = region;gtn forceSingleRegion = False use_caf = False From c3857918c982d1e9397c1d0788c9fb723ddcc1ba Mon Sep 17 00:00:00 2001 From: Will Constable Date: Tue, 4 Nov 2025 16:55:58 -0800 Subject: [PATCH 2/3] remove print_args option deleted from upstream torchtitan --- mast/run_torchtitan.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/mast/run_torchtitan.sh b/mast/run_torchtitan.sh index bd10e244..ad341f8e 100755 --- a/mast/run_torchtitan.sh +++ b/mast/run_torchtitan.sh @@ -78,5 +78,4 @@ python torchtitan/train.py \ --validation.dataset_path "${dataset_path}" \ --metrics.save_tb_folder "${save_tb_folder}" \ --metrics.disable_color_printing \ ---job.print_args \ $overrides From ad9c0033364300b4de99df8759af12e90a78136d Mon Sep 17 00:00:00 2001 From: Will Constable Date: Tue, 4 Nov 2025 17:04:29 -0800 Subject: [PATCH 3/3] Fix llama3_auto_parallel name to match upstream torchtitan/autoparallel --- mast/sweep.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mast/sweep.py b/mast/sweep.py index e867bd43..3ab22ed2 100644 --- a/mast/sweep.py +++ b/mast/sweep.py @@ -104,12 +104,12 @@ def maybe_find_pulp(maybe_path: Optional[str] = None) -> Optional[str]: ], "llama3_autop_1d_compile": llama3_1d_common_opts + [ - "--model.name=llama3_auto_parallel", + "--model.name=auto_parallel.llama3", "--compile.enable", ], "llama3_autop_1d_compile_bucket_reorder": llama3_1d_common_opts + [ - "--model.name=llama3_auto_parallel", + "--model.name=auto_parallel.llama3", "--compile.enable", "--experimental.bucket_all_gathers_fx=fsdp", "--experimental.bucket_reduce_scatters_fx=fsdp", @@ -125,12 +125,12 @@ def maybe_find_pulp(maybe_path: Optional[str] = None) -> Optional[str]: ], "llama3_autop_2d_compile": llama3_2d_common_opts + [ - "--model.name=llama3_auto_parallel", + "--model.name=auto_parallel.llama3", "--compile.enable", ], "llama3_autop_2d_compile_bucket_reorder": llama3_2d_common_opts + [ - "--model.name=llama3_auto_parallel", + "--model.name=auto_parallel.llama3", "--compile.enable", "--experimental.bucket_all_gathers_fx=fsdp", "--experimental.bucket_reduce_scatters_fx=fsdp", @@ -153,13 +153,13 @@ def maybe_find_pulp(maybe_path: Optional[str] = None) -> Optional[str]: | { "llama3_autop_1d_compile_ruisi_bucket_reorder": llama3_1d_common_opts + [ - "--model.name=llama3_auto_parallel", + "--model.name=auto_parallel.llama3", "--compile.enable", "--experimental.enable_simplefsdp_passes", ], "llama3_autop_2d_compile_ruisi_bucket_reorder": llama3_2d_common_opts + [ - "--model.name=llama3_auto_parallel", + "--model.name=auto_parallel.llama3", "--compile.enable", "--experimental.enable_simplefsdp_passes", ],