From cc858af6616e57d636f83e926c26298c93dc3ff5 Mon Sep 17 00:00:00 2001 From: Chien-Chin Huang Date: Mon, 10 Nov 2025 14:47:35 -0800 Subject: [PATCH 1/3] Update [ghstack-poisoned] --- torchtitan/distributed/tensor_parallel.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/torchtitan/distributed/tensor_parallel.py b/torchtitan/distributed/tensor_parallel.py index a2749f4c11..9377f44dcf 100644 --- a/torchtitan/distributed/tensor_parallel.py +++ b/torchtitan/distributed/tensor_parallel.py @@ -17,7 +17,9 @@ def maybe_enable_async_tp(job_config: JobConfig, tp_mesh: DeviceMesh): return if not (job_config.compile.enable and "model" in job_config.compile.components): - raise RuntimeError("Async TP requires --training.compile") + raise RuntimeError( + "Async TP requires 'model' in --compile.components and --comile.enable" + ) from torch.distributed._symmetric_memory import enable_symm_mem_for_group From f352b6918f25d32a8c2a17af2806c713dc117dc4 Mon Sep 17 00:00:00 2001 From: Chien-Chin Huang Date: Mon, 10 Nov 2025 14:47:35 -0800 Subject: [PATCH 2/3] Update (base update) [ghstack-poisoned] From 68dc6d409d481dcb51bd4738091acd2d8a01b2e8 Mon Sep 17 00:00:00 2001 From: Chien-Chin Huang Date: Mon, 10 Nov 2025 16:59:42 -0800 Subject: [PATCH 3/3] Update [ghstack-poisoned] --- torchtitan/distributed/tensor_parallel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchtitan/distributed/tensor_parallel.py b/torchtitan/distributed/tensor_parallel.py index 9377f44dcf..04e4e36c3a 100644 --- a/torchtitan/distributed/tensor_parallel.py +++ b/torchtitan/distributed/tensor_parallel.py @@ -18,7 +18,7 @@ def maybe_enable_async_tp(job_config: JobConfig, tp_mesh: DeviceMesh): if not (job_config.compile.enable and "model" in job_config.compile.components): raise RuntimeError( - "Async TP requires 'model' in --compile.components and --comile.enable" + "Async TP requires 'model' in --compile.components and --compile.enable" ) from torch.distributed._symmetric_memory import enable_symm_mem_for_group