add test for dynamo + traceable collectives (#7745)

JackCaoG · web-flow · commit b9c19b45ae3a · 2024-07-26T10:14:25.000-07:00
diff --git a/test/dynamo/test_traceable_collectives.py b/test/dynamo/test_traceable_collectives.py
@@ -0,0 +1,35 @@
+import torch
+import torch_xla
+from torch_xla import runtime as xr
+import torch_xla.core.xla_model as xm
+import torch_xla.debug.metrics as met
+
+
+def dummy_collective_fn(input):
+  res_tensor = xm.all_reduce(xm.REDUCE_SUM, input)
+  res_tensor += 3.0
+  res_tensor = xm.all_gather(res_tensor, dim=0)
+  return res_tensor
+
+
+def _mp_fn(index):
+  device = xm.xla_device()
+  world_size = xr.world_size()
+  if xm.xla_device_hw(device) not in ('TPU', 'CUDA', 'NEURON'):
+    print(f'skip this test for hw {xm.xla_device_hw(device)}')
+    return
+  ordinal_tensor = torch.tensor([index], dtype=torch.float).to(device)
+  for dynamic in [True, False]:
+    met.clear_all()
+    compiled_collective = torch.compile(
+        dummy_collective_fn, backend="openxla", dynamic=dynamic)
+    res_tensor = compiled_collective(ordinal_tensor)
+    expected_tensor = torch.tensor(
+        [world_size * world_size / 2] * world_size, dtype=torch.float) + 3.0
+    torch_xla.sync()
+    torch.allclose(res_tensor.cpu(), expected_tensor)
+    assert met.metric_data("ExecuteTime")[0] == 1
+
+
+if __name__ == '__main__':
+  torch_xla.launch(_mp_fn, args=())
diff --git a/test/run_tests.sh b/test/run_tests.sh
@@ -296,6 +296,7 @@ function run_mp_op_tests {
   run_test "$CDIR/test_mp_save.py"
   run_test "$CDIR/test_mp_mesh_reduce.py"
   run_test "$CDIR/test_mp_sync_batch_norm.py"
+  run_test "$CDIR/dynamo/test_traceable_collectives.py"
   run_test "$CDIR/test_fsdp_auto_wrap.py"
   # run_torchrun "$CDIR/test_mp_early_exit.py"
   run_pt_xla_debug "$CDIR/debug_tool/test_mp_pt_xla_debug.py"
diff --git a/test/tpu/run_tests.sh b/test/tpu/run_tests.sh
@@ -18,6 +18,7 @@ python3 test/test_autocast.py
 python3 test/test_grad_checkpoint.py
 python3 test/dynamo/test_dynamo.py
 python3 test/dynamo/test_dynamo_dynamic_shape.py
+python3 test/dynamo/test_traceable_collectives.py
 python3 test/spmd/test_spmd_debugging.py
 XLA_PARAMETER_WRAPPING_THREADSHOLD=1 python test/spmd/test_spmd_parameter_wrapping.py
 python3 test/pjrt/test_dtypes.py