pytorch · will-cromar · Jun 26, 2023 · Jun 23, 2023 · Jun 23, 2023 · Jun 23, 2023
diff --git a/configuration.yaml b/configuration.yaml
@@ -1,93 +1,5 @@
 ---
 variables:
-  xrt_variables:
-    XRT_MESH_SERVICE_ADDRESS:
-      description:
-        - The mesh service address used to create the XRT mesh client.
-      type: string
-      default_value: ""
-    XRT_GRPC_COMPRESSION:
-      description:
-        - Configures compression for rpc options on the XRT client.
-      type: string
-      default_value: ""
-    XRT_TPU_CONFIG:
-      description:
-        - Addresses for the TPU services to be used by the XRT client. ";"
-          separated list of addresses, e.g. localservice;0;localhost:51011.
-      type: string
-    XRT_WORKERS:
-      description:
-        - Addresses for the XRT workers to be used by the XRT client, for
-          example localservice:0;grpc://localhost:51011
-      type: string
-    XRT_DEVICE_MAP:
-      description:
-        - Maps devices to metadata about the job, replica, task that the device
-          is responsible for. e.g.
-          CPU:0;/job:localservice/replica:0/task:0/device:XLA_CPU:0
-      type: string
-    XRT_GRPC_MULTISTREAM:
-      description:
-        - Used to disable session conneciton sharing for XRT.
-      type: bool
-      default_value: true
-    XRT_START_LOCAL_SERVER:
-      description:
-        - Whether or not XRT should start the local service. If true, and if
-          the devices are CPU or GPU, XRT will try to start the local server.
-      type: bool
-      default_value: false
-    XRT_SHARD_LOCAL_ORDINAL:
-      description:
-        - Ordinal to be appended to the paths used by this thread of the xla
-          client.
-      type: int
-    XRT_GRPC_COMPRESSION_LEVEL:
-      description:
-        - Configures compression level for rpc options on the XRT client.
-      type: int
-      default_value: 3
-    XRT_MESH_MAX_MSGSIZE:
-      description:
-        - Max size of the mesh used by the XRT mesh service, product of
-          dimensions e.g. 1024 * 1024 * 1024
-      type: int
-      default_value: 1073741824  # (1024^3)
-    XRT_MESH_CONNECT_WAIT:
-      description:
-        - Number of seconds to wait for a connection to the XRT mesh service,
-          particularly the client mesh master
-      type: int
-      default_value: 300
-    XRT_LOCAL_WORKER:
-      description:
-        - Local service address for XRT local worker, e.g. localhost:8000.
-      type: string
-      default_value: ""
-    XRT_SHARD_WORLD_SIZE:
-      description:
-        - Total number of XRT shards to consider in this client instance. Does
-          not have a default because there's special behavior when the flag is
-          not set.
-      type: int
-    XRT_MULTI_PROCESSING_DEVICE:
-      description:
-        - Service address of the XRT device to be used as a multi processing
-          device.
-      type: string
-      default_value: ""
-    XRT_HOST_ORDINAL:
-      description:
-        - Sets the host ordinal for the XRT computation client. Used to
-          identify the rank of current device. Does not have a default because
-          there's special behavior when the flag is not set.
-      type: int
-    XRT_SHARD_ORDINAL:
-      description:
-        - Sets the shard ordinal for the XRT computation client.
-      type: int
-      default_value: -1
   pjrt_variables:
     PJRT_DEVICE:
       description:

diff --git a/setup.py b/setup.py
@@ -40,9 +40,6 @@
 #   TPUVM_MODE=0
 #     whether to build for TPU
 #
-#   DISABLE_XRT=0
-#     whether to exclude XRT from the build
-#
 #   SILO_NAME=""
 #     name of the remote build cache silo
 #
@@ -253,9 +250,6 @@ def bazel_build(self, ext):
     if _check_env_flag('TPUVM_MODE'):
       bazel_argv.append('--config=tpu')
 
-    if _check_env_flag('DISABLE_XRT'):
-      bazel_argv.append('--config=disable_xrt')
-
     # Remote cache authentication.
     if _check_env_flag('BAZEL_REMOTE_CACHE'):
       bazel_argv.append('--config=remote_cache')

diff --git a/test/allreduce_torchrun.py b/test/allreduce_torchrun.py
diff --git a/test/cpp/BUILD b/test/cpp/BUILD
@@ -70,16 +70,6 @@ ptxla_cc_test(
     ],
 )
 
-ptxla_cc_test(
-    name = "test_op_by_op_executor",
-    srcs = ["test_op_by_op_executor.cpp"],
-    deps = [
-        ":cpp_test_util",
-        "//torch_xla/csrc:tensor",
-        "@com_google_googletest//:gtest_main",
-    ],
-)
-
 ptxla_cc_test(
     name = "test_replication",
     srcs = ["test_replication.cpp"],

diff --git a/test/cpp/test_op_by_op_executor.cpp b/test/cpp/test_op_by_op_executor.cpp
diff --git a/test/pjrt/test_runtime.py b/test/pjrt/test_runtime.py
@@ -55,19 +55,15 @@ def test_xla_device_error(self):
   }, False), ('pjrt_cpu', {
       'PJRT_DEVICE': 'CPU',
       'PJRT_SELECT_DEFAULT_DEVICE': '0'
-  }, True), ('xrt_tpu', {
-      'XRT_TPU_CONFIG': 'localservice;0;localhost:51011'
-  }, False), ('pjrt_tpu_precedence', {
+  }, True), ('pjrt_tpu_precedence', {
       'PJRT_DEVICE': 'TPU',
       'XRT_TPU_CONFIG': 'localservice;0;localhost:51011',
-  }, True), ('xrt_gpu', {
+  }, True), ('gpu_num_devives', {
       'GPU_NUM_DEVICES': '4'
-  }, False), ('pjrt_gpu', {
+  }, True), ('pjrt_gpu', {
       'PJRT_DEVICE': 'GPU',
       'GPU_NUM_DEVICES': '4'
-  }, True), ('xla_dist_worker', {
-      'XRT_LOCAL_WORKER': 'c_localservice:2'
-  }, False))
+  }, True))
   def test_pjrt_default_device(self, env_vars, expect_using_pjrt):
     with mock.patch.dict(os.environ, env_vars, clear=True):
       # Print a warningif we had to select a default runtime

diff --git a/test/run_tests.sh b/test/run_tests.sh
@@ -108,42 +108,6 @@ function run_xla_backend_mp {
   MASTER_ADDR=localhost MASTER_PORT=6000 run_test "$@"
 }
 
-function run_xrt {
-  if [ -x "$(command -v nvidia-smi)" ] && [ "$XLA_CUDA" != "0" ]; then
-    GPU_NUM_DEVICES=2 run_coverage "$@"
-  else
-    XRT_DEVICE_MAP="CPU:0;/job:localservice/replica:0/task:0/device:XLA_CPU:0" XRT_WORKERS="localservice:0;grpc://localhost:$(shuf -i 40701-40999 -n 1)" run_coverage "$@"
-  fi
-}
-
-function run_opbyop {
-  echo "Running in OpByOp mode: $@"
-  XLA_GET_TENSORS_OPBYOP=1 XLA_SYNC_TENSORS_OPBYOP=1 run_xrt "$@"
-}
-
-function run_async_scalar {
-  echo "Running in Async Scalar Upload mode: $@"
-  XLA_TRANSFER_SCALAR_ASYNC=1 run_xrt "$@"
-}
-
-function run_torchrun {
-  echo "Running tests spawned by torchrun"
-  if [ -x "$(command -v nvidia-smi)" ]; then
-    run_xrt "$@"
-  else
-    echo "the tests need atleast two XLA workers to validate"
-  fi
-}
-
-function run_xrt_tests {
-  # For features not supported in PJRT
-  echo "Running XRT tests"
-  run_xrt "$CDIR/test_operations.py" "$@" --verbosity=$VERBOSITY
-  run_opbyop  "$CDIR/test_operations.py" "$@" --verbosity=$VERBOSITY
-  run_async_scalar  "$CDIR/test_operations.py" "$@" --verbosity=$VERBOSITY
-  run_torchrun  "$CDIR/test_allreduce_torchrun.py"
-}
-
 function run_torch_op_tests {
   run_dynamic "$CDIR/../../test/test_view_ops.py" "$@" -v TestViewOpsXLA
   run_test_without_functionalization "$CDIR/../../test/test_view_ops.py" "$@" -v TestViewOpsXLA
@@ -171,7 +135,6 @@ function run_xla_op_tests {
   run_test_without_functionalization "$CDIR/test_operations.py" "$@" --verbosity=$VERBOSITY
   run_test "$CDIR/test_async_closures.py"
   run_test "$CDIR/test_autocast.py"
-  run_test "$CDIR/test_xla_dist.py"
   run_test "$CDIR/test_profiler.py"
   run_test "$CDIR/test_ops.py"
   run_test "$CDIR/test_metrics.py"
@@ -235,9 +198,6 @@ function run_tests {
   if [[ "$XLA_SKIP_MP_OP_TESTS" != "1" ]]; then
     run_mp_op_tests
   fi
-  if [[ "$XLA_SKIP_XRT_TESTS" != "1" ]]; then
-    run_xrt_tests
-  fi
 }
 
 if [ "$LOGFILE" != "" ]; then

diff --git a/test/test_allreduce_torchrun.py b/test/test_allreduce_torchrun.py