pytorch · qihqi · Sep 4, 2025 · Sep 3, 2025
diff --git a/.circleci/common.sh b/.circleci/common.sh
@@ -158,26 +158,12 @@ function run_torch_xla_cpp_tests() {
     fi
 
     if [ "$USE_COVERAGE" != "0" ]; then
-      if [ -x "$(command -v nvidia-smi)" ]; then
-        PJRT_DEVICE=CUDA test/cpp/run_tests.sh $EXTRA_ARGS -L""
-        cp $XLA_DIR/bazel-out/_coverage/_coverage_report.dat /tmp/cov1.dat
-        PJRT_DEVICE=CUDA test/cpp/run_tests.sh -X early_sync -F AtenXlaTensorTest.TestEarlySyncLiveTensors -L"" $EXTRA_ARGS
-        cp $XLA_DIR/bazel-out/_coverage/_coverage_report.dat /tmp/cov2.dat
-        lcov --add-tracefile /tmp/cov1.dat -a /tmp/cov2.dat -o /tmp/merged.dat
-      else
-        PJRT_DEVICE=CPU test/cpp/run_tests.sh $EXTRA_ARGS -L""
-        cp $XLA_DIR/bazel-out/_coverage/_coverage_report.dat /tmp/merged.dat
-      fi
+      PJRT_DEVICE=CPU test/cpp/run_tests.sh $EXTRA_ARGS -L""
+      cp $XLA_DIR/bazel-out/_coverage/_coverage_report.dat /tmp/merged.dat
       genhtml /tmp/merged.dat -o ~/htmlcov/cpp/cpp_lcov.info
       mv /tmp/merged.dat ~/htmlcov/cpp_lcov.info
     else
-      # Shard GPU testing
-      if [ -x "$(command -v nvidia-smi)" ]; then
-        PJRT_DEVICE=CUDA test/cpp/run_tests.sh $EXTRA_ARGS -L""
-        PJRT_DEVICE=CUDA test/cpp/run_tests.sh -X early_sync -F AtenXlaTensorTest.TestEarlySyncLiveTensors -L"" $EXTRA_ARGS
-      else
-        PJRT_DEVICE=CPU test/cpp/run_tests.sh $EXTRA_ARGS -L""
-      fi
+      PJRT_DEVICE=CPU test/cpp/run_tests.sh $EXTRA_ARGS -L""
     fi
   popd
 }
@@ -196,11 +182,6 @@ function run_torch_xla_tests() {
   RUN_CPP="${RUN_CPP_TESTS:0}"
   RUN_PYTHON="${RUN_PYTHON_TESTS:0}"
 
-  if [ -x "$(command -v nvidia-smi)" ]; then
-    num_devices=$(nvidia-smi --list-gpus | wc -l)
-    echo "Found $num_devices GPU devices..."
-    export GPU_NUM_DEVICES=$num_devices
-  fi
   export PYTORCH_TESTING_DEVICE_ONLY_FOR="xla"
   export CXX_ABI=$(python -c "import torch;print(int(torch._C._GLIBCXX_USE_CXX11_ABI))")
 

diff --git a/.devcontainer/gpu-internal/devcontainer.json b/.devcontainer/gpu-internal/devcontainer.json
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
@@ -13,5 +13,5 @@ Error messages and stack traces are also helpful.
 
 ## System Info
 
-- reproducible on XLA backend [CPU/TPU/CUDA]:
+- reproducible on XLA backend [CPU/TPU]:
 - torch_xla version:
diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md
@@ -46,7 +46,7 @@ Steps to reproduce the behavior:
 
 ## Environment
 
- - Reproducible on XLA backend [CPU/TPU/CUDA]:
+ - Reproducible on XLA backend [CPU/TPU]:
  - torch_xla version:
 
 

diff --git a/.github/ci.md b/.github/ci.md
@@ -44,20 +44,20 @@ fail. Steps for fixing and merging such breaking PyTorch change is as following:
 
 ### Running TPU tests on PRs
 
-The `build_and_test.yml` workflow runs tests on the TPU in addition to CPU and
-GPU. The set of tests run on the TPU is defined in `test/tpu/run_tests.sh`.
+The `build_and_test.yml` workflow runs tests on the TPU in addition to CPU.
+The set of tests run on the TPU is defined in `test/tpu/run_tests.sh`.
 
 ## CI Environment
 
 Before the CI in this repository runs, we build a base dev image. These are the
 same images we recommend in our VSCode `.devcontainer` setup and nightly build
-to ensure consistency between environments. We produce variants with and without
-CUDA, configured in `infra/ansible` (build config) and
-`infra/tpu-pytorch-releases/dev_images.tf` (build triggers).
+to ensure consistency between environments. We produce variants configured in
+`infra/ansible` (build config) and `infra/tpu-pytorch-releases/dev_images.tf`
+(build triggers).
 
 The CI runs in two environments:
 
-1. Organization self-hosted runners for CPU and GPU: used for almost every step
+1. Organization self-hosted runners for CPU: used for almost every step
    of the CI. These runners are managed by PyTorch and have access to the shared
    ECR repository.
 1. TPU self-hosted runners: these are managed by us and are only available in
@@ -68,48 +68,35 @@ The CI runs in two environments:
 
 We have two build paths for each CI run:
 
-- `torch_xla`: we build the main package to support both TPU and GPU[^1], along
+- `torch_xla`: we build the main package to support TPU, along
   with a CPU build of `torch` from HEAD. This build step exports the
   `torch-xla-wheels` artifact for downstream use in tests.
   - Some CI tests also require `torchvision`. To reduce flakiness, we compile
     `torchvision` from [`torch`'s CI pin][pytorch-vision-pin].
   - C++ tests are piggybacked onto the same build and uploaded in the
     `cpp-test-bin` artifact.
-- `torch_xla_cuda_plugin`: the XLA CUDA runtime can be built independently of
-  either `torch` or `torch_xla` -- it depends only on our pinned OpenXLA. Thus,
-  this build should be almost entirely cached, unless your PR changes the XLA
-  pin or adds a patch.
 
-Both the main package build and plugin build are configured with ansible at
-`infra/ansible`, although they run in separate stages (`stage=build_srcs` vs
-`stage=build_plugin`). This is the same configuration we use for our nightly and
-release builds.
+The main package build is configured with ansible at `infra/ansible`. This is
+the same configuration we use for our nightly and release builds.
 
-The CPU and GPU test configs are defined in the same file, `_test.yml`. Since
+The CPU test config is defined in the file `_test.yml`. Since
 some of the tests come from the upstream PyTorch repository, we check out
 PyTorch at the same git rev as the `build` step (taken from
 `torch_xla.version.__torch_gitrev__`). The tests are split up into multiple
 groups that run in parallel; the `matrix` section of `_test.yml` corresponds to
 in `.github/scripts/run_tests.sh`.
 
 CPU tests run immediately after the `torch_xla` build completes. This will
-likely be the first test feedback on your commit. GPU tests will launch when
-both the `torch_xla` and `torch_xla_cuda_plugin` complete. GPU compilation is
-much slower due to the number of possible optimizations, and the GPU chips
-themselves are quite outdated, so these tests will take longer to run than the
-CPU tests.
+likely be the first test feedback on your commit. 
 
 ![CPU tests launch when `torch_xla` is
 complete](../docs/assets/ci_test_dependency.png)
 
-![GPU tests also depend on CUDA
-plugin](../docs/assets/ci_test_dependency_gpu.png)
-
 For the C++ test groups in either case, the test binaries are pre-built during
 the build phase and packaged in `cpp-test-bin`. This will only be downloaded if
 necessary.
 
-[^1]: Note: both GPU and TPU support require their respective plugins to be
+[^1]: Note: TPU support require its respective plugins to be
     installed. This package will _not_ work on either out of the box.
 
 ### TPU CI

diff --git a/.github/scripts/run_tests.sh b/.github/scripts/run_tests.sh
@@ -77,11 +77,6 @@ PYTORCH_DIR=$1
 XLA_DIR=$2
 USE_COVERAGE="${3:-0}"
 
-if [ -x "$(command -v nvidia-smi)" ]; then
-  num_devices=$(nvidia-smi --list-gpus | wc -l)
-  echo "Found $num_devices GPU devices..."
-  export GPU_NUM_DEVICES=$num_devices
-fi
 export PYTORCH_TESTING_DEVICE_ONLY_FOR="xla"
 export CXX_ABI=$(python -c "import torch;print(int(torch._C._GLIBCXX_USE_CXX11_ABI))")
 

diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml
@@ -23,11 +23,6 @@ on:
         description: |
           Set the maximum (in minutes) how long the workflow should take to finish
             timeout-minutes:
-      install-cuda-plugin:
-        required: false
-        type: boolean
-        default: false
-        description: Whether to install CUDA plugin package
       torch-commit:
           required: true
           type: string
@@ -46,7 +41,7 @@ jobs:
     runs-on: ${{ inputs.runner }}
     container:
       image: ${{ inputs.dev-image }}
-      options: "${{ inputs.install-cuda-plugin == true && '--gpus all' || '' }} --shm-size 16g"
+      options: "--shm-size 16g"
     strategy:
       fail-fast: false
       matrix:
@@ -95,9 +90,7 @@ jobs:
         uses: ./.actions/.github/workflows/setup
         with:
           torch-commit: ${{ inputs.torch-commit }}
-          cuda: ${{ inputs.install-cuda-plugin && true || false }}
           wheels-artifact: torch-xla-wheels
-          cuda-plugin-artifact: ${{ inputs.install-cuda-plugin && 'cuda-plugin' || null }}
       - name: Fetch CPP test binaries
         if: inputs.has_code_changes == 'true' && matrix.run_cpp_tests
         uses: actions/download-artifact@v4
@@ -111,9 +104,6 @@ jobs:
         run: |
           chmod +x /tmp/test/bin/*
           ls -l /tmp/test/bin
-      - name: Check GPU
-        if: inputs.has_code_changes == 'true' && inputs.install-cuda-plugin
-        run: nvidia-smi
       - name: Install test deps
         if: inputs.has_code_changes == 'true'
         shell: bash
@@ -164,35 +154,24 @@ jobs:
                 exit 0
             fi
             docker cp "${pid}":/home/jenkins/htmlcov "${GITHUB_WORKSPACE}"
-            if [ -n "${GPU_FLAG:-}" ]; then
-              if [ -n "${PYTHON_TEST_NAME}" ]; then
-                gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_python_coverage_${PYTHON_TEST_NAME}.out
-                gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_python_coverage_${PYTHON_TEST_NAME}.out
-              fi
-              if [ -n "${CPP_TEST_NAME}" ]; then
-                gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_cpp_coverage_${CPP_TEST_NAME}.out
-                gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/gpu_cpp_coverage_${CPP_TEST_NAME}.out
-              fi
-            else
-              if [ -n "${PYTHON_TEST_NAME}" ]; then
-                gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_python_coverage_${PYTHON_TEST_NAME}.out
-                gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_python_coverage_${PYTHON_TEST_NAME}.out
-              fi
+            if [ -n "${PYTHON_TEST_NAME}" ]; then
+              gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_python_coverage_${PYTHON_TEST_NAME}.out
+              gsutil cp ${GITHUB_WORKSPACE}/htmlcov/lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_python_coverage_${PYTHON_TEST_NAME}.out
+            fi
 
-              if [ -n "${CPP_TEST_NAME}" ]; then
-                gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_cpp_coverage_${CPP_TEST_NAME}.out
-                gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_cpp_coverage_${CPP_TEST_NAME}.out
-              fi
+            if [ -n "${CPP_TEST_NAME}" ]; then
+              gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_cpp_coverage_${CPP_TEST_NAME}.out
+              gsutil cp ${GITHUB_WORKSPACE}/htmlcov/cpp_lcov.info gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/cpu_cpp_coverage_${CPP_TEST_NAME}.out
+            fi
 
-              if [ "${CPP_TEST_NAME}" == "cpp_tests" ]; then
-                ABS_METADATA='{"host": "github", "project": "pytorchxla", "trace_type": "LCOV", "commit_id": '\"${GITHUB_SHA}\"', "ref": "HEAD", "source": "https://github.com/pytorch/xla", "owner": "cloud-tpu-pt-dev", "bug_component": "587012"}'
-                echo $ABS_METADATA > abs_metadata.json
-                gsutil cp abs_metadata.json gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/metadata.json
+            if [ "${CPP_TEST_NAME}" == "cpp_tests" ]; then
+              ABS_METADATA='{"host": "github", "project": "pytorchxla", "trace_type": "LCOV", "commit_id": '\"${GITHUB_SHA}\"', "ref": "HEAD", "source": "https://github.com/pytorch/xla", "owner": "cloud-tpu-pt-dev", "bug_component": "587012"}'
+              echo $ABS_METADATA > abs_metadata.json
+              gsutil cp abs_metadata.json gs://ng3-metrics/ng3-pytorchxla-coverage/absolute/pytorchxla/${CIRCLE_WORKFLOW_ID}/metadata.json
 
-                INC_METADATA='{"host": "github", "project": "pytorchxla", "trace_type": "LCOV", "patchset_num": 1, "change_id": '${CIRCLE_BUILD_NUM}', "owner": "cloud-tpu-pt-dev", "bug_component": "587012"}'
-                echo $INC_METADATA > inc_metadata.json
-                gsutil cp inc_metadata.json gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/metadata.json
-              fi
+              INC_METADATA='{"host": "github", "project": "pytorchxla", "trace_type": "LCOV", "patchset_num": 1, "change_id": '${CIRCLE_BUILD_NUM}', "owner": "cloud-tpu-pt-dev", "bug_component": "587012"}'
+              echo $INC_METADATA > inc_metadata.json
+              gsutil cp inc_metadata.json gs://ng3-metrics/ng3-pytorchxla-coverage/incremental/pytorchxla/${CIRCLE_WORKFLOW_ID}/metadata.json
             fi
       - name: Report no code changes
         if: inputs.has_code_changes == 'false'

diff --git a/.github/workflows/setup/action.yml b/.github/workflows/setup/action.yml
@@ -3,20 +3,10 @@ inputs:
   torch-commit:
     type: string
     description: PyTorch commit to check out, if provided
-  cuda:
-    type: boolean
-    description: Whether to set up CUDA library paths
-    default: false
   wheels-artifact:
     type: string
     description: |
       Artifact containing `torch` (cpu) and `torch-xla` wheels to install
-  cuda-plugin-artifact:
-    type: string
-    description: Artifact containing `torch-xla-cuda-plugin` to install
-  cuda-torch-artifact:
-    type: string
-    description: Artifact containing CUDA build of `torch`
 runs:
   using: "composite"
   steps:
@@ -26,12 +16,6 @@ runs:
       run: |
         ls -la
         rm -rvf ${GITHUB_WORKSPACE}/*
-    - name: Setup CUDA environment
-      shell: bash
-      run: |
-        echo "PATH=$PATH:/usr/local/cuda-12.3/bin" >> $GITHUB_ENV
-        echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12.3/lib64" >> $GITHUB_ENV
-      if: ${{ inputs.cuda }}
     - name: Setup gcloud
       shell: bash
       run: |
@@ -59,23 +43,6 @@ runs:
         name: ${{ inputs.wheels-artifact }}
         path: /tmp/wheels/
       if: ${{ inputs.wheels-artifact }}
-    - name: Fetch CUDA plugin
-      uses: actions/download-artifact@v4
-      with:
-        name: ${{ inputs.cuda-plugin-artifact }}
-        path: /tmp/wheels/
-      if: ${{ inputs.cuda-plugin-artifact }}
-    - name: Remove CPU `torch` build
-      shell: bash
-      run: |
-        rm -rf /tmp/wheels/torch-*
-      if: ${{ inputs.cuda-torch-artifact }}
-    - name: Fetch CUDA `torch` build
-      uses: actions/download-artifact@v4
-      with:
-        name: ${{ inputs.cuda-torch-artifact }}
-        path: /tmp/wheels/
-      if: ${{ inputs.cuda-torch-artifact }}
     - name: Install wheels
       shell: bash
       run: |

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -238,10 +238,6 @@ first time, you may need to build everything again, for example, after a
   python setup.py develop
   ```
 
-### Additional steps for GPU
-
-Please refer to this [guide](https://github.com/pytorch/xla/blob/master/plugins/cuda/README.md).
-
 ## Before Creating a Pull Request
 
 In `pytorch/xla` repo we enforce coding style for both C++ and Python files.