From 8f9fc9a6a14be077ac89a111a9306ccf5c7d59ce Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Tue, 16 Sep 2025 15:47:44 -0700
Subject: [PATCH 01/20] rebase to latest main

---
 install_requirements.py | 161 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 153 insertions(+), 8 deletions(-)

diff --git a/install_requirements.py b/install_requirements.py
index cbae175e276..844ada0c7da 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -59,8 +59,16 @@ def python_is_compatible():
 
 
 # The pip repository that hosts nightly torch packages.
-TORCH_NIGHTLY_URL = "https://download.pytorch.org/whl/nightly/cpu"
+# This will be dynamically set based on CUDA availability and CUDA backend enabled/disabled.
+TORCH_NIGHTLY_URL_BASE = "https://download.pytorch.org/whl/nightly"
 
+# Supported CUDA versions - modify this to add/remove supported versions
+# Format: tuple of (major, minor) version numbers
+SUPPORTED_CUDA_VERSIONS = [
+    (12, 6),
+    (12, 8),
+    (12, 9),
+]
 
 # Since ExecuTorch often uses main-branch features of pytorch, only the nightly
 # pip versions will have the required features.
@@ -71,7 +79,137 @@ def python_is_compatible():
 #
 # NOTE: If you're changing, make the corresponding change in .ci/docker/ci_commit_pins/pytorch.txt
 # by picking the hash from the same date in https://hud.pytorch.org/hud/pytorch/pytorch/nightly/
-NIGHTLY_VERSION = "dev20250906"
+#
+# NOTE: If you're changing, make the corresponding supported CUDA versions in
+# SUPPORTED_CUDA_VERSIONS above if needed.
+NIGHTLY_VERSION = "dev20250915"
+
+
+def _check_cuda_enabled():
+    """Check if CUDA delegate is enabled via CMAKE_ARGS environment variable."""
+    cmake_args = os.environ.get("CMAKE_ARGS", "")
+    return "-DEXECUTORCH_BUILD_CUDA=ON" in cmake_args
+
+
+def _cuda_version_to_pytorch_suffix(major, minor):
+    """
+    Generate PyTorch CUDA wheel suffix from CUDA version numbers.
+
+    Args:
+        major: CUDA major version (e.g., 12)
+        minor: CUDA minor version (e.g., 6)
+
+    Returns:
+        PyTorch wheel suffix string (e.g., "cu126")
+    """
+    return f"cu{major}{minor}"
+
+
+def _get_cuda_version():
+    """
+    Get the CUDA version installed on the system using nvcc command.
+    Returns a tuple (major, minor).
+
+    Raises:
+        RuntimeError: if nvcc is not found or version cannot be parsed
+    """
+    try:
+        # Get CUDA version from nvcc (CUDA compiler)
+        nvcc_result = subprocess.run(
+            ["nvcc", "--version"], capture_output=True, text=True, check=True
+        )
+        # Parse nvcc output for CUDA version
+        # Output contains line like "Cuda compilation tools, release 12.6, V12.6.68"
+        match = re.search(r"release (\d+)\.(\d+)", nvcc_result.stdout)
+        if match:
+            major, minor = int(match.group(1)), int(match.group(2))
+
+            # Check if the detected version is supported
+            if (major, minor) not in SUPPORTED_CUDA_VERSIONS:
+                available_versions = ", ".join(
+                    [f"{maj}.{min}" for maj, min in SUPPORTED_CUDA_VERSIONS]
+                )
+                raise RuntimeError(
+                    f"Detected CUDA version {major}.{minor} is not supported. "
+                    f"Only the following CUDA versions are supported: {available_versions}. "
+                    f"Please install a supported CUDA version or try on CPU-only delegates."
+                )
+
+            return (major, minor)
+        else:
+            raise RuntimeError(
+                "CUDA delegate is enabled but could not parse CUDA version from nvcc output. "
+                "Please ensure CUDA is properly installed or try on CPU-only delegates."
+            )
+    except FileNotFoundError:
+        raise RuntimeError(
+            "CUDA delegate is enabled but nvcc (CUDA compiler) is not found in PATH. "
+            "Please install CUDA toolkit or try on CPU-only delegates."
+        )
+    except subprocess.CalledProcessError as e:
+        raise RuntimeError(
+            f"CUDA delegate is enabled but nvcc command failed with error: {e}. "
+            "Please ensure CUDA is properly installed or try on CPU-only delegates."
+        )
+
+
+def _get_pytorch_cuda_url(cuda_version):
+    """
+    Get the appropriate PyTorch CUDA URL for the given CUDA version.
+
+    Args:
+        cuda_version: tuple of (major, minor) version numbers
+
+    Returns:
+        URL string for PyTorch CUDA packages
+    """
+    major, minor = cuda_version
+    # Generate CUDA suffix (version validation is already done in _get_cuda_version)
+    cuda_suffix = _cuda_version_to_pytorch_suffix(major, minor)
+
+    return f"{TORCH_NIGHTLY_URL_BASE}/{cuda_suffix}"
+
+
+# url for the PyTorch ExecuTorch depending on, which will be set by _determine_torch_url().
+# please do not directly rely on it, but use _determine_torch_url() instead.
+_torch_url = None
+
+
+def _determine_torch_url():
+    """
+    Determine the appropriate PyTorch installation URL based on CUDA availability and CMAKE_ARGS.
+    Uses caching to avoid redundant CUDA detection and print statements.
+
+    Returns:
+        URL string for PyTorch packages
+    """
+    global _torch_url
+
+    # Return cached URL if already determined
+    if _torch_url is not None:
+        return _torch_url
+
+    # Check if CUDA delegate is enabled
+    if not _check_cuda_enabled():
+        print("CUDA delegate not enabled, using CPU-only PyTorch")
+        _torch_url = f"{TORCH_NIGHTLY_URL_BASE}/cpu"
+        return _torch_url
+
+    print("CUDA delegate enabled, detecting CUDA version...")
+
+    # Get CUDA version
+    cuda_version = _get_cuda_version()
+
+    major, minor = cuda_version
+    print(f"Detected CUDA version: {major}.{minor}")
+
+    # Get appropriate PyTorch CUDA URL
+    torch_url = _get_pytorch_cuda_url(cuda_version)
+    print(f"Using PyTorch URL: {torch_url}")
+
+    # Cache the result
+    _torch_url = torch_url
+    return torch_url
 
 
 def install_requirements(use_pytorch_nightly):
@@ -84,12 +222,16 @@ def install_requirements(use_pytorch_nightly):
         )
         sys.exit(1)
 
+    # Determine the appropriate PyTorch URL based on CUDA delegate status
+    torch_url = _determine_torch_url()
+
     # pip packages needed by exir.
     TORCH_PACKAGE = [
         # Setting use_pytorch_nightly to false to test the pinned PyTorch commit. Note
         # that we don't need to set any version number there because they have already
         # been installed on CI before this step, so pip won't reinstall them
-        f"torch==2.9.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torch",
+        f"torch==2.10.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torch",
+        f"torchao==0.14.0{NIGHTLY_VERSION}" if use_pytorch_nightly else "torchao",
     ]
 
     # Install the requirements for core ExecuTorch package.
@@ -105,13 +247,13 @@ def install_requirements(use_pytorch_nightly):
             "requirements-dev.txt",
             *TORCH_PACKAGE,
             "--extra-index-url",
-            TORCH_NIGHTLY_URL,
+            torch_url,
         ],
         check=True,
     )
 
     LOCAL_REQUIREMENTS = [
-        "third-party/ao",  # We need the latest kernels for fast iteration, so not relying on pypi.
+        # "third-party/ao",  # We need the latest kernels for fast iteration, so not relying on pypi.
     ] + (
         [
             "extension/llm/tokenizers",  # TODO(larryliu0820): Setup a pypi package for this.
@@ -147,10 +289,13 @@ def install_requirements(use_pytorch_nightly):
 
 
 def install_optional_example_requirements(use_pytorch_nightly):
+    # Determine the appropriate PyTorch URL based on CUDA delegate status
+    torch_url = _determine_torch_url()
+
     print("Installing torch domain libraries")
     DOMAIN_LIBRARIES = [
         (
-            f"torchvision==0.24.0.{NIGHTLY_VERSION}"
+            f"torchvision==0.25.0.{NIGHTLY_VERSION}"
             if use_pytorch_nightly
             else "torchvision"
         ),
@@ -165,7 +310,7 @@ def install_optional_example_requirements(use_pytorch_nightly):
             "install",
             *DOMAIN_LIBRARIES,
             "--extra-index-url",
-            TORCH_NIGHTLY_URL,
+            torch_url,
         ],
         check=True,
     )
@@ -180,7 +325,7 @@ def install_optional_example_requirements(use_pytorch_nightly):
             "-r",
             "requirements-examples.txt",
             "--extra-index-url",
-            TORCH_NIGHTLY_URL,
+            torch_url,
             "--upgrade-strategy",
             "only-if-needed",
         ],

From dbe31b51064b4737d9f645091185de3e1dbdfb54 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Tue, 16 Sep 2025 16:41:16 -0700
Subject: [PATCH 02/20] add github ci for gpu pt install check

---
 .github/workflows/test-cuda-builds.yml | 68 ++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 .github/workflows/test-cuda-builds.yml

diff --git a/.github/workflows/test-cuda-builds.yml b/.github/workflows/test-cuda-builds.yml
new file mode 100644
index 00000000000..eef3287a920
--- /dev/null
+++ b/.github/workflows/test-cuda-builds.yml
@@ -0,0 +1,68 @@
+# Test ExecutorTorch CUDA Build Compatibility
+# This workflow tests whether ExecutorTorch can be successfully built with CUDA support
+# across different CUDA versions (12.6, 12.8, 12.9) using the command:
+# CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
+#
+# Note: ExecutorTorch automatically detects the system CUDA version using nvcc and
+# installs the appropriate PyTorch wheel. No manual CUDA/PyTorch installation needed.
+
+name: Test CUDA Builds
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+      - release/*
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  cancel-in-progress: true
+
+jobs:
+  test-cuda-builds:
+    strategy:
+      fail-fast: false
+      matrix:
+        cuda-version: ["12.6", "12.8", "12.9"]
+
+    name: test-executorch-cuda-build-${{ matrix.cuda-version }}
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      timeout: 90
+      runner: linux.g5.4xlarge.nvidia.gpu
+      gpu-arch-type: cuda
+      gpu-arch-version: ${{ matrix.cuda-version }}
+      submodules: recursive
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      script: |
+        set -eux
+
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        if [ -n "$CONDA_ENV" ]; then
+          conda activate "${CONDA_ENV}"
+        fi
+
+        # Test ExecutorTorch CUDA build - ExecutorTorch will automatically detect CUDA version
+        # and install the appropriate PyTorch wheel when CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}"
+
+  # This job will fail if any of the CUDA versions fail
+  check-all-cuda-builds:
+    needs: test-cuda-builds
+    runs-on: ubuntu-latest
+    if: always()
+    steps:
+      - name: Check if all CUDA builds succeeded
+        run: |
+          if [[ "${{ needs.test-cuda-builds.result }}" != "success" ]]; then
+            echo "ERROR: One or more ExecutorTorch CUDA builds failed!"
+            echo "CUDA build results: ${{ needs.test-cuda-builds.result }}"
+            exit 1
+          else
+            echo "SUCCESS: All ExecutorTorch CUDA builds (12.6, 12.8, 12.9) completed successfully!"
+          fi

From 11104349874d0b7776dc36bbcfd453dc9229bcec Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Tue, 16 Sep 2025 16:41:39 -0700
Subject: [PATCH 03/20] add github ci for gpu pt install check

---
 .ci/scripts/test-cuda-build.sh | 84 ++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100755 .ci/scripts/test-cuda-build.sh

diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh
new file mode 100755
index 00000000000..8a9fedc4d7a
--- /dev/null
+++ b/.ci/scripts/test-cuda-build.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+# shellcheck source=/dev/null
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+CUDA_VERSION=${1:-"12.6"}
+
+echo "=== Testing ExecutorTorch CUDA ${CUDA_VERSION} Build ==="
+
+# Function to build and test ExecutorTorch with CUDA support
+test_executorch_cuda_build() {
+    local cuda_version=$1
+
+    echo "Building ExecutorTorch with CUDA ${cuda_version} support..."
+    echo "ExecutorTorch will automatically detect CUDA and install appropriate PyTorch wheel"
+
+    # Set CMAKE_ARGS to enable CUDA build - ExecutorTorch will handle PyTorch installation automatically
+    export CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
+
+    # Install ExecutorTorch with CUDA support - this will automatically:
+    # 1. Detect CUDA version using nvcc
+    # 2. Install appropriate PyTorch wheel for the detected CUDA version
+    # 3. Build ExecutorTorch with CUDA support
+    ./install_executorch.sh
+
+    echo "SUCCESS: ExecutorTorch CUDA build completed"
+
+    # Verify the installation
+    echo "=== Verifying ExecutorTorch CUDA Installation ==="
+
+    # Test that ExecutorTorch was built successfully
+    python -c "
+import executorch
+print('SUCCESS: ExecutorTorch imported successfully')
+"
+
+    # Test CUDA availability and show details
+    python -c "
+try:
+    import torch
+    print('INFO: PyTorch version:', torch.__version__)
+    print('INFO: CUDA available:', torch.cuda.is_available())
+
+    if torch.cuda.is_available():
+        print('SUCCESS: CUDA is available for ExecutorTorch')
+        print('INFO: CUDA version:', torch.version.cuda)
+        print('INFO: GPU device count:', torch.cuda.device_count())
+        print('INFO: Current GPU device:', torch.cuda.current_device())
+        print('INFO: GPU device name:', torch.cuda.get_device_name())
+
+        # Test basic CUDA tensor operation
+        device = torch.device('cuda')
+        x = torch.randn(10, 10).to(device)
+        y = torch.randn(10, 10).to(device)
+        z = torch.mm(x, y)
+        print('SUCCESS: CUDA tensor operation completed on device:', z.device)
+        print('INFO: Result tensor shape:', z.shape)
+
+        print('SUCCESS: ExecutorTorch CUDA integration verified')
+    else:
+        print('WARNING: CUDA not detected, but ExecutorTorch built successfully')
+        exit(1)
+except Exception as e:
+    print('ERROR: ExecutorTorch CUDA test failed:', e)
+    exit(1)
+"
+
+    echo "SUCCESS: ExecutorTorch CUDA ${cuda_version} build and verification completed successfully"
+}
+
+# Main execution
+echo "Current working directory: $(pwd)"
+echo "Directory contents:"
+ls -la
+
+# Run the CUDA build test
+test_executorch_cuda_build "${CUDA_VERSION}"

From 0621550f0cae09d915f5129c7e3b133324e7814c Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Tue, 16 Sep 2025 16:58:32 -0700
Subject: [PATCH 04/20] recover torchao

---
 .ci/docker/ci_commit_pins/pytorch.txt | 2 +-
 install_requirements.py               | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
index 8c9330d6f2c..e3a53c8bcb5 100644
--- a/.ci/docker/ci_commit_pins/pytorch.txt
+++ b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-4d4abec80f03cd8fdefe1d9cb3a60d3690cd777e
+53a2908a10f414a2f85caa06703a26a40e873869
diff --git a/install_requirements.py b/install_requirements.py
index 844ada0c7da..32303f80842 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -231,7 +231,6 @@ def install_requirements(use_pytorch_nightly):
         # that we don't need to set any version number there because they have already
         # been installed on CI before this step, so pip won't reinstall them
         f"torch==2.10.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torch",
-        f"torchao==0.14.0{NIGHTLY_VERSION}" if use_pytorch_nightly else "torchao",
     ]
 
     # Install the requirements for core ExecuTorch package.
@@ -253,7 +252,7 @@ def install_requirements(use_pytorch_nightly):
     )
 
     LOCAL_REQUIREMENTS = [
-        # "third-party/ao",  # We need the latest kernels for fast iteration, so not relying on pypi.
+        "third-party/ao",  # We need the latest kernels for fast iteration, so not relying on pypi.
     ] + (
         [
             "extension/llm/tokenizers",  # TODO(larryliu0820): Setup a pypi package for this.

From 3ef491b3540e06c2a33eae682c282737024bd771 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Tue, 16 Sep 2025 21:38:34 -0700
Subject: [PATCH 05/20] solve lint issue

---
 .ci/scripts/test-cuda-build.sh         | 24 +++++++++++++++++++-----
 .github/workflows/test-cuda-builds.yml |  2 +-
 install_requirements.py                |  4 ++--
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh
index 8a9fedc4d7a..a9f8e7ec14f 100755
--- a/.ci/scripts/test-cuda-build.sh
+++ b/.ci/scripts/test-cuda-build.sh
@@ -21,14 +21,28 @@ test_executorch_cuda_build() {
     echo "Building ExecutorTorch with CUDA ${cuda_version} support..."
     echo "ExecutorTorch will automatically detect CUDA and install appropriate PyTorch wheel"
 
+    # Check available resources before starting
+    echo "=== System Information ==="
+    echo "Available memory: $(free -h | grep Mem | awk '{print $2}')"
+    echo "Available disk space: $(df -h . | tail -1 | awk '{print $4}')"
+    echo "CPU cores: $(nproc)"
+    echo "CUDA version check:"
+    nvcc --version || echo "nvcc not found"
+    nvidia-smi || echo "nvidia-smi not found"
+
     # Set CMAKE_ARGS to enable CUDA build - ExecutorTorch will handle PyTorch installation automatically
     export CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
 
-    # Install ExecutorTorch with CUDA support - this will automatically:
-    # 1. Detect CUDA version using nvcc
-    # 2. Install appropriate PyTorch wheel for the detected CUDA version
-    # 3. Build ExecutorTorch with CUDA support
-    ./install_executorch.sh
+    echo "=== Starting ExecutorTorch Installation ==="
+    # Install ExecutorTorch with CUDA support with timeout and error handling
+    timeout 5400 ./install_executorch.sh || {
+        local exit_code=$?
+        echo "ERROR: install_executorch.sh failed with exit code: $exit_code"
+        if [ $exit_code -eq 124 ]; then
+            echo "ERROR: Installation timed out after 90 minutes"
+        fi
+        exit $exit_code
+    }
 
     echo "SUCCESS: ExecutorTorch CUDA build completed"
 
diff --git a/.github/workflows/test-cuda-builds.yml b/.github/workflows/test-cuda-builds.yml
index eef3287a920..eff26e72c67 100644
--- a/.github/workflows/test-cuda-builds.yml
+++ b/.github/workflows/test-cuda-builds.yml
@@ -17,7 +17,7 @@ on:
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
-  cancel-in-progress: true
+  cancel-in-progress: false
 
 jobs:
   test-cuda-builds:
diff --git a/install_requirements.py b/install_requirements.py
index 32303f80842..e5a7c29c482 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -172,7 +172,7 @@ def _get_pytorch_cuda_url(cuda_version):
 
 # url for the PyTorch ExecuTorch depending on, which will be set by _determine_torch_url().
 # please do not directly rely on it, but use _determine_torch_url() instead.
-_torch_url = None
+_torch_url = ""
 
 
 def _determine_torch_url():
@@ -186,7 +186,7 @@ def _determine_torch_url():
     global _torch_url
 
     # Return cached URL if already determined
-    if _torch_url is not None:
+    if _torch_url:
         return _torch_url
 
     # Check if CUDA delegate is enabled

From 9792c99066550f1784bab43864deb7fb66dcf4d9 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Wed, 17 Sep 2025 12:45:25 -0700
Subject: [PATCH 06/20] create install_utils.py for better structure

---
 install_requirements.py | 190 +------------------------------------
 install_utils.py        | 201 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 204 insertions(+), 187 deletions(-)
 create mode 100644 install_utils.py

diff --git a/install_requirements.py b/install_requirements.py
index e5a7c29c482..409ed083970 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -7,56 +7,10 @@
 
 import argparse
 import os
-import platform
-import re
 import subprocess
 import sys
 
-
-def python_is_compatible():
-    # Scrape the version range from pyproject.toml, which should be in the current directory.
-    version_specifier = None
-    with open("pyproject.toml", "r") as file:
-        for line in file:
-            if line.startswith("requires-python"):
-                match = re.search(r'"([^"]*)"', line)
-                if match:
-                    version_specifier = match.group(1)
-                    break
-
-    if not version_specifier:
-        print(
-            "WARNING: Skipping python version check: version range not found",
-            file=sys.stderr,
-        )
-        return False
-
-    # Install the packaging module if necessary.
-    try:
-        import packaging
-    except ImportError:
-        subprocess.run(
-            [sys.executable, "-m", "pip", "install", "packaging"], check=True
-        )
-    # Compare the current python version to the range in version_specifier. Exits
-    # with status 1 if the version is not compatible, or with status 0 if the
-    # version is compatible or the logic itself fails.
-    try:
-        import packaging.specifiers
-        import packaging.version
-
-        python_version = packaging.version.parse(platform.python_version())
-        version_range = packaging.specifiers.SpecifierSet(version_specifier)
-        if python_version not in version_range:
-            print(
-                f'ERROR: ExecuTorch does not support python version {python_version}: must satisfy "{version_specifier}"',
-                file=sys.stderr,
-            )
-            return False
-    except Exception as e:
-        print(f"WARNING: Skipping python version check: {e}", file=sys.stderr)
-    return True
-
+from install_utils import determine_torch_url, is_intel_mac_os, python_is_compatible
 
 # The pip repository that hosts nightly torch packages.
 # This will be dynamically set based on CUDA availability and CUDA backend enabled/disabled.
@@ -85,133 +39,6 @@ def python_is_compatible():
 NIGHTLY_VERSION = "dev20250915"
 
 
-def _check_cuda_enabled():
-    """Check if CUDA delegate is enabled via CMAKE_ARGS environment variable."""
-    cmake_args = os.environ.get("CMAKE_ARGS", "")
-    return "-DEXECUTORCH_BUILD_CUDA=ON" in cmake_args
-
-
-def _cuda_version_to_pytorch_suffix(major, minor):
-    """
-    Generate PyTorch CUDA wheel suffix from CUDA version numbers.
-
-    Args:
-        major: CUDA major version (e.g., 12)
-        minor: CUDA minor version (e.g., 6)
-
-    Returns:
-        PyTorch wheel suffix string (e.g., "cu126")
-    """
-    return f"cu{major}{minor}"
-
-
-def _get_cuda_version():
-    """
-    Get the CUDA version installed on the system using nvcc command.
-    Returns a tuple (major, minor).
-
-    Raises:
-        RuntimeError: if nvcc is not found or version cannot be parsed
-    """
-    try:
-        # Get CUDA version from nvcc (CUDA compiler)
-        nvcc_result = subprocess.run(
-            ["nvcc", "--version"], capture_output=True, text=True, check=True
-        )
-        # Parse nvcc output for CUDA version
-        # Output contains line like "Cuda compilation tools, release 12.6, V12.6.68"
-        match = re.search(r"release (\d+)\.(\d+)", nvcc_result.stdout)
-        if match:
-            major, minor = int(match.group(1)), int(match.group(2))
-
-            # Check if the detected version is supported
-            if (major, minor) not in SUPPORTED_CUDA_VERSIONS:
-                available_versions = ", ".join(
-                    [f"{maj}.{min}" for maj, min in SUPPORTED_CUDA_VERSIONS]
-                )
-                raise RuntimeError(
-                    f"Detected CUDA version {major}.{minor} is not supported. "
-                    f"Only the following CUDA versions are supported: {available_versions}. "
-                    f"Please install a supported CUDA version or try on CPU-only delegates."
-                )
-
-            return (major, minor)
-        else:
-            raise RuntimeError(
-                "CUDA delegate is enabled but could not parse CUDA version from nvcc output. "
-                "Please ensure CUDA is properly installed or try on CPU-only delegates."
-            )
-    except FileNotFoundError:
-        raise RuntimeError(
-            "CUDA delegate is enabled but nvcc (CUDA compiler) is not found in PATH. "
-            "Please install CUDA toolkit or try on CPU-only delegates."
-        )
-    except subprocess.CalledProcessError as e:
-        raise RuntimeError(
-            f"CUDA delegate is enabled but nvcc command failed with error: {e}. "
-            "Please ensure CUDA is properly installed or try on CPU-only delegates."
-        )
-
-
-def _get_pytorch_cuda_url(cuda_version):
-    """
-    Get the appropriate PyTorch CUDA URL for the given CUDA version.
-
-    Args:
-        cuda_version: tuple of (major, minor) version numbers
-
-    Returns:
-        URL string for PyTorch CUDA packages
-    """
-    major, minor = cuda_version
-    # Generate CUDA suffix (version validation is already done in _get_cuda_version)
-    cuda_suffix = _cuda_version_to_pytorch_suffix(major, minor)
-
-    return f"{TORCH_NIGHTLY_URL_BASE}/{cuda_suffix}"
-
-
-# url for the PyTorch ExecuTorch depending on, which will be set by _determine_torch_url().
-# please do not directly rely on it, but use _determine_torch_url() instead.
-_torch_url = ""
-
-
-def _determine_torch_url():
-    """
-    Determine the appropriate PyTorch installation URL based on CUDA availability and CMAKE_ARGS.
-    Uses caching to avoid redundant CUDA detection and print statements.
-
-    Returns:
-        URL string for PyTorch packages
-    """
-    global _torch_url
-
-    # Return cached URL if already determined
-    if _torch_url:
-        return _torch_url
-
-    # Check if CUDA delegate is enabled
-    if not _check_cuda_enabled():
-        print("CUDA delegate not enabled, using CPU-only PyTorch")
-        _torch_url = f"{TORCH_NIGHTLY_URL_BASE}/cpu"
-        return _torch_url
-
-    print("CUDA delegate enabled, detecting CUDA version...")
-
-    # Get CUDA version
-    cuda_version = _get_cuda_version()
-
-    major, minor = cuda_version
-    print(f"Detected CUDA version: {major}.{minor}")
-
-    # Get appropriate PyTorch CUDA URL
-    torch_url = _get_pytorch_cuda_url(cuda_version)
-    print(f"Using PyTorch URL: {torch_url}")
-
-    # Cache the result
-    _torch_url = torch_url
-    return torch_url
-
-
 def install_requirements(use_pytorch_nightly):
     # Skip pip install on Intel macOS if using nightly.
     if use_pytorch_nightly and is_intel_mac_os():
@@ -223,7 +50,7 @@ def install_requirements(use_pytorch_nightly):
         sys.exit(1)
 
     # Determine the appropriate PyTorch URL based on CUDA delegate status
-    torch_url = _determine_torch_url()
+    torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE, SUPPORTED_CUDA_VERSIONS)
 
     # pip packages needed by exir.
     TORCH_PACKAGE = [
@@ -289,7 +116,7 @@ def install_requirements(use_pytorch_nightly):
 
 def install_optional_example_requirements(use_pytorch_nightly):
     # Determine the appropriate PyTorch URL based on CUDA delegate status
-    torch_url = _determine_torch_url()
+    torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE, SUPPORTED_CUDA_VERSIONS)
 
     print("Installing torch domain libraries")
     DOMAIN_LIBRARIES = [
@@ -332,17 +159,6 @@ def install_optional_example_requirements(use_pytorch_nightly):
     )
 
 
-# Prebuilt binaries for Intel-based macOS are no longer available on PyPI; users must compile from source.
-# PyTorch stopped building macOS x86_64 binaries since version 2.3.0 (January 2024).
-def is_intel_mac_os():
-    # Returns True if running on Intel macOS.
-    return platform.system().lower() == "darwin" and platform.machine().lower() in (
-        "x86",
-        "x86_64",
-        "i386",
-    )
-
-
 def main(args):
     parser = argparse.ArgumentParser()
     parser.add_argument(
diff --git a/install_utils.py b/install_utils.py
new file mode 100644
index 00000000000..19da1b2193b
--- /dev/null
+++ b/install_utils.py
@@ -0,0 +1,201 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# Copyright 2024-25 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import platform
+import re
+import subprocess
+
+
+def _is_cuda_enabled():
+    """Check if CUDA delegate is enabled via CMAKE_ARGS environment variable."""
+    cmake_args = os.environ.get("CMAKE_ARGS", "")
+    return "-DEXECUTORCH_BUILD_CUDA=ON" in cmake_args
+
+
+def _cuda_version_to_pytorch_suffix(major, minor):
+    """
+    Generate PyTorch CUDA wheel suffix from CUDA version numbers.
+
+    Args:
+        major: CUDA major version (e.g., 12)
+        minor: CUDA minor version (e.g., 6)
+
+    Returns:
+        PyTorch wheel suffix string (e.g., "cu126")
+    """
+    return f"cu{major}{minor}"
+
+
+def _get_cuda_version(supported_cuda_versions):
+    """
+    Get the CUDA version installed on the system using nvcc command.
+    Returns a tuple (major, minor).
+
+    Args:
+        supported_cuda_versions: List of supported CUDA versions as tuples
+
+    Raises:
+        RuntimeError: if nvcc is not found or version cannot be parsed
+    """
+    try:
+        # Get CUDA version from nvcc (CUDA compiler)
+        nvcc_result = subprocess.run(
+            ["nvcc", "--version"], capture_output=True, text=True, check=True
+        )
+        # Parse nvcc output for CUDA version
+        # Output contains line like "Cuda compilation tools, release 12.6, V12.6.68"
+        match = re.search(r"release (\d+)\.(\d+)", nvcc_result.stdout)
+        if match:
+            major, minor = int(match.group(1)), int(match.group(2))
+
+            # Check if the detected version is supported
+            if (major, minor) not in supported_cuda_versions:
+                available_versions = ", ".join(
+                    [f"{maj}.{min}" for maj, min in supported_cuda_versions]
+                )
+                raise RuntimeError(
+                    f"Detected CUDA version {major}.{minor} is not supported. "
+                    f"Only the following CUDA versions are supported: {available_versions}. "
+                    f"Please install a supported CUDA version or try on CPU-only delegates."
+                )
+
+            return (major, minor)
+        else:
+            raise RuntimeError(
+                "CUDA delegate is enabled but could not parse CUDA version from nvcc output. "
+                "Please ensure CUDA is properly installed or try on CPU-only delegates."
+            )
+    except FileNotFoundError:
+        raise RuntimeError(
+            "CUDA delegate is enabled but nvcc (CUDA compiler) is not found in PATH. "
+            "Please install CUDA toolkit or try on CPU-only delegates."
+        )
+    except subprocess.CalledProcessError as e:
+        raise RuntimeError(
+            f"CUDA delegate is enabled but nvcc command failed with error: {e}. "
+            "Please ensure CUDA is properly installed or try on CPU-only delegates."
+        )
+
+
+def _get_pytorch_cuda_url(cuda_version, torch_nightly_url_base):
+    """
+    Get the appropriate PyTorch CUDA URL for the given CUDA version.
+
+    Args:
+        cuda_version: tuple of (major, minor) version numbers
+        torch_nightly_url_base: Base URL for PyTorch nightly packages
+
+    Returns:
+        URL string for PyTorch CUDA packages
+    """
+    major, minor = cuda_version
+    # Generate CUDA suffix (version validation is already done in _get_cuda_version)
+    cuda_suffix = _cuda_version_to_pytorch_suffix(major, minor)
+
+    return f"{torch_nightly_url_base}/{cuda_suffix}"
+
+
+# Global variable for caching torch URL
+_torch_url_cache = ""
+
+
+def determine_torch_url(torch_nightly_url_base, supported_cuda_versions):
+    """
+    Determine the appropriate PyTorch installation URL based on CUDA availability and CMAKE_ARGS.
+    Uses caching to avoid redundant CUDA detection and print statements.
+
+    Args:
+        torch_nightly_url_base: Base URL for PyTorch nightly packages
+        supported_cuda_versions: List of supported CUDA versions as tuples
+
+    Returns:
+        URL string for PyTorch packages
+    """
+    global _torch_url_cache
+
+    # Return cached URL if already determined
+    if _torch_url_cache:
+        return _torch_url_cache
+
+    # Check if CUDA delegate is enabled
+    if not _is_cuda_enabled():
+        print("CUDA delegate not enabled, using CPU-only PyTorch")
+        _torch_url_cache = f"{torch_nightly_url_base}/cpu"
+        return _torch_url_cache
+
+    print("CUDA delegate enabled, detecting CUDA version...")
+
+    # Get CUDA version
+    cuda_version = _get_cuda_version(supported_cuda_versions)
+
+    major, minor = cuda_version
+    print(f"Detected CUDA version: {major}.{minor}")
+
+    # Get appropriate PyTorch CUDA URL
+    torch_url = _get_pytorch_cuda_url(cuda_version, torch_nightly_url_base)
+    print(f"Using PyTorch URL: {torch_url}")
+
+    # Cache the result
+    _torch_url_cache = torch_url
+    return torch_url
+
+
+# Prebuilt binaries for Intel-based macOS are no longer available on PyPI; users must compile from source.
+# PyTorch stopped building macOS x86_64 binaries since version 2.3.0 (January 2024).
+def is_intel_mac_os():
+    # Returns True if running on Intel macOS.
+    return platform.system().lower() == "darwin" and platform.machine().lower() in (
+        "x86",
+        "x86_64",
+        "i386",
+    )
+
+
+def python_is_compatible():
+    # Scrape the version range from pyproject.toml, which should be in the current directory.
+    version_specifier = None
+    with open("pyproject.toml", "r") as file:
+        for line in file:
+            if line.startswith("requires-python"):
+                match = re.search(r'"([^"]*)"', line)
+                if match:
+                    version_specifier = match.group(1)
+                    break
+
+    if not version_specifier:
+        print(
+            "WARNING: Skipping python version check: version range not found",
+            file=sys.stderr,
+        )
+        return False
+
+    # Install the packaging module if necessary.
+    try:
+        import packaging
+    except ImportError:
+        subprocess.run(
+            [sys.executable, "-m", "pip", "install", "packaging"], check=True
+        )
+    # Compare the current python version to the range in version_specifier. Exits
+    # with status 1 if the version is not compatible, or with status 0 if the
+    # version is compatible or the logic itself fails.
+    try:
+        import packaging.specifiers
+        import packaging.version
+
+        python_version = packaging.version.parse(platform.python_version())
+        version_range = packaging.specifiers.SpecifierSet(version_specifier)
+        if python_version not in version_range:
+            print(
+                f'ERROR: ExecuTorch does not support python version {python_version}: must satisfy "{version_specifier}"',
+                file=sys.stderr,
+            )
+            return False
+    except Exception as e:
+        print(f"WARNING: Skipping python version check: {e}", file=sys.stderr)
+    return True

From a18cd15ec853a35370958b94183e61e997cb4f35 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Fri, 19 Sep 2025 08:36:40 -0700
Subject: [PATCH 07/20] set use-custom-docker-registry as false

---
 .github/workflows/test-cuda-builds.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test-cuda-builds.yml b/.github/workflows/test-cuda-builds.yml
index eff26e72c67..ed6018bddf7 100644
--- a/.github/workflows/test-cuda-builds.yml
+++ b/.github/workflows/test-cuda-builds.yml
@@ -36,6 +36,7 @@ jobs:
       runner: linux.g5.4xlarge.nvidia.gpu
       gpu-arch-type: cuda
       gpu-arch-version: ${{ matrix.cuda-version }}
+      use-custom-docker-registry: false
       submodules: recursive
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       script: |

From 5b430f46f811cf2a7e038bbb0774a88ebf812308 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Tue, 16 Sep 2025 15:47:44 -0700
Subject: [PATCH 08/20] rebase to latest main

---
 install_requirements.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/install_requirements.py b/install_requirements.py
index 409ed083970..bfe214f444b 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -79,7 +79,7 @@ def install_requirements(use_pytorch_nightly):
     )
 
     LOCAL_REQUIREMENTS = [
-        "third-party/ao",  # We need the latest kernels for fast iteration, so not relying on pypi.
+        # "third-party/ao",  # We need the latest kernels for fast iteration, so not relying on pypi.
     ] + (
         [
             "extension/llm/tokenizers",  # TODO(larryliu0820): Setup a pypi package for this.

From 95c2536d52cf5f97b5566295bf617258cc36bf23 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Tue, 16 Sep 2025 16:58:32 -0700
Subject: [PATCH 09/20] recover torchao

---
 install_requirements.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/install_requirements.py b/install_requirements.py
index bfe214f444b..409ed083970 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -79,7 +79,7 @@ def install_requirements(use_pytorch_nightly):
     )
 
     LOCAL_REQUIREMENTS = [
-        # "third-party/ao",  # We need the latest kernels for fast iteration, so not relying on pypi.
+        "third-party/ao",  # We need the latest kernels for fast iteration, so not relying on pypi.
     ] + (
         [
             "extension/llm/tokenizers",  # TODO(larryliu0820): Setup a pypi package for this.

From b00bc1436f2854d829d7f613738f4006154cebb2 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Fri, 19 Sep 2025 09:39:44 -0700
Subject: [PATCH 10/20] solve platform import issue

---
 install_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/install_utils.py b/install_utils.py
index 19da1b2193b..2b2c6ffc51c 100644
--- a/install_utils.py
+++ b/install_utils.py
@@ -6,7 +6,6 @@
 # LICENSE file in the root directory of this source tree.
 
 import os
-import platform
 import re
 import subprocess
 

From ae52b29b0e30b59ef2c92f053f859081db8c0cd8 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Fri, 19 Sep 2025 10:58:26 -0700
Subject: [PATCH 11/20] introduce missed sys

---
 install_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/install_utils.py b/install_utils.py
index 2b2c6ffc51c..159da75cee3 100644
--- a/install_utils.py
+++ b/install_utils.py
@@ -8,6 +8,7 @@
 import os
 import re
 import subprocess
+import sys
 
 
 def _is_cuda_enabled():

From 57ebb63f887955dc316148257ac09be9ebabdd54 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Fri, 19 Sep 2025 11:05:46 -0700
Subject: [PATCH 12/20] introduce missed platform

---
 install_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/install_utils.py b/install_utils.py
index 159da75cee3..fdd6c4bd93c 100644
--- a/install_utils.py
+++ b/install_utils.py
@@ -6,6 +6,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import os
+import platform
 import re
 import subprocess
 import sys

From 43d164f00cc2d7fee7a63c4c4a6f0233592f203a Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Fri, 19 Sep 2025 12:00:50 -0700
Subject: [PATCH 13/20] update cuda ci script

---
 .ci/scripts/test-cuda-build.sh         | 8 ++++----
 .github/workflows/test-cuda-builds.yml | 9 +++------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh
index a9f8e7ec14f..a1539139019 100755
--- a/.ci/scripts/test-cuda-build.sh
+++ b/.ci/scripts/test-cuda-build.sh
@@ -7,8 +7,8 @@
 
 set -exu
 
-# shellcheck source=/dev/null
-source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+bash .ci/scripts/setup-conda.sh
+eval "$(conda shell.bash hook)"
 
 CUDA_VERSION=${1:-"12.6"}
 
@@ -50,13 +50,13 @@ test_executorch_cuda_build() {
     echo "=== Verifying ExecutorTorch CUDA Installation ==="
 
     # Test that ExecutorTorch was built successfully
-    python -c "
+    ${CONDA_RUN} python -c "
 import executorch
 print('SUCCESS: ExecutorTorch imported successfully')
 "
 
     # Test CUDA availability and show details
-    python -c "
+    ${CONDA_RUN} python -c "
 try:
     import torch
     print('INFO: PyTorch version:', torch.__version__)
diff --git a/.github/workflows/test-cuda-builds.yml b/.github/workflows/test-cuda-builds.yml
index ed6018bddf7..ec94d6721fb 100644
--- a/.github/workflows/test-cuda-builds.yml
+++ b/.github/workflows/test-cuda-builds.yml
@@ -42,15 +42,12 @@ jobs:
       script: |
         set -eux
 
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        if [ -n "$CONDA_ENV" ]; then
-          conda activate "${CONDA_ENV}"
-        fi
+        # This is needed to get the prebuilt PyTorch wheel from S3
+        ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
 
         # Test ExecutorTorch CUDA build - ExecutorTorch will automatically detect CUDA version
         # and install the appropriate PyTorch wheel when CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}"
+        source .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}"
 
   # This job will fail if any of the CUDA versions fail
   check-all-cuda-builds:

From d892e3f637d95fd7b86f1b4d7dbd625affe3d01b Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Fri, 19 Sep 2025 12:52:38 -0700
Subject: [PATCH 14/20] try ci with specific docker-image

---
 .ci/scripts/test-cuda-build.sh         | 5 +++++
 .github/workflows/test-cuda-builds.yml | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh
index a1539139019..9981eb7ec87 100755
--- a/.ci/scripts/test-cuda-build.sh
+++ b/.ci/scripts/test-cuda-build.sh
@@ -7,9 +7,14 @@
 
 set -exu
 
+# Source the conda setup
 bash .ci/scripts/setup-conda.sh
 eval "$(conda shell.bash hook)"
 
+# Set up CONDA_RUN variable if not already set
+# This is needed for compatibility with pytorch/test-infra workflows
+export CONDA_RUN="${CONDA_RUN:-conda run --no-capture-output -p ${CONDA_PREFIX:-$HOME/miniconda3/envs/ci}}"
+
 CUDA_VERSION=${1:-"12.6"}
 
 echo "=== Testing ExecutorTorch CUDA ${CUDA_VERSION} Build ==="
diff --git a/.github/workflows/test-cuda-builds.yml b/.github/workflows/test-cuda-builds.yml
index ec94d6721fb..13fbd427310 100644
--- a/.github/workflows/test-cuda-builds.yml
+++ b/.github/workflows/test-cuda-builds.yml
@@ -36,7 +36,7 @@ jobs:
       runner: linux.g5.4xlarge.nvidia.gpu
       gpu-arch-type: cuda
       gpu-arch-version: ${{ matrix.cuda-version }}
-      use-custom-docker-registry: false
+      docker-image: nvidia/cuda:${{ matrix.cuda-version }}.0-devel-ubuntu22.04
       submodules: recursive
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       script: |

From 2e878864a01ced717ea59c5d0cbe0ff8a514e6a9 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Fri, 19 Sep 2025 15:16:48 -0700
Subject: [PATCH 15/20] no conda run n yml

---
 .ci/scripts/test-cuda-build.sh         | 13 +++++--------
 .github/workflows/test-cuda-builds.yml | 17 +++++++----------
 2 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh
index 9981eb7ec87..577d89ef057 100755
--- a/.ci/scripts/test-cuda-build.sh
+++ b/.ci/scripts/test-cuda-build.sh
@@ -7,13 +7,10 @@
 
 set -exu
 
-# Source the conda setup
-bash .ci/scripts/setup-conda.sh
+# The generic Linux job chooses to use base env, not the one setup by the image
 eval "$(conda shell.bash hook)"
-
-# Set up CONDA_RUN variable if not already set
-# This is needed for compatibility with pytorch/test-infra workflows
-export CONDA_RUN="${CONDA_RUN:-conda run --no-capture-output -p ${CONDA_PREFIX:-$HOME/miniconda3/envs/ci}}"
+CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+conda activate "${CONDA_ENV}"
 
 CUDA_VERSION=${1:-"12.6"}
 
@@ -55,13 +52,13 @@ test_executorch_cuda_build() {
     echo "=== Verifying ExecutorTorch CUDA Installation ==="
 
     # Test that ExecutorTorch was built successfully
-    ${CONDA_RUN} python -c "
+    python -c "
 import executorch
 print('SUCCESS: ExecutorTorch imported successfully')
 "
 
     # Test CUDA availability and show details
-    ${CONDA_RUN} python -c "
+    python -c "
 try:
     import torch
     print('INFO: PyTorch version:', torch.__version__)
diff --git a/.github/workflows/test-cuda-builds.yml b/.github/workflows/test-cuda-builds.yml
index 13fbd427310..0930c6524e3 100644
--- a/.github/workflows/test-cuda-builds.yml
+++ b/.github/workflows/test-cuda-builds.yml
@@ -1,9 +1,9 @@
-# Test ExecutorTorch CUDA Build Compatibility
-# This workflow tests whether ExecutorTorch can be successfully built with CUDA support
+# Test ExecuTorch CUDA Build Compatibility
+# This workflow tests whether ExecuTorch can be successfully built with CUDA support
 # across different CUDA versions (12.6, 12.8, 12.9) using the command:
 # CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
 #
-# Note: ExecutorTorch automatically detects the system CUDA version using nvcc and
+# Note: ExecuTorch automatically detects the system CUDA version using nvcc and
 # installs the appropriate PyTorch wheel. No manual CUDA/PyTorch installation needed.
 
 name: Test CUDA Builds
@@ -36,16 +36,13 @@ jobs:
       runner: linux.g5.4xlarge.nvidia.gpu
       gpu-arch-type: cuda
       gpu-arch-version: ${{ matrix.cuda-version }}
-      docker-image: nvidia/cuda:${{ matrix.cuda-version }}.0-devel-ubuntu22.04
+      use-custom-docker-registry: false
       submodules: recursive
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       script: |
         set -eux
 
-        # This is needed to get the prebuilt PyTorch wheel from S3
-        ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
-
-        # Test ExecutorTorch CUDA build - ExecutorTorch will automatically detect CUDA version
+        # Test ExecuTorch CUDA build - ExecuTorch will automatically detect CUDA version
         # and install the appropriate PyTorch wheel when CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
         source .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}"
 
@@ -58,9 +55,9 @@ jobs:
       - name: Check if all CUDA builds succeeded
         run: |
           if [[ "${{ needs.test-cuda-builds.result }}" != "success" ]]; then
-            echo "ERROR: One or more ExecutorTorch CUDA builds failed!"
+            echo "ERROR: One or more ExecuTorch CUDA builds failed!"
             echo "CUDA build results: ${{ needs.test-cuda-builds.result }}"
             exit 1
           else
-            echo "SUCCESS: All ExecutorTorch CUDA builds (12.6, 12.8, 12.9) completed successfully!"
+            echo "SUCCESS: All ExecuTorch CUDA builds (12.6, 12.8, 12.9) completed successfully!"
           fi

From 5a5e829de93dbda2d31136ae7510a5972e9bac94 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Fri, 19 Sep 2025 16:09:52 -0700
Subject: [PATCH 16/20] remove unsupported jq

---
 .ci/scripts/test-cuda-build.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh
index 577d89ef057..8cb0cf4b698 100755
--- a/.ci/scripts/test-cuda-build.sh
+++ b/.ci/scripts/test-cuda-build.sh
@@ -8,9 +8,9 @@
 set -exu
 
 # The generic Linux job chooses to use base env, not the one setup by the image
-eval "$(conda shell.bash hook)"
-CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-conda activate "${CONDA_ENV}"
+# eval "$(conda shell.bash hook)"
+# CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+# conda activate "${CONDA_ENV}"
 
 CUDA_VERSION=${1:-"12.6"}
 

From 6e7884fb605c8b52c2b83742947f539a4a9b4a74 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Fri, 19 Sep 2025 17:06:57 -0700
Subject: [PATCH 17/20] use lru cache to replace global cache variable

---
 .ci/scripts/test-cuda-build.sh |  6 +++---
 install_utils.py               | 19 ++++---------------
 2 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh
index 8cb0cf4b698..13fa1e9fe9c 100755
--- a/.ci/scripts/test-cuda-build.sh
+++ b/.ci/scripts/test-cuda-build.sh
@@ -8,9 +8,9 @@
 set -exu
 
 # The generic Linux job chooses to use base env, not the one setup by the image
-# eval "$(conda shell.bash hook)"
-# CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-# conda activate "${CONDA_ENV}"
+eval "$(conda shell.bash hook)"
+CONDA_ENV=$(conda info --envs | awk '/base/ {print $2}')
+conda activate "${CONDA_ENV}"
 
 CUDA_VERSION=${1:-"12.6"}
 
diff --git a/install_utils.py b/install_utils.py
index fdd6c4bd93c..113005ba1e4 100644
--- a/install_utils.py
+++ b/install_utils.py
@@ -5,6 +5,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import functools
 import os
 import platform
 import re
@@ -101,14 +102,11 @@ def _get_pytorch_cuda_url(cuda_version, torch_nightly_url_base):
     return f"{torch_nightly_url_base}/{cuda_suffix}"
 
 
-# Global variable for caching torch URL
-_torch_url_cache = ""
-
-
+@functools.lru_cache(maxsize=1)
 def determine_torch_url(torch_nightly_url_base, supported_cuda_versions):
     """
     Determine the appropriate PyTorch installation URL based on CUDA availability and CMAKE_ARGS.
-    Uses caching to avoid redundant CUDA detection and print statements.
+    Uses @functools.lru_cache to avoid redundant CUDA detection and print statements.
 
     Args:
         torch_nightly_url_base: Base URL for PyTorch nightly packages
@@ -117,17 +115,10 @@ def determine_torch_url(torch_nightly_url_base, supported_cuda_versions):
     Returns:
         URL string for PyTorch packages
     """
-    global _torch_url_cache
-
-    # Return cached URL if already determined
-    if _torch_url_cache:
-        return _torch_url_cache
-
     # Check if CUDA delegate is enabled
     if not _is_cuda_enabled():
         print("CUDA delegate not enabled, using CPU-only PyTorch")
-        _torch_url_cache = f"{torch_nightly_url_base}/cpu"
-        return _torch_url_cache
+        return f"{torch_nightly_url_base}/cpu"
 
     print("CUDA delegate enabled, detecting CUDA version...")
 
@@ -141,8 +132,6 @@ def determine_torch_url(torch_nightly_url_base, supported_cuda_versions):
     torch_url = _get_pytorch_cuda_url(cuda_version, torch_nightly_url_base)
     print(f"Using PyTorch URL: {torch_url}")
 
-    # Cache the result
-    _torch_url_cache = torch_url
     return torch_url
 
 

From bd24c4be407c603355635308d2b49f2f36bea2ba Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Fri, 19 Sep 2025 17:12:23 -0700
Subject: [PATCH 18/20] make SUPPORTED_CUDA_VERSIONS as tuple for hashable

---
 install_requirements.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/install_requirements.py b/install_requirements.py
index 409ed083970..4cc8858086b 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -18,11 +18,11 @@
 
 # Supported CUDA versions - modify this to add/remove supported versions
 # Format: tuple of (major, minor) version numbers
-SUPPORTED_CUDA_VERSIONS = [
+SUPPORTED_CUDA_VERSIONS = (
     (12, 6),
     (12, 8),
     (12, 9),
-]
+)
 
 # Since ExecuTorch often uses main-branch features of pytorch, only the nightly
 # pip versions will have the required features.

From d1c596c7193af7e4afe8c81002021850bb9a6d1d Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Fri, 19 Sep 2025 17:35:14 -0700
Subject: [PATCH 19/20] use default conda env

---
 .ci/scripts/test_backend_linux.sh | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.ci/scripts/test_backend_linux.sh b/.ci/scripts/test_backend_linux.sh
index d230860875d..a5233d15559 100755
--- a/.ci/scripts/test_backend_linux.sh
+++ b/.ci/scripts/test_backend_linux.sh
@@ -15,11 +15,6 @@ REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.csv"
 echo "Running backend test job for suite $SUITE, flow $FLOW."
 echo "Saving job artifacts to $ARTIFACT_DIR."
 
-# The generic Linux job chooses to use base env, not the one setup by the image
-eval "$(conda shell.bash hook)"
-CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-conda activate "${CONDA_ENV}"
-
 export PYTHON_EXECUTABLE=python
 
 # CMake options to use, in addition to the defaults.

From 19c2fb22e284ceecd39e4dfa2aa1cbcb45d322d5 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Fri, 19 Sep 2025 17:47:11 -0700
Subject: [PATCH 20/20] remove conda env selection in cuda-build.sh

---
 .ci/scripts/test-cuda-build.sh    | 5 -----
 .ci/scripts/test_backend_linux.sh | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh
index 13fa1e9fe9c..52c2f21dbd2 100755
--- a/.ci/scripts/test-cuda-build.sh
+++ b/.ci/scripts/test-cuda-build.sh
@@ -7,11 +7,6 @@
 
 set -exu
 
-# The generic Linux job chooses to use base env, not the one setup by the image
-eval "$(conda shell.bash hook)"
-CONDA_ENV=$(conda info --envs | awk '/base/ {print $2}')
-conda activate "${CONDA_ENV}"
-
 CUDA_VERSION=${1:-"12.6"}
 
 echo "=== Testing ExecutorTorch CUDA ${CUDA_VERSION} Build ==="
diff --git a/.ci/scripts/test_backend_linux.sh b/.ci/scripts/test_backend_linux.sh
index a5233d15559..d230860875d 100755
--- a/.ci/scripts/test_backend_linux.sh
+++ b/.ci/scripts/test_backend_linux.sh
@@ -15,6 +15,11 @@ REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.csv"
 echo "Running backend test job for suite $SUITE, flow $FLOW."
 echo "Saving job artifacts to $ARTIFACT_DIR."
 
+# The generic Linux job chooses to use base env, not the one setup by the image
+eval "$(conda shell.bash hook)"
+CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+conda activate "${CONDA_ENV}"
+
 export PYTHON_EXECUTABLE=python
 
 # CMake options to use, in addition to the defaults.