From 8f9fc9a6a14be077ac89a111a9306ccf5c7d59ce Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Tue, 16 Sep 2025 15:47:44 -0700 Subject: [PATCH 01/20] rebase to latest main --- install_requirements.py | 161 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 153 insertions(+), 8 deletions(-) diff --git a/install_requirements.py b/install_requirements.py index cbae175e276..844ada0c7da 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -59,8 +59,16 @@ def python_is_compatible(): # The pip repository that hosts nightly torch packages. -TORCH_NIGHTLY_URL = "https://download.pytorch.org/whl/nightly/cpu" +# This will be dynamically set based on CUDA availability and CUDA backend enabled/disabled. +TORCH_NIGHTLY_URL_BASE = "https://download.pytorch.org/whl/nightly" +# Supported CUDA versions - modify this to add/remove supported versions +# Format: tuple of (major, minor) version numbers +SUPPORTED_CUDA_VERSIONS = [ + (12, 6), + (12, 8), + (12, 9), +] # Since ExecuTorch often uses main-branch features of pytorch, only the nightly # pip versions will have the required features. @@ -71,7 +79,137 @@ def python_is_compatible(): # # NOTE: If you're changing, make the corresponding change in .ci/docker/ci_commit_pins/pytorch.txt # by picking the hash from the same date in https://hud.pytorch.org/hud/pytorch/pytorch/nightly/ -NIGHTLY_VERSION = "dev20250906" +# +# NOTE: If you're changing, make the corresponding supported CUDA versions in +# SUPPORTED_CUDA_VERSIONS above if needed. +NIGHTLY_VERSION = "dev20250915" + + +def _check_cuda_enabled(): + """Check if CUDA delegate is enabled via CMAKE_ARGS environment variable.""" + cmake_args = os.environ.get("CMAKE_ARGS", "") + return "-DEXECUTORCH_BUILD_CUDA=ON" in cmake_args + + +def _cuda_version_to_pytorch_suffix(major, minor): + """ + Generate PyTorch CUDA wheel suffix from CUDA version numbers. + + Args: + major: CUDA major version (e.g., 12) + minor: CUDA minor version (e.g., 6) + + Returns: + PyTorch wheel suffix string (e.g., "cu126") + """ + return f"cu{major}{minor}" + + +def _get_cuda_version(): + """ + Get the CUDA version installed on the system using nvcc command. + Returns a tuple (major, minor). + + Raises: + RuntimeError: if nvcc is not found or version cannot be parsed + """ + try: + # Get CUDA version from nvcc (CUDA compiler) + nvcc_result = subprocess.run( + ["nvcc", "--version"], capture_output=True, text=True, check=True + ) + # Parse nvcc output for CUDA version + # Output contains line like "Cuda compilation tools, release 12.6, V12.6.68" + match = re.search(r"release (\d+)\.(\d+)", nvcc_result.stdout) + if match: + major, minor = int(match.group(1)), int(match.group(2)) + + # Check if the detected version is supported + if (major, minor) not in SUPPORTED_CUDA_VERSIONS: + available_versions = ", ".join( + [f"{maj}.{min}" for maj, min in SUPPORTED_CUDA_VERSIONS] + ) + raise RuntimeError( + f"Detected CUDA version {major}.{minor} is not supported. " + f"Only the following CUDA versions are supported: {available_versions}. " + f"Please install a supported CUDA version or try on CPU-only delegates." + ) + + return (major, minor) + else: + raise RuntimeError( + "CUDA delegate is enabled but could not parse CUDA version from nvcc output. " + "Please ensure CUDA is properly installed or try on CPU-only delegates." + ) + except FileNotFoundError: + raise RuntimeError( + "CUDA delegate is enabled but nvcc (CUDA compiler) is not found in PATH. " + "Please install CUDA toolkit or try on CPU-only delegates." + ) + except subprocess.CalledProcessError as e: + raise RuntimeError( + f"CUDA delegate is enabled but nvcc command failed with error: {e}. " + "Please ensure CUDA is properly installed or try on CPU-only delegates." + ) + + +def _get_pytorch_cuda_url(cuda_version): + """ + Get the appropriate PyTorch CUDA URL for the given CUDA version. + + Args: + cuda_version: tuple of (major, minor) version numbers + + Returns: + URL string for PyTorch CUDA packages + """ + major, minor = cuda_version + # Generate CUDA suffix (version validation is already done in _get_cuda_version) + cuda_suffix = _cuda_version_to_pytorch_suffix(major, minor) + + return f"{TORCH_NIGHTLY_URL_BASE}/{cuda_suffix}" + + +# url for the PyTorch ExecuTorch depending on, which will be set by _determine_torch_url(). +# please do not directly rely on it, but use _determine_torch_url() instead. +_torch_url = None + + +def _determine_torch_url(): + """ + Determine the appropriate PyTorch installation URL based on CUDA availability and CMAKE_ARGS. + Uses caching to avoid redundant CUDA detection and print statements. + + Returns: + URL string for PyTorch packages + """ + global _torch_url + + # Return cached URL if already determined + if _torch_url is not None: + return _torch_url + + # Check if CUDA delegate is enabled + if not _check_cuda_enabled(): + print("CUDA delegate not enabled, using CPU-only PyTorch") + _torch_url = f"{TORCH_NIGHTLY_URL_BASE}/cpu" + return _torch_url + + print("CUDA delegate enabled, detecting CUDA version...") + + # Get CUDA version + cuda_version = _get_cuda_version() + + major, minor = cuda_version + print(f"Detected CUDA version: {major}.{minor}") + + # Get appropriate PyTorch CUDA URL + torch_url = _get_pytorch_cuda_url(cuda_version) + print(f"Using PyTorch URL: {torch_url}") + + # Cache the result + _torch_url = torch_url + return torch_url def install_requirements(use_pytorch_nightly): @@ -84,12 +222,16 @@ def install_requirements(use_pytorch_nightly): ) sys.exit(1) + # Determine the appropriate PyTorch URL based on CUDA delegate status + torch_url = _determine_torch_url() + # pip packages needed by exir. TORCH_PACKAGE = [ # Setting use_pytorch_nightly to false to test the pinned PyTorch commit. Note # that we don't need to set any version number there because they have already # been installed on CI before this step, so pip won't reinstall them - f"torch==2.9.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torch", + f"torch==2.10.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torch", + f"torchao==0.14.0{NIGHTLY_VERSION}" if use_pytorch_nightly else "torchao", ] # Install the requirements for core ExecuTorch package. @@ -105,13 +247,13 @@ def install_requirements(use_pytorch_nightly): "requirements-dev.txt", *TORCH_PACKAGE, "--extra-index-url", - TORCH_NIGHTLY_URL, + torch_url, ], check=True, ) LOCAL_REQUIREMENTS = [ - "third-party/ao", # We need the latest kernels for fast iteration, so not relying on pypi. + # "third-party/ao", # We need the latest kernels for fast iteration, so not relying on pypi. ] + ( [ "extension/llm/tokenizers", # TODO(larryliu0820): Setup a pypi package for this. @@ -147,10 +289,13 @@ def install_requirements(use_pytorch_nightly): def install_optional_example_requirements(use_pytorch_nightly): + # Determine the appropriate PyTorch URL based on CUDA delegate status + torch_url = _determine_torch_url() + print("Installing torch domain libraries") DOMAIN_LIBRARIES = [ ( - f"torchvision==0.24.0.{NIGHTLY_VERSION}" + f"torchvision==0.25.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torchvision" ), @@ -165,7 +310,7 @@ def install_optional_example_requirements(use_pytorch_nightly): "install", *DOMAIN_LIBRARIES, "--extra-index-url", - TORCH_NIGHTLY_URL, + torch_url, ], check=True, ) @@ -180,7 +325,7 @@ def install_optional_example_requirements(use_pytorch_nightly): "-r", "requirements-examples.txt", "--extra-index-url", - TORCH_NIGHTLY_URL, + torch_url, "--upgrade-strategy", "only-if-needed", ], From dbe31b51064b4737d9f645091185de3e1dbdfb54 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Tue, 16 Sep 2025 16:41:16 -0700 Subject: [PATCH 02/20] add github ci for gpu pt install check --- .github/workflows/test-cuda-builds.yml | 68 ++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 .github/workflows/test-cuda-builds.yml diff --git a/.github/workflows/test-cuda-builds.yml b/.github/workflows/test-cuda-builds.yml new file mode 100644 index 00000000000..eef3287a920 --- /dev/null +++ b/.github/workflows/test-cuda-builds.yml @@ -0,0 +1,68 @@ +# Test ExecutorTorch CUDA Build Compatibility +# This workflow tests whether ExecutorTorch can be successfully built with CUDA support +# across different CUDA versions (12.6, 12.8, 12.9) using the command: +# CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh +# +# Note: ExecutorTorch automatically detects the system CUDA version using nvcc and +# installs the appropriate PyTorch wheel. No manual CUDA/PyTorch installation needed. + +name: Test CUDA Builds + +on: + pull_request: + push: + branches: + - main + - release/* + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} + cancel-in-progress: true + +jobs: + test-cuda-builds: + strategy: + fail-fast: false + matrix: + cuda-version: ["12.6", "12.8", "12.9"] + + name: test-executorch-cuda-build-${{ matrix.cuda-version }} + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + with: + timeout: 90 + runner: linux.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: ${{ matrix.cuda-version }} + submodules: recursive + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + script: | + set -eux + + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + if [ -n "$CONDA_ENV" ]; then + conda activate "${CONDA_ENV}" + fi + + # Test ExecutorTorch CUDA build - ExecutorTorch will automatically detect CUDA version + # and install the appropriate PyTorch wheel when CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" + PYTHON_EXECUTABLE=python bash .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}" + + # This job will fail if any of the CUDA versions fail + check-all-cuda-builds: + needs: test-cuda-builds + runs-on: ubuntu-latest + if: always() + steps: + - name: Check if all CUDA builds succeeded + run: | + if [[ "${{ needs.test-cuda-builds.result }}" != "success" ]]; then + echo "ERROR: One or more ExecutorTorch CUDA builds failed!" + echo "CUDA build results: ${{ needs.test-cuda-builds.result }}" + exit 1 + else + echo "SUCCESS: All ExecutorTorch CUDA builds (12.6, 12.8, 12.9) completed successfully!" + fi From 11104349874d0b7776dc36bbcfd453dc9229bcec Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Tue, 16 Sep 2025 16:41:39 -0700 Subject: [PATCH 03/20] add github ci for gpu pt install check --- .ci/scripts/test-cuda-build.sh | 84 ++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100755 .ci/scripts/test-cuda-build.sh diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh new file mode 100755 index 00000000000..8a9fedc4d7a --- /dev/null +++ b/.ci/scripts/test-cuda-build.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -exu + +# shellcheck source=/dev/null +source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" + +CUDA_VERSION=${1:-"12.6"} + +echo "=== Testing ExecutorTorch CUDA ${CUDA_VERSION} Build ===" + +# Function to build and test ExecutorTorch with CUDA support +test_executorch_cuda_build() { + local cuda_version=$1 + + echo "Building ExecutorTorch with CUDA ${cuda_version} support..." + echo "ExecutorTorch will automatically detect CUDA and install appropriate PyTorch wheel" + + # Set CMAKE_ARGS to enable CUDA build - ExecutorTorch will handle PyTorch installation automatically + export CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" + + # Install ExecutorTorch with CUDA support - this will automatically: + # 1. Detect CUDA version using nvcc + # 2. Install appropriate PyTorch wheel for the detected CUDA version + # 3. Build ExecutorTorch with CUDA support + ./install_executorch.sh + + echo "SUCCESS: ExecutorTorch CUDA build completed" + + # Verify the installation + echo "=== Verifying ExecutorTorch CUDA Installation ===" + + # Test that ExecutorTorch was built successfully + python -c " +import executorch +print('SUCCESS: ExecutorTorch imported successfully') +" + + # Test CUDA availability and show details + python -c " +try: + import torch + print('INFO: PyTorch version:', torch.__version__) + print('INFO: CUDA available:', torch.cuda.is_available()) + + if torch.cuda.is_available(): + print('SUCCESS: CUDA is available for ExecutorTorch') + print('INFO: CUDA version:', torch.version.cuda) + print('INFO: GPU device count:', torch.cuda.device_count()) + print('INFO: Current GPU device:', torch.cuda.current_device()) + print('INFO: GPU device name:', torch.cuda.get_device_name()) + + # Test basic CUDA tensor operation + device = torch.device('cuda') + x = torch.randn(10, 10).to(device) + y = torch.randn(10, 10).to(device) + z = torch.mm(x, y) + print('SUCCESS: CUDA tensor operation completed on device:', z.device) + print('INFO: Result tensor shape:', z.shape) + + print('SUCCESS: ExecutorTorch CUDA integration verified') + else: + print('WARNING: CUDA not detected, but ExecutorTorch built successfully') + exit(1) +except Exception as e: + print('ERROR: ExecutorTorch CUDA test failed:', e) + exit(1) +" + + echo "SUCCESS: ExecutorTorch CUDA ${cuda_version} build and verification completed successfully" +} + +# Main execution +echo "Current working directory: $(pwd)" +echo "Directory contents:" +ls -la + +# Run the CUDA build test +test_executorch_cuda_build "${CUDA_VERSION}" From 0621550f0cae09d915f5129c7e3b133324e7814c Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Tue, 16 Sep 2025 16:58:32 -0700 Subject: [PATCH 04/20] recover torchao --- .ci/docker/ci_commit_pins/pytorch.txt | 2 +- install_requirements.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt index 8c9330d6f2c..e3a53c8bcb5 100644 --- a/.ci/docker/ci_commit_pins/pytorch.txt +++ b/.ci/docker/ci_commit_pins/pytorch.txt @@ -1 +1 @@ -4d4abec80f03cd8fdefe1d9cb3a60d3690cd777e +53a2908a10f414a2f85caa06703a26a40e873869 diff --git a/install_requirements.py b/install_requirements.py index 844ada0c7da..32303f80842 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -231,7 +231,6 @@ def install_requirements(use_pytorch_nightly): # that we don't need to set any version number there because they have already # been installed on CI before this step, so pip won't reinstall them f"torch==2.10.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torch", - f"torchao==0.14.0{NIGHTLY_VERSION}" if use_pytorch_nightly else "torchao", ] # Install the requirements for core ExecuTorch package. @@ -253,7 +252,7 @@ def install_requirements(use_pytorch_nightly): ) LOCAL_REQUIREMENTS = [ - # "third-party/ao", # We need the latest kernels for fast iteration, so not relying on pypi. + "third-party/ao", # We need the latest kernels for fast iteration, so not relying on pypi. ] + ( [ "extension/llm/tokenizers", # TODO(larryliu0820): Setup a pypi package for this. From 3ef491b3540e06c2a33eae682c282737024bd771 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Tue, 16 Sep 2025 21:38:34 -0700 Subject: [PATCH 05/20] solve lint issue --- .ci/scripts/test-cuda-build.sh | 24 +++++++++++++++++++----- .github/workflows/test-cuda-builds.yml | 2 +- install_requirements.py | 4 ++-- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh index 8a9fedc4d7a..a9f8e7ec14f 100755 --- a/.ci/scripts/test-cuda-build.sh +++ b/.ci/scripts/test-cuda-build.sh @@ -21,14 +21,28 @@ test_executorch_cuda_build() { echo "Building ExecutorTorch with CUDA ${cuda_version} support..." echo "ExecutorTorch will automatically detect CUDA and install appropriate PyTorch wheel" + # Check available resources before starting + echo "=== System Information ===" + echo "Available memory: $(free -h | grep Mem | awk '{print $2}')" + echo "Available disk space: $(df -h . | tail -1 | awk '{print $4}')" + echo "CPU cores: $(nproc)" + echo "CUDA version check:" + nvcc --version || echo "nvcc not found" + nvidia-smi || echo "nvidia-smi not found" + # Set CMAKE_ARGS to enable CUDA build - ExecutorTorch will handle PyTorch installation automatically export CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" - # Install ExecutorTorch with CUDA support - this will automatically: - # 1. Detect CUDA version using nvcc - # 2. Install appropriate PyTorch wheel for the detected CUDA version - # 3. Build ExecutorTorch with CUDA support - ./install_executorch.sh + echo "=== Starting ExecutorTorch Installation ===" + # Install ExecutorTorch with CUDA support with timeout and error handling + timeout 5400 ./install_executorch.sh || { + local exit_code=$? + echo "ERROR: install_executorch.sh failed with exit code: $exit_code" + if [ $exit_code -eq 124 ]; then + echo "ERROR: Installation timed out after 90 minutes" + fi + exit $exit_code + } echo "SUCCESS: ExecutorTorch CUDA build completed" diff --git a/.github/workflows/test-cuda-builds.yml b/.github/workflows/test-cuda-builds.yml index eef3287a920..eff26e72c67 100644 --- a/.github/workflows/test-cuda-builds.yml +++ b/.github/workflows/test-cuda-builds.yml @@ -17,7 +17,7 @@ on: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} - cancel-in-progress: true + cancel-in-progress: false jobs: test-cuda-builds: diff --git a/install_requirements.py b/install_requirements.py index 32303f80842..e5a7c29c482 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -172,7 +172,7 @@ def _get_pytorch_cuda_url(cuda_version): # url for the PyTorch ExecuTorch depending on, which will be set by _determine_torch_url(). # please do not directly rely on it, but use _determine_torch_url() instead. -_torch_url = None +_torch_url = "" def _determine_torch_url(): @@ -186,7 +186,7 @@ def _determine_torch_url(): global _torch_url # Return cached URL if already determined - if _torch_url is not None: + if _torch_url: return _torch_url # Check if CUDA delegate is enabled From 9792c99066550f1784bab43864deb7fb66dcf4d9 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Wed, 17 Sep 2025 12:45:25 -0700 Subject: [PATCH 06/20] create install_utils.py for better structure --- install_requirements.py | 190 +------------------------------------ install_utils.py | 201 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+), 187 deletions(-) create mode 100644 install_utils.py diff --git a/install_requirements.py b/install_requirements.py index e5a7c29c482..409ed083970 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -7,56 +7,10 @@ import argparse import os -import platform -import re import subprocess import sys - -def python_is_compatible(): - # Scrape the version range from pyproject.toml, which should be in the current directory. - version_specifier = None - with open("pyproject.toml", "r") as file: - for line in file: - if line.startswith("requires-python"): - match = re.search(r'"([^"]*)"', line) - if match: - version_specifier = match.group(1) - break - - if not version_specifier: - print( - "WARNING: Skipping python version check: version range not found", - file=sys.stderr, - ) - return False - - # Install the packaging module if necessary. - try: - import packaging - except ImportError: - subprocess.run( - [sys.executable, "-m", "pip", "install", "packaging"], check=True - ) - # Compare the current python version to the range in version_specifier. Exits - # with status 1 if the version is not compatible, or with status 0 if the - # version is compatible or the logic itself fails. - try: - import packaging.specifiers - import packaging.version - - python_version = packaging.version.parse(platform.python_version()) - version_range = packaging.specifiers.SpecifierSet(version_specifier) - if python_version not in version_range: - print( - f'ERROR: ExecuTorch does not support python version {python_version}: must satisfy "{version_specifier}"', - file=sys.stderr, - ) - return False - except Exception as e: - print(f"WARNING: Skipping python version check: {e}", file=sys.stderr) - return True - +from install_utils import determine_torch_url, is_intel_mac_os, python_is_compatible # The pip repository that hosts nightly torch packages. # This will be dynamically set based on CUDA availability and CUDA backend enabled/disabled. @@ -85,133 +39,6 @@ def python_is_compatible(): NIGHTLY_VERSION = "dev20250915" -def _check_cuda_enabled(): - """Check if CUDA delegate is enabled via CMAKE_ARGS environment variable.""" - cmake_args = os.environ.get("CMAKE_ARGS", "") - return "-DEXECUTORCH_BUILD_CUDA=ON" in cmake_args - - -def _cuda_version_to_pytorch_suffix(major, minor): - """ - Generate PyTorch CUDA wheel suffix from CUDA version numbers. - - Args: - major: CUDA major version (e.g., 12) - minor: CUDA minor version (e.g., 6) - - Returns: - PyTorch wheel suffix string (e.g., "cu126") - """ - return f"cu{major}{minor}" - - -def _get_cuda_version(): - """ - Get the CUDA version installed on the system using nvcc command. - Returns a tuple (major, minor). - - Raises: - RuntimeError: if nvcc is not found or version cannot be parsed - """ - try: - # Get CUDA version from nvcc (CUDA compiler) - nvcc_result = subprocess.run( - ["nvcc", "--version"], capture_output=True, text=True, check=True - ) - # Parse nvcc output for CUDA version - # Output contains line like "Cuda compilation tools, release 12.6, V12.6.68" - match = re.search(r"release (\d+)\.(\d+)", nvcc_result.stdout) - if match: - major, minor = int(match.group(1)), int(match.group(2)) - - # Check if the detected version is supported - if (major, minor) not in SUPPORTED_CUDA_VERSIONS: - available_versions = ", ".join( - [f"{maj}.{min}" for maj, min in SUPPORTED_CUDA_VERSIONS] - ) - raise RuntimeError( - f"Detected CUDA version {major}.{minor} is not supported. " - f"Only the following CUDA versions are supported: {available_versions}. " - f"Please install a supported CUDA version or try on CPU-only delegates." - ) - - return (major, minor) - else: - raise RuntimeError( - "CUDA delegate is enabled but could not parse CUDA version from nvcc output. " - "Please ensure CUDA is properly installed or try on CPU-only delegates." - ) - except FileNotFoundError: - raise RuntimeError( - "CUDA delegate is enabled but nvcc (CUDA compiler) is not found in PATH. " - "Please install CUDA toolkit or try on CPU-only delegates." - ) - except subprocess.CalledProcessError as e: - raise RuntimeError( - f"CUDA delegate is enabled but nvcc command failed with error: {e}. " - "Please ensure CUDA is properly installed or try on CPU-only delegates." - ) - - -def _get_pytorch_cuda_url(cuda_version): - """ - Get the appropriate PyTorch CUDA URL for the given CUDA version. - - Args: - cuda_version: tuple of (major, minor) version numbers - - Returns: - URL string for PyTorch CUDA packages - """ - major, minor = cuda_version - # Generate CUDA suffix (version validation is already done in _get_cuda_version) - cuda_suffix = _cuda_version_to_pytorch_suffix(major, minor) - - return f"{TORCH_NIGHTLY_URL_BASE}/{cuda_suffix}" - - -# url for the PyTorch ExecuTorch depending on, which will be set by _determine_torch_url(). -# please do not directly rely on it, but use _determine_torch_url() instead. -_torch_url = "" - - -def _determine_torch_url(): - """ - Determine the appropriate PyTorch installation URL based on CUDA availability and CMAKE_ARGS. - Uses caching to avoid redundant CUDA detection and print statements. - - Returns: - URL string for PyTorch packages - """ - global _torch_url - - # Return cached URL if already determined - if _torch_url: - return _torch_url - - # Check if CUDA delegate is enabled - if not _check_cuda_enabled(): - print("CUDA delegate not enabled, using CPU-only PyTorch") - _torch_url = f"{TORCH_NIGHTLY_URL_BASE}/cpu" - return _torch_url - - print("CUDA delegate enabled, detecting CUDA version...") - - # Get CUDA version - cuda_version = _get_cuda_version() - - major, minor = cuda_version - print(f"Detected CUDA version: {major}.{minor}") - - # Get appropriate PyTorch CUDA URL - torch_url = _get_pytorch_cuda_url(cuda_version) - print(f"Using PyTorch URL: {torch_url}") - - # Cache the result - _torch_url = torch_url - return torch_url - - def install_requirements(use_pytorch_nightly): # Skip pip install on Intel macOS if using nightly. if use_pytorch_nightly and is_intel_mac_os(): @@ -223,7 +50,7 @@ def install_requirements(use_pytorch_nightly): sys.exit(1) # Determine the appropriate PyTorch URL based on CUDA delegate status - torch_url = _determine_torch_url() + torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE, SUPPORTED_CUDA_VERSIONS) # pip packages needed by exir. TORCH_PACKAGE = [ @@ -289,7 +116,7 @@ def install_requirements(use_pytorch_nightly): def install_optional_example_requirements(use_pytorch_nightly): # Determine the appropriate PyTorch URL based on CUDA delegate status - torch_url = _determine_torch_url() + torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE, SUPPORTED_CUDA_VERSIONS) print("Installing torch domain libraries") DOMAIN_LIBRARIES = [ @@ -332,17 +159,6 @@ def install_optional_example_requirements(use_pytorch_nightly): ) -# Prebuilt binaries for Intel-based macOS are no longer available on PyPI; users must compile from source. -# PyTorch stopped building macOS x86_64 binaries since version 2.3.0 (January 2024). -def is_intel_mac_os(): - # Returns True if running on Intel macOS. - return platform.system().lower() == "darwin" and platform.machine().lower() in ( - "x86", - "x86_64", - "i386", - ) - - def main(args): parser = argparse.ArgumentParser() parser.add_argument( diff --git a/install_utils.py b/install_utils.py new file mode 100644 index 00000000000..19da1b2193b --- /dev/null +++ b/install_utils.py @@ -0,0 +1,201 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright 2024-25 Arm Limited and/or its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import os +import platform +import re +import subprocess + + +def _is_cuda_enabled(): + """Check if CUDA delegate is enabled via CMAKE_ARGS environment variable.""" + cmake_args = os.environ.get("CMAKE_ARGS", "") + return "-DEXECUTORCH_BUILD_CUDA=ON" in cmake_args + + +def _cuda_version_to_pytorch_suffix(major, minor): + """ + Generate PyTorch CUDA wheel suffix from CUDA version numbers. + + Args: + major: CUDA major version (e.g., 12) + minor: CUDA minor version (e.g., 6) + + Returns: + PyTorch wheel suffix string (e.g., "cu126") + """ + return f"cu{major}{minor}" + + +def _get_cuda_version(supported_cuda_versions): + """ + Get the CUDA version installed on the system using nvcc command. + Returns a tuple (major, minor). + + Args: + supported_cuda_versions: List of supported CUDA versions as tuples + + Raises: + RuntimeError: if nvcc is not found or version cannot be parsed + """ + try: + # Get CUDA version from nvcc (CUDA compiler) + nvcc_result = subprocess.run( + ["nvcc", "--version"], capture_output=True, text=True, check=True + ) + # Parse nvcc output for CUDA version + # Output contains line like "Cuda compilation tools, release 12.6, V12.6.68" + match = re.search(r"release (\d+)\.(\d+)", nvcc_result.stdout) + if match: + major, minor = int(match.group(1)), int(match.group(2)) + + # Check if the detected version is supported + if (major, minor) not in supported_cuda_versions: + available_versions = ", ".join( + [f"{maj}.{min}" for maj, min in supported_cuda_versions] + ) + raise RuntimeError( + f"Detected CUDA version {major}.{minor} is not supported. " + f"Only the following CUDA versions are supported: {available_versions}. " + f"Please install a supported CUDA version or try on CPU-only delegates." + ) + + return (major, minor) + else: + raise RuntimeError( + "CUDA delegate is enabled but could not parse CUDA version from nvcc output. " + "Please ensure CUDA is properly installed or try on CPU-only delegates." + ) + except FileNotFoundError: + raise RuntimeError( + "CUDA delegate is enabled but nvcc (CUDA compiler) is not found in PATH. " + "Please install CUDA toolkit or try on CPU-only delegates." + ) + except subprocess.CalledProcessError as e: + raise RuntimeError( + f"CUDA delegate is enabled but nvcc command failed with error: {e}. " + "Please ensure CUDA is properly installed or try on CPU-only delegates." + ) + + +def _get_pytorch_cuda_url(cuda_version, torch_nightly_url_base): + """ + Get the appropriate PyTorch CUDA URL for the given CUDA version. + + Args: + cuda_version: tuple of (major, minor) version numbers + torch_nightly_url_base: Base URL for PyTorch nightly packages + + Returns: + URL string for PyTorch CUDA packages + """ + major, minor = cuda_version + # Generate CUDA suffix (version validation is already done in _get_cuda_version) + cuda_suffix = _cuda_version_to_pytorch_suffix(major, minor) + + return f"{torch_nightly_url_base}/{cuda_suffix}" + + +# Global variable for caching torch URL +_torch_url_cache = "" + + +def determine_torch_url(torch_nightly_url_base, supported_cuda_versions): + """ + Determine the appropriate PyTorch installation URL based on CUDA availability and CMAKE_ARGS. + Uses caching to avoid redundant CUDA detection and print statements. + + Args: + torch_nightly_url_base: Base URL for PyTorch nightly packages + supported_cuda_versions: List of supported CUDA versions as tuples + + Returns: + URL string for PyTorch packages + """ + global _torch_url_cache + + # Return cached URL if already determined + if _torch_url_cache: + return _torch_url_cache + + # Check if CUDA delegate is enabled + if not _is_cuda_enabled(): + print("CUDA delegate not enabled, using CPU-only PyTorch") + _torch_url_cache = f"{torch_nightly_url_base}/cpu" + return _torch_url_cache + + print("CUDA delegate enabled, detecting CUDA version...") + + # Get CUDA version + cuda_version = _get_cuda_version(supported_cuda_versions) + + major, minor = cuda_version + print(f"Detected CUDA version: {major}.{minor}") + + # Get appropriate PyTorch CUDA URL + torch_url = _get_pytorch_cuda_url(cuda_version, torch_nightly_url_base) + print(f"Using PyTorch URL: {torch_url}") + + # Cache the result + _torch_url_cache = torch_url + return torch_url + + +# Prebuilt binaries for Intel-based macOS are no longer available on PyPI; users must compile from source. +# PyTorch stopped building macOS x86_64 binaries since version 2.3.0 (January 2024). +def is_intel_mac_os(): + # Returns True if running on Intel macOS. + return platform.system().lower() == "darwin" and platform.machine().lower() in ( + "x86", + "x86_64", + "i386", + ) + + +def python_is_compatible(): + # Scrape the version range from pyproject.toml, which should be in the current directory. + version_specifier = None + with open("pyproject.toml", "r") as file: + for line in file: + if line.startswith("requires-python"): + match = re.search(r'"([^"]*)"', line) + if match: + version_specifier = match.group(1) + break + + if not version_specifier: + print( + "WARNING: Skipping python version check: version range not found", + file=sys.stderr, + ) + return False + + # Install the packaging module if necessary. + try: + import packaging + except ImportError: + subprocess.run( + [sys.executable, "-m", "pip", "install", "packaging"], check=True + ) + # Compare the current python version to the range in version_specifier. Exits + # with status 1 if the version is not compatible, or with status 0 if the + # version is compatible or the logic itself fails. + try: + import packaging.specifiers + import packaging.version + + python_version = packaging.version.parse(platform.python_version()) + version_range = packaging.specifiers.SpecifierSet(version_specifier) + if python_version not in version_range: + print( + f'ERROR: ExecuTorch does not support python version {python_version}: must satisfy "{version_specifier}"', + file=sys.stderr, + ) + return False + except Exception as e: + print(f"WARNING: Skipping python version check: {e}", file=sys.stderr) + return True From a18cd15ec853a35370958b94183e61e997cb4f35 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Fri, 19 Sep 2025 08:36:40 -0700 Subject: [PATCH 07/20] set use-custom-docker-registry as false --- .github/workflows/test-cuda-builds.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-cuda-builds.yml b/.github/workflows/test-cuda-builds.yml index eff26e72c67..ed6018bddf7 100644 --- a/.github/workflows/test-cuda-builds.yml +++ b/.github/workflows/test-cuda-builds.yml @@ -36,6 +36,7 @@ jobs: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda gpu-arch-version: ${{ matrix.cuda-version }} + use-custom-docker-registry: false submodules: recursive ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | From 5b430f46f811cf2a7e038bbb0774a88ebf812308 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Tue, 16 Sep 2025 15:47:44 -0700 Subject: [PATCH 08/20] rebase to latest main --- install_requirements.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install_requirements.py b/install_requirements.py index 409ed083970..bfe214f444b 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -79,7 +79,7 @@ def install_requirements(use_pytorch_nightly): ) LOCAL_REQUIREMENTS = [ - "third-party/ao", # We need the latest kernels for fast iteration, so not relying on pypi. + # "third-party/ao", # We need the latest kernels for fast iteration, so not relying on pypi. ] + ( [ "extension/llm/tokenizers", # TODO(larryliu0820): Setup a pypi package for this. From 95c2536d52cf5f97b5566295bf617258cc36bf23 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Tue, 16 Sep 2025 16:58:32 -0700 Subject: [PATCH 09/20] recover torchao --- install_requirements.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install_requirements.py b/install_requirements.py index bfe214f444b..409ed083970 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -79,7 +79,7 @@ def install_requirements(use_pytorch_nightly): ) LOCAL_REQUIREMENTS = [ - # "third-party/ao", # We need the latest kernels for fast iteration, so not relying on pypi. + "third-party/ao", # We need the latest kernels for fast iteration, so not relying on pypi. ] + ( [ "extension/llm/tokenizers", # TODO(larryliu0820): Setup a pypi package for this. From b00bc1436f2854d829d7f613738f4006154cebb2 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Fri, 19 Sep 2025 09:39:44 -0700 Subject: [PATCH 10/20] solve platform import issue --- install_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/install_utils.py b/install_utils.py index 19da1b2193b..2b2c6ffc51c 100644 --- a/install_utils.py +++ b/install_utils.py @@ -6,7 +6,6 @@ # LICENSE file in the root directory of this source tree. import os -import platform import re import subprocess From ae52b29b0e30b59ef2c92f053f859081db8c0cd8 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Fri, 19 Sep 2025 10:58:26 -0700 Subject: [PATCH 11/20] introduce missed sys --- install_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/install_utils.py b/install_utils.py index 2b2c6ffc51c..159da75cee3 100644 --- a/install_utils.py +++ b/install_utils.py @@ -8,6 +8,7 @@ import os import re import subprocess +import sys def _is_cuda_enabled(): From 57ebb63f887955dc316148257ac09be9ebabdd54 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Fri, 19 Sep 2025 11:05:46 -0700 Subject: [PATCH 12/20] introduce missed platform --- install_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/install_utils.py b/install_utils.py index 159da75cee3..fdd6c4bd93c 100644 --- a/install_utils.py +++ b/install_utils.py @@ -6,6 +6,7 @@ # LICENSE file in the root directory of this source tree. import os +import platform import re import subprocess import sys From 43d164f00cc2d7fee7a63c4c4a6f0233592f203a Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Fri, 19 Sep 2025 12:00:50 -0700 Subject: [PATCH 13/20] update cuda ci script --- .ci/scripts/test-cuda-build.sh | 8 ++++---- .github/workflows/test-cuda-builds.yml | 9 +++------ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh index a9f8e7ec14f..a1539139019 100755 --- a/.ci/scripts/test-cuda-build.sh +++ b/.ci/scripts/test-cuda-build.sh @@ -7,8 +7,8 @@ set -exu -# shellcheck source=/dev/null -source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" +bash .ci/scripts/setup-conda.sh +eval "$(conda shell.bash hook)" CUDA_VERSION=${1:-"12.6"} @@ -50,13 +50,13 @@ test_executorch_cuda_build() { echo "=== Verifying ExecutorTorch CUDA Installation ===" # Test that ExecutorTorch was built successfully - python -c " + ${CONDA_RUN} python -c " import executorch print('SUCCESS: ExecutorTorch imported successfully') " # Test CUDA availability and show details - python -c " + ${CONDA_RUN} python -c " try: import torch print('INFO: PyTorch version:', torch.__version__) diff --git a/.github/workflows/test-cuda-builds.yml b/.github/workflows/test-cuda-builds.yml index ed6018bddf7..ec94d6721fb 100644 --- a/.github/workflows/test-cuda-builds.yml +++ b/.github/workflows/test-cuda-builds.yml @@ -42,15 +42,12 @@ jobs: script: | set -eux - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - if [ -n "$CONDA_ENV" ]; then - conda activate "${CONDA_ENV}" - fi + # This is needed to get the prebuilt PyTorch wheel from S3 + ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21 # Test ExecutorTorch CUDA build - ExecutorTorch will automatically detect CUDA version # and install the appropriate PyTorch wheel when CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" - PYTHON_EXECUTABLE=python bash .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}" + source .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}" # This job will fail if any of the CUDA versions fail check-all-cuda-builds: From d892e3f637d95fd7b86f1b4d7dbd625affe3d01b Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Fri, 19 Sep 2025 12:52:38 -0700 Subject: [PATCH 14/20] try ci with specific docker-image --- .ci/scripts/test-cuda-build.sh | 5 +++++ .github/workflows/test-cuda-builds.yml | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh index a1539139019..9981eb7ec87 100755 --- a/.ci/scripts/test-cuda-build.sh +++ b/.ci/scripts/test-cuda-build.sh @@ -7,9 +7,14 @@ set -exu +# Source the conda setup bash .ci/scripts/setup-conda.sh eval "$(conda shell.bash hook)" +# Set up CONDA_RUN variable if not already set +# This is needed for compatibility with pytorch/test-infra workflows +export CONDA_RUN="${CONDA_RUN:-conda run --no-capture-output -p ${CONDA_PREFIX:-$HOME/miniconda3/envs/ci}}" + CUDA_VERSION=${1:-"12.6"} echo "=== Testing ExecutorTorch CUDA ${CUDA_VERSION} Build ===" diff --git a/.github/workflows/test-cuda-builds.yml b/.github/workflows/test-cuda-builds.yml index ec94d6721fb..13fbd427310 100644 --- a/.github/workflows/test-cuda-builds.yml +++ b/.github/workflows/test-cuda-builds.yml @@ -36,7 +36,7 @@ jobs: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda gpu-arch-version: ${{ matrix.cuda-version }} - use-custom-docker-registry: false + docker-image: nvidia/cuda:${{ matrix.cuda-version }}.0-devel-ubuntu22.04 submodules: recursive ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | From 2e878864a01ced717ea59c5d0cbe0ff8a514e6a9 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Fri, 19 Sep 2025 15:16:48 -0700 Subject: [PATCH 15/20] no conda run n yml --- .ci/scripts/test-cuda-build.sh | 13 +++++-------- .github/workflows/test-cuda-builds.yml | 17 +++++++---------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh index 9981eb7ec87..577d89ef057 100755 --- a/.ci/scripts/test-cuda-build.sh +++ b/.ci/scripts/test-cuda-build.sh @@ -7,13 +7,10 @@ set -exu -# Source the conda setup -bash .ci/scripts/setup-conda.sh +# The generic Linux job chooses to use base env, not the one setup by the image eval "$(conda shell.bash hook)" - -# Set up CONDA_RUN variable if not already set -# This is needed for compatibility with pytorch/test-infra workflows -export CONDA_RUN="${CONDA_RUN:-conda run --no-capture-output -p ${CONDA_PREFIX:-$HOME/miniconda3/envs/ci}}" +CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") +conda activate "${CONDA_ENV}" CUDA_VERSION=${1:-"12.6"} @@ -55,13 +52,13 @@ test_executorch_cuda_build() { echo "=== Verifying ExecutorTorch CUDA Installation ===" # Test that ExecutorTorch was built successfully - ${CONDA_RUN} python -c " + python -c " import executorch print('SUCCESS: ExecutorTorch imported successfully') " # Test CUDA availability and show details - ${CONDA_RUN} python -c " + python -c " try: import torch print('INFO: PyTorch version:', torch.__version__) diff --git a/.github/workflows/test-cuda-builds.yml b/.github/workflows/test-cuda-builds.yml index 13fbd427310..0930c6524e3 100644 --- a/.github/workflows/test-cuda-builds.yml +++ b/.github/workflows/test-cuda-builds.yml @@ -1,9 +1,9 @@ -# Test ExecutorTorch CUDA Build Compatibility -# This workflow tests whether ExecutorTorch can be successfully built with CUDA support +# Test ExecuTorch CUDA Build Compatibility +# This workflow tests whether ExecuTorch can be successfully built with CUDA support # across different CUDA versions (12.6, 12.8, 12.9) using the command: # CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh # -# Note: ExecutorTorch automatically detects the system CUDA version using nvcc and +# Note: ExecuTorch automatically detects the system CUDA version using nvcc and # installs the appropriate PyTorch wheel. No manual CUDA/PyTorch installation needed. name: Test CUDA Builds @@ -36,16 +36,13 @@ jobs: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda gpu-arch-version: ${{ matrix.cuda-version }} - docker-image: nvidia/cuda:${{ matrix.cuda-version }}.0-devel-ubuntu22.04 + use-custom-docker-registry: false submodules: recursive ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | set -eux - # This is needed to get the prebuilt PyTorch wheel from S3 - ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21 - - # Test ExecutorTorch CUDA build - ExecutorTorch will automatically detect CUDA version + # Test ExecuTorch CUDA build - ExecuTorch will automatically detect CUDA version # and install the appropriate PyTorch wheel when CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" source .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}" @@ -58,9 +55,9 @@ jobs: - name: Check if all CUDA builds succeeded run: | if [[ "${{ needs.test-cuda-builds.result }}" != "success" ]]; then - echo "ERROR: One or more ExecutorTorch CUDA builds failed!" + echo "ERROR: One or more ExecuTorch CUDA builds failed!" echo "CUDA build results: ${{ needs.test-cuda-builds.result }}" exit 1 else - echo "SUCCESS: All ExecutorTorch CUDA builds (12.6, 12.8, 12.9) completed successfully!" + echo "SUCCESS: All ExecuTorch CUDA builds (12.6, 12.8, 12.9) completed successfully!" fi From 5a5e829de93dbda2d31136ae7510a5972e9bac94 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Fri, 19 Sep 2025 16:09:52 -0700 Subject: [PATCH 16/20] remove unsupported jq --- .ci/scripts/test-cuda-build.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh index 577d89ef057..8cb0cf4b698 100755 --- a/.ci/scripts/test-cuda-build.sh +++ b/.ci/scripts/test-cuda-build.sh @@ -8,9 +8,9 @@ set -exu # The generic Linux job chooses to use base env, not the one setup by the image -eval "$(conda shell.bash hook)" -CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") -conda activate "${CONDA_ENV}" +# eval "$(conda shell.bash hook)" +# CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") +# conda activate "${CONDA_ENV}" CUDA_VERSION=${1:-"12.6"} From 6e7884fb605c8b52c2b83742947f539a4a9b4a74 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Fri, 19 Sep 2025 17:06:57 -0700 Subject: [PATCH 17/20] use lru cache to replace global cache variable --- .ci/scripts/test-cuda-build.sh | 6 +++--- install_utils.py | 19 ++++--------------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh index 8cb0cf4b698..13fa1e9fe9c 100755 --- a/.ci/scripts/test-cuda-build.sh +++ b/.ci/scripts/test-cuda-build.sh @@ -8,9 +8,9 @@ set -exu # The generic Linux job chooses to use base env, not the one setup by the image -# eval "$(conda shell.bash hook)" -# CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") -# conda activate "${CONDA_ENV}" +eval "$(conda shell.bash hook)" +CONDA_ENV=$(conda info --envs | awk '/base/ {print $2}') +conda activate "${CONDA_ENV}" CUDA_VERSION=${1:-"12.6"} diff --git a/install_utils.py b/install_utils.py index fdd6c4bd93c..113005ba1e4 100644 --- a/install_utils.py +++ b/install_utils.py @@ -5,6 +5,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import functools import os import platform import re @@ -101,14 +102,11 @@ def _get_pytorch_cuda_url(cuda_version, torch_nightly_url_base): return f"{torch_nightly_url_base}/{cuda_suffix}" -# Global variable for caching torch URL -_torch_url_cache = "" - - +@functools.lru_cache(maxsize=1) def determine_torch_url(torch_nightly_url_base, supported_cuda_versions): """ Determine the appropriate PyTorch installation URL based on CUDA availability and CMAKE_ARGS. - Uses caching to avoid redundant CUDA detection and print statements. + Uses @functools.lru_cache to avoid redundant CUDA detection and print statements. Args: torch_nightly_url_base: Base URL for PyTorch nightly packages @@ -117,17 +115,10 @@ def determine_torch_url(torch_nightly_url_base, supported_cuda_versions): Returns: URL string for PyTorch packages """ - global _torch_url_cache - - # Return cached URL if already determined - if _torch_url_cache: - return _torch_url_cache - # Check if CUDA delegate is enabled if not _is_cuda_enabled(): print("CUDA delegate not enabled, using CPU-only PyTorch") - _torch_url_cache = f"{torch_nightly_url_base}/cpu" - return _torch_url_cache + return f"{torch_nightly_url_base}/cpu" print("CUDA delegate enabled, detecting CUDA version...") @@ -141,8 +132,6 @@ def determine_torch_url(torch_nightly_url_base, supported_cuda_versions): torch_url = _get_pytorch_cuda_url(cuda_version, torch_nightly_url_base) print(f"Using PyTorch URL: {torch_url}") - # Cache the result - _torch_url_cache = torch_url return torch_url From bd24c4be407c603355635308d2b49f2f36bea2ba Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Fri, 19 Sep 2025 17:12:23 -0700 Subject: [PATCH 18/20] make SUPPORTED_CUDA_VERSIONS as tuple for hashable --- install_requirements.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/install_requirements.py b/install_requirements.py index 409ed083970..4cc8858086b 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -18,11 +18,11 @@ # Supported CUDA versions - modify this to add/remove supported versions # Format: tuple of (major, minor) version numbers -SUPPORTED_CUDA_VERSIONS = [ +SUPPORTED_CUDA_VERSIONS = ( (12, 6), (12, 8), (12, 9), -] +) # Since ExecuTorch often uses main-branch features of pytorch, only the nightly # pip versions will have the required features. From d1c596c7193af7e4afe8c81002021850bb9a6d1d Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Fri, 19 Sep 2025 17:35:14 -0700 Subject: [PATCH 19/20] use default conda env --- .ci/scripts/test_backend_linux.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.ci/scripts/test_backend_linux.sh b/.ci/scripts/test_backend_linux.sh index d230860875d..a5233d15559 100755 --- a/.ci/scripts/test_backend_linux.sh +++ b/.ci/scripts/test_backend_linux.sh @@ -15,11 +15,6 @@ REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.csv" echo "Running backend test job for suite $SUITE, flow $FLOW." echo "Saving job artifacts to $ARTIFACT_DIR." -# The generic Linux job chooses to use base env, not the one setup by the image -eval "$(conda shell.bash hook)" -CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") -conda activate "${CONDA_ENV}" - export PYTHON_EXECUTABLE=python # CMake options to use, in addition to the defaults. From 19c2fb22e284ceecd39e4dfa2aa1cbcb45d322d5 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Fri, 19 Sep 2025 17:47:11 -0700 Subject: [PATCH 20/20] remove conda env selection in cuda-build.sh --- .ci/scripts/test-cuda-build.sh | 5 ----- .ci/scripts/test_backend_linux.sh | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh index 13fa1e9fe9c..52c2f21dbd2 100755 --- a/.ci/scripts/test-cuda-build.sh +++ b/.ci/scripts/test-cuda-build.sh @@ -7,11 +7,6 @@ set -exu -# The generic Linux job chooses to use base env, not the one setup by the image -eval "$(conda shell.bash hook)" -CONDA_ENV=$(conda info --envs | awk '/base/ {print $2}') -conda activate "${CONDA_ENV}" - CUDA_VERSION=${1:-"12.6"} echo "=== Testing ExecutorTorch CUDA ${CUDA_VERSION} Build ===" diff --git a/.ci/scripts/test_backend_linux.sh b/.ci/scripts/test_backend_linux.sh index a5233d15559..d230860875d 100755 --- a/.ci/scripts/test_backend_linux.sh +++ b/.ci/scripts/test_backend_linux.sh @@ -15,6 +15,11 @@ REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.csv" echo "Running backend test job for suite $SUITE, flow $FLOW." echo "Saving job artifacts to $ARTIFACT_DIR." +# The generic Linux job chooses to use base env, not the one setup by the image +eval "$(conda shell.bash hook)" +CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") +conda activate "${CONDA_ENV}" + export PYTHON_EXECUTABLE=python # CMake options to use, in addition to the defaults.