diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt index 8c9330d6f2c..e3a53c8bcb5 100644 --- a/.ci/docker/ci_commit_pins/pytorch.txt +++ b/.ci/docker/ci_commit_pins/pytorch.txt @@ -1 +1 @@ -4d4abec80f03cd8fdefe1d9cb3a60d3690cd777e +53a2908a10f414a2f85caa06703a26a40e873869 diff --git a/.ci/scripts/test-cuda-build.sh b/.ci/scripts/test-cuda-build.sh new file mode 100755 index 00000000000..52c2f21dbd2 --- /dev/null +++ b/.ci/scripts/test-cuda-build.sh @@ -0,0 +1,95 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -exu + +CUDA_VERSION=${1:-"12.6"} + +echo "=== Testing ExecutorTorch CUDA ${CUDA_VERSION} Build ===" + +# Function to build and test ExecutorTorch with CUDA support +test_executorch_cuda_build() { + local cuda_version=$1 + + echo "Building ExecutorTorch with CUDA ${cuda_version} support..." + echo "ExecutorTorch will automatically detect CUDA and install appropriate PyTorch wheel" + + # Check available resources before starting + echo "=== System Information ===" + echo "Available memory: $(free -h | grep Mem | awk '{print $2}')" + echo "Available disk space: $(df -h . | tail -1 | awk '{print $4}')" + echo "CPU cores: $(nproc)" + echo "CUDA version check:" + nvcc --version || echo "nvcc not found" + nvidia-smi || echo "nvidia-smi not found" + + # Set CMAKE_ARGS to enable CUDA build - ExecutorTorch will handle PyTorch installation automatically + export CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" + + echo "=== Starting ExecutorTorch Installation ===" + # Install ExecutorTorch with CUDA support with timeout and error handling + timeout 5400 ./install_executorch.sh || { + local exit_code=$? + echo "ERROR: install_executorch.sh failed with exit code: $exit_code" + if [ $exit_code -eq 124 ]; then + echo "ERROR: Installation timed out after 90 minutes" + fi + exit $exit_code + } + + echo "SUCCESS: ExecutorTorch CUDA build completed" + + # Verify the installation + echo "=== Verifying ExecutorTorch CUDA Installation ===" + + # Test that ExecutorTorch was built successfully + python -c " +import executorch +print('SUCCESS: ExecutorTorch imported successfully') +" + + # Test CUDA availability and show details + python -c " +try: + import torch + print('INFO: PyTorch version:', torch.__version__) + print('INFO: CUDA available:', torch.cuda.is_available()) + + if torch.cuda.is_available(): + print('SUCCESS: CUDA is available for ExecutorTorch') + print('INFO: CUDA version:', torch.version.cuda) + print('INFO: GPU device count:', torch.cuda.device_count()) + print('INFO: Current GPU device:', torch.cuda.current_device()) + print('INFO: GPU device name:', torch.cuda.get_device_name()) + + # Test basic CUDA tensor operation + device = torch.device('cuda') + x = torch.randn(10, 10).to(device) + y = torch.randn(10, 10).to(device) + z = torch.mm(x, y) + print('SUCCESS: CUDA tensor operation completed on device:', z.device) + print('INFO: Result tensor shape:', z.shape) + + print('SUCCESS: ExecutorTorch CUDA integration verified') + else: + print('WARNING: CUDA not detected, but ExecutorTorch built successfully') + exit(1) +except Exception as e: + print('ERROR: ExecutorTorch CUDA test failed:', e) + exit(1) +" + + echo "SUCCESS: ExecutorTorch CUDA ${cuda_version} build and verification completed successfully" +} + +# Main execution +echo "Current working directory: $(pwd)" +echo "Directory contents:" +ls -la + +# Run the CUDA build test +test_executorch_cuda_build "${CUDA_VERSION}" diff --git a/.github/workflows/test-cuda-builds.yml b/.github/workflows/test-cuda-builds.yml new file mode 100644 index 00000000000..0930c6524e3 --- /dev/null +++ b/.github/workflows/test-cuda-builds.yml @@ -0,0 +1,63 @@ +# Test ExecuTorch CUDA Build Compatibility +# This workflow tests whether ExecuTorch can be successfully built with CUDA support +# across different CUDA versions (12.6, 12.8, 12.9) using the command: +# CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh +# +# Note: ExecuTorch automatically detects the system CUDA version using nvcc and +# installs the appropriate PyTorch wheel. No manual CUDA/PyTorch installation needed. + +name: Test CUDA Builds + +on: + pull_request: + push: + branches: + - main + - release/* + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} + cancel-in-progress: false + +jobs: + test-cuda-builds: + strategy: + fail-fast: false + matrix: + cuda-version: ["12.6", "12.8", "12.9"] + + name: test-executorch-cuda-build-${{ matrix.cuda-version }} + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + with: + timeout: 90 + runner: linux.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: ${{ matrix.cuda-version }} + use-custom-docker-registry: false + submodules: recursive + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + script: | + set -eux + + # Test ExecuTorch CUDA build - ExecuTorch will automatically detect CUDA version + # and install the appropriate PyTorch wheel when CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" + source .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}" + + # This job will fail if any of the CUDA versions fail + check-all-cuda-builds: + needs: test-cuda-builds + runs-on: ubuntu-latest + if: always() + steps: + - name: Check if all CUDA builds succeeded + run: | + if [[ "${{ needs.test-cuda-builds.result }}" != "success" ]]; then + echo "ERROR: One or more ExecuTorch CUDA builds failed!" + echo "CUDA build results: ${{ needs.test-cuda-builds.result }}" + exit 1 + else + echo "SUCCESS: All ExecuTorch CUDA builds (12.6, 12.8, 12.9) completed successfully!" + fi diff --git a/install_requirements.py b/install_requirements.py index cbae175e276..4cc8858086b 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -7,60 +7,22 @@ import argparse import os -import platform -import re import subprocess import sys - -def python_is_compatible(): - # Scrape the version range from pyproject.toml, which should be in the current directory. - version_specifier = None - with open("pyproject.toml", "r") as file: - for line in file: - if line.startswith("requires-python"): - match = re.search(r'"([^"]*)"', line) - if match: - version_specifier = match.group(1) - break - - if not version_specifier: - print( - "WARNING: Skipping python version check: version range not found", - file=sys.stderr, - ) - return False - - # Install the packaging module if necessary. - try: - import packaging - except ImportError: - subprocess.run( - [sys.executable, "-m", "pip", "install", "packaging"], check=True - ) - # Compare the current python version to the range in version_specifier. Exits - # with status 1 if the version is not compatible, or with status 0 if the - # version is compatible or the logic itself fails. - try: - import packaging.specifiers - import packaging.version - - python_version = packaging.version.parse(platform.python_version()) - version_range = packaging.specifiers.SpecifierSet(version_specifier) - if python_version not in version_range: - print( - f'ERROR: ExecuTorch does not support python version {python_version}: must satisfy "{version_specifier}"', - file=sys.stderr, - ) - return False - except Exception as e: - print(f"WARNING: Skipping python version check: {e}", file=sys.stderr) - return True - +from install_utils import determine_torch_url, is_intel_mac_os, python_is_compatible # The pip repository that hosts nightly torch packages. -TORCH_NIGHTLY_URL = "https://download.pytorch.org/whl/nightly/cpu" +# This will be dynamically set based on CUDA availability and CUDA backend enabled/disabled. +TORCH_NIGHTLY_URL_BASE = "https://download.pytorch.org/whl/nightly" +# Supported CUDA versions - modify this to add/remove supported versions +# Format: tuple of (major, minor) version numbers +SUPPORTED_CUDA_VERSIONS = ( + (12, 6), + (12, 8), + (12, 9), +) # Since ExecuTorch often uses main-branch features of pytorch, only the nightly # pip versions will have the required features. @@ -71,7 +33,10 @@ def python_is_compatible(): # # NOTE: If you're changing, make the corresponding change in .ci/docker/ci_commit_pins/pytorch.txt # by picking the hash from the same date in https://hud.pytorch.org/hud/pytorch/pytorch/nightly/ -NIGHTLY_VERSION = "dev20250906" +# +# NOTE: If you're changing, make the corresponding supported CUDA versions in +# SUPPORTED_CUDA_VERSIONS above if needed. +NIGHTLY_VERSION = "dev20250915" def install_requirements(use_pytorch_nightly): @@ -84,12 +49,15 @@ def install_requirements(use_pytorch_nightly): ) sys.exit(1) + # Determine the appropriate PyTorch URL based on CUDA delegate status + torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE, SUPPORTED_CUDA_VERSIONS) + # pip packages needed by exir. TORCH_PACKAGE = [ # Setting use_pytorch_nightly to false to test the pinned PyTorch commit. Note # that we don't need to set any version number there because they have already # been installed on CI before this step, so pip won't reinstall them - f"torch==2.9.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torch", + f"torch==2.10.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torch", ] # Install the requirements for core ExecuTorch package. @@ -105,7 +73,7 @@ def install_requirements(use_pytorch_nightly): "requirements-dev.txt", *TORCH_PACKAGE, "--extra-index-url", - TORCH_NIGHTLY_URL, + torch_url, ], check=True, ) @@ -147,10 +115,13 @@ def install_requirements(use_pytorch_nightly): def install_optional_example_requirements(use_pytorch_nightly): + # Determine the appropriate PyTorch URL based on CUDA delegate status + torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE, SUPPORTED_CUDA_VERSIONS) + print("Installing torch domain libraries") DOMAIN_LIBRARIES = [ ( - f"torchvision==0.24.0.{NIGHTLY_VERSION}" + f"torchvision==0.25.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torchvision" ), @@ -165,7 +136,7 @@ def install_optional_example_requirements(use_pytorch_nightly): "install", *DOMAIN_LIBRARIES, "--extra-index-url", - TORCH_NIGHTLY_URL, + torch_url, ], check=True, ) @@ -180,7 +151,7 @@ def install_optional_example_requirements(use_pytorch_nightly): "-r", "requirements-examples.txt", "--extra-index-url", - TORCH_NIGHTLY_URL, + torch_url, "--upgrade-strategy", "only-if-needed", ], @@ -188,17 +159,6 @@ def install_optional_example_requirements(use_pytorch_nightly): ) -# Prebuilt binaries for Intel-based macOS are no longer available on PyPI; users must compile from source. -# PyTorch stopped building macOS x86_64 binaries since version 2.3.0 (January 2024). -def is_intel_mac_os(): - # Returns True if running on Intel macOS. - return platform.system().lower() == "darwin" and platform.machine().lower() in ( - "x86", - "x86_64", - "i386", - ) - - def main(args): parser = argparse.ArgumentParser() parser.add_argument( diff --git a/install_utils.py b/install_utils.py new file mode 100644 index 00000000000..113005ba1e4 --- /dev/null +++ b/install_utils.py @@ -0,0 +1,191 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright 2024-25 Arm Limited and/or its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import functools +import os +import platform +import re +import subprocess +import sys + + +def _is_cuda_enabled(): + """Check if CUDA delegate is enabled via CMAKE_ARGS environment variable.""" + cmake_args = os.environ.get("CMAKE_ARGS", "") + return "-DEXECUTORCH_BUILD_CUDA=ON" in cmake_args + + +def _cuda_version_to_pytorch_suffix(major, minor): + """ + Generate PyTorch CUDA wheel suffix from CUDA version numbers. + + Args: + major: CUDA major version (e.g., 12) + minor: CUDA minor version (e.g., 6) + + Returns: + PyTorch wheel suffix string (e.g., "cu126") + """ + return f"cu{major}{minor}" + + +def _get_cuda_version(supported_cuda_versions): + """ + Get the CUDA version installed on the system using nvcc command. + Returns a tuple (major, minor). + + Args: + supported_cuda_versions: List of supported CUDA versions as tuples + + Raises: + RuntimeError: if nvcc is not found or version cannot be parsed + """ + try: + # Get CUDA version from nvcc (CUDA compiler) + nvcc_result = subprocess.run( + ["nvcc", "--version"], capture_output=True, text=True, check=True + ) + # Parse nvcc output for CUDA version + # Output contains line like "Cuda compilation tools, release 12.6, V12.6.68" + match = re.search(r"release (\d+)\.(\d+)", nvcc_result.stdout) + if match: + major, minor = int(match.group(1)), int(match.group(2)) + + # Check if the detected version is supported + if (major, minor) not in supported_cuda_versions: + available_versions = ", ".join( + [f"{maj}.{min}" for maj, min in supported_cuda_versions] + ) + raise RuntimeError( + f"Detected CUDA version {major}.{minor} is not supported. " + f"Only the following CUDA versions are supported: {available_versions}. " + f"Please install a supported CUDA version or try on CPU-only delegates." + ) + + return (major, minor) + else: + raise RuntimeError( + "CUDA delegate is enabled but could not parse CUDA version from nvcc output. " + "Please ensure CUDA is properly installed or try on CPU-only delegates." + ) + except FileNotFoundError: + raise RuntimeError( + "CUDA delegate is enabled but nvcc (CUDA compiler) is not found in PATH. " + "Please install CUDA toolkit or try on CPU-only delegates." + ) + except subprocess.CalledProcessError as e: + raise RuntimeError( + f"CUDA delegate is enabled but nvcc command failed with error: {e}. " + "Please ensure CUDA is properly installed or try on CPU-only delegates." + ) + + +def _get_pytorch_cuda_url(cuda_version, torch_nightly_url_base): + """ + Get the appropriate PyTorch CUDA URL for the given CUDA version. + + Args: + cuda_version: tuple of (major, minor) version numbers + torch_nightly_url_base: Base URL for PyTorch nightly packages + + Returns: + URL string for PyTorch CUDA packages + """ + major, minor = cuda_version + # Generate CUDA suffix (version validation is already done in _get_cuda_version) + cuda_suffix = _cuda_version_to_pytorch_suffix(major, minor) + + return f"{torch_nightly_url_base}/{cuda_suffix}" + + +@functools.lru_cache(maxsize=1) +def determine_torch_url(torch_nightly_url_base, supported_cuda_versions): + """ + Determine the appropriate PyTorch installation URL based on CUDA availability and CMAKE_ARGS. + Uses @functools.lru_cache to avoid redundant CUDA detection and print statements. + + Args: + torch_nightly_url_base: Base URL for PyTorch nightly packages + supported_cuda_versions: List of supported CUDA versions as tuples + + Returns: + URL string for PyTorch packages + """ + # Check if CUDA delegate is enabled + if not _is_cuda_enabled(): + print("CUDA delegate not enabled, using CPU-only PyTorch") + return f"{torch_nightly_url_base}/cpu" + + print("CUDA delegate enabled, detecting CUDA version...") + + # Get CUDA version + cuda_version = _get_cuda_version(supported_cuda_versions) + + major, minor = cuda_version + print(f"Detected CUDA version: {major}.{minor}") + + # Get appropriate PyTorch CUDA URL + torch_url = _get_pytorch_cuda_url(cuda_version, torch_nightly_url_base) + print(f"Using PyTorch URL: {torch_url}") + + return torch_url + + +# Prebuilt binaries for Intel-based macOS are no longer available on PyPI; users must compile from source. +# PyTorch stopped building macOS x86_64 binaries since version 2.3.0 (January 2024). +def is_intel_mac_os(): + # Returns True if running on Intel macOS. + return platform.system().lower() == "darwin" and platform.machine().lower() in ( + "x86", + "x86_64", + "i386", + ) + + +def python_is_compatible(): + # Scrape the version range from pyproject.toml, which should be in the current directory. + version_specifier = None + with open("pyproject.toml", "r") as file: + for line in file: + if line.startswith("requires-python"): + match = re.search(r'"([^"]*)"', line) + if match: + version_specifier = match.group(1) + break + + if not version_specifier: + print( + "WARNING: Skipping python version check: version range not found", + file=sys.stderr, + ) + return False + + # Install the packaging module if necessary. + try: + import packaging + except ImportError: + subprocess.run( + [sys.executable, "-m", "pip", "install", "packaging"], check=True + ) + # Compare the current python version to the range in version_specifier. Exits + # with status 1 if the version is not compatible, or with status 0 if the + # version is compatible or the logic itself fails. + try: + import packaging.specifiers + import packaging.version + + python_version = packaging.version.parse(platform.python_version()) + version_range = packaging.specifiers.SpecifierSet(version_specifier) + if python_version not in version_range: + print( + f'ERROR: ExecuTorch does not support python version {python_version}: must satisfy "{version_specifier}"', + file=sys.stderr, + ) + return False + except Exception as e: + print(f"WARNING: Skipping python version check: {e}", file=sys.stderr) + return True