From 29e8b7f21d3c5a2c0480fce95585511414e884c8 Mon Sep 17 00:00:00 2001 From: Abhinay Kukkadapu Date: Mon, 23 Mar 2026 10:39:12 -0700 Subject: [PATCH] [QNN] Skip QNN auto-download when server is unreachable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Gracefully skip QNN backend when the download server is unreachable (e.g. devvms without proxy) and harden the SDK download against flaky connections. - Add --check flag to download_qnn_sdk.py that probes server reachability via requests.head() (proxy-aware) - Skip QNN in GitHub Actions CI unless QNN_SDK_ROOT is set - Increase download retries (3→5) with exponential backoff and timeouts - Validate archive integrity after download to catch truncation - Verify downloaded size matches Content-Length to trigger retries on incomplete downloads Test Plan: Tested on devvm without proxy (QNN skipped), with proxy (QNN downloaded), with QNN_SDK_ROOT set (QNN enabled), and with -DEXECUTORCH_BUILD_QNN=ON. --- backends/qualcomm/scripts/download_qnn_sdk.py | 68 ++++++++++++++++++- tools/cmake/preset/pybind.cmake | 31 ++++++++- 2 files changed, 95 insertions(+), 4 deletions(-) diff --git a/backends/qualcomm/scripts/download_qnn_sdk.py b/backends/qualcomm/scripts/download_qnn_sdk.py index 896d96b0cb7..1ad67514a42 100644 --- a/backends/qualcomm/scripts/download_qnn_sdk.py +++ b/backends/qualcomm/scripts/download_qnn_sdk.py @@ -10,6 +10,7 @@ import sys import tarfile import tempfile +import time import urllib.request import zipfile from typing import Dict, List, Optional, Tuple @@ -203,7 +204,7 @@ def _stream_to_file( downloaded = archive_path.stat().st_size if archive_path.exists() else 0 headers = {"Range": f"bytes={downloaded}-"} if downloaded > 0 else {} - with session.get(url, stream=True, headers=headers) as r: + with session.get(url, stream=True, headers=headers, timeout=(30, 60)) as r: if r.status_code == 200 and downloaded > 0: downloaded = 0 # Server doesn't support Range — restart r.raise_for_status() @@ -235,12 +236,17 @@ def _stream_to_file( if total: _progress_newline() + if total > 0 and downloaded < total: + raise requests.exceptions.ConnectionError( + f"Incomplete download: {downloaded}/{total} bytes" + ) + logger.info("[QNN] Download complete.") return True def _download_archive( - url: str, archive_path: pathlib.Path, max_retries: int = 3 + url: str, archive_path: pathlib.Path, max_retries: int = 5 ) -> bool: """Streaming download with retry + resume on mid-stream failures.""" logger.debug("Archive will be saved to: %s", archive_path) @@ -265,10 +271,12 @@ def _download_archive( ) as e: _progress_newline() if attempt < max_retries: + backoff = min(2 ** (attempt - 1), 30) logger.warning( f"[QNN] Download interrupted: {type(e).__name__}. " - f"Retrying ({attempt}/{max_retries})..." + f"Retrying in {backoff}s ({attempt}/{max_retries})..." ) + time.sleep(backoff) else: logger.error(f"[QNN] Download failed after {max_retries} attempts: {e}") return False @@ -281,6 +289,20 @@ def _download_archive( logger.error("[QNN] Downloaded file is empty or missing!") return False + # Validate archive integrity — catches truncation and corruption that + # size checks alone would miss (e.g. no Content-Length, or bit flips). + try: + if url.endswith(".zip"): + with zipfile.ZipFile(archive_path, "r"): + pass # Reading central directory is enough to detect truncation + elif url.endswith((".tar.gz", ".tgz")): + with tarfile.open(archive_path, "r:gz"): + pass + except (zipfile.BadZipFile, tarfile.TarError) as e: + logger.error(f"[QNN] Downloaded archive is corrupt: {e}") + archive_path.unlink(missing_ok=True) + return False + return True @@ -748,6 +770,37 @@ def install_qnn_sdk() -> bool: return _ensure_libcxx_stack() and _ensure_qnn_sdk_lib() +def _check_sdk_available() -> int: + """Return 0 if the SDK is cached or the download server is reachable, 1 otherwise. + + Uses requests.head() so HTTPS_PROXY env vars are respected — devvms behind + a proxy will succeed when the proxy is configured, and gracefully fail when + it is not. + """ + if not is_linux_x86(): + return 1 + + try: + sdk_dir = _get_sdk_dir() + if sdk_dir.exists() and any(sdk_dir.iterdir()): + return 0 + except Exception: + pass + + try: + r = requests.head( + "https://softwarecenter.qualcomm.com", + timeout=5, + allow_redirects=True, + ) + if r.status_code < 500: + return 0 + except requests.exceptions.RequestException: + pass + + return 1 + + def main(argv: Optional[List[str]] = None) -> int: parser = argparse.ArgumentParser( description="Helper utility for Qualcomm SDK staging." @@ -768,8 +821,17 @@ def main(argv: Optional[List[str]] = None) -> int: action="store_true", help="Ensure the SDK and runtime libraries are staged and loaded.", ) + parser.add_argument( + "--check", + action="store_true", + help="Exit 0 if the SDK is cached or the download host is reachable, " + "1 otherwise. Does not download anything.", + ) args = parser.parse_args(argv) + if args.check: + return _check_sdk_available() + # When --print-sdk-path is used, stdout must contain ONLY the SDK path. # Redirect all logger and progress output to stderr. if args.print_sdk_path: diff --git a/tools/cmake/preset/pybind.cmake b/tools/cmake/preset/pybind.cmake index a0d06d74d17..1a7e08a9d60 100644 --- a/tools/cmake/preset/pybind.cmake +++ b/tools/cmake/preset/pybind.cmake @@ -37,7 +37,36 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") set_overridable_option(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_LLM ON) if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64|i.86)$") - set_overridable_option(EXECUTORCH_BUILD_QNN ON) + # Auto-enable QNN on Linux x86 when the SDK is available. - QNN_SDK_ROOT set + # explicitly → always enable - GitHub Actions CI → skip (avoids flaky 1.3GB + # downloads) - Otherwise → probe the download server; skip gracefully when + # unreachable (e.g. devvms without proxy configured) + if(DEFINED QNN_SDK_ROOT OR DEFINED ENV{QNN_SDK_ROOT}) + set_overridable_option(EXECUTORCH_BUILD_QNN ON) + elseif("$ENV{GITHUB_ACTIONS}" STREQUAL "true") + message(STATUS "GitHub Actions CI detected: skipping QNN auto-download. " + "Set QNN_SDK_ROOT or -DEXECUTORCH_BUILD_QNN=ON to enable." + ) + else() + execute_process( + COMMAND + ${PYTHON_EXECUTABLE} + ${CMAKE_CURRENT_LIST_DIR}/../../../backends/qualcomm/scripts/download_qnn_sdk.py + --check + RESULT_VARIABLE _qnn_available + OUTPUT_QUIET ERROR_QUIET + TIMEOUT 10 + ) + if(_qnn_available EQUAL 0) + set_overridable_option(EXECUTORCH_BUILD_QNN ON) + else() + message( + STATUS "QNN SDK not cached and download server unreachable. " + "Skipping QNN backend. Set QNN_SDK_ROOT or use " + "-DEXECUTORCH_BUILD_QNN=ON with network access to enable." + ) + endif() + endif() endif() elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL "WIN32"