From a72fba4e3bcac8c587a2873b0ff4707e5f78aeb8 Mon Sep 17 00:00:00 2001 From: cccclai Date: Wed, 1 Oct 2025 12:43:30 -0700 Subject: [PATCH] Try to get nightly wheel build work with qnn (#14633) Our current nightly/release wheel package is done following https://github.com/pytorch/test-infra/wiki/Using-Nova-Reusable-Build-Workflows As described by https://github.com/pytorch/test-infra/blob/5398e1a00c39939f43251f29031c37e6d0c84647/.github/workflows/build_wheels_linux.yml#L4, The docker image infra team used to release nightly/release package is from https://github.com/pypa/manylinux, and it's currently using https://github.com/pypa/manylinux?tab=readme-ov-file#manylinux_2_28-almalinux-8-based. It means the glibc version is 2.28 and GCC is 14. The issue is that, QNN .so files are not compatible with 2.28. The minimum version is 2.34 (I tried 2.29 the first time when it failed and asked for 2.29, but it still fails). In this PR, instead of checking glibc and failed directly when minimum version isn't matched, we will download the glibc 2.34 to /tmp. A different strategy compared with glibc++ is that, we don't load them, because the python process itself start with the system glibc 2.28. We need to re-execute the process with the new glibc (cherry picked from commit 19be2a3ccbfb26f20cce1cc83a1f07e6e8c909be) --- backends/qualcomm/__init__.py | 14 +- backends/qualcomm/scripts/download_qnn_sdk.py | 280 ++++++++++++++---- setup.py | 3 +- 3 files changed, 218 insertions(+), 79 deletions(-) diff --git a/backends/qualcomm/__init__.py b/backends/qualcomm/__init__.py index 04ba5fcf24b..5770dfb0fcd 100644 --- a/backends/qualcomm/__init__.py +++ b/backends/qualcomm/__init__.py @@ -1,23 +1,13 @@ import os -from .scripts.download_qnn_sdk import ( - check_glibc_exist_and_validate, - install_qnn_sdk, - is_linux_x86, -) +from .scripts.download_qnn_sdk import install_qnn_sdk, is_linux_x86 env_flag = os.getenv("EXECUTORCH_BUILDING_WHEEL", "0").lower() # If users have preinstalled QNN_SDK_ROOT, we will use it. qnn_sdk_root_flag = os.getenv("QNN_SDK_ROOT", None) -if ( - env_flag not in ("1", "true", "yes") - and not qnn_sdk_root_flag - and is_linux_x86() - and check_glibc_exist_and_validate() -): +if env_flag not in ("1", "true", "yes") and not qnn_sdk_root_flag and is_linux_x86(): ok = install_qnn_sdk() - if not ok: raise RuntimeError("Failed to install QNN SDK. Please check the logs above.") diff --git a/backends/qualcomm/scripts/download_qnn_sdk.py b/backends/qualcomm/scripts/download_qnn_sdk.py index 35006a41433..747524a0e5b 100644 --- a/backends/qualcomm/scripts/download_qnn_sdk.py +++ b/backends/qualcomm/scripts/download_qnn_sdk.py @@ -6,12 +6,15 @@ import platform import re import shutil +import subprocess +import sys import tarfile import tempfile import urllib.request import zipfile from typing import Dict, List, Optional, Tuple + logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) @@ -34,68 +37,81 @@ def is_linux_x86() -> bool: ) -import subprocess +######################### +# Cache directory helper +######################### -MINIMUM_LIBC_VERSION = 2.29 +APP_NAMESPACE = ["executorch", "qnn"] -REQUIRED_LIBC_LIBS = [ - "/lib/x86_64-linux-gnu/libc.so.6", - "/lib64/libc.so.6", - "/lib/libc.so.6", -] +def _get_staging_dir(*parts: str) -> pathlib.Path: + r""" + Return a cross-platform staging directory for staging SDKs/libraries. + + - On Linux: + ~/.cache/executorch/qnn/ + (falls back to $HOME/.cache if $XDG_CACHE_HOME is unset) -def check_glibc_exist_and_validate() -> bool: + - On Windows (not supported yet, but as placeholder): + %LOCALAPPDATA%\executorch\qnn\ + (falls back to $HOME/AppData/Local if %LOCALAPPDATA% is unset) + + - Override: + If QNN_STAGING_DIR is set in the environment, that path is used instead. + + Args: + parts (str): Subdirectories to append under the root staging dir. + + Returns: + pathlib.Path: Fully qualified staging path. """ - Check if users have glibc installed. + # Environment override wins + base = os.environ.get("QNN_STAGING_DIR") + if base: + return pathlib.Path(base).joinpath(*parts) + + system = platform.system().lower() + if system == "windows": + # On Windows, prefer %LOCALAPPDATA%, fallback to ~/AppData/Local + base = pathlib.Path( + os.environ.get("LOCALAPPDATA", pathlib.Path.home() / "AppData" / "Local") + ) + elif is_linux_x86(): + # On Linux/Unix, prefer $XDG_CACHE_HOME, fallback to ~/.cache + base = pathlib.Path( + os.environ.get("XDG_CACHE_HOME", pathlib.Path.home() / ".cache") + ) + else: + raise ValueError(f"Unsupported platform: {system}") + + return base.joinpath(*APP_NAMESPACE, *parts) + + +def _atomic_download(url: str, dest: pathlib.Path): """ - exists = False - for path in REQUIRED_LIBC_LIBS: - try: - output = subprocess.check_output( - [path, "--version"], stderr=subprocess.STDOUT - ) - output = output.decode().split("\n")[0] - logger.debug(f"[QNN] glibc version for path {path} is: {output}") - match = re.search(r"version (\d+\.\d+)", output) - if match: - version = match.group(1) - if float(version) >= MINIMUM_LIBC_VERSION: - logger.debug(f"[QNN] glibc version is {version}.") - exists = True - return True - else: - logger.error( - f"[QNN] glibc version is too low. The minimum libc version is {MINIMUM_LIBC_VERSION} Please install glibc following the commands below." - ) - else: - logger.error("[QNN] glibc version not found.") + Download URL into dest atomically: + - Write to a temp file in the same dir + - Move into place if successful + """ + dest.parent.mkdir(parents=True, exist_ok=True) - except Exception: - continue + # Temp file in same dir (guarantees atomic rename) + with tempfile.NamedTemporaryFile(dir=dest.parent, delete=False) as tmp: + tmp_path = pathlib.Path(tmp.name) - if not exists: - logger.error( - r"""" - [QNN] glibc not found or the version is too low. Please install glibc following the commands below. - Ubuntu/Debian: - sudo apt update - sudo apt install libc6 - - Fedora/Red Hat: - sudo dnf install glibc - - Arch Linux: - sudo pacman -S glibc - - Also please make sure the glibc version is >= MINIMUM_LIBC_VERSION. You can verify the glibc version by running the following command: - Option 1: - ldd --version - Option 2: - /path/to/libc.so.6 --version - """ - ) - return exists + try: + urllib.request.urlretrieve(url, tmp_path) + tmp_path.replace(dest) # atomic rename + except Exception: + # Clean up partial file on failure + if tmp_path.exists(): + tmp_path.unlink(missing_ok=True) + raise + + +#################### +# qnn sdk download management +#################### def _download_archive(url: str, archive_path: pathlib.Path) -> bool: @@ -178,9 +194,6 @@ def _download_qnn_sdk(dst_folder=SDK_DIR) -> Optional[pathlib.Path]: if not is_linux_x86(): logger.info("[QNN] Skipping Qualcomm SDK (only supported on Linux x86).") return None - elif not check_glibc_exist_and_validate(): - logger.info("[QNN] Skipping Qualcomm SDK (glibc not found or version too old).") - return None else: logger.info("[QNN] Downloading Qualcomm SDK for Linux x86") @@ -241,6 +254,136 @@ def _extract_tar(archive_path: pathlib.Path, prefix: str, target_dir: pathlib.Pa dst.write(src.read()) +#################### +# libc management +#################### + +GLIBC_VERSION = "2.34" +GLIBC_REEXEC_GUARD = "QNN_GLIBC_REEXEC" +MINIMUM_LIBC_VERSION = GLIBC_VERSION + + +def _get_glibc_libdir() -> pathlib.Path: + glibc_root = _get_staging_dir(f"glibc-{GLIBC_VERSION}") + return glibc_root / "lib" + + +def _parse_version(v: str) -> tuple[int, int]: + """Turn '2.34' → (2,34) so it can be compared.""" + parts = v.split(".") + return int(parts[0]), int(parts[1]) if len(parts) > 1 else 0 + + +def _current_glibc_version() -> str: + """Return system glibc version string (via ctypes).""" + try: + libc = ctypes.CDLL("libc.so.6") + func = libc.gnu_get_libc_version + func.restype = ctypes.c_char_p + return func().decode() + except Exception as e: + return f"error:{e}" + + +def _resolve_glibc_loader() -> pathlib.Path | None: + """Return staged ld.so path if available.""" + for p in [ + _get_glibc_libdir() / f"ld-{GLIBC_VERSION}.so", + _get_glibc_libdir() / "ld-linux-x86-64.so.2", + ]: + if p.exists(): + return p + return None + + +def _stage_prebuilt_glibc(): + """Download + extract Fedora 35 glibc RPM into /tmp.""" + logger.info(">>> Staging prebuilt glibc-%s from Fedora 35 RPM", GLIBC_VERSION) + _get_glibc_libdir().mkdir(parents=True, exist_ok=True) + rpm_path = _get_staging_dir("glibc") / "glibc.rpm" + work_dir = _get_staging_dir("glibc") / "extracted" + rpm_url = ( + "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/35/" + "Everything/x86_64/os/Packages/g/glibc-2.34-7.fc35.x86_64.rpm" + ) + + rpm_path.parent.mkdir(parents=True, exist_ok=True) + logger.info("[glibc] Downloading %s -> %s", rpm_url, rpm_path) + try: + urllib.request.urlretrieve(rpm_url, rpm_path) + except Exception as e: + logger.error("[glibc] Failed to download %s: %s", rpm_url, e) + raise + + # Extract + if work_dir.exists(): + shutil.rmtree(work_dir) + work_dir.mkdir(parents=True) + subprocess.check_call(["bsdtar", "-C", str(work_dir), "-xf", str(rpm_path)]) + + # Copy runtime libs + staged = [ + "ld-linux-x86-64.so.2", + "libc.so.6", + "libdl.so.2", + "libpthread.so.0", + "librt.so.1", + "libm.so.6", + "libutil.so.1", + ] + for lib in staged: + src = work_dir / "lib64" / lib + if src.exists(): + shutil.copy2(src, _get_glibc_libdir() / lib) + logger.info("[glibc] Staged %s", lib) + else: + logger.warning("[glibc] Missing %s in RPM", lib) + + +def ensure_glibc_minimum(min_version: str = GLIBC_VERSION): + """ + Ensure process runs under glibc >= min_version. + - If system glibc is new enough → skip. + - Else → stage Fedora RPM and re-exec under staged loader. + """ + current = _current_glibc_version() + logger.info("[glibc] Current loaded glibc: %s", current) + + # If system glibc already sufficient → skip everything + m = re.match(r"(\d+\.\d+)", current) + if m and _parse_version(m.group(1)) >= _parse_version(min_version): + logger.info("[glibc] System glibc >= %s, no staging needed.", min_version) + return + + # Avoid infinite loop + if os.environ.get(GLIBC_REEXEC_GUARD) == "1": + logger.info("[glibc] Already re-exec'd once, continuing.") + return + + # Stage prebuilt if not already staged + if not (_get_glibc_libdir() / "libc.so.6").exists(): + _stage_prebuilt_glibc() + + loader = _resolve_glibc_loader() + if not loader: + logger.error("[glibc] Loader not found in %s", _get_glibc_libdir()) + return + + logger.info( + "[glibc] Re-execing under loader %s with libdir %s", loader, _get_glibc_libdir() + ) + os.environ[GLIBC_REEXEC_GUARD] = "1" + os.execv( + str(loader), + [str(loader), "--library-path", str(_get_glibc_libdir()), sys.executable] + + sys.argv, + ) + + +#################### +# libc++ management +#################### + LLVM_VERSION = "14.0.0" LIBCXX_BASE_NAME = f"clang+llvm-{LLVM_VERSION}-x86_64-linux-gnu-ubuntu-18.04" LLVM_URL = f"https://github.com/llvm/llvm-project/releases/download/llvmorg-{LLVM_VERSION}/{LIBCXX_BASE_NAME}.tar.xz" @@ -258,12 +401,17 @@ def _stage_libcxx(target_dir: pathlib.Path): logger.info("[libcxx] Already staged at %s, skipping download", target_dir) return - temp_tar = pathlib.Path("/tmp") / f"{LIBCXX_BASE_NAME}.tar.xz" - temp_extract = pathlib.Path("/tmp") / LIBCXX_BASE_NAME + libcxx_stage = _get_staging_dir(f"libcxx-{LLVM_VERSION}") + temp_tar = libcxx_stage / f"{LIBCXX_BASE_NAME}.tar.xz" + temp_extract = libcxx_stage / LIBCXX_BASE_NAME if not temp_tar.exists(): logger.info("[libcxx] Downloading %s", LLVM_URL) - urllib.request.urlretrieve(LLVM_URL, temp_tar) + _atomic_download(LLVM_URL, temp_tar) + + # Sanity check before extracting + if not temp_tar.exists() or temp_tar.stat().st_size == 0: + raise FileNotFoundError(f"[libcxx] Tarball missing or empty: {temp_tar}") logger.info("[libcxx] Extracting %s", temp_tar) with tarfile.open(temp_tar, "r:xz") as tar: @@ -437,8 +585,10 @@ def install_qnn_sdk() -> bool: Returns: True if both steps succeeded (or were already satisfied), else False. """ - if check_glibc_exist_and_validate(): - if _ensure_libcxx_stack(): - if _ensure_qnn_sdk_lib(): - return True - return False + logger.info("[QNN] Starting SDK installation") + + # Make sure we’re running under >= 2.34 + ensure_glibc_minimum(GLIBC_VERSION) + + # libc++ and QNN SDK setup + return _ensure_libcxx_stack() and _ensure_qnn_sdk_lib() diff --git a/setup.py b/setup.py index def9b996be0..005274c1540 100644 --- a/setup.py +++ b/setup.py @@ -467,11 +467,10 @@ def run(self): # Following code is for building the Qualcomm backend. from backends.qualcomm.scripts.download_qnn_sdk import ( _download_qnn_sdk, - check_glibc_exist_and_validate, is_linux_x86, ) - if is_linux_x86() and check_glibc_exist_and_validate(): + if is_linux_x86(): os.environ["EXECUTORCH_BUILDING_WHEEL"] = "1" with tempfile.TemporaryDirectory() as tmpdir: