pytorch · Flamefire · Apr 17, 2025 · Apr 18, 2025 · Apr 18, 2025 · Apr 18, 2025
diff --git a/.ci/aarch64_linux/aarch64_ci_build.sh b/.ci/aarch64_linux/aarch64_ci_build.sh
@@ -20,7 +20,7 @@ cd /
 # on the mounted pytorch repo
 git config --global --add safe.directory /pytorch
 pip install -r /pytorch/requirements.txt
-pip install auditwheel
+pip install auditwheel==6.2.0
 if [ "$DESIRED_CUDA" = "cpu" ]; then
     echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
     #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files

diff --git a/.ci/aarch64_linux/aarch64_wheel_ci_build.py b/.ci/aarch64_linux/aarch64_wheel_ci_build.py
@@ -39,7 +39,7 @@ def build_ArmComputeLibrary() -> None:
             "clone",
             "https://github.com/ARM-software/ComputeLibrary.git",
             "-b",
-            "v24.09",
+            "v25.02",
             "--depth",
             "1",
             "--shallow-submodules",
@@ -99,10 +99,14 @@ def update_wheel(wheel_path, desired_cuda) -> None:
         if "126" in desired_cuda:
             libs_to_copy += [
                 "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.6",
+                "/usr/local/cuda/lib64/libcufile.so.0",
+                "/usr/local/cuda/lib64/libcufile_rdma.so.1",
             ]
         elif "128" in desired_cuda:
             libs_to_copy += [
                 "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.8",
+                "/usr/local/cuda/lib64/libcufile.so.0",
+                "/usr/local/cuda/lib64/libcufile_rdma.so.1",
             ]
     else:
         libs_to_copy += [
@@ -132,6 +136,9 @@ def complete_wheel(folder: str) -> str:
     """
     wheel_name = list_dir(f"/{folder}/dist")[0]
 
+    # Please note for cuda we don't run auditwheel since we use custom script to package
+    # the cuda dependencies to the wheel file using update_wheel() method.
+    # However we need to make sure filename reflects the correct Manylinux platform.
     if "pytorch" in folder and not enable_cuda:
         print("Repairing Wheel with AuditWheel")
         check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder)
@@ -143,7 +150,14 @@ def complete_wheel(folder: str) -> str:
             f"/{folder}/dist/{repaired_wheel_name}",
         )
     else:
-        repaired_wheel_name = wheel_name
+        repaired_wheel_name = wheel_name.replace(
+            "linux_aarch64", "manylinux_2_28_aarch64"
+        )
+        print(f"Renaming {wheel_name} wheel to {repaired_wheel_name}")
+        os.rename(
+            f"/{folder}/dist/{wheel_name}",
+            f"/{folder}/dist/{repaired_wheel_name}",
+        )
 
     print(f"Copying {repaired_wheel_name} to artifacts")
     shutil.copy2(
@@ -204,7 +218,7 @@ def parse_arguments():
         else:
             build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
     elif branch.startswith(("v1.", "v2.")):
-        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
+        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
 
     if enable_mkldnn:
         build_ArmComputeLibrary()

diff --git a/.ci/aarch64_linux/build_aarch64_wheel.py b/.ci/aarch64_linux/build_aarch64_wheel.py
@@ -19,13 +19,11 @@
 
 # AMI images for us-east-1, change the following based on your ~/.aws/config
 os_amis = {
-    "ubuntu18_04": "ami-078eece1d8119409f",  # login_name: ubuntu
     "ubuntu20_04": "ami-052eac90edaa9d08f",  # login_name: ubuntu
     "ubuntu22_04": "ami-0c6c29c5125214c77",  # login_name: ubuntu
     "redhat8": "ami-0698b90665a2ddcf1",  # login_name: ec2-user
 }
 
-ubuntu18_04_ami = os_amis["ubuntu18_04"]
 ubuntu20_04_ami = os_amis["ubuntu20_04"]
 
 
@@ -329,7 +327,7 @@ def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None
         ]
     )
     host.run_cmd(
-        f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v24.09 {git_clone_flags}"
+        f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v25.02 {git_clone_flags}"
     )
 
     host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}")
@@ -659,18 +657,6 @@ def configure_system(
             "sudo apt-get install -y python3-dev python3-yaml python3-setuptools python3-wheel python3-pip"
         )
     host.run_cmd("pip3 install dataclasses typing-extensions")
-    # Install and switch to gcc-8 on Ubuntu-18.04
-    if not host.using_docker() and host.ami == ubuntu18_04_ami and compiler == "gcc-8":
-        host.run_cmd("sudo apt-get install -y g++-8 gfortran-8")
-        host.run_cmd(
-            "sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 100"
-        )
-        host.run_cmd(
-            "sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 100"
-        )
-        host.run_cmd(
-            "sudo update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-8 100"
-        )
     if not use_conda:
         print("Installing Cython + numpy from PyPy")
         host.run_cmd("sudo pip3 install Cython")
@@ -761,7 +747,7 @@ def start_build(
         version = host.check_output("cat pytorch/version.txt").strip()[:-2]
         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1"
     if branch.startswith(("v1.", "v2.")):
-        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1"
+        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1"
     if host.using_docker():
         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
     if enable_mkldnn:
@@ -1026,7 +1012,7 @@ def parse_arguments():
         install_condaforge_python(host, args.python_version)
         sys.exit(0)
 
-    python_version = args.python_version if args.python_version is not None else "3.8"
+    python_version = args.python_version if args.python_version is not None else "3.9"
 
     if args.use_torch_from_pypi:
         configure_system(host, compiler=args.compiler, python_version=python_version)

diff --git a/.ci/docker/README.md b/.ci/docker/README.md
@@ -34,5 +34,5 @@ See `build.sh` for valid build environments (it's the giant switch).
 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
 
 # Set flags (see build.sh) and build image
-sudo bash -c 'PROTOBUF=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
+sudo bash -c 'TRITON=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
 ```
diff --git a/.ci/docker/almalinux/Dockerfile b/.ci/docker/almalinux/Dockerfile
@@ -44,6 +44,9 @@ FROM base as cuda
 ARG CUDA_VERSION=12.4
 RUN rm -rf /usr/local/cuda-*
 ADD ./common/install_cuda.sh install_cuda.sh
+COPY ./common/install_nccl.sh install_nccl.sh
+COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
+COPY ./common/install_cusparselt.sh install_cusparselt.sh
 ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}
 # Preserve CUDA_VERSION for the builds
 ENV CUDA_VERSION=${CUDA_VERSION}

diff --git a/.ci/docker/almalinux/build.sh b/.ci/docker/almalinux/build.sh
@@ -1,82 +1,60 @@
 #!/usr/bin/env bash
 # Script used only in CD pipeline
 
-set -eou pipefail
+set -exou pipefail
 
 image="$1"
 shift
 
 if [ -z "${image}" ]; then
-  echo "Usage: $0 IMAGE"
+  echo "Usage: $0 IMAGENAME:ARCHTAG"
   exit 1
 fi
 
-DOCKER_IMAGE_NAME="pytorch/${image}"
+# Go from imagename:tag to tag
+DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
 
+CUDA_VERSION=""
+if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
+    # extract cuda version from image name and tag.  e.g. manylinux2_28-builder:cuda12.8 returns 12.8
+    CUDA_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
+fi
 
-export DOCKER_BUILDKIT=1
-TOPDIR=$(git rev-parse --show-toplevel)
-
-CUDA_VERSION=${CUDA_VERSION:-12.1}
-
-case ${CUDA_VERSION} in
+case ${DOCKER_TAG_PREFIX} in
   cpu)
     BASE_TARGET=base
-    DOCKER_TAG=cpu
     ;;
-  all)
-    BASE_TARGET=all_cuda
-    DOCKER_TAG=latest
+  cuda*)
+    BASE_TARGET=cuda${CUDA_VERSION}
     ;;
   *)
-    BASE_TARGET=cuda${CUDA_VERSION}
-    DOCKER_TAG=cuda${CUDA_VERSION}
+    echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}"
+    exit 1
     ;;
 esac
 
+# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
+# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
+sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
+sudo systemctl daemon-reload
+sudo systemctl restart docker
 
-(
-  set -x
-  # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
-  # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
-  sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
-  sudo systemctl daemon-reload
-  sudo systemctl restart docker
-
-  docker build \
-    --target final \
-    --progress plain \
-    --build-arg "BASE_TARGET=${BASE_TARGET}" \
-    --build-arg "CUDA_VERSION=${CUDA_VERSION}" \
-    --build-arg "DEVTOOLSET_VERSION=11" \
-    -t ${DOCKER_IMAGE_NAME} \
-    $@ \
-    -f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \
-    ${TOPDIR}/.ci/docker/
-)
-
-if [[ "${DOCKER_TAG}" =~ ^cuda* ]]; then
+export DOCKER_BUILDKIT=1
+TOPDIR=$(git rev-parse --show-toplevel)
+tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
+
+docker build \
+  --target final \
+  --progress plain \
+  --build-arg "BASE_TARGET=${BASE_TARGET}" \
+  --build-arg "CUDA_VERSION=${CUDA_VERSION}" \
+  --build-arg "DEVTOOLSET_VERSION=11" \
+  -t ${tmp_tag} \
+  $@ \
+  -f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \
+  ${TOPDIR}/.ci/docker/
+
+if [ -n "${CUDA_VERSION}" ]; then
   # Test that we're using the right CUDA compiler
-  (
-    set -x
-    docker run --rm "${DOCKER_IMAGE_NAME}" nvcc --version | grep "cuda_${CUDA_VERSION}"
-  )
-fi
-
-GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
-GIT_BRANCH_NAME=${GITHUB_REF##*/}
-GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
-DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE_NAME}-${GIT_BRANCH_NAME}
-DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE_NAME}-${GIT_COMMIT_SHA}
-if [[ "${WITH_PUSH:-}" == true ]]; then
-  (
-    set -x
-    docker push "${DOCKER_IMAGE_NAME}"
-    if [[ -n ${GITHUB_REF} ]]; then
-        docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_BRANCH_TAG}
-        docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_SHA_TAG}
-        docker push "${DOCKER_IMAGE_BRANCH_TAG}"
-        docker push "${DOCKER_IMAGE_SHA_TAG}"
-    fi
-  )
+  docker run --rm "${tmp_tag}" nvcc --version | grep "cuda_${CUDA_VERSION}"
 fi