From 483132f2e82b4ba2818d07301b77b178b53c4b14 Mon Sep 17 00:00:00 2001 From: chuanqiw Date: Fri, 21 Jun 2024 11:35:15 -0700 Subject: [PATCH 1/2] Add xpu manylinux 2.28 wheel build --- .github/workflows/build-manywheel-images.yml | 15 ++ common/install_xpu.sh | 156 +++++++++++++++++++ manywheel/Dockerfile_2_28 | 8 + manywheel/build.sh | 3 + manywheel/build_all_docker.sh | 2 + manywheel/build_docker.sh | 7 + manywheel/build_xpu.sh | 61 ++++++++ 7 files changed, 252 insertions(+) create mode 100644 common/install_xpu.sh create mode 100755 manywheel/build_xpu.sh diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml index 243e302f4..b90391a98 100644 --- a/.github/workflows/build-manywheel-images.yml +++ b/.github/workflows/build-manywheel-images.yml @@ -193,3 +193,18 @@ jobs: - name: Build Docker Image run: | manywheel/build_docker.sh + build-docker-xpu: + runs-on: linux.12xlarge + env: + GPU_ARCH_TYPE: xpu + steps: + - name: Checkout PyTorch + uses: actions/checkout@v3 + - name: Authenticate if WITH_PUSH + run: | + if [[ "${WITH_PUSH}" == true ]]; then + echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin + fi + - name: Build Docker Image + run: | + manywheel/build_docker.sh diff --git a/common/install_xpu.sh b/common/install_xpu.sh new file mode 100644 index 000000000..c34bdd8bd --- /dev/null +++ b/common/install_xpu.sh @@ -0,0 +1,156 @@ +#!/bin/bash +set -xe + +# IntelĀ® software for general purpose GPU capabilities. +# Refer to https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html + +# Users should update to the latest version as it becomes available + +function install_ubuntu() { + . /etc/os-release + if [[ ! " jammy " =~ " ${VERSION_CODENAME} " ]]; then + echo "Ubuntu version ${VERSION_CODENAME} not supported" + exit + fi + + apt-get update -y + apt-get install -y gpg-agent wget + + # To add the online network package repository for the GPU Driver LTS releases + wget -qO - https://repositories.intel.com/gpu/intel-graphics.key \ + | gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg + echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] \ + https://repositories.intel.com/gpu/ubuntu ${VERSION_CODENAME}/lts/2350 unified" \ + | tee /etc/apt/sources.list.d/intel-gpu-${VERSION_CODENAME}.list + # To add the online network network package repository for the Intel Support Packages + wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ + | gpg --dearmor > /usr/share/keyrings/intel-for-pytorch-gpu-dev-keyring.gpg + echo "deb [signed-by=/usr/share/keyrings/intel-for-pytorch-gpu-dev-keyring.gpg] \ + https://apt.repos.intel.com/intel-for-pytorch-gpu-dev all main" \ + | tee /etc/apt/sources.list.d/intel-for-pytorch-gpu-dev.list + + # Update the packages list and repository index + apt-get update + + # The xpu-smi packages + apt-get install -y flex bison xpu-smi + # Compute and Media Runtimes + apt-get install -y \ + intel-opencl-icd intel-level-zero-gpu level-zero \ + intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \ + libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \ + libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \ + mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo + # Development Packages + apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev + # Install Intel Support Packages + if [ -n "$XPU_VERSION" ]; then + apt-get install -y intel-for-pytorch-gpu-dev-${XPU_VERSION} + else + apt-get install -y intel-for-pytorch-gpu-dev + fi + + # Cleanup + apt-get autoclean && apt-get clean + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* +} + +function install_rhel() { + . /etc/os-release + if [[ "${ID}" == "rhel" ]]; then + if [[ ! " 8.6 8.8 8.9 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then + echo "RHEL version ${VERSION_ID} not supported" + exit + fi + elif [[ "${ID}" == "almalinux" ]]; then + # Workaround for almalinux8 which used by quay.io/pypa/manylinux_2_28_x86_64 + VERSION_ID="8.6" + fi + + dnf install -y 'dnf-command(config-manager)' + # To add the online network package repository for the GPU Driver LTS releases + dnf config-manager --add-repo \ + https://repositories.intel.com/gpu/rhel/${VERSION_ID}/lts/2350/unified/intel-gpu-${VERSION_ID}.repo + + # To add the online network network package repository for the Intel Support Packages + tee > /etc/yum.repos.d/intel-for-pytorch-gpu-dev.repo << EOF +[intel-for-pytorch-gpu-dev] +name=Intel for Pytorch GPU dev repository +baseurl=https://yum.repos.intel.com/intel-for-pytorch-gpu-dev +enabled=1 +gpgcheck=1 +repo_gpgcheck=1 +gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB +EOF + + # The xpu-smi packages + dnf install -y xpu-smi + # Compute and Media Runtimes + dnf install -y \ + intel-opencl intel-media intel-mediasdk libmfxgen1 libvpl2\ + level-zero intel-level-zero-gpu mesa-dri-drivers mesa-vulkan-drivers \ + mesa-vdpau-drivers libdrm mesa-libEGL mesa-libgbm mesa-libGL \ + mesa-libxatracker libvpl-tools intel-metrics-discovery \ + intel-metrics-library intel-igc-core intel-igc-cm \ + libva libva-utils intel-gmmlib libmetee intel-gsc intel-ocloc + # Development packages + dnf install -y --refresh \ + intel-igc-opencl-devel level-zero-devel intel-gsc-devel libmetee-devel \ + level-zero-devel + # Install Intel Support Packages + yum install -y intel-for-pytorch-gpu-dev intel-pti-dev + + # Cleanup + dnf clean all + rm -rf /var/cache/yum + rm -rf /var/lib/yum/yumdb + rm -rf /var/lib/yum/history +} + +function install_sles() { + . /etc/os-release + VERSION_SP=${VERSION_ID//./sp} + if [[ ! " 15sp4 15sp5 " =~ " ${VERSION_SP} " ]]; then + echo "SLES version ${VERSION_ID} not supported" + exit + fi + + # To add the online network package repository for the GPU Driver LTS releases + zypper addrepo -f -r \ + https://repositories.intel.com/gpu/sles/${VERSION_SP}/lts/2350/unified/intel-gpu-${VERSION_SP}.repo + rpm --import https://repositories.intel.com/gpu/intel-graphics.key + # To add the online network network package repository for the Intel Support Packages + zypper addrepo https://yum.repos.intel.com/intel-for-pytorch-gpu-dev intel-for-pytorch-gpu-dev + rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + + # The xpu-smi packages + zypper install -y lsb-release flex bison xpu-smi + # Compute and Media Runtimes + zypper install -y intel-level-zero-gpu level-zero intel-gsc intel-opencl intel-ocloc \ + intel-media-driver libigfxcmrt7 libvpl2 libvpl-tools libmfxgen1 libmfx1 + # Development packages + zypper install -y libigdfcl-devel intel-igc-cm libigfxcmrt-devel level-zero-devel + + # Install Intel Support Packages + zypper install -y intel-for-pytorch-gpu-dev intel-pti-dev + +} + + +# The installation depends on the base OS +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +case "$ID" in + ubuntu) + install_ubuntu + ;; + rhel|almalinux) + install_rhel + ;; + sles) + install_sles + ;; + *) + echo "Unable to determine OS..." + exit 1 + ;; +esac diff --git a/manywheel/Dockerfile_2_28 b/manywheel/Dockerfile_2_28 index f5f21bf3d..c564cacb8 100644 --- a/manywheel/Dockerfile_2_28 +++ b/manywheel/Dockerfile_2_28 @@ -142,3 +142,11 @@ RUN yum install -y cmake3 && \ ln -s /usr/bin/cmake3 /usr/bin/cmake ADD ./common/install_miopen.sh install_miopen.sh RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh + +FROM cpu_final as xpu_final +# cmake-3.28.4 from pip +RUN python3 -m pip install --upgrade pip && \ + python3 -mpip install cmake==3.28.4 +ADD ./common/install_xpu.sh install_xpu.sh +RUN bash ./install_xpu.sh && rm install_xpu.sh +RUN pushd /opt/_internal && tar -xJf static-libs-for-embedding-only.tar.xz && popd diff --git a/manywheel/build.sh b/manywheel/build.sh index a04d05869..4c4d51134 100755 --- a/manywheel/build.sh +++ b/manywheel/build.sh @@ -18,6 +18,9 @@ case "${GPU_ARCH_TYPE:-BLANK}" in cpu | cpu-cxx11-abi | cpu-s390x) bash "${SCRIPTPATH}/build_cpu.sh" ;; + xpu) + bash "${SCRIPTPATH}/build_xpu.sh" + ;; *) echo "Un-recognized GPU_ARCH_TYPE '${GPU_ARCH_TYPE}', exiting..." exit 1 diff --git a/manywheel/build_all_docker.sh b/manywheel/build_all_docker.sh index 3e1059491..bb7f95ed8 100644 --- a/manywheel/build_all_docker.sh +++ b/manywheel/build_all_docker.sh @@ -22,3 +22,5 @@ for rocm_version in 6.0 6.1; do GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh" MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh" done + +GPU_ARCH_TYPE=xpu "${TOPDIR}/manywheel/build_docker.sh" diff --git a/manywheel/build_docker.sh b/manywheel/build_docker.sh index 58d74e578..1e7f9ab1b 100755 --- a/manywheel/build_docker.sh +++ b/manywheel/build_docker.sh @@ -93,6 +93,13 @@ case ${GPU_ARCH_TYPE} in fi DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=9" ;; + xpu) + TARGET=xpu_final + DOCKER_TAG=xpu + GPU_IMAGE=amd64/almalinux:8 + DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11" + MANY_LINUX_VERSION="2_28" + ;; *) echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}" exit 1 diff --git a/manywheel/build_xpu.sh b/manywheel/build_xpu.sh new file mode 100755 index 000000000..acc4b1d06 --- /dev/null +++ b/manywheel/build_xpu.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +set -ex + +export TH_BINARY_BUILD=1 +export USE_CUDA=0 + +# Keep an array of cmake variables to add to +if [[ -z "$CMAKE_ARGS" ]]; then + # These are passed to tools/build_pytorch_libs.sh::build() + CMAKE_ARGS=() +fi +if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then + # These are passed to tools/build_pytorch_libs.sh::build_caffe2() + EXTRA_CAFFE2_CMAKE_FLAGS=() +fi + +WHEELHOUSE_DIR="wheelhousexpu" +LIBTORCH_HOUSE_DIR="libtorch_housexpu" +if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then + if [[ -z "$BUILD_PYTHONLESS" ]]; then + PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhousexpu" + else + PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_housexpu" + fi +fi +mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true + +OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release) +if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then + LIBGOMP_PATH="/usr/lib64/libgomp.so.1" +elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then + LIBGOMP_PATH="/usr/lib64/libgomp.so.1" +elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then + LIBGOMP_PATH="/usr/lib64/libgomp.so.1" +elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then + if [[ "$(uname -m)" == "s390x" ]]; then + LIBGOMP_PATH="/usr/lib/s390x-linux-gnu/libgomp.so.1" + else + LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1" + fi +fi + +DEPS_LIST=( + "$LIBGOMP_PATH" +) + +DEPS_SONAME=( + "libgomp.so.1" +) + +rm -rf /usr/local/cuda* +source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh + +SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" +if [[ -z "$BUILD_PYTHONLESS" ]]; then + BUILD_SCRIPT=build_common.sh +else + BUILD_SCRIPT=build_libtorch.sh +fi +source ${SOURCE_DIR}/${BUILD_SCRIPT} From e7e4a1e513023b8f1a29b40bab233a9f37313a1f Mon Sep 17 00:00:00 2001 From: chuanqiw Date: Tue, 25 Jun 2024 08:59:04 -0700 Subject: [PATCH 2/2] reuse build_cpu.sh for xpu build --- manywheel/build.sh | 5 +--- manywheel/build_cpu.sh | 16 ++++++++--- manywheel/build_xpu.sh | 61 ------------------------------------------ 3 files changed, 13 insertions(+), 69 deletions(-) delete mode 100755 manywheel/build_xpu.sh diff --git a/manywheel/build.sh b/manywheel/build.sh index 4c4d51134..e79083ee0 100755 --- a/manywheel/build.sh +++ b/manywheel/build.sh @@ -15,12 +15,9 @@ case "${GPU_ARCH_TYPE:-BLANK}" in rocm) bash "${SCRIPTPATH}/build_rocm.sh" ;; - cpu | cpu-cxx11-abi | cpu-s390x) + cpu | cpu-cxx11-abi | cpu-s390x | xpu) bash "${SCRIPTPATH}/build_cpu.sh" ;; - xpu) - bash "${SCRIPTPATH}/build_xpu.sh" - ;; *) echo "Un-recognized GPU_ARCH_TYPE '${GPU_ARCH_TYPE}', exiting..." exit 1 diff --git a/manywheel/build_cpu.sh b/manywheel/build_cpu.sh index 9d982bd30..3bb2affea 100755 --- a/manywheel/build_cpu.sh +++ b/manywheel/build_cpu.sh @@ -2,6 +2,8 @@ set -ex +GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu} + export TH_BINARY_BUILD=1 export USE_CUDA=0 @@ -15,13 +17,19 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then EXTRA_CAFFE2_CMAKE_FLAGS=() fi -WHEELHOUSE_DIR="wheelhousecpu" -LIBTORCH_HOUSE_DIR="libtorch_housecpu" +DIR_SUFFIX=cpu +if [[ "$GPU_ARCH_TYPE" == "xpu" ]]; then + DIR_SUFFIX=xpu + source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh +fi + +WHEELHOUSE_DIR="wheelhouse$DIR_SUFFIX" +LIBTORCH_HOUSE_DIR="libtorch_house$DIR_SUFFIX" if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then if [[ -z "$BUILD_PYTHONLESS" ]]; then - PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhousecpu" + PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhouse$DIR_SUFFIX" else - PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_housecpu" + PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_house$DIR_SUFFIX" fi fi mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true diff --git a/manywheel/build_xpu.sh b/manywheel/build_xpu.sh deleted file mode 100755 index acc4b1d06..000000000 --- a/manywheel/build_xpu.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env bash - -set -ex - -export TH_BINARY_BUILD=1 -export USE_CUDA=0 - -# Keep an array of cmake variables to add to -if [[ -z "$CMAKE_ARGS" ]]; then - # These are passed to tools/build_pytorch_libs.sh::build() - CMAKE_ARGS=() -fi -if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then - # These are passed to tools/build_pytorch_libs.sh::build_caffe2() - EXTRA_CAFFE2_CMAKE_FLAGS=() -fi - -WHEELHOUSE_DIR="wheelhousexpu" -LIBTORCH_HOUSE_DIR="libtorch_housexpu" -if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then - if [[ -z "$BUILD_PYTHONLESS" ]]; then - PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhousexpu" - else - PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_housexpu" - fi -fi -mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true - -OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release) -if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then - LIBGOMP_PATH="/usr/lib64/libgomp.so.1" -elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then - LIBGOMP_PATH="/usr/lib64/libgomp.so.1" -elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then - LIBGOMP_PATH="/usr/lib64/libgomp.so.1" -elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then - if [[ "$(uname -m)" == "s390x" ]]; then - LIBGOMP_PATH="/usr/lib/s390x-linux-gnu/libgomp.so.1" - else - LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1" - fi -fi - -DEPS_LIST=( - "$LIBGOMP_PATH" -) - -DEPS_SONAME=( - "libgomp.so.1" -) - -rm -rf /usr/local/cuda* -source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" -if [[ -z "$BUILD_PYTHONLESS" ]]; then - BUILD_SCRIPT=build_common.sh -else - BUILD_SCRIPT=build_libtorch.sh -fi -source ${SOURCE_DIR}/${BUILD_SCRIPT}