diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh index 5f74d0592dbd8..73e3f09394b72 100755 --- a/.ci/docker/build.sh +++ b/.ci/docker/build.sh @@ -84,13 +84,27 @@ fi # CMake 3.18 is needed to support CUDA17 language variant CMAKE_VERSION=3.18.5 -_UCX_COMMIT=00bcc6bb18fc282eb160623b4c0d300147f579af -_UCC_COMMIT=7cb07a76ccedad7e56ceb136b865eb9319c258ea +_UCX_COMMIT=7bb2722ff2187a0cad557ae4a6afa090569f83fb +_UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b # It's annoying to rename jobs every time you want to rewrite a # configuration, so we hardcode everything here rather than do it # from scratch case "$image" in + pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9) + CUDA_VERSION=12.4.0 + CUDNN_VERSION=8 + ANACONDA_PYTHON_VERSION=3.10 + GCC_VERSION=9 + PROTOBUF=yes + DB=yes + VISION=yes + KATEX=yes + UCX_COMMIT=${_UCX_COMMIT} + UCC_COMMIT=${_UCC_COMMIT} + CONDA_CMAKE=yes + TRITON=yes + ;; pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9) CUDA_VERSION=12.1.1 CUDNN_VERSION=8 @@ -105,6 +119,21 @@ case "$image" in CONDA_CMAKE=yes TRITON=yes ;; + pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9-inductor-benchmarks) + CUDA_VERSION=12.4.0 + CUDNN_VERSION=8 + ANACONDA_PYTHON_VERSION=3.10 + GCC_VERSION=9 + PROTOBUF=yes + DB=yes + VISION=yes + KATEX=yes + UCX_COMMIT=${_UCX_COMMIT} + UCC_COMMIT=${_UCC_COMMIT} + CONDA_CMAKE=yes + TRITON=yes + INDUCTOR_BENCHMARKS=yes + ;; pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks) CUDA_VERSION=12.1.1 CUDNN_VERSION=8 @@ -134,6 +163,20 @@ case "$image" in CONDA_CMAKE=yes TRITON=yes ;; + pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9) + CUDA_VERSION=12.4.0 + CUDNN_VERSION=8 + ANACONDA_PYTHON_VERSION=3.10 + GCC_VERSION=9 + PROTOBUF=yes + DB=yes + VISION=yes + KATEX=yes + UCX_COMMIT=${_UCX_COMMIT} + UCC_COMMIT=${_UCC_COMMIT} + CONDA_CMAKE=yes + TRITON=yes + ;; pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9) CUDA_VERSION=12.1.1 CUDNN_VERSION=8 diff --git a/.ci/docker/common/install_cudnn.sh b/.ci/docker/common/install_cudnn.sh index f654c9fee24e6..3afd2f28841f5 100644 --- a/.ci/docker/common/install_cudnn.sh +++ b/.ci/docker/common/install_cudnn.sh @@ -4,7 +4,10 @@ if [[ ${CUDNN_VERSION} == 8 ]]; then # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement mkdir tmp_cudnn pushd tmp_cudnn - if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then + if [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then + CUDNN_NAME="cudnn-linux-x86_64-8.9.7.29_cuda12-archive" + curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz + elif [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then CUDNN_NAME="cudnn-linux-x86_64-8.9.2.26_cuda12-archive" curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then diff --git a/.ci/docker/common/install_cusparselt.sh b/.ci/docker/common/install_cusparselt.sh index d418f1c75610e..493982919f8a4 100644 --- a/.ci/docker/common/install_cusparselt.sh +++ b/.ci/docker/common/install_cusparselt.sh @@ -5,9 +5,14 @@ set -ex # cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html mkdir tmp_cusparselt && cd tmp_cusparselt -if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then - CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.5.2.1-archive" - curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz +if [[ ${CUDA_VERSION:0:4} =~ ^12\.[1-4]$ ]]; then + arch_path='sbsa' + export TARGETARCH=${TARGETARCH:-$(uname -m)} + if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then + arch_path='x86_64' + fi + CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.5.2.1-archive" + curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.4.0.7-archive" curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz diff --git a/.ci/docker/ubuntu-cuda/Dockerfile b/.ci/docker/ubuntu-cuda/Dockerfile index dcf7312c108fc..f96ee5e3b1070 100644 --- a/.ci/docker/ubuntu-cuda/Dockerfile +++ b/.ci/docker/ubuntu-cuda/Dockerfile @@ -152,6 +152,7 @@ RUN rm install_cusparselt.sh RUN if [ -h /usr/local/cuda-11.6/cuda-11.6 ]; then rm /usr/local/cuda-11.6/cuda-11.6; fi RUN if [ -h /usr/local/cuda-11.7/cuda-11.7 ]; then rm /usr/local/cuda-11.7/cuda-11.7; fi RUN if [ -h /usr/local/cuda-12.1/cuda-12.1 ]; then rm /usr/local/cuda-12.1/cuda-12.1; fi +RUN if [ -h /usr/local/cuda-12.1/cuda-12.4 ]; then rm /usr/local/cuda-12.1/cuda-12.4; fi USER jenkins CMD ["bash"] diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml index 6d822165895eb..9f0dfe973dc9f 100644 --- a/.github/workflows/docker-builds.yml +++ b/.github/workflows/docker-builds.yml @@ -38,6 +38,8 @@ jobs: matrix: runner: [linux.12xlarge] docker-image-name: [ + pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9, + pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9-inductor-benchmarks, pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9, pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks, pytorch-linux-focal-cuda11.8-cudnn8-py3-gcc9,