Skip to content

Commit

Permalink
[CUDA] [CI] Add cu124 docker images (#125944)
Browse files Browse the repository at this point in the history
Fixes issues encountered in #121956

Pull Request resolved: #125944
Approved by: https://github.com/atalman
  • Loading branch information
nWEIdia authored and pytorchmergebot committed May 15, 2024
1 parent f060b0c commit 8f30f36
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 6 deletions.
47 changes: 45 additions & 2 deletions .ci/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,27 @@ fi
# CMake 3.18 is needed to support CUDA17 language variant
CMAKE_VERSION=3.18.5

_UCX_COMMIT=00bcc6bb18fc282eb160623b4c0d300147f579af
_UCC_COMMIT=7cb07a76ccedad7e56ceb136b865eb9319c258ea
_UCX_COMMIT=7bb2722ff2187a0cad557ae4a6afa090569f83fb
_UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b

# It's annoying to rename jobs every time you want to rewrite a
# configuration, so we hardcode everything here rather than do it
# from scratch
case "$image" in
pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9)
CUDA_VERSION=12.4.0
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9)
CUDA_VERSION=12.1.1
CUDNN_VERSION=8
Expand All @@ -105,6 +119,21 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9-inductor-benchmarks)
CUDA_VERSION=12.4.0
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks)
CUDA_VERSION=12.1.1
CUDNN_VERSION=8
Expand Down Expand Up @@ -134,6 +163,20 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9)
CUDA_VERSION=12.4.0
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9)
CUDA_VERSION=12.1.1
CUDNN_VERSION=8
Expand Down
5 changes: 4 additions & 1 deletion .ci/docker/common/install_cudnn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ if [[ ${CUDNN_VERSION} == 8 ]]; then
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn
pushd tmp_cudnn
if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
if [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-8.9.7.29_cuda12-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-8.9.2.26_cuda12-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
Expand Down
11 changes: 8 additions & 3 deletions .ci/docker/common/install_cusparselt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,14 @@ set -ex
# cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && cd tmp_cusparselt

if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.5.2.1-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
if [[ ${CUDA_VERSION:0:4} =~ ^12\.[1-4]$ ]]; then
arch_path='sbsa'
export TARGETARCH=${TARGETARCH:-$(uname -m)}
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
arch_path='x86_64'
fi
CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.5.2.1-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.4.0.7-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
Expand Down
1 change: 1 addition & 0 deletions .ci/docker/ubuntu-cuda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ RUN rm install_cusparselt.sh
RUN if [ -h /usr/local/cuda-11.6/cuda-11.6 ]; then rm /usr/local/cuda-11.6/cuda-11.6; fi
RUN if [ -h /usr/local/cuda-11.7/cuda-11.7 ]; then rm /usr/local/cuda-11.7/cuda-11.7; fi
RUN if [ -h /usr/local/cuda-12.1/cuda-12.1 ]; then rm /usr/local/cuda-12.1/cuda-12.1; fi
RUN if [ -h /usr/local/cuda-12.1/cuda-12.4 ]; then rm /usr/local/cuda-12.1/cuda-12.4; fi

USER jenkins
CMD ["bash"]
2 changes: 2 additions & 0 deletions .github/workflows/docker-builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ jobs:
matrix:
runner: [linux.12xlarge]
docker-image-name: [
pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9,
pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9,
pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda11.8-cudnn8-py3-gcc9,
Expand Down

0 comments on commit 8f30f36

Please sign in to comment.