Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions .ci/docker/common/install_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,17 @@ function install_cusparselt_062 {
rm -rf tmp_cusparselt
}

function install_cusparselt_063 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
tar xf libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}

function install_118 {
CUDNN_VERSION=9.1.0.70
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
Expand Down Expand Up @@ -140,7 +151,7 @@ function install_124 {
}

function install_126 {
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
# install CUDA 12.6.3 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux.run
Expand All @@ -167,7 +178,7 @@ function install_126 {
cd ..
rm -rf nccl

install_cusparselt_062
install_cusparselt_063

ldconfig
}
Expand Down
25 changes: 18 additions & 7 deletions .ci/docker/common/install_cuda_aarch64.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ function install_cusparselt_062 {
rm -rf tmp_cusparselt
}

function install_cusparselt_063 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missed this, should have been libcusparse_lt-linux-sbsa-0.6.3.2. This is causing error /usr/local/cuda/lib64/libcusparseLt.so: error adding symbols: file in wrong format

tar xf libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}

function install_124 {
CUDNN_VERSION=9.1.0.70
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
Expand Down Expand Up @@ -83,13 +94,13 @@ function prune_124 {
}

function install_126 {
echo "Installing CUDA 12.6.2 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
# install CUDA 12.6.2 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux_sbsa.run
chmod +x cuda_12.6.2_560.35.03_linux_sbsa.run
./cuda_12.6.2_560.35.03_linux_sbsa.run --toolkit --silent
rm -f cuda_12.6.2_560.35.03_linux_sbsa.run
# install CUDA 12.6.3 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux_sbsa.run
chmod +x cuda_12.6.3_560.35.05_linux_sbsa.run
./cuda_12.6.3_560.35.05_linux_sbsa.run --toolkit --silent
rm -f cuda_12.6.3_560.35.05_linux_sbsa.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
Expand All @@ -110,7 +121,7 @@ function install_126 {
cd ..
rm -rf nccl

install_cusparselt_062
install_cusparselt_063

ldconfig
}
Expand Down
4 changes: 2 additions & 2 deletions .github/scripts/generate_binary_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@
"nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cublas-cu12==12.6.3.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@atalman I'll probably need this uploaded to the cu126 s3 bucket. Changes are from the https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-toolkit-major-component-versions list

"nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvjitlink-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64'"
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64'"
),
}

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading