Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CUDA 11 builds for Windows CI #42420

Closed
wants to merge 11 commits into from
5 changes: 5 additions & 0 deletions .circleci/cimodel/data/windows_build_definitions.py
Expand Up @@ -124,6 +124,11 @@ def TruePred(_):
WindowsJob(1, VcSpec(2019), CudaVersion(10, 1)),
WindowsJob(2, VcSpec(2019), CudaVersion(10, 1)),
WindowsJob("-jit-profiling-tests", VcSpec(2019), CudaVersion(10, 1), master_only_pred=FalsePred),
# VS2019 CUDA-11.0
WindowsJob(None, VcSpec(2019), CudaVersion(11, 0)),
WindowsJob(1, VcSpec(2019), CudaVersion(11, 0)),
WindowsJob(2, VcSpec(2019), CudaVersion(11, 0)),
WindowsJob("-jit-profiling-tests", VcSpec(2019), CudaVersion(11, 0), master_only_pred=FalsePred),
# VS2019 CPU-only
WindowsJob(None, VcSpec(2019), None),
WindowsJob(1, VcSpec(2019), None, master_only_pred=TruePred),
Expand Down
69 changes: 58 additions & 11 deletions .circleci/config.yml
Expand Up @@ -674,10 +674,7 @@ jobs:
name: Install Cudnn
command : |
if [[ "${USE_CUDA}" == "1" ]]; then
cd c:/
curl --retry 3 -O https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip
7z x cudnn-10.1-windows10-x64-v7.6.4.38.zip -ocudnn
cp -r cudnn/cuda/* "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/"
.circleci/scripts/windows_cudnn_install.sh
fi
- run:
name: Build
Expand Down Expand Up @@ -734,17 +731,19 @@ jobs:
name: Install Cuda
no_output_timeout: 30m
command: |
if [[ "${CUDA_VERSION}" != "cpu" && "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
.circleci/scripts/windows_cuda_install.sh
if [[ "${CUDA_VERSION}" != "cpu" ]]; then
if [[ "${CUDA_VERSION}" != "10" || "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
.circleci/scripts/windows_cuda_install.sh
fi
if [[ "${CUDA_VERSION}" != "10" && "${JOB_EXECUTOR}" == "windows-with-nvidia-gpu" ]]; then
.circleci/scripts/driver_update.bat
fi
fi
- run:
name: Install Cudnn
command : |
if [[ "${CUDA_VERSION}" != "cpu" && "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
cd c:/
curl --retry 3 -O https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip
7z x cudnn-10.1-windows10-x64-v7.6.4.38.zip -ocudnn
cp -r cudnn/cuda/* "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/"
if [[ "${CUDA_VERSION}" != "cpu" ]]; then
.circleci/scripts/windows_cudnn_install.sh
fi
- run:
name: Test
Expand Down Expand Up @@ -6356,6 +6355,54 @@ workflows:
vc_product: Community
vc_version: ""
vc_year: "2019"
- pytorch_windows_build:
build_environment: pytorch-win-vs2019-cuda11-cudnn7-py3
cuda_version: "11"
name: pytorch_windows_vs2019_py36_cuda11.0_build
python_version: "3.6"
use_cuda: "1"
vc_product: Community
vc_version: ""
vc_year: "2019"
- pytorch_windows_test:
build_environment: pytorch-win-vs2019-cuda11-cudnn7-py3
cuda_version: "11"
executor: windows-with-nvidia-gpu
name: pytorch_windows_vs2019_py36_cuda11.0_test1
python_version: "3.6"
requires:
- pytorch_windows_vs2019_py36_cuda11.0_build
test_name: pytorch-windows-test1
use_cuda: "1"
vc_product: Community
vc_version: ""
vc_year: "2019"
- pytorch_windows_test:
build_environment: pytorch-win-vs2019-cuda11-cudnn7-py3
cuda_version: "11"
executor: windows-with-nvidia-gpu
name: pytorch_windows_vs2019_py36_cuda11.0_test2
python_version: "3.6"
requires:
- pytorch_windows_vs2019_py36_cuda11.0_build
test_name: pytorch-windows-test2
use_cuda: "1"
vc_product: Community
vc_version: ""
vc_year: "2019"
- pytorch_windows_test:
build_environment: pytorch-win-vs2019-cuda11-cudnn7-py3
cuda_version: "11"
executor: windows-with-nvidia-gpu
name: pytorch_windows_vs2019_py36_cuda11.0_test-jit-profiling-tests
python_version: "3.6"
requires:
- pytorch_windows_vs2019_py36_cuda11.0_build
test_name: pytorch-windows-test-jit-profiling-tests
use_cuda: "1"
vc_product: Community
vc_version: ""
vc_year: "2019"
- pytorch_windows_build:
build_environment: pytorch-win-vs2019-cpu-py3
cuda_version: cpu
Expand Down
8 changes: 8 additions & 0 deletions .circleci/scripts/driver_update.bat
@@ -0,0 +1,8 @@
set "DRIVER_DOWNLOAD_LINK=https://s3.amazonaws.com/ossci-windows/451.82-tesla-desktop-winserver-2019-2016-international.exe"
curl --retry 3 -kL %DRIVER_DOWNLOAD_LINK% --output 451.82-tesla-desktop-winserver-2019-2016-international.exe
if errorlevel 1 exit /b 1

start /wait 451.82-tesla-desktop-winserver-2019-2016-international.exe -s -noreboot
if errorlevel 1 exit /b 1

del 451.82-tesla-desktop-winserver-2019-2016-international.exe || ver > NUL
48 changes: 34 additions & 14 deletions .circleci/scripts/windows_cuda_install.sh
@@ -1,30 +1,50 @@
#!/bin/bash
set -eux -o pipefail

curl --retry 3 -kLO https://ossci-windows.s3.amazonaws.com/cuda_10.1.243_426.00_win10.exe
7z x cuda_10.1.243_426.00_win10.exe -ocuda_10.1.243_426.00_win10
cd cuda_10.1.243_426.00_win10
if [[ "$CUDA_VERSION" == "10" ]]; then
cuda_complete_version="10.1"
cuda_installer_name="cuda_10.1.243_426.00_win10"
msbuild_project_dir="CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions"
cuda_install_packages="nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1"
elif [[ "$CUDA_VERSION" == "11" ]]; then
cuda_complete_version="11.0"
cuda_installer_name="cuda_11.0.2_451.48_win10"
msbuild_project_dir="visual_studio_integration/CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions"
cuda_install_packages="nvcc_11.0 cuobjdump_11.0 nvprune_11.0 nvprof_11.0 cupti_11.0 cublas_11.0 cublas_dev_11.0 cudart_11.0 cufft_11.0 cufft_dev_11.0 curand_11.0 curand_dev_11.0 cusolver_11.0 cusolver_dev_11.0 cusparse_11.0 cusparse_dev_11.0 npp_11.0 npp_dev_11.0 nvrtc_11.0 nvrtc_dev_11.0 nvml_dev_11.0"
else
echo "CUDA_VERSION $CUDA_VERSION is not supported yet"
exit 1
fi

cuda_installer_link="https://ossci-windows.s3.amazonaws.com/${cuda_installer_name}.exe"

curl --retry 3 -kLO $cuda_installer_link
7z x ${cuda_installer_name}.exe -o${cuda_installer_name}
cd ${cuda_installer_name}
mkdir cuda_install_logs

set +e

./setup.exe -s nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1 -loglevel:6 -log:"$(pwd -W)/cuda_install_logs"
./setup.exe -s ${cuda_install_packages} -loglevel:6 -log:"$(pwd -W)/cuda_install_logs"

set -e

if [[ "${VC_YEAR}" == "2017" ]]; then
cp -r CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions/* "C:/Program Files (x86)/Microsoft Visual Studio/2017/${VC_PRODUCT}/Common7/IDE/VC/VCTargets/BuildCustomizations/"
cp -r ${msbuild_project_dir}/* "C:/Program Files (x86)/Microsoft Visual Studio/2017/${VC_PRODUCT}/Common7/IDE/VC/VCTargets/BuildCustomizations/"
else
cp -r CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions/* "C:/Program Files (x86)/Microsoft Visual Studio/2019/${VC_PRODUCT}/MSBuild/Microsoft/VC/v160/BuildCustomizations/"
cp -r ${msbuild_project_dir}/* "C:/Program Files (x86)/Microsoft Visual Studio/2019/${VC_PRODUCT}/MSBuild/Microsoft/VC/v160/BuildCustomizations/"
fi

curl --retry 3 -kLO https://ossci-windows.s3.amazonaws.com/NvToolsExt.7z
7z x NvToolsExt.7z -oNvToolsExt
mkdir -p "C:/Program Files/NVIDIA Corporation/NvToolsExt"
cp -r NvToolsExt/* "C:/Program Files/NVIDIA Corporation/NvToolsExt/"
export NVTOOLSEXT_PATH="C:\\Program Files\\NVIDIA Corporation\\NvToolsExt\\"
if ! ls "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64/nvToolsExt64_1.dll"
then
curl --retry 3 -kLO https://ossci-windows.s3.amazonaws.com/NvToolsExt.7z
7z x NvToolsExt.7z -oNvToolsExt
mkdir -p "C:/Program Files/NVIDIA Corporation/NvToolsExt"
cp -r NvToolsExt/* "C:/Program Files/NVIDIA Corporation/NvToolsExt/"
export NVTOOLSEXT_PATH="C:\\Program Files\\NVIDIA Corporation\\NvToolsExt\\"
fi

if ! ls "/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/nvcc.exe"
if ! ls "/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${cuda_complete_version}/bin/nvcc.exe"
then
echo "CUDA installation failed"
mkdir -p /c/w/build-results
Expand All @@ -33,5 +53,5 @@ then
fi

cd ..
rm -rf ./cuda_10.1.243_426.00_win10
rm -f ./cuda_10.1.243_426.00_win10.exe
rm -rf ./${cuda_installer_name}
rm -f ./${cuda_installer_name}.exe
21 changes: 21 additions & 0 deletions .circleci/scripts/windows_cudnn_install.sh
@@ -0,0 +1,21 @@
#!/bin/bash
set -eux -o pipefail

if [[ "$CUDA_VERSION" == "10" ]]; then
cuda_complete_version="10.1"
cudnn_installer_name="cudnn-10.1-windows10-x64-v7.6.4.38"
elif [[ "$CUDA_VERSION" == "11" ]]; then
cuda_complete_version="11.0"
cudnn_installer_name="cudnn-11.0-windows-x64-v8.0.2.39"
else
echo "CUDNN for CUDA_VERSION $CUDA_VERSION is not supported yet"
exit 1
fi

cudnn_installer_link="https://ossci-windows.s3.amazonaws.com/${cudnn_installer_name}.zip"

curl --retry 3 -O $cudnn_installer_link
7z x ${cudnn_installer_name}.zip -ocudnn
cp -r cudnn/cuda/* "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${cuda_complete_version}/"
rm -rf cudnn
rm -f ${cudnn_installer_name}.zip
21 changes: 10 additions & 11 deletions .circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
Expand Up @@ -222,10 +222,7 @@ jobs:
name: Install Cudnn
command : |
if [[ "${USE_CUDA}" == "1" ]]; then
cd c:/
curl --retry 3 -O https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip
7z x cudnn-10.1-windows10-x64-v7.6.4.38.zip -ocudnn
cp -r cudnn/cuda/* "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/"
.circleci/scripts/windows_cudnn_install.sh
fi
- run:
name: Build
Expand Down Expand Up @@ -282,17 +279,19 @@ jobs:
name: Install Cuda
no_output_timeout: 30m
command: |
if [[ "${CUDA_VERSION}" != "cpu" && "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
.circleci/scripts/windows_cuda_install.sh
if [[ "${CUDA_VERSION}" != "cpu" ]]; then
if [[ "${CUDA_VERSION}" != "10" || "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
.circleci/scripts/windows_cuda_install.sh
fi
if [[ "${CUDA_VERSION}" != "10" && "${JOB_EXECUTOR}" == "windows-with-nvidia-gpu" ]]; then
.circleci/scripts/driver_update.bat
fi
fi
- run:
name: Install Cudnn
command : |
if [[ "${CUDA_VERSION}" != "cpu" && "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
cd c:/
curl --retry 3 -O https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip
7z x cudnn-10.1-windows10-x64-v7.6.4.38.zip -ocudnn
cp -r cudnn/cuda/* "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/"
if [[ "${CUDA_VERSION}" != "cpu" ]]; then
.circleci/scripts/windows_cudnn_install.sh
fi
- run:
name: Test
Expand Down
8 changes: 8 additions & 0 deletions .jenkins/pytorch/win-test-helpers/build_pytorch.bat
Expand Up @@ -39,6 +39,7 @@ popd

if "%CUDA_VERSION%" == "9" goto cuda_build_9
if "%CUDA_VERSION%" == "10" goto cuda_build_10
if "%CUDA_VERSION%" == "11" goto cuda_build_11
goto cuda_build_end

:cuda_build_9
Expand All @@ -55,6 +56,13 @@ set CUDA_PATH_V10_1=%CUDA_PATH%

goto cuda_build_common

:cuda_build_11

set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0
set CUDA_PATH_V11_0=%CUDA_PATH%

goto cuda_build_common

:cuda_build_common

set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
Expand Down
@@ -1,8 +1,9 @@
if "%CUDA_VERSION%" == "9" set CUDA_SUFFIX=cuda92
if "%CUDA_VERSION%" == "10" set CUDA_SUFFIX=cuda101
if "%CUDA_VERSION%" == "11" set CUDA_SUFFIX=cuda110

if "%CUDA_SUFFIX%" == "" (
echo unknown CUDA version, please set `CUDA_VERSION` to 9 or 10.
echo unknown CUDA version, please set `CUDA_VERSION` to 9, 10 or 11.
exit /b 1
)

Expand Down
8 changes: 8 additions & 0 deletions .jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat
Expand Up @@ -48,6 +48,7 @@ set DISTUTILS_USE_SDK=1

if "%CUDA_VERSION%" == "9" goto cuda_build_9
if "%CUDA_VERSION%" == "10" goto cuda_build_10
if "%CUDA_VERSION%" == "11" goto cuda_build_11
goto cuda_build_end

:cuda_build_9
Expand All @@ -64,6 +65,13 @@ set CUDA_PATH_V10_1=%CUDA_PATH%

goto cuda_build_common

:cuda_build_11

set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0
set CUDA_PATH_V11_0=%CUDA_PATH%

goto cuda_build_common

:cuda_build_common

set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
Expand Down
13 changes: 7 additions & 6 deletions aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu
Expand Up @@ -23,6 +23,7 @@
#include <bitset>
#include <cusparse.h>
#include <cuda_runtime_api.h>
#include <memory>

#define I_INFO(tensor) cuda::detail::getTensorInfo<int64_t, uint64_t>(tensor)
#define V_INFO(tensor) cuda::detail::getTensorInfo<scalar_t, uint64_t>(tensor)
Expand Down Expand Up @@ -732,7 +733,7 @@ Tensor _bmm_sparse_cuda(const SparseTensor& self, const Tensor& mat2, bool deter
return _bmm_out_sparse_cuda(result, self, mat2, deterministic);
}

#if !(defined(__HIP_PLATFORM_HCC__) || defined(_WIN32) || defined(_WIN64))
#if !(defined(__HIP_PLATFORM_HCC__) || (defined(_MSC_VER) && CUSPARSE_VERSION < 11000))
__global__ void search_end_matrix_indices_cuda_kernel(
int64_t* mat_el_end_indices,
int64_t num_matrices,
Expand Down Expand Up @@ -817,9 +818,9 @@ Tensor& bmm_out_sparse_cuda(Tensor& result, const SparseTensor& self, const Tens
Tensor& _bmm_out_sparse_cuda(Tensor& result, const SparseTensor& self, const Tensor& mat2, bool deterministic) {
#if defined __HIP_PLATFORM_HCC__
TORCH_CHECK(false, "bmm sparse-dense is not supported on HIP");
#elif defined(_WIN32) || defined(_WIN64)
TORCH_CHECK(false, "bmm sparse-dense CUDA is not supported on Windows");
#elif defined(CUDART_VERSION) && (CUDART_VERSION >= 10010)
#elif defined(_MSC_VER) && (CUSPARSE_VERSION < 11000)
TORCH_CHECK(false, "bmm sparse-dense CUDA is not supported on Windows with cuda before 11.0");
#elif defined(CUDART_VERSION) && (CUDART_VERSION >= 10010) // linux cuda >= 10.1 or windows cuda >= 11.0

TORCH_CHECK(!mat2.is_sparse(), "bmm_sparse: Tensor 'mat2' must be dense");
TORCH_CHECK(self.dense_dim() == 0, "bmm_sparse: Tensor 'self' must have 0 dense dims, but has ", self.dense_dim());
Expand Down Expand Up @@ -872,13 +873,13 @@ Tensor& _bmm_out_sparse_cuda(Tensor& result, const SparseTensor& self, const Ten
Tensor indices_dim1 = indices[1].to(ScalarType::Int);
Tensor indices_dim2 = indices[2].to(ScalarType::Int);

int64_t mat_el_end_indices_host[num_matrices];
std::unique_ptr<int64_t[]> mat_el_end_indices_host(new int64_t[num_matrices]);
int64_t* mat_el_end_indices_device;

cudaMalloc(&mat_el_end_indices_device, num_matrices*sizeof(int64_t));
search_end_matrix_indices(mat_el_end_indices_device, num_matrices, indices_dim0);
cudaMemcpy(
mat_el_end_indices_host,
mat_el_end_indices_host.get(),
mat_el_end_indices_device,
num_matrices*sizeof(int64_t),
cudaMemcpyDeviceToHost
Expand Down
16 changes: 16 additions & 0 deletions cmake/ProtoBufPatch.cmake
Expand Up @@ -37,6 +37,22 @@ string(
content
"${content}")

# constexpr int TensorBoundShape_DimType_DimType_ARRAYSIZE = TensorBoundShape_DimType_DimType_MAX + 1;
# throws
# error: more than one operator "+" matches these operands:
# built-in operator "arithmetic + arithmetic"
# function "c10::operator+(int, c10::BFloat16)"
# function "c10::operator+(c10::BFloat16, int)"
# function "c10::operator+(int, c10::Half)"
# function "c10::operator+(c10::Half, int)"
# operand types are: const caffe2::ExternalDataProto_SourceType + int
string(
REGEX REPLACE
"constexpr ([^ ]+) ([^ ]+_ARRAYSIZE) = ([^ ]+_MAX) \\+ 1;"
"constexpr \\1 \\2 = static_cast<\\1>(\\3) + 1;"
content
"${content}")

Comment on lines +40 to +55
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How this related to CUDA-11 update?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error (in comments) will be raised for CUDA 11 builds.

foreach(ns ${NAMESPACES})
# Insert "const ::std::string& GetEmptyStringAlreadyInited();" within
# the namespace and make sure we only do it once in the file. Unfortunately
Expand Down
6 changes: 3 additions & 3 deletions test/test_sparse.py
Expand Up @@ -1030,15 +1030,15 @@ def test_shape(num_mats, dim_i, dim_j, dim_k, nnz):

@cuda_only
@unittest.skipIf(
not IS_WINDOWS,
"this test ensures bmm sparse-dense CUDA gives an error when run on Windows"
not IS_WINDOWS or [int(x) for x in torch.version.cuda.split(".")] >= [11, 0],
"this test ensures bmm sparse-dense CUDA gives an error when run on Windows with CUDA < 11.0"
)
def test_bmm_windows_error(self):
a = torch.rand(2, 2, 2).to_sparse().cuda()
b = torch.rand(2, 2, 2).cuda()
with self.assertRaisesRegex(
RuntimeError,
"bmm sparse-dense CUDA is not supported on Windows"):
"bmm sparse-dense CUDA is not supported on Windows with cuda before 11.0"):
ab = a.bmm(b)

@cuda_only
Expand Down