pytorch · peterjc123 · Aug 2, 2020 · Aug 2, 2020 · Aug 2, 2020 · Aug 2, 2020
diff --git a/.circleci/cimodel/data/windows_build_definitions.py b/.circleci/cimodel/data/windows_build_definitions.py
@@ -124,6 +124,11 @@ def TruePred(_):
     WindowsJob(1, VcSpec(2019), CudaVersion(10, 1)),
     WindowsJob(2, VcSpec(2019), CudaVersion(10, 1)),
     WindowsJob("-jit-profiling-tests", VcSpec(2019), CudaVersion(10, 1), master_only_pred=FalsePred),
+    # VS2019 CUDA-11.0
+    WindowsJob(None, VcSpec(2019), CudaVersion(11, 0)),
+    WindowsJob(1, VcSpec(2019), CudaVersion(11, 0)),
+    WindowsJob(2, VcSpec(2019), CudaVersion(11, 0)),
+    WindowsJob("-jit-profiling-tests", VcSpec(2019), CudaVersion(11, 0), master_only_pred=FalsePred),
     # VS2019 CPU-only
     WindowsJob(None, VcSpec(2019), None),
     WindowsJob(1, VcSpec(2019), None, master_only_pred=TruePred),

diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -674,10 +674,7 @@ jobs:
           name: Install Cudnn
           command : |
             if [[ "${USE_CUDA}" == "1" ]]; then
-              cd c:/
-              curl --retry 3 -O https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip
-              7z x cudnn-10.1-windows10-x64-v7.6.4.38.zip -ocudnn
-              cp -r cudnn/cuda/* "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/"
+              .circleci/scripts/windows_cudnn_install.sh
             fi
       - run:
           name: Build
@@ -734,17 +731,19 @@ jobs:
           name: Install Cuda
           no_output_timeout: 30m
           command: |
-            if [[ "${CUDA_VERSION}" != "cpu" && "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
-              .circleci/scripts/windows_cuda_install.sh
+            if [[ "${CUDA_VERSION}" != "cpu" ]]; then
+              if [[ "${CUDA_VERSION}" != "10" || "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
+                .circleci/scripts/windows_cuda_install.sh
+              fi
+              if [[ "${CUDA_VERSION}" != "10" && "${JOB_EXECUTOR}" == "windows-with-nvidia-gpu" ]]; then
+                .circleci/scripts/driver_update.bat
+              fi
             fi
       - run:
           name: Install Cudnn
           command : |
-            if [[ "${CUDA_VERSION}" != "cpu" && "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
-              cd c:/
-              curl --retry 3 -O https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip
-              7z x cudnn-10.1-windows10-x64-v7.6.4.38.zip -ocudnn
-              cp -r cudnn/cuda/* "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/"
+            if [[ "${CUDA_VERSION}" != "cpu" ]]; then
+              .circleci/scripts/windows_cudnn_install.sh
             fi
       - run:
           name: Test
@@ -6356,6 +6355,54 @@ workflows:
           vc_product: Community
           vc_version: ""
           vc_year: "2019"
+      - pytorch_windows_build:
+          build_environment: pytorch-win-vs2019-cuda11-cudnn7-py3
+          cuda_version: "11"
+          name: pytorch_windows_vs2019_py36_cuda11.0_build
+          python_version: "3.6"
+          use_cuda: "1"
+          vc_product: Community
+          vc_version: ""
+          vc_year: "2019"
+      - pytorch_windows_test:
+          build_environment: pytorch-win-vs2019-cuda11-cudnn7-py3
+          cuda_version: "11"
+          executor: windows-with-nvidia-gpu
+          name: pytorch_windows_vs2019_py36_cuda11.0_test1
+          python_version: "3.6"
+          requires:
+            - pytorch_windows_vs2019_py36_cuda11.0_build
+          test_name: pytorch-windows-test1
+          use_cuda: "1"
+          vc_product: Community
+          vc_version: ""
+          vc_year: "2019"
+      - pytorch_windows_test:
+          build_environment: pytorch-win-vs2019-cuda11-cudnn7-py3
+          cuda_version: "11"
+          executor: windows-with-nvidia-gpu
+          name: pytorch_windows_vs2019_py36_cuda11.0_test2
+          python_version: "3.6"
+          requires:
+            - pytorch_windows_vs2019_py36_cuda11.0_build
+          test_name: pytorch-windows-test2
+          use_cuda: "1"
+          vc_product: Community
+          vc_version: ""
+          vc_year: "2019"
+      - pytorch_windows_test:
+          build_environment: pytorch-win-vs2019-cuda11-cudnn7-py3
+          cuda_version: "11"
+          executor: windows-with-nvidia-gpu
+          name: pytorch_windows_vs2019_py36_cuda11.0_test-jit-profiling-tests
+          python_version: "3.6"
+          requires:
+            - pytorch_windows_vs2019_py36_cuda11.0_build
+          test_name: pytorch-windows-test-jit-profiling-tests
+          use_cuda: "1"
+          vc_product: Community
+          vc_version: ""
+          vc_year: "2019"
       - pytorch_windows_build:
           build_environment: pytorch-win-vs2019-cpu-py3
           cuda_version: cpu

diff --git a/.circleci/scripts/driver_update.bat b/.circleci/scripts/driver_update.bat
@@ -0,0 +1,8 @@
+set "DRIVER_DOWNLOAD_LINK=https://s3.amazonaws.com/ossci-windows/451.82-tesla-desktop-winserver-2019-2016-international.exe"
+curl --retry 3 -kL %DRIVER_DOWNLOAD_LINK% --output 451.82-tesla-desktop-winserver-2019-2016-international.exe
+if errorlevel 1 exit /b 1
+
+start /wait 451.82-tesla-desktop-winserver-2019-2016-international.exe -s -noreboot
+if errorlevel 1 exit /b 1
+
+del 451.82-tesla-desktop-winserver-2019-2016-international.exe || ver > NUL
diff --git a/.circleci/scripts/windows_cuda_install.sh b/.circleci/scripts/windows_cuda_install.sh
@@ -1,30 +1,50 @@
 #!/bin/bash
 set -eux -o pipefail
 
-curl --retry 3 -kLO https://ossci-windows.s3.amazonaws.com/cuda_10.1.243_426.00_win10.exe
-7z x cuda_10.1.243_426.00_win10.exe -ocuda_10.1.243_426.00_win10
-cd cuda_10.1.243_426.00_win10
+if [[ "$CUDA_VERSION" == "10" ]]; then
+    cuda_complete_version="10.1"
+    cuda_installer_name="cuda_10.1.243_426.00_win10"
+    msbuild_project_dir="CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions"
+    cuda_install_packages="nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1"
+elif [[ "$CUDA_VERSION" == "11" ]]; then
+    cuda_complete_version="11.0"
+    cuda_installer_name="cuda_11.0.2_451.48_win10"
+    msbuild_project_dir="visual_studio_integration/CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions"
+    cuda_install_packages="nvcc_11.0 cuobjdump_11.0 nvprune_11.0 nvprof_11.0 cupti_11.0 cublas_11.0 cublas_dev_11.0 cudart_11.0 cufft_11.0 cufft_dev_11.0 curand_11.0 curand_dev_11.0 cusolver_11.0 cusolver_dev_11.0 cusparse_11.0 cusparse_dev_11.0 npp_11.0 npp_dev_11.0 nvrtc_11.0 nvrtc_dev_11.0 nvml_dev_11.0"
+else
+    echo "CUDA_VERSION $CUDA_VERSION is not supported yet"
+    exit 1
+fi
+
+cuda_installer_link="https://ossci-windows.s3.amazonaws.com/${cuda_installer_name}.exe"
+
+curl --retry 3 -kLO $cuda_installer_link
+7z x ${cuda_installer_name}.exe -o${cuda_installer_name}
+cd ${cuda_installer_name}
 mkdir cuda_install_logs
 
 set +e
 
-./setup.exe -s nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1 -loglevel:6 -log:"$(pwd -W)/cuda_install_logs"
+./setup.exe -s ${cuda_install_packages} -loglevel:6 -log:"$(pwd -W)/cuda_install_logs"
 
 set -e
 
 if [[ "${VC_YEAR}" == "2017" ]]; then
-    cp -r CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions/* "C:/Program Files (x86)/Microsoft Visual Studio/2017/${VC_PRODUCT}/Common7/IDE/VC/VCTargets/BuildCustomizations/"
+    cp -r ${msbuild_project_dir}/* "C:/Program Files (x86)/Microsoft Visual Studio/2017/${VC_PRODUCT}/Common7/IDE/VC/VCTargets/BuildCustomizations/"
 else
-    cp -r CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions/* "C:/Program Files (x86)/Microsoft Visual Studio/2019/${VC_PRODUCT}/MSBuild/Microsoft/VC/v160/BuildCustomizations/"
+    cp -r ${msbuild_project_dir}/* "C:/Program Files (x86)/Microsoft Visual Studio/2019/${VC_PRODUCT}/MSBuild/Microsoft/VC/v160/BuildCustomizations/"
 fi
 
-curl --retry 3 -kLO https://ossci-windows.s3.amazonaws.com/NvToolsExt.7z
-7z x NvToolsExt.7z -oNvToolsExt
-mkdir -p "C:/Program Files/NVIDIA Corporation/NvToolsExt"
-cp -r NvToolsExt/* "C:/Program Files/NVIDIA Corporation/NvToolsExt/"
-export NVTOOLSEXT_PATH="C:\\Program Files\\NVIDIA Corporation\\NvToolsExt\\"
+if ! ls "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64/nvToolsExt64_1.dll"
+then
+    curl --retry 3 -kLO https://ossci-windows.s3.amazonaws.com/NvToolsExt.7z
+    7z x NvToolsExt.7z -oNvToolsExt
+    mkdir -p "C:/Program Files/NVIDIA Corporation/NvToolsExt"
+    cp -r NvToolsExt/* "C:/Program Files/NVIDIA Corporation/NvToolsExt/"
+    export NVTOOLSEXT_PATH="C:\\Program Files\\NVIDIA Corporation\\NvToolsExt\\"
+fi
 
-if ! ls "/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/nvcc.exe"
+if ! ls "/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${cuda_complete_version}/bin/nvcc.exe"
 then
     echo "CUDA installation failed"
     mkdir -p /c/w/build-results
@@ -33,5 +53,5 @@ then
 fi
 
 cd ..
-rm -rf ./cuda_10.1.243_426.00_win10
-rm -f ./cuda_10.1.243_426.00_win10.exe
+rm -rf ./${cuda_installer_name}
+rm -f ./${cuda_installer_name}.exe
diff --git a/.circleci/scripts/windows_cudnn_install.sh b/.circleci/scripts/windows_cudnn_install.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+set -eux -o pipefail
+
+if [[ "$CUDA_VERSION" == "10" ]]; then
+    cuda_complete_version="10.1"
+    cudnn_installer_name="cudnn-10.1-windows10-x64-v7.6.4.38"
+elif [[ "$CUDA_VERSION" == "11" ]]; then
+    cuda_complete_version="11.0"
+    cudnn_installer_name="cudnn-11.0-windows-x64-v8.0.2.39"
+else
+    echo "CUDNN for CUDA_VERSION $CUDA_VERSION is not supported yet"
+    exit 1
+fi
+
+cudnn_installer_link="https://ossci-windows.s3.amazonaws.com/${cudnn_installer_name}.zip"
+
+curl --retry 3 -O $cudnn_installer_link
+7z x ${cudnn_installer_name}.zip -ocudnn
+cp -r cudnn/cuda/* "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${cuda_complete_version}/"
+rm -rf cudnn
+rm -f ${cudnn_installer_name}.zip
diff --git a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
@@ -222,10 +222,7 @@ jobs:
           name: Install Cudnn
           command : |
             if [[ "${USE_CUDA}" == "1" ]]; then
-              cd c:/
-              curl --retry 3 -O https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip
-              7z x cudnn-10.1-windows10-x64-v7.6.4.38.zip -ocudnn
-              cp -r cudnn/cuda/* "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/"
+              .circleci/scripts/windows_cudnn_install.sh
             fi
       - run:
           name: Build
@@ -282,17 +279,19 @@ jobs:
           name: Install Cuda
           no_output_timeout: 30m
           command: |
-            if [[ "${CUDA_VERSION}" != "cpu" && "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
-              .circleci/scripts/windows_cuda_install.sh
+            if [[ "${CUDA_VERSION}" != "cpu" ]]; then
+              if [[ "${CUDA_VERSION}" != "10" || "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
+                .circleci/scripts/windows_cuda_install.sh
+              fi
+              if [[ "${CUDA_VERSION}" != "10" && "${JOB_EXECUTOR}" == "windows-with-nvidia-gpu" ]]; then
+                .circleci/scripts/driver_update.bat
+              fi
             fi
       - run:
           name: Install Cudnn
           command : |
-            if [[ "${CUDA_VERSION}" != "cpu" && "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
-              cd c:/
-              curl --retry 3 -O https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip
-              7z x cudnn-10.1-windows10-x64-v7.6.4.38.zip -ocudnn
-              cp -r cudnn/cuda/* "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/"
+            if [[ "${CUDA_VERSION}" != "cpu" ]]; then
+              .circleci/scripts/windows_cudnn_install.sh
             fi
       - run:
           name: Test

diff --git a/.jenkins/pytorch/win-test-helpers/build_pytorch.bat b/.jenkins/pytorch/win-test-helpers/build_pytorch.bat
@@ -39,6 +39,7 @@ popd
 
 if "%CUDA_VERSION%" == "9" goto cuda_build_9
 if "%CUDA_VERSION%" == "10" goto cuda_build_10
+if "%CUDA_VERSION%" == "11" goto cuda_build_11
 goto cuda_build_end
 
 :cuda_build_9
@@ -55,6 +56,13 @@ set CUDA_PATH_V10_1=%CUDA_PATH%
 
 goto cuda_build_common
 
+:cuda_build_11
+
+set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0
+set CUDA_PATH_V11_0=%CUDA_PATH%
+
+goto cuda_build_common
+
 :cuda_build_common
 
 set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64

diff --git a/.jenkins/pytorch/win-test-helpers/installation-helpers/install_magma.bat b/.jenkins/pytorch/win-test-helpers/installation-helpers/install_magma.bat
@@ -1,8 +1,9 @@
 if "%CUDA_VERSION%" == "9" set CUDA_SUFFIX=cuda92
 if "%CUDA_VERSION%" == "10" set CUDA_SUFFIX=cuda101
+if "%CUDA_VERSION%" == "11" set CUDA_SUFFIX=cuda110
 
 if "%CUDA_SUFFIX%" == "" (
-  echo unknown CUDA version, please set `CUDA_VERSION` to 9 or 10.
+  echo unknown CUDA version, please set `CUDA_VERSION` to 9, 10 or 11.
   exit /b 1
 )
 

diff --git a/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat b/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat
@@ -48,6 +48,7 @@ set DISTUTILS_USE_SDK=1
 
 if "%CUDA_VERSION%" == "9" goto cuda_build_9
 if "%CUDA_VERSION%" == "10" goto cuda_build_10
+if "%CUDA_VERSION%" == "11" goto cuda_build_11
 goto cuda_build_end
 
 :cuda_build_9
@@ -64,6 +65,13 @@ set CUDA_PATH_V10_1=%CUDA_PATH%
 
 goto cuda_build_common
 
+:cuda_build_11
+
+set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0
+set CUDA_PATH_V11_0=%CUDA_PATH%
+
+goto cuda_build_common
+
 :cuda_build_common
 
 set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64

diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu b/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu
@@ -23,6 +23,7 @@
 #include <bitset>
 #include <cusparse.h>
 #include <cuda_runtime_api.h>
+#include <memory>
 
 #define I_INFO(tensor) cuda::detail::getTensorInfo<int64_t, uint64_t>(tensor)
 #define V_INFO(tensor) cuda::detail::getTensorInfo<scalar_t, uint64_t>(tensor)
@@ -732,7 +733,7 @@ Tensor _bmm_sparse_cuda(const SparseTensor& self, const Tensor& mat2, bool deter
   return _bmm_out_sparse_cuda(result, self, mat2, deterministic);
 }
 
-#if !(defined(__HIP_PLATFORM_HCC__) || defined(_WIN32) || defined(_WIN64))
+#if !(defined(__HIP_PLATFORM_HCC__) || (defined(_MSC_VER) && CUSPARSE_VERSION < 11000))
 __global__ void search_end_matrix_indices_cuda_kernel(
   int64_t* mat_el_end_indices,
   int64_t num_matrices,
@@ -817,9 +818,9 @@ Tensor& bmm_out_sparse_cuda(Tensor& result, const SparseTensor& self, const Tens
 Tensor& _bmm_out_sparse_cuda(Tensor& result, const SparseTensor& self, const Tensor& mat2, bool deterministic) {
 #if defined __HIP_PLATFORM_HCC__
   TORCH_CHECK(false, "bmm sparse-dense is not supported on HIP");
-#elif defined(_WIN32) || defined(_WIN64)
-  TORCH_CHECK(false, "bmm sparse-dense CUDA is not supported on Windows");
-#elif defined(CUDART_VERSION) && (CUDART_VERSION >= 10010)
+#elif defined(_MSC_VER) && (CUSPARSE_VERSION < 11000)
+  TORCH_CHECK(false, "bmm sparse-dense CUDA is not supported on Windows with cuda before 11.0");
+#elif defined(CUDART_VERSION) && (CUDART_VERSION >= 10010)  // linux cuda >= 10.1 or windows cuda >= 11.0
 
   TORCH_CHECK(!mat2.is_sparse(), "bmm_sparse: Tensor 'mat2' must be dense");
   TORCH_CHECK(self.dense_dim() == 0, "bmm_sparse: Tensor 'self' must have 0 dense dims, but has ", self.dense_dim());
@@ -872,13 +873,13 @@ Tensor& _bmm_out_sparse_cuda(Tensor& result, const SparseTensor& self, const Ten
   Tensor indices_dim1 = indices[1].to(ScalarType::Int);
   Tensor indices_dim2 = indices[2].to(ScalarType::Int);
 
-  int64_t mat_el_end_indices_host[num_matrices];
+  std::unique_ptr<int64_t[]> mat_el_end_indices_host(new int64_t[num_matrices]);
   int64_t* mat_el_end_indices_device;
 
   cudaMalloc(&mat_el_end_indices_device, num_matrices*sizeof(int64_t));
   search_end_matrix_indices(mat_el_end_indices_device, num_matrices, indices_dim0);
   cudaMemcpy(
-    mat_el_end_indices_host,
+    mat_el_end_indices_host.get(),
     mat_el_end_indices_device,
     num_matrices*sizeof(int64_t),
     cudaMemcpyDeviceToHost

diff --git a/cmake/ProtoBufPatch.cmake b/cmake/ProtoBufPatch.cmake
@@ -37,6 +37,22 @@ string(
   content
   "${content}")
 
+# constexpr int TensorBoundShape_DimType_DimType_ARRAYSIZE = TensorBoundShape_DimType_DimType_MAX + 1;
+# throws
+# error: more than one operator "+" matches these operands:
+#     built-in operator "arithmetic + arithmetic"
+#     function "c10::operator+(int, c10::BFloat16)"
+#     function "c10::operator+(c10::BFloat16, int)"
+#     function "c10::operator+(int, c10::Half)"
+#     function "c10::operator+(c10::Half, int)"
+#   operand types are: const caffe2::ExternalDataProto_SourceType + int
+string(
+  REGEX REPLACE
+  "constexpr ([^ ]+) ([^ ]+_ARRAYSIZE) = ([^ ]+_MAX) \\+ 1;"
+  "constexpr \\1 \\2 = static_cast<\\1>(\\3) + 1;"
+  content
+  "${content}")
+
 foreach(ns ${NAMESPACES})
   # Insert "const ::std::string& GetEmptyStringAlreadyInited();" within
   # the namespace and make sure we only do it once in the file. Unfortunately

diff --git a/test/test_sparse.py b/test/test_sparse.py
@@ -1030,15 +1030,15 @@ def test_shape(num_mats, dim_i, dim_j, dim_k, nnz):
 
     @cuda_only
     @unittest.skipIf(
-        not IS_WINDOWS,
-        "this test ensures bmm sparse-dense CUDA gives an error when run on Windows"
+        not IS_WINDOWS or [int(x) for x in torch.version.cuda.split(".")] >= [11, 0],
+        "this test ensures bmm sparse-dense CUDA gives an error when run on Windows with CUDA < 11.0"
     )
     def test_bmm_windows_error(self):
         a = torch.rand(2, 2, 2).to_sparse().cuda()
         b = torch.rand(2, 2, 2).cuda()
         with self.assertRaisesRegex(
                 RuntimeError,
-                "bmm sparse-dense CUDA is not supported on Windows"):
+                "bmm sparse-dense CUDA is not supported on Windows with cuda before 11.0"):
             ab = a.bmm(b)
 
     @cuda_only