From 4e6b1378908cf5c9d98426de9e2e9e88a50ab53e Mon Sep 17 00:00:00 2001 From: Dmytro Dzhulgakov Date: Thu, 5 Oct 2023 06:03:29 +0000 Subject: [PATCH] Add 9.0a into cpp_extension support archs --- .../upstream/FindCUDA/select_compute_arch.cmake | 5 +++++ torch/utils/cpp_extension.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/cmake/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake b/cmake/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake index d917738a5c7ef..769ddacfcf2ce 100644 --- a/cmake/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake +++ b/cmake/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake @@ -72,6 +72,11 @@ if(NOT CUDA_VERSION VERSION_LESS "11.8") endif() endif() +if(NOT CUDA_VERSION VERSION_LESS "12.0") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "9.0a") + list(APPEND CUDA_ALL_GPU_ARCHITECTURES "9.0a") +endif() + ################################################################################################ # A function for automatic detection of GPUs installed (if autodetection is enabled) # Usage: diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py index 5fe55256cfe2f..6ec39b6817dcd 100644 --- a/torch/utils/cpp_extension.py +++ b/torch/utils/cpp_extension.py @@ -1949,7 +1949,7 @@ def _get_cuda_arch_flags(cflags: Optional[List[str]] = None) -> List[str]: ]) supported_arches = ['3.5', '3.7', '5.0', '5.2', '5.3', '6.0', '6.1', '6.2', - '7.0', '7.2', '7.5', '8.0', '8.6', '8.7', '8.9', '9.0'] + '7.0', '7.2', '7.5', '8.0', '8.6', '8.7', '8.9', '9.0', '9.0a'] valid_arch_strings = supported_arches + [s + "+PTX" for s in supported_arches] # The default is sm_30 for CUDA 9.x and 10.x @@ -1992,7 +1992,7 @@ def _get_cuda_arch_flags(cflags: Optional[List[str]] = None) -> List[str]: if arch not in valid_arch_strings: raise ValueError(f"Unknown CUDA arch ({arch}) or GPU not supported") else: - num = arch[0] + arch[2] + num = arch[0] + arch[2:].split("+")[0] flags.append(f'-gencode=arch=compute_{num},code=sm_{num}') if arch.endswith('+PTX'): flags.append(f'-gencode=arch=compute_{num},code=compute_{num}')