diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6e4009deaf874..783dc7333af7e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -973,6 +973,9 @@ CUDA/HIP Language Changes CUDA Support ^^^^^^^^^^^^ +- Clang now supports CUDA SDK up to 12.3 +- Added support for sm_90a + AIX Support ^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index d74a7d1e55dd2..0f2e8260143be 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -26,7 +26,9 @@ #pragma push_macro("SM_87") #pragma push_macro("SM_89") #pragma push_macro("SM_90") -#define SM_90 "sm_90" +#pragma push_macro("SM_90a") +#define SM_90a "sm_90a" +#define SM_90 "sm_90|" SM_90a #define SM_89 "sm_89|" SM_90 #define SM_87 "sm_87|" SM_89 #define SM_86 "sm_86|" SM_87 @@ -56,7 +58,11 @@ #pragma push_macro("PTX78") #pragma push_macro("PTX80") #pragma push_macro("PTX81") -#define PTX81 "ptx81" +#pragma push_macro("PTX82") +#pragma push_macro("PTX83") +#define PTX83 "ptx83" +#define PTX82 "ptx82|" PTX83 +#define PTX81 "ptx81|" PTX82 #define PTX80 "ptx80|" PTX81 #define PTX78 "ptx78|" PTX80 #define PTX77 "ptx77|" PTX78 @@ -1055,6 +1061,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("SM_87") #pragma pop_macro("SM_89") #pragma pop_macro("SM_90") +#pragma pop_macro("SM_90a") #pragma pop_macro("PTX42") #pragma pop_macro("PTX60") #pragma pop_macro("PTX61") @@ -1072,3 +1079,5 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("PTX78") #pragma pop_macro("PTX80") #pragma pop_macro("PTX81") +#pragma pop_macro("PTX82") +#pragma pop_macro("PTX83") diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 2d912bdbbd1bc..916cb4b7ef34a 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -39,9 +39,11 @@ enum class CudaVersion { CUDA_118, CUDA_120, CUDA_121, - FULLY_SUPPORTED = CUDA_118, + CUDA_122, + CUDA_123, + FULLY_SUPPORTED = CUDA_123, PARTIALLY_SUPPORTED = - CUDA_121, // Partially supported. Proceed with a warning. + CUDA_123, // Partially supported. Proceed with a warning. NEW = 10000, // Too new. Issue a warning, but allow using it. }; const char *CudaVersionToString(CudaVersion V); @@ -71,6 +73,7 @@ enum class CudaArch { SM_87, SM_89, SM_90, + SM_90a, GFX600, GFX601, GFX602, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 65840b9f20252..1b1da6a1356f2 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -39,6 +39,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = { CUDA_ENTRY(11, 8), CUDA_ENTRY(12, 0), CUDA_ENTRY(12, 1), + CUDA_ENTRY(12, 2), + CUDA_ENTRY(12, 3), {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits::max())}, {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone. }; @@ -93,6 +95,7 @@ static const CudaArchToStringMap arch_names[] = { SM(87), // Jetson/Drive AGX Orin SM(89), // Ada Lovelace SM(90), // Hopper + SM(90a), // Hopper GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -209,6 +212,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) { case CudaArch::SM_89: case CudaArch::SM_90: return CudaVersion::CUDA_118; + case CudaArch::SM_90a: + return CudaVersion::CUDA_120; default: llvm_unreachable("invalid enum"); } diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 3a4a75b0348f2..5c601812f6175 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -262,11 +262,14 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case CudaArch::SM_89: return "890"; case CudaArch::SM_90: + case CudaArch::SM_90a: return "900"; } llvm_unreachable("unhandled CudaArch"); }(); Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); + if (GPU == CudaArch::SM_90a) + Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1"); } } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 293ccaa3413cd..299ee1460b3db 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -3483,6 +3483,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( case CudaArch::SM_87: case CudaArch::SM_89: case CudaArch::SM_90: + case CudaArch::SM_90a: case CudaArch::GFX600: case CudaArch::GFX601: case CudaArch::GFX602: diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index e95ff98e6c940..ef1e77974c1ea 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -78,6 +78,10 @@ CudaVersion getCudaVersion(uint32_t raw_version) { return CudaVersion::CUDA_120; if (raw_version < 12020) return CudaVersion::CUDA_121; + if (raw_version < 12030) + return CudaVersion::CUDA_122; + if (raw_version < 12040) + return CudaVersion::CUDA_123; return CudaVersion::NEW; } @@ -671,6 +675,8 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, case CudaVersion::CUDA_##CUDA_VER: \ PtxFeature = "+ptx" #PTX_VER; \ break; + CASE_CUDA_VERSION(123, 83); + CASE_CUDA_VERSION(122, 82); CASE_CUDA_VERSION(121, 81); CASE_CUDA_VERSION(120, 80); CASE_CUDA_VERSION(118, 78); diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index e8c5ecbcb5363..e840a9208f5a4 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -29,7 +29,7 @@ // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX // NVPTX: error: unknown target CPU 'not-a-cpu' -// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}} +// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}} // RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600 // R600: error: unknown target CPU 'not-a-cpu' diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td index 02fa2a4ee81ec..f2a4ce381b40b 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -24,23 +24,24 @@ include "NVPTXInstrInfo.td" // TableGen in NVPTXGenSubtarget.inc. //===----------------------------------------------------------------------===// -class FeatureSM: - SubtargetFeature<"sm_"# version, "SmVersion", - "" # version, - "Target SM " # version>; -def SM90a: FeatureSM<90>; +class FeatureSM: + SubtargetFeature<"sm_"# sm, "FullSmVersion", + "" # value, + "Target SM " # sm>; class FeaturePTX: SubtargetFeature<"ptx"# version, "PTXVersion", "" # version, "Use PTX version " # version>; -foreach version = [20, 21, 30, 32, 35, 37, 50, 52, 53, - 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in - def SM#version: FeatureSM; +foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53, + 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in + def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>; + +def SM90a: FeatureSM<"90a", 901>; foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 63, 64, 65, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81] in + 70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83] in def PTX#version: FeaturePTX; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index 7fa64af196b93..420065585b384 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -36,6 +36,11 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS); + // Re-map SM version numbers, SmVersion carries the regular SMs which do + // have relative order, while FullSmVersion allows distinguishing sm_90 from + // sm_90a, which would *not* be a subset of sm_91. + SmVersion = getSmVersion(); + // Set default to PTX 6.0 (CUDA 9.0) if (PTXVersion == 0) { PTXVersion = 60; @@ -48,7 +53,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const NVPTXTargetMachine &TM) : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0), - SmVersion(20), TM(TM), + FullSmVersion(200), SmVersion(getSmVersion()), TM(TM), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {} bool NVPTXSubtarget::hasImageHandles() const { diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index 93af11c258b48..3ca4c1a24c79a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -35,7 +35,12 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31 unsigned PTXVersion; - // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31 + // Full SM version x.y is represented as 100*x+10*y+feature, e.g. 3.1 == 310 + // sm_90a == 901 + unsigned int FullSmVersion; + + // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31. Derived from + // FullSmVersion. unsigned int SmVersion; const NVPTXTargetMachine &TM; @@ -80,7 +85,15 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { bool allowFP16Math() const; bool hasMaskOperator() const { return PTXVersion >= 71; } bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; } - unsigned int getSmVersion() const { return SmVersion; } + unsigned int getFullSmVersion() const { return FullSmVersion; } + unsigned int getSmVersion() const { return getFullSmVersion() / 10; } + // GPUs with "a" suffix have include architecture-accelerated features that + // are supported on the specified architecture only, hence such targets do not + // follow the onion layer model. hasAAFeatures() allows distinguishing such + // GPU variants from the base GPU architecture. + // - 0 represents base GPU model, + // - non-zero value identifies particular architecture-accelerated variant. + bool hasAAFeatures() const { return getFullSmVersion() % 10; } std::string getTargetName() const { return TargetName; } // Get maximum value of required alignments among the supported data types.