Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CUDA] Add support for CUDA-12.3 and sm_90a #74895

Merged
merged 3 commits into from
Dec 11, 2023
Merged

[CUDA] Add support for CUDA-12.3 and sm_90a #74895

merged 3 commits into from
Dec 11, 2023

Conversation

Artem-B
Copy link
Member

@Artem-B Artem-B commented Dec 8, 2023

No description provided.

@Artem-B Artem-B requested a review from jhuber6 December 8, 2023 23:18
@llvmbot llvmbot added clang Clang issues not falling into any other category clang:driver 'clang' and 'clang++' user-facing binaries. Not 'clang-cl' clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:codegen clang:openmp OpenMP related changes to Clang labels Dec 8, 2023
@llvmbot
Copy link
Collaborator

llvmbot commented Dec 8, 2023

@llvm/pr-subscribers-clang
@llvm/pr-subscribers-clang-driver

@llvm/pr-subscribers-clang-codegen

Author: Artem Belevich (Artem-B)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/74895.diff

11 Files Affected:

  • (modified) clang/docs/ReleaseNotes.rst (+3)
  • (modified) clang/include/clang/Basic/BuiltinsNVPTX.def (+11-2)
  • (modified) clang/include/clang/Basic/Cuda.h (+5-2)
  • (modified) clang/lib/Basic/Cuda.cpp (+5)
  • (modified) clang/lib/Basic/Targets/NVPTX.cpp (+3)
  • (modified) clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp (+1)
  • (modified) clang/lib/Driver/ToolChains/Cuda.cpp (+6)
  • (modified) clang/test/Misc/target-invalid-cpu-note.c (+1-1)
  • (modified) llvm/lib/Target/NVPTX/NVPTX.td (+10-9)
  • (modified) llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp (+6-1)
  • (modified) llvm/lib/Target/NVPTX/NVPTXSubtarget.h (+9-2)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 89ea2f0930cec..1bf68a46a64da 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -937,6 +937,9 @@ CUDA/HIP Language Changes
 CUDA Support
 ^^^^^^^^^^^^
 
+- Clang now supports CUDA SDK up to 12.3
+- Added support for sm_90a
+
 AIX Support
 ^^^^^^^^^^^
 
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index d74a7d1e55dd2..0f2e8260143be 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -26,7 +26,9 @@
 #pragma push_macro("SM_87")
 #pragma push_macro("SM_89")
 #pragma push_macro("SM_90")
-#define SM_90 "sm_90"
+#pragma push_macro("SM_90a")
+#define SM_90a "sm_90a"
+#define SM_90 "sm_90|" SM_90a
 #define SM_89 "sm_89|" SM_90
 #define SM_87 "sm_87|" SM_89
 #define SM_86 "sm_86|" SM_87
@@ -56,7 +58,11 @@
 #pragma push_macro("PTX78")
 #pragma push_macro("PTX80")
 #pragma push_macro("PTX81")
-#define PTX81 "ptx81"
+#pragma push_macro("PTX82")
+#pragma push_macro("PTX83")
+#define PTX83 "ptx83"
+#define PTX82 "ptx82|" PTX83
+#define PTX81 "ptx81|" PTX82
 #define PTX80 "ptx80|" PTX81
 #define PTX78 "ptx78|" PTX80
 #define PTX77 "ptx77|" PTX78
@@ -1055,6 +1061,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
 #pragma pop_macro("SM_87")
 #pragma pop_macro("SM_89")
 #pragma pop_macro("SM_90")
+#pragma pop_macro("SM_90a")
 #pragma pop_macro("PTX42")
 #pragma pop_macro("PTX60")
 #pragma pop_macro("PTX61")
@@ -1072,3 +1079,5 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
 #pragma pop_macro("PTX78")
 #pragma pop_macro("PTX80")
 #pragma pop_macro("PTX81")
+#pragma pop_macro("PTX82")
+#pragma pop_macro("PTX83")
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index 2d912bdbbd1bc..916cb4b7ef34a 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -39,9 +39,11 @@ enum class CudaVersion {
   CUDA_118,
   CUDA_120,
   CUDA_121,
-  FULLY_SUPPORTED = CUDA_118,
+  CUDA_122,
+  CUDA_123,
+  FULLY_SUPPORTED = CUDA_123,
   PARTIALLY_SUPPORTED =
-      CUDA_121, // Partially supported. Proceed with a warning.
+      CUDA_123, // Partially supported. Proceed with a warning.
   NEW = 10000,  // Too new. Issue a warning, but allow using it.
 };
 const char *CudaVersionToString(CudaVersion V);
@@ -71,6 +73,7 @@ enum class CudaArch {
   SM_87,
   SM_89,
   SM_90,
+  SM_90a,
   GFX600,
   GFX601,
   GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 65840b9f20252..1b1da6a1356f2 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -39,6 +39,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
     CUDA_ENTRY(11, 8),
     CUDA_ENTRY(12, 0),
     CUDA_ENTRY(12, 1),
+    CUDA_ENTRY(12, 2),
+    CUDA_ENTRY(12, 3),
     {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
     {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
 };
@@ -93,6 +95,7 @@ static const CudaArchToStringMap arch_names[] = {
     SM(87),                          // Jetson/Drive AGX Orin
     SM(89),                          // Ada Lovelace
     SM(90),                          // Hopper
+    SM(90a),                         // Hopper
     GFX(600),  // gfx600
     GFX(601),  // gfx601
     GFX(602),  // gfx602
@@ -209,6 +212,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
   case CudaArch::SM_89:
   case CudaArch::SM_90:
     return CudaVersion::CUDA_118;
+  case CudaArch::SM_90a:
+    return CudaVersion::CUDA_120;
   default:
     llvm_unreachable("invalid enum");
   }
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 3a4a75b0348f2..5c601812f6175 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -262,11 +262,14 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
       case CudaArch::SM_89:
         return "890";
       case CudaArch::SM_90:
+      case CudaArch::SM_90a:
         return "900";
       }
       llvm_unreachable("unhandled CudaArch");
     }();
     Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
+    if (GPU == CudaArch::SM_90a)
+      Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
   }
 }
 
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 293ccaa3413cd..299ee1460b3db 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -3483,6 +3483,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
       case CudaArch::SM_87:
       case CudaArch::SM_89:
       case CudaArch::SM_90:
+      case CudaArch::SM_90a:
       case CudaArch::GFX600:
       case CudaArch::GFX601:
       case CudaArch::GFX602:
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index e95ff98e6c940..ef1e77974c1ea 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -78,6 +78,10 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
     return CudaVersion::CUDA_120;
   if (raw_version < 12020)
     return CudaVersion::CUDA_121;
+  if (raw_version < 12030)
+    return CudaVersion::CUDA_122;
+  if (raw_version < 12040)
+    return CudaVersion::CUDA_123;
   return CudaVersion::NEW;
 }
 
@@ -671,6 +675,8 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   case CudaVersion::CUDA_##CUDA_VER:                                           \
     PtxFeature = "+ptx" #PTX_VER;                                              \
     break;
+    CASE_CUDA_VERSION(123, 83);
+    CASE_CUDA_VERSION(122, 82);
     CASE_CUDA_VERSION(121, 81);
     CASE_CUDA_VERSION(120, 80);
     CASE_CUDA_VERSION(118, 78);
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index c7146e63add5f..5475b1d8bd052 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -29,7 +29,7 @@
 
 // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
 // NVPTX: error: unknown target CPU 'not-a-cpu'
-// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}}
+// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}}
 
 // RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
 // R600: error: unknown target CPU 'not-a-cpu'
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index 02fa2a4ee81ec..f2a4ce381b40b 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -24,23 +24,24 @@ include "NVPTXInstrInfo.td"
 //   TableGen in NVPTXGenSubtarget.inc.
 //===----------------------------------------------------------------------===//
 
-class FeatureSM<int version>:
-   SubtargetFeature<"sm_"# version, "SmVersion",
-                    "" # version,
-                    "Target SM " # version>;
-def SM90a: FeatureSM<90>;
+class FeatureSM<string sm, int value>:
+   SubtargetFeature<"sm_"# sm, "FullSmVersion",
+                    "" # value,
+                    "Target SM " # sm>;
 
 class FeaturePTX<int version>:
    SubtargetFeature<"ptx"# version, "PTXVersion",
                     "" # version,
                     "Use PTX version " # version>;
 
-foreach version = [20, 21, 30, 32, 35, 37, 50, 52, 53,
-                   60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
-  def SM#version: FeatureSM<version>;
+foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
+              60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
+  def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
+
+def SM90a: FeatureSM<"90a", 901>;
 
 foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 63, 64, 65,
-                   70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81] in
+                   70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83] in
   def PTX#version: FeaturePTX<version>;
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 7fa64af196b93..420065585b384 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -36,6 +36,11 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
 
     ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);
 
+    // Re-map SM version numbers, SmVersion carries the regular SMs which do
+    // have relative order, while FullSmVersion allows distinguishing sm_90 from
+    // sm_90a, which would *not* be a subset of sm_91.
+    SmVersion = getSmVersion();
+
     // Set default to PTX 6.0 (CUDA 9.0)
     if (PTXVersion == 0) {
       PTXVersion = 60;
@@ -48,7 +53,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
                                const std::string &FS,
                                const NVPTXTargetMachine &TM)
     : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
-      SmVersion(20), TM(TM),
+      FullSmVersion(200), SmVersion(getSmVersion()), TM(TM),
       TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {}
 
 bool NVPTXSubtarget::hasImageHandles() const {
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 93af11c258b48..951962d1e68be 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -35,7 +35,12 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
   unsigned PTXVersion;
 
-  // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31
+  // Full SM version x.y is represented as 100*x+10*y+feature, e.g. 3.1 == 310
+  // sm_90a == 901
+  unsigned int FullSmVersion;
+
+  // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31. Derived from
+  // FullSmVersion.
   unsigned int SmVersion;
 
   const NVPTXTargetMachine &TM;
@@ -80,8 +85,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool allowFP16Math() const;
   bool hasMaskOperator() const { return PTXVersion >= 71; }
   bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
-  unsigned int getSmVersion() const { return SmVersion; }
+  unsigned int getSmVersion() const { return FullSmVersion / 10; }
+  unsigned int getFullSmVersion() const { return FullSmVersion; }
   std::string getTargetName() const { return TargetName; }
+  bool isSm90a() const { return getFullSmVersion() == 901; }
 
   // Get maximum value of required alignments among the supported data types.
   // From the PTX ISA doc, section 8.2.3:

@Artem-B
Copy link
Member Author

Artem-B commented Dec 8, 2023

@ezhulenev FYI.

Copy link

github-actions bot commented Dec 8, 2023

:white_check_mark: With the latest revision this PR passed the C/C++ code formatter.

@Artem-B
Copy link
Member Author

Artem-B commented Dec 11, 2023

Tested the changes with cuda test-suite, with cuda-12.1 and 12.3 targeting sm_{60,70,80,90,90a}.

@Artem-B Artem-B merged commit 631c6e8 into llvm:main Dec 11, 2023
4 of 5 checks passed
@Artem-B Artem-B deleted the sm90a branch December 11, 2023 20:18
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang:codegen clang:driver 'clang' and 'clang++' user-facing binaries. Not 'clang-cl' clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:openmp OpenMP related changes to Clang clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants