Skip to content

Commit

Permalink
[CUDA] Add support for CUDA-12.3 and sm_90a (#74895)
Browse files Browse the repository at this point in the history
  • Loading branch information
Artem-B committed Dec 11, 2023
1 parent ced631e commit 631c6e8
Show file tree
Hide file tree
Showing 11 changed files with 66 additions and 17 deletions.
3 changes: 3 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -973,6 +973,9 @@ CUDA/HIP Language Changes
CUDA Support
^^^^^^^^^^^^

- Clang now supports CUDA SDK up to 12.3
- Added support for sm_90a

AIX Support
^^^^^^^^^^^

Expand Down
13 changes: 11 additions & 2 deletions clang/include/clang/Basic/BuiltinsNVPTX.def
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
#pragma push_macro("SM_87")
#pragma push_macro("SM_89")
#pragma push_macro("SM_90")
#define SM_90 "sm_90"
#pragma push_macro("SM_90a")
#define SM_90a "sm_90a"
#define SM_90 "sm_90|" SM_90a
#define SM_89 "sm_89|" SM_90
#define SM_87 "sm_87|" SM_89
#define SM_86 "sm_86|" SM_87
Expand Down Expand Up @@ -56,7 +58,11 @@
#pragma push_macro("PTX78")
#pragma push_macro("PTX80")
#pragma push_macro("PTX81")
#define PTX81 "ptx81"
#pragma push_macro("PTX82")
#pragma push_macro("PTX83")
#define PTX83 "ptx83"
#define PTX82 "ptx82|" PTX83
#define PTX81 "ptx81|" PTX82
#define PTX80 "ptx80|" PTX81
#define PTX78 "ptx78|" PTX80
#define PTX77 "ptx77|" PTX78
Expand Down Expand Up @@ -1055,6 +1061,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
#pragma pop_macro("SM_87")
#pragma pop_macro("SM_89")
#pragma pop_macro("SM_90")
#pragma pop_macro("SM_90a")
#pragma pop_macro("PTX42")
#pragma pop_macro("PTX60")
#pragma pop_macro("PTX61")
Expand All @@ -1072,3 +1079,5 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
#pragma pop_macro("PTX78")
#pragma pop_macro("PTX80")
#pragma pop_macro("PTX81")
#pragma pop_macro("PTX82")
#pragma pop_macro("PTX83")
7 changes: 5 additions & 2 deletions clang/include/clang/Basic/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ enum class CudaVersion {
CUDA_118,
CUDA_120,
CUDA_121,
FULLY_SUPPORTED = CUDA_118,
CUDA_122,
CUDA_123,
FULLY_SUPPORTED = CUDA_123,
PARTIALLY_SUPPORTED =
CUDA_121, // Partially supported. Proceed with a warning.
CUDA_123, // Partially supported. Proceed with a warning.
NEW = 10000, // Too new. Issue a warning, but allow using it.
};
const char *CudaVersionToString(CudaVersion V);
Expand Down Expand Up @@ -71,6 +73,7 @@ enum class CudaArch {
SM_87,
SM_89,
SM_90,
SM_90a,
GFX600,
GFX601,
GFX602,
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Basic/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
CUDA_ENTRY(11, 8),
CUDA_ENTRY(12, 0),
CUDA_ENTRY(12, 1),
CUDA_ENTRY(12, 2),
CUDA_ENTRY(12, 3),
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
};
Expand Down Expand Up @@ -93,6 +95,7 @@ static const CudaArchToStringMap arch_names[] = {
SM(87), // Jetson/Drive AGX Orin
SM(89), // Ada Lovelace
SM(90), // Hopper
SM(90a), // Hopper
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
Expand Down Expand Up @@ -209,6 +212,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
case CudaArch::SM_89:
case CudaArch::SM_90:
return CudaVersion::CUDA_118;
case CudaArch::SM_90a:
return CudaVersion::CUDA_120;
default:
llvm_unreachable("invalid enum");
}
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,11 +262,14 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case CudaArch::SM_89:
return "890";
case CudaArch::SM_90:
case CudaArch::SM_90a:
return "900";
}
llvm_unreachable("unhandled CudaArch");
}();
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
if (GPU == CudaArch::SM_90a)
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
}
}

Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3483,6 +3483,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::SM_87:
case CudaArch::SM_89:
case CudaArch::SM_90:
case CudaArch::SM_90a:
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX602:
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
return CudaVersion::CUDA_120;
if (raw_version < 12020)
return CudaVersion::CUDA_121;
if (raw_version < 12030)
return CudaVersion::CUDA_122;
if (raw_version < 12040)
return CudaVersion::CUDA_123;
return CudaVersion::NEW;
}

Expand Down Expand Up @@ -671,6 +675,8 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
case CudaVersion::CUDA_##CUDA_VER: \
PtxFeature = "+ptx" #PTX_VER; \
break;
CASE_CUDA_VERSION(123, 83);
CASE_CUDA_VERSION(122, 82);
CASE_CUDA_VERSION(121, 81);
CASE_CUDA_VERSION(120, 80);
CASE_CUDA_VERSION(118, 78);
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Misc/target-invalid-cpu-note.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}}
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}}

// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
// R600: error: unknown target CPU 'not-a-cpu'
Expand Down
19 changes: 10 additions & 9 deletions llvm/lib/Target/NVPTX/NVPTX.td
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,24 @@ include "NVPTXInstrInfo.td"
// TableGen in NVPTXGenSubtarget.inc.
//===----------------------------------------------------------------------===//

class FeatureSM<int version>:
SubtargetFeature<"sm_"# version, "SmVersion",
"" # version,
"Target SM " # version>;
def SM90a: FeatureSM<90>;
class FeatureSM<string sm, int value>:
SubtargetFeature<"sm_"# sm, "FullSmVersion",
"" # value,
"Target SM " # sm>;

class FeaturePTX<int version>:
SubtargetFeature<"ptx"# version, "PTXVersion",
"" # version,
"Use PTX version " # version>;

foreach version = [20, 21, 30, 32, 35, 37, 50, 52, 53,
60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
def SM#version: FeatureSM<version>;
foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;

def SM90a: FeatureSM<"90a", 901>;

foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 63, 64, 65,
70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81] in
70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83] in
def PTX#version: FeaturePTX<version>;

//===----------------------------------------------------------------------===//
Expand Down
7 changes: 6 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,

ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);

// Re-map SM version numbers, SmVersion carries the regular SMs which do
// have relative order, while FullSmVersion allows distinguishing sm_90 from
// sm_90a, which would *not* be a subset of sm_91.
SmVersion = getSmVersion();

// Set default to PTX 6.0 (CUDA 9.0)
if (PTXVersion == 0) {
PTXVersion = 60;
Expand All @@ -48,7 +53,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const NVPTXTargetMachine &TM)
: NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
SmVersion(20), TM(TM),
FullSmVersion(200), SmVersion(getSmVersion()), TM(TM),
TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {}

bool NVPTXSubtarget::hasImageHandles() const {
Expand Down
17 changes: 15 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,12 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
unsigned PTXVersion;

// SM version x.y is represented as 10*x+y, e.g. 3.1 == 31
// Full SM version x.y is represented as 100*x+10*y+feature, e.g. 3.1 == 310
// sm_90a == 901
unsigned int FullSmVersion;

// SM version x.y is represented as 10*x+y, e.g. 3.1 == 31. Derived from
// FullSmVersion.
unsigned int SmVersion;

const NVPTXTargetMachine &TM;
Expand Down Expand Up @@ -80,7 +85,15 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool allowFP16Math() const;
bool hasMaskOperator() const { return PTXVersion >= 71; }
bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
unsigned int getSmVersion() const { return SmVersion; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
// GPUs with "a" suffix have include architecture-accelerated features that
// are supported on the specified architecture only, hence such targets do not
// follow the onion layer model. hasAAFeatures() allows distinguishing such
// GPU variants from the base GPU architecture.
// - 0 represents base GPU model,
// - non-zero value identifies particular architecture-accelerated variant.
bool hasAAFeatures() const { return getFullSmVersion() % 10; }
std::string getTargetName() const { return TargetName; }

// Get maximum value of required alignments among the supported data types.
Expand Down

0 comments on commit 631c6e8

Please sign in to comment.