350 changes: 79 additions & 271 deletions clang/lib/Basic/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ const char *CudaVersionToString(CudaVersion V) {
return "10.0";
case CudaVersion::CUDA_101:
return "10.1";
case CudaVersion::CUDA_102:
return "10.2";
case CudaVersion::CUDA_110:
return "11.0";
}
llvm_unreachable("invalid enum");
}
Expand All @@ -42,253 +46,87 @@ CudaVersion CudaStringToVersion(const llvm::Twine &S) {
.Case("9.2", CudaVersion::CUDA_92)
.Case("10.0", CudaVersion::CUDA_100)
.Case("10.1", CudaVersion::CUDA_101)
.Case("10.2", CudaVersion::CUDA_102)
.Case("11.0", CudaVersion::CUDA_110)
.Default(CudaVersion::UNKNOWN);
}

const char *CudaArchToString(CudaArch A) {
switch (A) {
case CudaArch::LAST:
break;
case CudaArch::UNKNOWN:
return "unknown";
case CudaArch::SM_20:
return "sm_20";
case CudaArch::SM_21:
return "sm_21";
case CudaArch::SM_30:
return "sm_30";
case CudaArch::SM_32:
return "sm_32";
case CudaArch::SM_35:
return "sm_35";
case CudaArch::SM_37:
return "sm_37";
case CudaArch::SM_50:
return "sm_50";
case CudaArch::SM_52:
return "sm_52";
case CudaArch::SM_53:
return "sm_53";
case CudaArch::SM_60:
return "sm_60";
case CudaArch::SM_61:
return "sm_61";
case CudaArch::SM_62:
return "sm_62";
case CudaArch::SM_70:
return "sm_70";
case CudaArch::SM_72:
return "sm_72";
case CudaArch::SM_75:
return "sm_75";
case CudaArch::GFX600: // tahiti
return "gfx600";
case CudaArch::GFX601: // pitcairn, verde, oland,hainan
return "gfx601";
case CudaArch::GFX700: // kaveri
return "gfx700";
case CudaArch::GFX701: // hawaii
return "gfx701";
case CudaArch::GFX702: // 290,290x,R390,R390x
return "gfx702";
case CudaArch::GFX703: // kabini mullins
return "gfx703";
case CudaArch::GFX704: // bonaire
return "gfx704";
case CudaArch::GFX801: // carrizo
return "gfx801";
case CudaArch::GFX802: // tonga,iceland
return "gfx802";
case CudaArch::GFX803: // fiji,polaris10
return "gfx803";
case CudaArch::GFX810: // stoney
return "gfx810";
case CudaArch::GFX900: // vega, instinct
return "gfx900";
case CudaArch::GFX902: // TBA
return "gfx902";
case CudaArch::GFX904: // TBA
return "gfx904";
case CudaArch::GFX906: // TBA
return "gfx906";
case CudaArch::GFX908: // TBA
return "gfx908";
case CudaArch::GFX909: // TBA
return "gfx909";
case CudaArch::GFX1010: // TBA
return "gfx1010";
case CudaArch::GFX1011: // TBA
return "gfx1011";
case CudaArch::GFX1012: // TBA
return "gfx1012";
}
llvm_unreachable("invalid enum");
}
struct CudaArchToStringMap {
CudaArch arch;
const char *arch_name;
const char *virtual_arch_name;
};

CudaArch StringToCudaArch(llvm::StringRef S) {
return llvm::StringSwitch<CudaArch>(S)
.Case("sm_20", CudaArch::SM_20)
.Case("sm_21", CudaArch::SM_21)
.Case("sm_30", CudaArch::SM_30)
.Case("sm_32", CudaArch::SM_32)
.Case("sm_35", CudaArch::SM_35)
.Case("sm_37", CudaArch::SM_37)
.Case("sm_50", CudaArch::SM_50)
.Case("sm_52", CudaArch::SM_52)
.Case("sm_53", CudaArch::SM_53)
.Case("sm_60", CudaArch::SM_60)
.Case("sm_61", CudaArch::SM_61)
.Case("sm_62", CudaArch::SM_62)
.Case("sm_70", CudaArch::SM_70)
.Case("sm_72", CudaArch::SM_72)
.Case("sm_75", CudaArch::SM_75)
.Case("gfx600", CudaArch::GFX600)
.Case("gfx601", CudaArch::GFX601)
.Case("gfx700", CudaArch::GFX700)
.Case("gfx701", CudaArch::GFX701)
.Case("gfx702", CudaArch::GFX702)
.Case("gfx703", CudaArch::GFX703)
.Case("gfx704", CudaArch::GFX704)
.Case("gfx801", CudaArch::GFX801)
.Case("gfx802", CudaArch::GFX802)
.Case("gfx803", CudaArch::GFX803)
.Case("gfx810", CudaArch::GFX810)
.Case("gfx900", CudaArch::GFX900)
.Case("gfx902", CudaArch::GFX902)
.Case("gfx904", CudaArch::GFX904)
.Case("gfx906", CudaArch::GFX906)
.Case("gfx908", CudaArch::GFX908)
.Case("gfx909", CudaArch::GFX909)
.Case("gfx1010", CudaArch::GFX1010)
.Case("gfx1011", CudaArch::GFX1011)
.Case("gfx1012", CudaArch::GFX1012)
.Default(CudaArch::UNKNOWN);
}
#define SM2(sm, ca) \
{ CudaArch::SM_##sm, "sm_" #sm, ca }
#define SM(sm) SM2(sm, "compute_" #sm)
#define GFX(gpu) \
{ CudaArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn" }
CudaArchToStringMap arch_names[] = {
// clang-format off
SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi
SM(30), SM(32), SM(35), SM(37), // Kepler
SM(50), SM(52), SM(53), // Maxwell
SM(60), SM(61), SM(62), // Pascal
SM(70), SM(72), // Volta
SM(75), // Turing
SM(80), // Ampere
GFX(600), // tahiti
GFX(601), // pitcairn, verde, oland,hainan
GFX(700), // kaveri
GFX(701), // hawaii
GFX(702), // 290,290x,R390,R390x
GFX(703), // kabini mullins
GFX(704), // bonaire
GFX(801), // carrizo
GFX(802), // tonga,iceland
GFX(803), // fiji,polaris10
GFX(810), // stoney
GFX(900), // vega, instinct
GFX(902), GFX(904), GFX(906), GFX(908), GFX(909),
GFX(1010), GFX(1011), GFX(1012),
// clang-format on
};
#undef SM
#undef SM2
#undef GFX

const char *CudaVirtualArchToString(CudaVirtualArch A) {
switch (A) {
case CudaVirtualArch::UNKNOWN:
const char *CudaArchToString(CudaArch A) {
auto result = std::find_if(
std::begin(arch_names), std::end(arch_names),
[A](const CudaArchToStringMap &map) { return A == map.arch; });
if (result == std::end(arch_names))
return "unknown";
case CudaVirtualArch::COMPUTE_20:
return "compute_20";
case CudaVirtualArch::COMPUTE_30:
return "compute_30";
case CudaVirtualArch::COMPUTE_32:
return "compute_32";
case CudaVirtualArch::COMPUTE_35:
return "compute_35";
case CudaVirtualArch::COMPUTE_37:
return "compute_37";
case CudaVirtualArch::COMPUTE_50:
return "compute_50";
case CudaVirtualArch::COMPUTE_52:
return "compute_52";
case CudaVirtualArch::COMPUTE_53:
return "compute_53";
case CudaVirtualArch::COMPUTE_60:
return "compute_60";
case CudaVirtualArch::COMPUTE_61:
return "compute_61";
case CudaVirtualArch::COMPUTE_62:
return "compute_62";
case CudaVirtualArch::COMPUTE_70:
return "compute_70";
case CudaVirtualArch::COMPUTE_72:
return "compute_72";
case CudaVirtualArch::COMPUTE_75:
return "compute_75";
case CudaVirtualArch::COMPUTE_AMDGCN:
return "compute_amdgcn";
}
llvm_unreachable("invalid enum");
return result->arch_name;
}

CudaVirtualArch StringToCudaVirtualArch(llvm::StringRef S) {
return llvm::StringSwitch<CudaVirtualArch>(S)
.Case("compute_20", CudaVirtualArch::COMPUTE_20)
.Case("compute_30", CudaVirtualArch::COMPUTE_30)
.Case("compute_32", CudaVirtualArch::COMPUTE_32)
.Case("compute_35", CudaVirtualArch::COMPUTE_35)
.Case("compute_37", CudaVirtualArch::COMPUTE_37)
.Case("compute_50", CudaVirtualArch::COMPUTE_50)
.Case("compute_52", CudaVirtualArch::COMPUTE_52)
.Case("compute_53", CudaVirtualArch::COMPUTE_53)
.Case("compute_60", CudaVirtualArch::COMPUTE_60)
.Case("compute_61", CudaVirtualArch::COMPUTE_61)
.Case("compute_62", CudaVirtualArch::COMPUTE_62)
.Case("compute_70", CudaVirtualArch::COMPUTE_70)
.Case("compute_72", CudaVirtualArch::COMPUTE_72)
.Case("compute_75", CudaVirtualArch::COMPUTE_75)
.Case("compute_amdgcn", CudaVirtualArch::COMPUTE_AMDGCN)
.Default(CudaVirtualArch::UNKNOWN);
const char *CudaArchToVirtualArchString(CudaArch A) {
auto result = std::find_if(
std::begin(arch_names), std::end(arch_names),
[A](const CudaArchToStringMap &map) { return A == map.arch; });
if (result == std::end(arch_names))
return "unknown";
return result->virtual_arch_name;
}

CudaVirtualArch VirtualArchForCudaArch(CudaArch A) {
switch (A) {
case CudaArch::LAST:
break;
case CudaArch::UNKNOWN:
return CudaVirtualArch::UNKNOWN;
case CudaArch::SM_20:
case CudaArch::SM_21:
return CudaVirtualArch::COMPUTE_20;
case CudaArch::SM_30:
return CudaVirtualArch::COMPUTE_30;
case CudaArch::SM_32:
return CudaVirtualArch::COMPUTE_32;
case CudaArch::SM_35:
return CudaVirtualArch::COMPUTE_35;
case CudaArch::SM_37:
return CudaVirtualArch::COMPUTE_37;
case CudaArch::SM_50:
return CudaVirtualArch::COMPUTE_50;
case CudaArch::SM_52:
return CudaVirtualArch::COMPUTE_52;
case CudaArch::SM_53:
return CudaVirtualArch::COMPUTE_53;
case CudaArch::SM_60:
return CudaVirtualArch::COMPUTE_60;
case CudaArch::SM_61:
return CudaVirtualArch::COMPUTE_61;
case CudaArch::SM_62:
return CudaVirtualArch::COMPUTE_62;
case CudaArch::SM_70:
return CudaVirtualArch::COMPUTE_70;
case CudaArch::SM_72:
return CudaVirtualArch::COMPUTE_72;
case CudaArch::SM_75:
return CudaVirtualArch::COMPUTE_75;
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX700:
case CudaArch::GFX701:
case CudaArch::GFX702:
case CudaArch::GFX703:
case CudaArch::GFX704:
case CudaArch::GFX801:
case CudaArch::GFX802:
case CudaArch::GFX803:
case CudaArch::GFX810:
case CudaArch::GFX900:
case CudaArch::GFX902:
case CudaArch::GFX904:
case CudaArch::GFX906:
case CudaArch::GFX908:
case CudaArch::GFX909:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
return CudaVirtualArch::COMPUTE_AMDGCN;
}
llvm_unreachable("invalid enum");
CudaArch StringToCudaArch(llvm::StringRef S) {
auto result = std::find_if(
std::begin(arch_names), std::end(arch_names),
[S](const CudaArchToStringMap &map) { return S == map.arch_name; });
if (result == std::end(arch_names))
return CudaArch::UNKNOWN;
return result->arch;
}

CudaVersion MinVersionForCudaArch(CudaArch A) {
switch (A) {
case CudaArch::LAST:
break;
case CudaArch::UNKNOWN:
if (A == CudaArch::UNKNOWN)
return CudaVersion::UNKNOWN;

// AMD GPUs do not depend on CUDA versions.
if (IsAMDGpuArch(A))
return CudaVersion::CUDA_70;

switch (A) {
case CudaArch::SM_20:
case CudaArch::SM_21:
case CudaArch::SM_30:
Expand All @@ -309,53 +147,23 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
return CudaVersion::CUDA_91;
case CudaArch::SM_75:
return CudaVersion::CUDA_100;
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX700:
case CudaArch::GFX701:
case CudaArch::GFX702:
case CudaArch::GFX703:
case CudaArch::GFX704:
case CudaArch::GFX801:
case CudaArch::GFX802:
case CudaArch::GFX803:
case CudaArch::GFX810:
case CudaArch::GFX900:
case CudaArch::GFX902:
case CudaArch::GFX904:
case CudaArch::GFX906:
case CudaArch::GFX908:
case CudaArch::GFX909:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
return CudaVersion::CUDA_70;
case CudaArch::SM_80:
return CudaVersion::CUDA_110;
default:
llvm_unreachable("invalid enum");
}
llvm_unreachable("invalid enum");
}

CudaVersion MaxVersionForCudaArch(CudaArch A) {
// AMD GPUs do not depend on CUDA versions.
if (IsAMDGpuArch(A))
return CudaVersion::LATEST;

switch (A) {
case CudaArch::UNKNOWN:
return CudaVersion::UNKNOWN;
case CudaArch::SM_20:
case CudaArch::SM_21:
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX700:
case CudaArch::GFX701:
case CudaArch::GFX702:
case CudaArch::GFX703:
case CudaArch::GFX704:
case CudaArch::GFX801:
case CudaArch::GFX802:
case CudaArch::GFX803:
case CudaArch::GFX810:
case CudaArch::GFX900:
case CudaArch::GFX902:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
return CudaVersion::CUDA_80;
default:
return CudaVersion::LATEST;
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
if (!Feature.startswith("+ptx"))
continue;
PTXVersion = llvm::StringSwitch<unsigned>(Feature)
.Case("+ptx70", 70)
.Case("+ptx65", 65)
.Case("+ptx64", 64)
.Case("+ptx63", 63)
.Case("+ptx61", 61)
Expand Down Expand Up @@ -231,6 +233,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
return "720";
case CudaArch::SM_75:
return "750";
case CudaArch::SM_80:
return "800";
}
llvm_unreachable("unhandled CudaArch");
}();
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4992,6 +4992,7 @@ void CGOpenMPRuntimeNVPTX::processRequiresDirective(
case CudaArch::SM_70:
case CudaArch::SM_72:
case CudaArch::SM_75:
case CudaArch::SM_80:
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX700:
Expand Down Expand Up @@ -5049,6 +5050,7 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
case CudaArch::SM_70:
case CudaArch::SM_72:
case CudaArch::SM_75:
case CudaArch::SM_80:
return {84, 32};
case CudaArch::GFX600:
case CudaArch::GFX601:
Expand Down
74 changes: 42 additions & 32 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,22 @@ void CudaInstallationDetector::ParseCudaVersionFile(llvm::StringRef V) {
return;
DetectedVersion = join_items(".", VersionParts[0], VersionParts[1]);
Version = CudaStringToVersion(DetectedVersion);
if (Version != CudaVersion::UNKNOWN)
if (Version != CudaVersion::UNKNOWN) {
// TODO(tra): remove the warning once we have all features of 10.2 and 11.0
// implemented.
DetectedVersionIsNotSupported = Version > CudaVersion::LATEST_SUPPORTED;
return;
}

Version = CudaVersion::LATEST;
Version = CudaVersion::LATEST_SUPPORTED;
DetectedVersionIsNotSupported = true;
}

void CudaInstallationDetector::WarnIfUnsupportedVersion() {
if (DetectedVersionIsNotSupported)
D.Diag(diag::warn_drv_unknown_cuda_version)
<< DetectedVersion << CudaVersionToString(Version);
<< DetectedVersion
<< CudaVersionToString(CudaVersion::LATEST_SUPPORTED);
}

CudaInstallationDetector::CudaInstallationDetector(
Expand Down Expand Up @@ -161,13 +166,13 @@ CudaInstallationDetector::CudaInstallationDetector(
// CUDA-9+ uses single libdevice file for all GPU variants.
std::string FilePath = LibDevicePath + "/libdevice.10.bc";
if (FS.exists(FilePath)) {
for (const char *GpuArchName :
{"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
"sm_60", "sm_61", "sm_62", "sm_70", "sm_72", "sm_75"}) {
const CudaArch GpuArch = StringToCudaArch(GpuArchName);
if (Version >= MinVersionForCudaArch(GpuArch) &&
Version <= MaxVersionForCudaArch(GpuArch))
LibDeviceMap[GpuArchName] = FilePath;
for (int Arch = (int)CudaArch::SM_30, E = (int)CudaArch::LAST; Arch < E;
++Arch) {
CudaArch GpuArch = static_cast<CudaArch>(Arch);
if (!IsNVIDIAGpuArch(GpuArch))
continue;
std::string GpuArchName(CudaArchToString(GpuArch));
LibDeviceMap[GpuArchName] = FilePath;
}
}
} else {
Expand Down Expand Up @@ -471,10 +476,9 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
continue;
// We need to pass an Arch of the form "sm_XX" for cubin files and
// "compute_XX" for ptx.
const char *Arch =
(II.getType() == types::TY_PP_Asm)
? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
: gpu_arch_str;
const char *Arch = (II.getType() == types::TY_PP_Asm)
? CudaArchToVirtualArchString(gpu_arch)
: gpu_arch_str;
CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
Arch + ",file=" + II.getFilename()));
}
Expand Down Expand Up @@ -640,24 +644,30 @@ void CudaToolChain::addClangTargetOptions(
// by new PTX version, so we need to raise PTX level to enable them in NVPTX
// back-end.
const char *PtxFeature = nullptr;
switch(CudaInstallation.version()) {
case CudaVersion::CUDA_101:
PtxFeature = "+ptx64";
break;
case CudaVersion::CUDA_100:
PtxFeature = "+ptx63";
break;
case CudaVersion::CUDA_92:
PtxFeature = "+ptx61";
break;
case CudaVersion::CUDA_91:
PtxFeature = "+ptx61";
break;
case CudaVersion::CUDA_90:
PtxFeature = "+ptx60";
break;
default:
PtxFeature = "+ptx42";
switch (CudaInstallation.version()) {
case CudaVersion::CUDA_110:
PtxFeature = "+ptx70";
break;
case CudaVersion::CUDA_102:
PtxFeature = "+ptx65";
break;
case CudaVersion::CUDA_101:
PtxFeature = "+ptx64";
break;
case CudaVersion::CUDA_100:
PtxFeature = "+ptx63";
break;
case CudaVersion::CUDA_92:
PtxFeature = "+ptx61";
break;
case CudaVersion::CUDA_91:
PtxFeature = "+ptx61";
break;
case CudaVersion::CUDA_90:
PtxFeature = "+ptx60";
break;
default:
PtxFeature = "+ptx42";
}
CC1Args.append({"-target-feature", PtxFeature});
if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
Expand Down
43 changes: 29 additions & 14 deletions clang/test/Driver/cuda-detect.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,49 +51,64 @@
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_21 \
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE20
// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE20
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_32 \
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE20
// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE20
// sm_30, sm_6x map to compute_30.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30
// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE30
// sm_5x is a special case. Maps to compute_30 for cuda-7.x only.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30
// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE30
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_60 \
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30
// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE30
// sm_35 and sm_37 -> compute_35
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix CUDAINC \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_37 \
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix CUDAINC \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35
// sm_5x -> compute_50 for CUDA-8.0 and newer.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE50
// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE50

// CUDA-9+ uses the same libdevice for all GPU variants:
// RUN: %clang -### -v --target=x86_64-unknown-linux --cuda-gpu-arch=sm_30 \
// RUN: --cuda-path=%S/Inputs/CUDA_90/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON64 \
// RUN: -check-prefixes PTX60,LIBDEVICE,LIBDEVICE10
// RUN: %clang -### -v --target=x86_64-unknown-linux --cuda-gpu-arch=sm_50 \
// RUN: --cuda-path=%S/Inputs/CUDA_90/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON64 \
// RUN: -check-prefixes PTX60,LIBDEVICE,LIBDEVICE10
// RUN: %clang -### -v --target=x86_64-unknown-linux --cuda-gpu-arch=sm_60 \
// RUN: --cuda-path=%S/Inputs/CUDA_90/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON64 \
// RUN: -check-prefixes PTX60,LIBDEVICE,LIBDEVICE10


// Verify that -nocudainc prevents adding include path to CUDA headers.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
// RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35
// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
// RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35

// We should not add any CUDA include paths if there's no valid CUDA installation
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
Expand Down Expand Up @@ -154,16 +169,16 @@
// MISSINGLIBDEVICE: error: cannot find libdevice for sm_20.

// COMMON: "-triple" "nvptx-nvidia-cuda"
// COMMON64: "-triple" "nvptx64-nvidia-cuda"
// COMMON-SAME: "-fcuda-is-device"
// LIBDEVICE-SAME: "-mlink-builtin-bitcode"
// NOLIBDEVICE-NOT: "-mlink-builtin-bitcode"
// LIBDEVICE10-SAME: libdevice.10.bc
// LIBDEVICE20-SAME: libdevice.compute_20.10.bc
// LIBDEVICE30-SAME: libdevice.compute_30.10.bc
// LIBDEVICE35-SAME: libdevice.compute_35.10.bc
// LIBDEVICE50-SAME: libdevice.compute_50.10.bc
// NOLIBDEVICE-NOT: libdevice.compute_{{.*}}.bc
// LIBDEVICE-SAME: "-target-feature" "+ptx42"
// NOLIBDEVICE-NOT: "-target-feature" "+ptx42"
// PTX42-SAME: "-target-feature" "+ptx42"
// PTX60-SAME: "-target-feature" "+ptx60"
// CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA{{[_0-9]+}}/usr/local/cuda/include"
// NOCUDAINC-NOT: "-internal-isystem" "{{.*}}/cuda/include"
// CUDAINC-SAME: "-include" "__clang_cuda_runtime_wrapper.h"
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTX.td
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def SM72 : SubtargetFeature<"sm_72", "SmVersion", "72",
"Target SM 7.2">;
def SM75 : SubtargetFeature<"sm_75", "SmVersion", "75",
"Target SM 7.5">;
def SM80 : SubtargetFeature<"sm_80", "SmVersion", "80",
"Target SM 8.0">;

// PTX Versions
def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
Expand All @@ -77,6 +79,10 @@ def PTX63 : SubtargetFeature<"ptx63", "PTXVersion", "63",
"Use PTX version 6.3">;
def PTX64 : SubtargetFeature<"ptx64", "PTXVersion", "64",
"Use PTX version 6.4">;
def PTX65 : SubtargetFeature<"ptx65", "PTXVersion", "65",
"Use PTX version 6.5">;
def PTX70 : SubtargetFeature<"ptx70", "PTXVersion", "70",
"Use PTX version 7.0">;

//===----------------------------------------------------------------------===//
// NVPTX supported processors.
Expand All @@ -100,6 +106,7 @@ def : Proc<"sm_62", [SM62, PTX50]>;
def : Proc<"sm_70", [SM70, PTX60]>;
def : Proc<"sm_72", [SM72, PTX61]>;
def : Proc<"sm_75", [SM75, PTX63]>;
def : Proc<"sm_80", [SM80, PTX70]>;

def NVPTXInstrInfo : InstrInfo {
}
Expand Down