Skip to content

Commit

Permalink
[NVPTX] Add support for maxclusterrank in launch_bounds (#66496)
Browse files Browse the repository at this point in the history
Since SM_90 CUDA supports specifying additional argument to the
launch_bounds attribute: maxBlocksPerCluster, to express the maximum
number of CTAs that can be part of the cluster. See:
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cluster-dimension-directives-maxclusterrank
and

https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#launch-bounds
for details.
  • Loading branch information
jchlanda committed Sep 27, 2023
1 parent d1653c8 commit dfab31b
Show file tree
Hide file tree
Showing 15 changed files with 262 additions and 61 deletions.
3 changes: 2 additions & 1 deletion clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1267,7 +1267,8 @@ def CUDAInvalidTarget : InheritableAttr {

def CUDALaunchBounds : InheritableAttr {
let Spellings = [GNU<"launch_bounds">, Declspec<"__launch_bounds__">];
let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>];
let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>,
ExprArgument<"MaxBlocks", 1>];
let LangOpts = [CUDA];
let Subjects = SubjectList<[ObjCMethod, FunctionLike]>;
// An AST node is created for this attribute, but is not used by other parts
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -11853,6 +11853,10 @@ def err_sycl_special_type_num_init_method : Error<
"types with 'sycl_special_class' attribute must have one and only one '__init' "
"method defined">;

def warn_cuda_maxclusterrank_sm_90 : Warning<
"maxclusterrank requires sm_90 or higher, CUDA arch provided: %0, ignoring "
"%1 attribute">, InGroup<IgnoredAttributes>;

def err_bit_int_bad_size : Error<"%select{signed|unsigned}0 _BitInt must "
"have a bit size of at least %select{2|1}0">;
def err_bit_int_max_size : Error<"%select{signed|unsigned}0 _BitInt of bit "
Expand Down
5 changes: 3 additions & 2 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -11061,12 +11061,13 @@ class Sema final {
/// Create an CUDALaunchBoundsAttr attribute.
CUDALaunchBoundsAttr *CreateLaunchBoundsAttr(const AttributeCommonInfo &CI,
Expr *MaxThreads,
Expr *MinBlocks);
Expr *MinBlocks,
Expr *MaxBlocks);

/// AddLaunchBoundsAttr - Adds a launch_bounds attribute to a particular
/// declaration.
void AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
Expr *MaxThreads, Expr *MinBlocks);
Expr *MaxThreads, Expr *MinBlocks, Expr *MaxBlocks);

/// AddModeAttr - Adds a mode attribute to a particular declaration.
void AddModeAttr(Decl *D, const AttributeCommonInfo &CI, IdentifierInfo *Name,
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/NVPTX.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {

bool hasBitIntType() const override { return true; }
bool hasBFloat16Type() const override { return true; }

CudaArch getGPU() const { return GPU; }
};
} // namespace targets
} // namespace clang
Expand Down
12 changes: 10 additions & 2 deletions clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,8 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
MaxThreads.getExtValue());

// min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
// not specified in __launch_bounds__ or if the user specified a 0 value,
// min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
// was not specified in __launch_bounds__ or if the user specified a 0 value,
// we don't have to add a PTX directive.
if (Attr->getMinBlocks()) {
llvm::APSInt MinBlocks(32);
Expand All @@ -307,6 +307,14 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
MinBlocks.getExtValue());
}
if (Attr->getMaxBlocks()) {
llvm::APSInt MaxBlocks(32);
MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
if (MaxBlocks > 0)
// Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
MaxBlocks.getExtValue());
}
}

std::unique_ptr<TargetCodeGenInfo>
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Parse/ParseOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3739,7 +3739,8 @@ OMPClause *Parser::ParseOpenMPOMPXAttributesClause(bool ParseOnly) {
continue;
if (auto *A = Actions.CreateLaunchBoundsAttr(
PA, PA.getArgAsExpr(0),
PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr))
PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr,
PA.getNumArgs() > 2 ? PA.getArgAsExpr(2) : nullptr))
Attrs.push_back(A);
continue;
default:
Expand Down
43 changes: 34 additions & 9 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//

#include "../Basic/Targets/NVPTX.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTMutationListener.h"
Expand Down Expand Up @@ -5608,6 +5609,13 @@ bool Sema::CheckRegparmAttr(const ParsedAttr &AL, unsigned &numParams) {
return false;
}

// Helper to get CudaArch.
static CudaArch getCudaArch(const TargetInfo &TI) {
if (!TI.getTriple().isNVPTX())
llvm_unreachable("getCudaArch is only valid for NVPTX triple");
return static_cast<const targets::NVPTXTargetInfo *>(&TI)->getGPU();
}

// Checks whether an argument of launch_bounds attribute is
// acceptable, performs implicit conversion to Rvalue, and returns
// non-nullptr Expr result on success. Otherwise, it returns nullptr
Expand Down Expand Up @@ -5651,34 +5659,51 @@ static Expr *makeLaunchBoundsArgExpr(Sema &S, Expr *E,

CUDALaunchBoundsAttr *
Sema::CreateLaunchBoundsAttr(const AttributeCommonInfo &CI, Expr *MaxThreads,
Expr *MinBlocks) {
CUDALaunchBoundsAttr TmpAttr(Context, CI, MaxThreads, MinBlocks);
Expr *MinBlocks, Expr *MaxBlocks) {
CUDALaunchBoundsAttr TmpAttr(Context, CI, MaxThreads, MinBlocks, MaxBlocks);
MaxThreads = makeLaunchBoundsArgExpr(*this, MaxThreads, TmpAttr, 0);
if (MaxThreads == nullptr)
if (!MaxThreads)
return nullptr;

if (MinBlocks) {
MinBlocks = makeLaunchBoundsArgExpr(*this, MinBlocks, TmpAttr, 1);
if (MinBlocks == nullptr)
if (!MinBlocks)
return nullptr;
}

if (MaxBlocks) {
// '.maxclusterrank' ptx directive requires .target sm_90 or higher.
auto SM = getCudaArch(Context.getTargetInfo());
if (SM == CudaArch::UNKNOWN || SM < CudaArch::SM_90) {
Diag(MaxBlocks->getBeginLoc(), diag::warn_cuda_maxclusterrank_sm_90)
<< CudaArchToString(SM) << CI << MaxBlocks->getSourceRange();
// Ignore it by setting MaxBlocks to null;
MaxBlocks = nullptr;
} else {
MaxBlocks = makeLaunchBoundsArgExpr(*this, MaxBlocks, TmpAttr, 2);
if (!MaxBlocks)
return nullptr;
}
}

return ::new (Context)
CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks);
CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks, MaxBlocks);
}

void Sema::AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI,
Expr *MaxThreads, Expr *MinBlocks) {
if (auto *Attr = CreateLaunchBoundsAttr(CI, MaxThreads, MinBlocks))
Expr *MaxThreads, Expr *MinBlocks,
Expr *MaxBlocks) {
if (auto *Attr = CreateLaunchBoundsAttr(CI, MaxThreads, MinBlocks, MaxBlocks))
D->addAttr(Attr);
}

static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (!AL.checkAtLeastNumArgs(S, 1) || !AL.checkAtMostNumArgs(S, 2))
if (!AL.checkAtLeastNumArgs(S, 1) || !AL.checkAtMostNumArgs(S, 3))
return;

S.AddLaunchBoundsAttr(D, AL, AL.getArgAsExpr(0),
AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr);
AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr,
AL.getNumArgs() > 2 ? AL.getArgAsExpr(2) : nullptr);
}

static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D,
Expand Down
10 changes: 9 additions & 1 deletion clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,15 @@ static void instantiateDependentCUDALaunchBoundsAttr(
MinBlocks = Result.getAs<Expr>();
}

S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks);
Expr *MaxBlocks = nullptr;
if (Attr.getMaxBlocks()) {
Result = S.SubstExpr(Attr.getMaxBlocks(), TemplateArgs);
if (Result.isInvalid())
return;
MaxBlocks = Result.getAs<Expr>();
}

S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks, MaxBlocks);
}

static void
Expand Down
69 changes: 69 additions & 0 deletions clang/test/CodeGenCUDA/launch-bounds.cu
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -target-cpu sm_90 -DUSE_MAX_BLOCKS -fcuda-is-device -emit-llvm -o - | FileCheck -check-prefix=CHECK_MAX_BLOCKS %s

#include "Inputs/cuda.h"

#define MAX_THREADS_PER_BLOCK 256
#define MIN_BLOCKS_PER_MP 2
#ifdef USE_MAX_BLOCKS
#define MAX_BLOCKS_PER_MP 4
#endif

// Test both max threads per block and Min cta per sm.
extern "C" {
Expand All @@ -17,6 +21,21 @@ Kernel1()
// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256}
// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"minctasm", i32 2}

#ifdef USE_MAX_BLOCKS
// Test max threads per block and min/max cta per sm.
extern "C" {
__global__ void
__launch_bounds__( MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP )
Kernel1_sm_90()
{
}
}

// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxntidx", i32 256}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"minctasm", i32 2}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxclusterrank", i32 4}
#endif // USE_MAX_BLOCKS

// Test only max threads per block. Min cta per sm defaults to 0, and
// CodeGen doesn't output a zero value for minctasm.
extern "C" {
Expand Down Expand Up @@ -50,6 +69,20 @@ template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}

#ifdef USE_MAX_BLOCKS
template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
__global__ void
__launch_bounds__(max_threads_per_block, min_blocks_per_mp, max_blocks_per_mp)
Kernel4_sm_90()
{
}
template __global__ void Kernel4_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();

// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxntidx", i32 256}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"minctasm", i32 2}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxclusterrank", i32 4}
#endif //USE_MAX_BLOCKS

const int constint = 100;
template <int max_threads_per_block, int min_blocks_per_mp>
__global__ void
Expand All @@ -63,6 +96,23 @@ template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}

#ifdef USE_MAX_BLOCKS

template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
__global__ void
__launch_bounds__(max_threads_per_block + constint,
min_blocks_per_mp + max_threads_per_block,
max_blocks_per_mp + max_threads_per_block)
Kernel5_sm_90()
{
}
template __global__ void Kernel5_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();

// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxntidx", i32 356}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"minctasm", i32 258}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxclusterrank", i32 260}
#endif //USE_MAX_BLOCKS

// Make sure we don't emit negative launch bounds values.
__global__ void
__launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP )
Expand All @@ -80,7 +130,26 @@ Kernel7()
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"maxntidx",
// CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"minctasm",

#ifdef USE_MAX_BLOCKS
__global__ void
__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP, -MAX_BLOCKS_PER_MP )
Kernel7_sm_90()
{
}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxntidx",
// CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"minctasm",
// CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxclusterrank",
#endif // USE_MAX_BLOCKS

const char constchar = 12;
__global__ void __launch_bounds__(constint, constchar) Kernel8() {}
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100
// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12

#ifdef USE_MAX_BLOCKS
const char constchar_2 = 14;
__global__ void __launch_bounds__(constint, constchar, constchar_2) Kernel8_sm_90() {}
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxntidx", i32 100
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"minctasm", i32 12
// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxclusterrank", i32 14
#endif // USE_MAX_BLOCKS
7 changes: 5 additions & 2 deletions clang/test/SemaCUDA/launch_bounds.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -triple nvptx-unknown-unknown -target-cpu sm_75 -verify %s

#include "Inputs/cuda.h"

Expand All @@ -11,8 +11,9 @@ __launch_bounds__(0x10000000000000000) void TestWayTooBigArg(void); // expected-

__launch_bounds__(-128, 7) void TestNegArg1(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
__launch_bounds__(128, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
__launch_bounds__(128, 2, -8) void TestNegArg2(void); // expected-warning {{maxclusterrank requires sm_90 or higher, CUDA arch provided: sm_75, ignoring 'launch_bounds' attribute}}

__launch_bounds__(1, 2, 3) void Test3Args(void); // expected-error {{'launch_bounds' attribute takes no more than 2 arguments}}
__launch_bounds__(1, 2, 3, 4) void Test4Args(void); // expected-error {{'launch_bounds' attribute takes no more than 3 arguments}}
__launch_bounds__() void TestNoArgs(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}

int TestNoFunction __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to Objective-C methods, functions, and function pointers}}
Expand Down Expand Up @@ -47,3 +48,5 @@ __launch_bounds__(Args) void TestTemplateVariadicArgs(void) {} // expected-error

template <int... Args>
__launch_bounds__(1, Args) void TestTemplateVariadicArgs2(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}}

__launch_bounds__(1, 2, 3) void Test3Args(void); // expected-warning {{maxclusterrank requires sm_90 or higher, CUDA arch provided: sm_75, ignoring 'launch_bounds' attribute}}
57 changes: 57 additions & 0 deletions clang/test/SemaCUDA/launch_bounds_sm_90.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -triple nvptx-unknown-unknown -target-cpu sm_90 -verify %s

#include "Inputs/cuda.h"

__launch_bounds__(128, 7) void Test2Args(void);
__launch_bounds__(128) void Test1Arg(void);

__launch_bounds__(0xffffffff) void TestMaxArg(void);
__launch_bounds__(0x100000000) void TestTooBigArg(void); // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
__launch_bounds__(0x10000000000000000) void TestWayTooBigArg(void); // expected-error {{integer literal is too large to be represented in any integer type}}
__launch_bounds__(1, 1, 0x10000000000000000) void TestWayTooBigArg(void); // expected-error {{integer literal is too large to be represented in any integer type}}

__launch_bounds__(-128, 7) void TestNegArg1(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
__launch_bounds__(128, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
__launch_bounds__(-128, 1, 7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
__launch_bounds__(128, -1, 7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
__launch_bounds__(128, 1, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 2 is negative and will be ignored}}
// expected-warning@20 {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
// expected-warning@20 {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
__launch_bounds__(-128, -1, 7) void TestNegArg2(void);
// expected-warning@23 {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
// expected-warning@23 {{'launch_bounds' attribute parameter 2 is negative and will be ignored}}
__launch_bounds__(-128, 1, -7) void TestNegArg2(void);
// expected-warning@27 {{'launch_bounds' attribute parameter 0 is negative and will be ignored}}
// expected-warning@27 {{'launch_bounds' attribute parameter 1 is negative and will be ignored}}
// expected-warning@27 {{'launch_bounds' attribute parameter 2 is negative and will be ignored}}
__launch_bounds__(-128, -1, -7) void TestNegArg2(void);


__launch_bounds__(1, 2, 3, 4) void Test4Args(void); // expected-error {{'launch_bounds' attribute takes no more than 3 arguments}}
__launch_bounds__() void TestNoArgs(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}

int TestNoFunction __launch_bounds__(128, 7, 13); // expected-warning {{'launch_bounds' attribute only applies to Objective-C methods, functions, and function pointers}}

__launch_bounds__(true) void TestBool(void);
__launch_bounds__(128, 1, 128.0) void TestFP(void); // expected-error {{'launch_bounds' attribute requires parameter 2 to be an integer constant}}
__launch_bounds__(128, 1, (void*)0) void TestNullptr(void); // expected-error {{'launch_bounds' attribute requires parameter 2 to be an integer constant}}

int nonconstint = 256;
__launch_bounds__(125, 1, nonconstint) void TestNonConstInt(void); // expected-error {{'launch_bounds' attribute requires parameter 2 to be an integer constant}}

const int constint = 512;
__launch_bounds__(128, 1, constint) void TestConstInt(void);
__launch_bounds__(128, 1, constint * 2 + 3) void TestConstIntExpr(void);

template <int a, int b, int c> __launch_bounds__(a, b, c) void TestTemplate2Args(void) {}
template void TestTemplate2Args<128,7, 13>(void);

template <int a, int b, int c>
__launch_bounds__(a + b, c + constint, a + b + c + constint) void TestTemplateExpr(void) {}
template void TestTemplateExpr<128+constint, 3, 7>(void);

template <int... Args>
__launch_bounds__(Args) void TestTemplateVariadicArgs(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}}

template <int... Args>
__launch_bounds__(1, 22, Args) void TestTemplateVariadicArgs2(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}}

0 comments on commit dfab31b

Please sign in to comment.