diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 2842b63197ff1..e14c92eae0afe 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -194,6 +194,12 @@ Removed Compiler Flags Attribute Changes in Clang -------------------------- +- Introduced a new function attribute ``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or + ``[[clang::amdgpu_max_num_work_groups(x, y, z)]]`` for the AMDGPU target. This attribute can be + attached to HIP or OpenCL kernel function definitions to provide an optimization hint. The parameters + ``x``, ``y``, and ``z`` specify the maximum number of workgroups for the respective dimensions, + and each must be a positive integer when provided. The parameter ``x`` is required, while ``y`` and + ``z`` are optional with default value of 1. Improvements to Clang's diagnostics ----------------------------------- diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 080340669b60a..63efd85dcd4e5 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2054,6 +2054,13 @@ def AMDGPUNumVGPR : InheritableAttr { let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; } +def AMDGPUMaxNumWorkGroups : InheritableAttr { + let Spellings = [Clang<"amdgpu_max_num_work_groups", 0>]; + let Args = [ExprArgument<"MaxNumWorkGroupsX">, ExprArgument<"MaxNumWorkGroupsY", 1>, ExprArgument<"MaxNumWorkGroupsZ", 1>]; + let Documentation = [AMDGPUMaxNumWorkGroupsDocs]; + let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; +} + def AMDGPUKernelCall : DeclOrTypeAttr { let Spellings = [Clang<"amdgpu_kernel">]; let Documentation = [Undocumented]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 2c07cd09b0d5b..d61f96ade557d 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2741,6 +2741,33 @@ An error will be given if: }]; } +def AMDGPUMaxNumWorkGroupsDocs : Documentation { + let Category = DocCatAMDGPUAttributes; + let Content = [{ +This attribute specifies the max number of work groups when the kernel +is dispatched. + +Clang supports the +``__attribute__((amdgpu_max_num_work_groups(, , )))`` or +``[[clang::amdgpu_max_num_work_groups(, , )]]`` attribute for the +AMDGPU target. This attribute may be attached to HIP or OpenCL kernel function +definitions and is an optimization hint. + +The ```` parameter specifies the maximum number of work groups in the x dimension. +Similarly ```` and ```` are for the y and z dimensions respectively. +Each of the three values must be greater than 0 when provided. The ```` parameter +is required, while ```` and ```` are optional with default value of 1. + +If specified, the AMDGPU target backend might be able to produce better machine +code. + +An error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes. + }]; +} + def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> { let Content = [{ Clang supports several different calling conventions, depending on the target diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 267c79cc057cb..b226851f03038 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -3911,6 +3911,16 @@ class Sema final { void addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI, Expr *Min, Expr *Max); + /// Create an AMDGPUMaxNumWorkGroupsAttr attribute. + AMDGPUMaxNumWorkGroupsAttr * + CreateAMDGPUMaxNumWorkGroupsAttr(const AttributeCommonInfo &CI, Expr *XExpr, + Expr *YExpr, Expr *ZExpr); + + /// addAMDGPUMaxNumWorkGroupsAttr - Adds an amdgpu_max_num_work_groups + /// attribute to a particular declaration. + void addAMDGPUMaxNumWorkGroupsAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *XExpr, Expr *YExpr, Expr *ZExpr); + DLLImportAttr *mergeDLLImportAttr(Decl *D, const AttributeCommonInfo &CI); DLLExportAttr *mergeDLLExportAttr(Decl *D, const AttributeCommonInfo &CI); MSInheritanceAttr *mergeMSInheritanceAttr(Decl *D, diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 03ac6b78598fc..44e86c0b40f68 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -356,6 +356,29 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } + + if (const auto *Attr = FD->getAttr()) { + uint32_t X = Attr->getMaxNumWorkGroupsX() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue(); + // Y and Z dimensions default to 1 if not specified + uint32_t Y = Attr->getMaxNumWorkGroupsY() + ? Attr->getMaxNumWorkGroupsY() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue() + : 1; + uint32_t Z = Attr->getMaxNumWorkGroupsZ() + ? Attr->getMaxNumWorkGroupsZ() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue() + : 1; + + llvm::SmallString<32> AttrVal; + llvm::raw_svector_ostream OS(AttrVal); + OS << X << ',' << Y << ',' << Z; + + F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str()); + } } /// Emits control constants used to change per-architecture behaviour in the diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index c00120b59d396..e3da3e606435f 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8079,6 +8079,65 @@ static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D, const ParsedAttr &AL) { D->addAttr(::new (S.Context) AMDGPUNumVGPRAttr(S.Context, AL, NumVGPR)); } +static bool +checkAMDGPUMaxNumWorkGroupsArguments(Sema &S, Expr *XExpr, Expr *YExpr, + Expr *ZExpr, + const AMDGPUMaxNumWorkGroupsAttr &Attr) { + if (S.DiagnoseUnexpandedParameterPack(XExpr) || + (YExpr && S.DiagnoseUnexpandedParameterPack(YExpr)) || + (ZExpr && S.DiagnoseUnexpandedParameterPack(ZExpr))) + return true; + + // Accept template arguments for now as they depend on something else. + // We'll get to check them when they eventually get instantiated. + if (XExpr->isValueDependent() || (YExpr && YExpr->isValueDependent()) || + (ZExpr && ZExpr->isValueDependent())) + return false; + + uint32_t NumWG = 0; + Expr *Exprs[3] = {XExpr, YExpr, ZExpr}; + for (int i = 0; i < 3; i++) { + if (Exprs[i]) { + if (!checkUInt32Argument(S, Attr, Exprs[i], NumWG, i, + /*StrictlyUnsigned=*/true)) + return true; + if (NumWG == 0) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero) + << &Attr << Exprs[i]->getSourceRange(); + return true; + } + } + } + + return false; +} + +AMDGPUMaxNumWorkGroupsAttr * +Sema::CreateAMDGPUMaxNumWorkGroupsAttr(const AttributeCommonInfo &CI, + Expr *XExpr, Expr *YExpr, Expr *ZExpr) { + AMDGPUMaxNumWorkGroupsAttr TmpAttr(Context, CI, XExpr, YExpr, ZExpr); + + if (checkAMDGPUMaxNumWorkGroupsArguments(*this, XExpr, YExpr, ZExpr, TmpAttr)) + return nullptr; + + return ::new (Context) + AMDGPUMaxNumWorkGroupsAttr(Context, CI, XExpr, YExpr, ZExpr); +} + +void Sema::addAMDGPUMaxNumWorkGroupsAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *XExpr, Expr *YExpr, + Expr *ZExpr) { + if (auto *Attr = CreateAMDGPUMaxNumWorkGroupsAttr(CI, XExpr, YExpr, ZExpr)) + D->addAttr(Attr); +} + +static void handleAMDGPUMaxNumWorkGroupsAttr(Sema &S, Decl *D, + const ParsedAttr &AL) { + Expr *YExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(1) : nullptr; + Expr *ZExpr = (AL.getNumArgs() > 2) ? AL.getArgAsExpr(2) : nullptr; + S.addAMDGPUMaxNumWorkGroupsAttr(D, AL, AL.getArgAsExpr(0), YExpr, ZExpr); +} + static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D, const ParsedAttr &AL) { // If we try to apply it to a function pointer, don't warn, but don't @@ -9183,6 +9242,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_AMDGPUNumVGPR: handleAMDGPUNumVGPRAttr(S, D, AL); break; + case ParsedAttr::AT_AMDGPUMaxNumWorkGroups: + handleAMDGPUMaxNumWorkGroupsAttr(S, D, AL); + break; case ParsedAttr::AT_AVRSignal: handleAVRSignalAttr(S, D, AL); break; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 20c2c93ac9c7b..8ef8bfdf2a7b5 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -607,6 +607,29 @@ static void instantiateDependentAMDGPUWavesPerEUAttr( S.addAMDGPUWavesPerEUAttr(New, Attr, MinExpr, MaxExpr); } +static void instantiateDependentAMDGPUMaxNumWorkGroupsAttr( + Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, + const AMDGPUMaxNumWorkGroupsAttr &Attr, Decl *New) { + EnterExpressionEvaluationContext Unevaluated( + S, Sema::ExpressionEvaluationContext::ConstantEvaluated); + + ExprResult ResultX = S.SubstExpr(Attr.getMaxNumWorkGroupsX(), TemplateArgs); + if (!ResultX.isUsable()) + return; + ExprResult ResultY = S.SubstExpr(Attr.getMaxNumWorkGroupsY(), TemplateArgs); + if (!ResultY.isUsable()) + return; + ExprResult ResultZ = S.SubstExpr(Attr.getMaxNumWorkGroupsZ(), TemplateArgs); + if (!ResultZ.isUsable()) + return; + + Expr *XExpr = ResultX.getAs(); + Expr *YExpr = ResultY.getAs(); + Expr *ZExpr = ResultZ.getAs(); + + S.addAMDGPUMaxNumWorkGroupsAttr(New, Attr, XExpr, YExpr, ZExpr); +} + // This doesn't take any template parameters, but we have a custom action that // needs to happen when the kernel itself is instantiated. We need to run the // ItaniumMangler to mark the names required to name this kernel. @@ -792,6 +815,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, *AMDGPUFlatWorkGroupSize, New); } + if (const auto *AMDGPUMaxNumWorkGroups = + dyn_cast(TmplAttr)) { + instantiateDependentAMDGPUMaxNumWorkGroupsAttr( + *this, TemplateArgs, *AMDGPUMaxNumWorkGroups, New); + } + if (const auto *ParamAttr = dyn_cast(TmplAttr)) { instantiateDependentHLSLParamModifierAttr(*this, TemplateArgs, ParamAttr, New); diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu index a1642421af2c8..11a133fd1351d 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu @@ -40,12 +40,45 @@ __attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics __global__ void num_vgpr_64() { // CHECK: define{{.*}} amdgpu_kernel void @_Z11num_vgpr_64v() [[NUM_VGPR_64:#[0-9]+]] } +__attribute__((amdgpu_max_num_work_groups(32, 4, 2))) // expected-no-diagnostics +__global__ void max_num_work_groups_32_4_2() { +// CHECK: define{{.*}} amdgpu_kernel void @_Z26max_num_work_groups_32_4_2v() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]] +} +__attribute__((amdgpu_max_num_work_groups(32))) // expected-no-diagnostics +__global__ void max_num_work_groups_32() { +// CHECK: define{{.*}} amdgpu_kernel void @_Z22max_num_work_groups_32v() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]] +} +__attribute__((amdgpu_max_num_work_groups(32,1))) // expected-no-diagnostics +__global__ void max_num_work_groups_32_1() { +// CHECK: define{{.*}} amdgpu_kernel void @_Z24max_num_work_groups_32_1v() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]] +} + + + +template +__attribute__((amdgpu_max_num_work_groups(a, 4, 2))) +__global__ void template_a_4_2_max_num_work_groups() {} +template __global__ void template_a_4_2_max_num_work_groups<32>(); +// CHECK: define{{.*}} amdgpu_kernel void @_Z34template_a_4_2_max_num_work_groupsILj32EEvv() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]] + +template +__attribute__((amdgpu_max_num_work_groups(32, a, 2))) +__global__ void template_32_a_2_max_num_work_groups() {} +template __global__ void template_32_a_2_max_num_work_groups<4>(); +// CHECK: define{{.*}} amdgpu_kernel void @_Z35template_32_a_2_max_num_work_groupsILj4EEvv() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]] + +template +__attribute__((amdgpu_max_num_work_groups(32, 4, a))) +__global__ void template_32_4_a_max_num_work_groups() {} +template __global__ void template_32_4_a_max_num_work_groups<2>(); +// CHECK: define{{.*}} amdgpu_kernel void @_Z35template_32_4_a_max_num_work_groupsILj2EEvv() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]] // Make sure this is silently accepted on other targets. // NAMD-NOT: "amdgpu-flat-work-group-size" // NAMD-NOT: "amdgpu-waves-per-eu" // NAMD-NOT: "amdgpu-num-vgpr" // NAMD-NOT: "amdgpu-num-sgpr" +// NAMD-NOT: "amdgpu-max-num-work-groups" // DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true" // MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" @@ -53,5 +86,7 @@ __global__ void num_vgpr_64() { // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2" // CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32" // CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}}"amdgpu-num-vgpr"="64" +// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_4_2]] = {{.*}}"amdgpu-max-num-workgroups"="32,4,2" +// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_1_1]] = {{.*}}"amdgpu-max-num-workgroups"="32,1,1" // NOUB-NOT: "uniform-work-group-size"="true" diff --git a/clang/test/CodeGenOpenCL/amdgpu-attrs.cl b/clang/test/CodeGenOpenCL/amdgpu-attrs.cl index b0dfc97b53b2c..5648bc13458e1 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-attrs.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -139,6 +139,46 @@ kernel void reqd_work_group_size_32_2_1_flat_work_group_size_16_128() { // CHECK: define{{.*}} amdgpu_kernel void @reqd_work_group_size_32_2_1_flat_work_group_size_16_128() [[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]] } +__attribute__((amdgpu_max_num_work_groups(1, 1, 1))) // expected-no-diagnostics +kernel void max_num_work_groups_1_1_1() { +// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_1_1_1() [[MAX_NUM_WORK_GROUPS_1_1_1:#[0-9]+]] +} + +__attribute__((amdgpu_max_num_work_groups(32, 1, 1))) // expected-no-diagnostics +kernel void max_num_work_groups_32_1_1() { +// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_32_1_1() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]] +} + +__attribute__((amdgpu_max_num_work_groups(32, 8, 1))) // expected-no-diagnostics +kernel void max_num_work_groups_32_8_1() { +// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_32_8_1() [[MAX_NUM_WORK_GROUPS_32_8_1:#[0-9]+]] +} + +__attribute__((amdgpu_max_num_work_groups(1, 1, 32))) // expected-no-diagnostics +kernel void max_num_work_groups_1_1_32() { +// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_1_1_32() [[MAX_NUM_WORK_GROUPS_1_1_32:#[0-9]+]] +} + +__attribute__((amdgpu_max_num_work_groups(1, 8, 32))) // expected-no-diagnostics +kernel void max_num_work_groups_1_8_32() { +// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_1_8_32() [[MAX_NUM_WORK_GROUPS_1_8_32:#[0-9]+]] +} + +__attribute__((amdgpu_max_num_work_groups(4, 8, 32))) // expected-no-diagnostics +kernel void max_num_work_groups_4_8_32() { +// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_4_8_32() [[MAX_NUM_WORK_GROUPS_4_8_32:#[0-9]+]] +} + +__attribute__((amdgpu_max_num_work_groups(32))) // expected-no-diagnostics +kernel void max_num_work_groups_32() { +// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_32() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]] +} + +__attribute__((amdgpu_max_num_work_groups(32,1))) // expected-no-diagnostics +kernel void max_num_work_groups_32_1() { +// CHECK: define{{.*}} amdgpu_kernel void @max_num_work_groups_32_1() [[MAX_NUM_WORK_GROUPS_32_1_1:#[0-9]+]] +} + void a_function() { // CHECK: define{{.*}} void @a_function() [[A_FUNCTION:#[0-9]+]] } @@ -189,5 +229,12 @@ kernel void default_kernel() { // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_1_1_1]] = {{.*}} "amdgpu-max-num-workgroups"="1,1,1" +// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_1_1]] = {{.*}} "amdgpu-max-num-workgroups"="32,1,1" +// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_8_1]] = {{.*}} "amdgpu-max-num-workgroups"="32,8,1" +// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_1_1_32]] = {{.*}} "amdgpu-max-num-workgroups"="1,1,32" +// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_1_8_32]] = {{.*}} "amdgpu-max-num-workgroups"="1,8,32" +// CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_4_8_32]] = {{.*}} "amdgpu-max-num-workgroups"="4,8,32" + // CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} // CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index ec84ebdc6abe7..318bfb2df2a7a 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -4,6 +4,7 @@ // CHECK: #pragma clang attribute supports the following attributes: // CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function) +// CHECK-NEXT: AMDGPUMaxNumWorkGroups (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUWavesPerEU (SubjectMatchRule_function) diff --git a/clang/test/SemaCUDA/amdgpu-attrs.cu b/clang/test/SemaCUDA/amdgpu-attrs.cu index 4811ef796c66b..e04b32d121bc8 100644 --- a/clang/test/SemaCUDA/amdgpu-attrs.cu +++ b/clang/test/SemaCUDA/amdgpu-attrs.cu @@ -63,6 +63,16 @@ __global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_6 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) __global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {} +__attribute__((amdgpu_max_num_work_groups(32, 1, 1))) +__global__ void max_num_work_groups_32_1_1() {} + +__attribute__((amdgpu_max_num_work_groups(32, 1, 1), amdgpu_flat_work_group_size(32, 64))) +__global__ void max_num_work_groups_32_1_1_flat_work_group_size_32_64() {} + +__attribute__((amdgpu_max_num_work_groups(32, 1, 1), amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) +__global__ void max_num_work_groups_32_1_1_flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {} + + // expected-error@+2{{attribute 'reqd_work_group_size' can only be applied to an OpenCL kernel function}} __attribute__((reqd_work_group_size(32, 64, 64))) __global__ void reqd_work_group_size_32_64_64() {} @@ -194,3 +204,125 @@ __global__ void non_cexpr_waves_per_eu_2() {} // expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}} __attribute__((amdgpu_waves_per_eu(2, ipow2(2)))) __global__ void non_cexpr_waves_per_eu_2_4() {} + +__attribute__((amdgpu_max_num_work_groups(32))) +__global__ void max_num_work_groups_32() {} + +__attribute__((amdgpu_max_num_work_groups(32, 1))) +__global__ void max_num_work_groups_32_1() {} + +// expected-error@+1{{'amdgpu_max_num_work_groups' attribute takes no more than 3 arguments}} +__attribute__((amdgpu_max_num_work_groups(32, 1, 1, 1))) +__global__ void max_num_work_groups_32_1_1_1() {} + +// expected-error@+1{{'amdgpu_max_num_work_groups' attribute takes at least 1 argument}} +__attribute__((amdgpu_max_num_work_groups())) +__global__ void max_num_work_groups_no_arg() {} + +// expected-error@+1{{expected expression}} +__attribute__((amdgpu_max_num_work_groups(,1,1))) +__global__ void max_num_work_groups_empty_1_1() {} + +// expected-error@+1{{expected expression}} +__attribute__((amdgpu_max_num_work_groups(32,,1))) +__global__ void max_num_work_groups_32_empty_1() {} + +// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_max_num_work_groups(ipow2(5), 1, 1))) +__global__ void max_num_work_groups_32_1_1_non_int_arg0() {} + +// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_max_num_work_groups(32, "1", 1))) +__global__ void max_num_work_groups_32_1_1_non_int_arg1() {} + +// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires a non-negative integral compile time constant expression}} +__attribute__((amdgpu_max_num_work_groups(-32, 1, 1))) +__global__ void max_num_work_groups_32_1_1_neg_int_arg0() {} + +// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires a non-negative integral compile time constant expression}} +__attribute__((amdgpu_max_num_work_groups(32, -1, 1))) +__global__ void max_num_work_groups_32_1_1_neg_int_arg1() {} + +// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires a non-negative integral compile time constant expression}} +__attribute__((amdgpu_max_num_work_groups(32, 1, -1))) +__global__ void max_num_work_groups_32_1_1_neg_int_arg2() {} + +// expected-error@+1{{'amdgpu_max_num_work_groups' attribute must be greater than 0}} +__attribute__((amdgpu_max_num_work_groups(0, 1, 1))) +__global__ void max_num_work_groups_0_1_1() {} + +// expected-error@+1{{'amdgpu_max_num_work_groups' attribute must be greater than 0}} +__attribute__((amdgpu_max_num_work_groups(32, 0, 1))) +__global__ void max_num_work_groups_32_0_1() {} + +// expected-error@+1{{'amdgpu_max_num_work_groups' attribute must be greater than 0}} +__attribute__((amdgpu_max_num_work_groups(32, 1, 0))) +__global__ void max_num_work_groups_32_1_0() {} + +__attribute__((amdgpu_max_num_work_groups(4294967295))) +__global__ void max_num_work_groups_max_unsigned_int() {} + +// expected-error@+1{{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} +__attribute__((amdgpu_max_num_work_groups(4294967296))) +__global__ void max_num_work_groups_max_unsigned_int_plus1() {} + +// expected-error@+1{{integer constant expression evaluates to value 10000000000 that cannot be represented in a 32-bit unsigned integer type}} +__attribute__((amdgpu_max_num_work_groups(10000000000))) +__global__ void max_num_work_groups_too_large() {} + +int num_wg_x = 32; +int num_wg_y = 1; +int num_wg_z = 1; +// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_max_num_work_groups(num_wg_x, 1, 1))) +__global__ void max_num_work_groups_32_1_1_non_const_arg0() {} + +// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_max_num_work_groups(32, num_wg_y, 1))) +__global__ void max_num_work_groups_32_1_1_non_const_arg1() {} + +// expected-error@+1{{'amdgpu_max_num_work_groups' attribute requires parameter 2 to be an integer constant}} +__attribute__((amdgpu_max_num_work_groups(32, 1, num_wg_z))) +__global__ void max_num_work_groups_32_1_1_non_const_arg2() {} + +const int c_num_wg_x = 32; +__attribute__((amdgpu_max_num_work_groups(c_num_wg_x, 1, 1))) +__global__ void max_num_work_groups_32_1_1_const_arg0() {} + +template +__attribute__((amdgpu_max_num_work_groups(a, 1, 1))) +__global__ void template_a_1_1_max_num_work_groups() {} +template __global__ void template_a_1_1_max_num_work_groups<32>(); + +template +__attribute__((amdgpu_max_num_work_groups(32, a, 1))) +__global__ void template_32_a_1_max_num_work_groups() {} +template __global__ void template_32_a_1_max_num_work_groups<1>(); + +template +__attribute__((amdgpu_max_num_work_groups(32, 1, a))) +__global__ void template_32_1_a_max_num_work_groups() {} +template __global__ void template_32_1_a_max_num_work_groups<1>(); + +// expected-error@+3{{'amdgpu_max_num_work_groups' attribute must be greater than 0}} +// expected-note@+4{{in instantiation of}} +template +__attribute__((amdgpu_max_num_work_groups(b, 1, 1))) +__global__ void template_b_1_1_max_num_work_groups() {} +template __global__ void template_b_1_1_max_num_work_groups<0>(); + +// expected-error@+3{{'amdgpu_max_num_work_groups' attribute must be greater than 0}} +// expected-note@+4{{in instantiation of}} +template +__attribute__((amdgpu_max_num_work_groups(32, b, 1))) +__global__ void template_32_b_1_max_num_work_groups() {} +template __global__ void template_32_b_1_max_num_work_groups<0>(); + +// expected-error@+3{{'amdgpu_max_num_work_groups' attribute must be greater than 0}} +// expected-note@+4{{in instantiation of}} +template +__attribute__((amdgpu_max_num_work_groups(32, 1, b))) +__global__ void template_32_1_b_max_num_work_groups() {} +template __global__ void template_32_1_b_max_num_work_groups<0>(); + + diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index f5f37d9e8a3b0..99d7a482710f5 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1442,6 +1442,11 @@ The AMDGPU backend supports the following LLVM IR attributes. the frame. This is an internal detail of how LDS variables are lowered, language front ends should not set this attribute. + "amdgpu-max-num-workgroups"="x,y,z" Specify the maximum number of work groups for the kernel dispatch in the + X, Y, and Z dimensions. Generated by the ``amdgpu_max_num_work_groups`` + CLANG attribute [CLANG-ATTR]_. Clang only emits this attribute when all + the three numbers are >= 1. + ======================================= ========================================================== Calling Conventions @@ -3917,6 +3922,11 @@ same *vendor-name*. If omitted, "normal" is assumed. + ".max_num_work_groups_{x,y,z}" integer The max number of + launched work-groups + in the X, Y, and Z + dimensions. Each number + must be >=1. =================================== ============== ========= ================================ .. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index c20fdd51607a5..9e288ab50e170 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -494,6 +494,14 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF, Kern[".max_flat_workgroup_size"] = Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); + unsigned NumWGX = MFI.getMaxNumWorkGroupsX(); + unsigned NumWGY = MFI.getMaxNumWorkGroupsY(); + unsigned NumWGZ = MFI.getMaxNumWorkGroupsZ(); + if (NumWGX != 0 && NumWGY != 0 && NumWGZ != 0) { + Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX); + Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY); + Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ); + } Kern[".sgpr_spill_count"] = Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); Kern[".vgpr_spill_count"] = diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index bcc7dedf32296..fa77b94fc22de 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -432,7 +432,7 @@ std::pair AMDGPUSubtarget::getEffectiveWavesPerEU( std::pair Default(1, getMaxWavesPerEU()); // If minimum/maximum flat work group sizes were explicitly requested using - // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum + // "amdgpu-flat-workgroup-size" attribute, then set default minimum/maximum // number of waves per execution unit to values implied by requested // minimum/maximum flat work group sizes. unsigned MinImpliedByFlatWorkGroupSize = @@ -1108,3 +1108,8 @@ void GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) { unsigned GCNUserSGPRUsageInfo::getNumFreeUserSGPRs() { return AMDGPU::getMaxNumUserSGPRs(ST) - NumUsedUserSGPRs; } + +SmallVector +AMDGPUSubtarget::getMaxNumWorkGroups(const Function &F) const { + return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index b72697973be7a..e2d8b5d1ce979 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -288,6 +288,9 @@ class AMDGPUSubtarget { /// 2) dimension. unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const; + /// Return the number of work groups for the function. + SmallVector getMaxNumWorkGroups(const Function &F) const; + /// Return true if only a single workitem can be active in a wave. bool isSingleLaneExecution(const Function &Kernel) const; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 52d6fe6c7ba51..2569f40fec0e4 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -46,6 +46,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, const GCNSubtarget &ST = *static_cast(STI); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); + MaxNumWorkGroups = ST.getMaxNumWorkGroups(F); + assert(MaxNumWorkGroups.size() == 3); Occupancy = ST.computeOccupancy(F, getLDSSize()); CallingConv::ID CC = F.getCallingConv(); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 0336ec4985ea7..7d0c1ba8448e6 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -426,6 +426,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV; + // Default/requested number of work groups for the function. + SmallVector MaxNumWorkGroups = {0, 0, 0}; + private: unsigned NumUserSGPRs = 0; unsigned NumSystemSGPRs = 0; @@ -1072,6 +1075,13 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, // \returns true if a function needs or may need AGPRs. bool usesAGPRs(const MachineFunction &MF) const; + + /// \returns Default/requested number of work groups for this function. + SmallVector getMaxNumWorkGroups() const { return MaxNumWorkGroups; } + + unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups[0]; } + unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups[1]; } + unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups[2]; } }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index edb0e50da2896..aa47dccf2dd28 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -11,6 +11,7 @@ #include "AMDGPUAsmUtils.h" #include "AMDKernelCodeT.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -1298,6 +1299,42 @@ getIntegerPairAttribute(const Function &F, StringRef Name, return Ints; } +SmallVector getIntegerVecAttribute(const Function &F, StringRef Name, + unsigned Size) { + assert(Size > 2); + SmallVector Default(Size, 0); + + Attribute A = F.getFnAttribute(Name); + if (!A.isStringAttribute()) + return Default; + + SmallVector Vals(Size, 0); + + LLVMContext &Ctx = F.getContext(); + + StringRef S = A.getValueAsString(); + unsigned i = 0; + for (; !S.empty() && i < Size; i++) { + std::pair Strs = S.split(','); + unsigned IntVal; + if (Strs.first.trim().getAsInteger(0, IntVal)) { + Ctx.emitError("can't parse integer attribute " + Strs.first + " in " + + Name); + return Default; + } + Vals[i] = IntVal; + S = Strs.second; + } + + if (!S.empty() || i < Size) { + Ctx.emitError("attribute " + Name + + " has incorrect number of integers; expected " + + llvm::utostr(Size)); + return Default; + } + return Vals; +} + unsigned getVmcntBitMask(const IsaVersion &Version) { return (1 << (getVmcntBitWidthLo(Version.Major) + getVmcntBitWidthHi(Version.Major))) - diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index d7ea2a3eff4b7..f8521cba077c6 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -863,6 +863,14 @@ bool isReadOnlySegment(const GlobalValue *GV); /// target triple \p TT, false otherwise. bool shouldEmitConstantsToTextSection(const Triple &TT); +/// \returns Integer value requested using \p F's \p Name attribute. +/// +/// \returns \p Default if attribute is not present. +/// +/// \returns \p Default and emits error if requested value cannot be converted +/// to integer. +int getIntegerAttribute(const Function &F, StringRef Name, int Default); + /// \returns A pair of integer values requested using \p F's \p Name attribute /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired /// is false). @@ -877,6 +885,16 @@ getIntegerPairAttribute(const Function &F, StringRef Name, std::pair Default, bool OnlyFirstRequired = false); +/// \returns Generate a vector of integer values requested using \p F's \p Name +/// attribute. +/// +/// \returns true if exactly Size (>2) number of integers are found in the +/// attribute. +/// +/// \returns false if any error occurs. +SmallVector getIntegerVecAttribute(const Function &F, StringRef Name, + unsigned Size); + /// Represents the counter values to wait for in an s_waitcnt instruction. /// /// Large values (including the maximum possible integer) can be used to diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll new file mode 100644 index 0000000000000..bc58222076ac0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll @@ -0,0 +1,84 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s + +; Attribute not specified. +; CHECK-LABEL: {{^}}empty_no_attribute: +define amdgpu_kernel void @empty_no_attribute() { +entry: + ret void +} + +; Ignore if number of work groups for x dimension is 0. +; CHECK-LABEL: {{^}}empty_max_num_workgroups_x0: +define amdgpu_kernel void @empty_max_num_workgroups_x0() #0 { +entry: + ret void +} +attributes #0 = {"amdgpu-max-num-workgroups"="0,2,3"} + +; Ignore if number of work groups for y dimension is 0. +; CHECK-LABEL: {{^}}empty_max_num_workgroups_y0: +define amdgpu_kernel void @empty_max_num_workgroups_y0() #1 { +entry: + ret void +} +attributes #1 = {"amdgpu-max-num-workgroups"="1,0,3"} + +; Ignore if number of work groups for z dimension is 0. +; CHECK-LABEL: {{^}}empty_max_num_workgroups_z0: +define amdgpu_kernel void @empty_max_num_workgroups_z0() #2 { +entry: + ret void +} +attributes #2 = {"amdgpu-max-num-workgroups"="1,2,0"} + +; CHECK-LABEL: {{^}}empty_max_num_workgroups_1_2_3: +define amdgpu_kernel void @empty_max_num_workgroups_1_2_3() #3 { +entry: + ret void +} +attributes #3 = {"amdgpu-max-num-workgroups"="1,2,3"} + +; CHECK-LABEL: {{^}}empty_max_num_workgroups_1024_1024_1024: +define amdgpu_kernel void @empty_max_num_workgroups_1024_1024_1024() #4 { +entry: + ret void +} +attributes #4 = {"amdgpu-max-num-workgroups"="1024,1024,1024"} + + +; CHECK: .amdgpu_metadata +; CHECK: - .args: +; CHECK: .max_flat_workgroup_size: 1024 +; CHECK-NEXT: .name: empty_no_attribute +; CHECK-NEXT: .private_segment_fixed_size: 0 + +; CHECK: - .args: +; CHECK: .max_flat_workgroup_size: 1024 +; CHECK-NEXT: .name: empty_max_num_workgroups_x0 +; CHECK-NEXT: .private_segment_fixed_size: 0 + +; CHECK: - .args: +; CHECK: .max_flat_workgroup_size: 1024 +; CHECK-NEXT: .name: empty_max_num_workgroups_y0 +; CHECK-NEXT: .private_segment_fixed_size: 0 + +; CHECK: - .args: +; CHECK: .max_flat_workgroup_size: 1024 +; CHECK-NEXT: .name: empty_max_num_workgroups_z0 +; CHECK-NEXT: .private_segment_fixed_size: 0 + +; CHECK: - .args: +; CHECK: .max_flat_workgroup_size: 1024 +; CHECK-NEXT: .max_num_workgroups_x: 1 +; CHECK-NEXT: .max_num_workgroups_y: 2 +; CHECK-NEXT: .max_num_workgroups_z: 3 +; CHECK-NEXT: .name: empty_max_num_workgroups_1_2_3 +; CHECK-NEXT: .private_segment_fixed_size: 0 + +; CHECK: - .args: +; CHECK: .max_flat_workgroup_size: 1024 +; CHECK-NEXT: .max_num_workgroups_x: 1024 +; CHECK-NEXT: .max_num_workgroups_y: 1024 +; CHECK-NEXT: .max_num_workgroups_z: 1024 +; CHECK-NEXT: .name: empty_max_num_workgroups_1024_1024_1024 +; CHECK-NEXT: .private_segment_fixed_size: 0 diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups_error_check.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups_error_check.ll new file mode 100644 index 0000000000000..6d86d2d7c1a34 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups_error_check.ll @@ -0,0 +1,71 @@ +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2>&1 | FileCheck --check-prefix=ERROR %s + +; ERROR: error: can't parse integer attribute -1 in amdgpu-max-num-workgroups +define amdgpu_kernel void @empty_max_num_workgroups_neg_num1() #21 { +entry: + ret void +} +attributes #21 = {"amdgpu-max-num-workgroups"="-1,2,3"} + +; ERROR: error: can't parse integer attribute -2 in amdgpu-max-num-workgroups +define amdgpu_kernel void @empty_max_num_workgroups_neg_num2() #22 { +entry: + ret void +} +attributes #22 = {"amdgpu-max-num-workgroups"="1,-2,3"} + +; ERROR: error: can't parse integer attribute -3 in amdgpu-max-num-workgroups +define amdgpu_kernel void @empty_max_num_workgroups_neg_num3() #23 { +entry: + ret void +} +attributes #23 = {"amdgpu-max-num-workgroups"="1,2,-3"} + +; ERROR: error: can't parse integer attribute 1.0 in amdgpu-max-num-workgroups +define amdgpu_kernel void @empty_max_num_workgroups_non_int1() #31 { +entry: + ret void +} +attributes #31 = {"amdgpu-max-num-workgroups"="1.0,2,3"} + +; ERROR: error: can't parse integer attribute 2.0 in amdgpu-max-num-workgroups +define amdgpu_kernel void @empty_max_num_workgroups_non_int2() #32 { +entry: + ret void +} +attributes #32 = {"amdgpu-max-num-workgroups"="1,2.0,3"} + +; ERROR: error: can't parse integer attribute 3.0 in amdgpu-max-num-workgroups +define amdgpu_kernel void @empty_max_num_workgroups_non_int3() #33 { +entry: + ret void +} +attributes #33 = {"amdgpu-max-num-workgroups"="1,2,3.0"} + +; ERROR: error: can't parse integer attribute 10000000000 in amdgpu-max-num-workgroups +define amdgpu_kernel void @empty_max_num_workgroups_too_large() #41 { +entry: + ret void +} +attributes #41 = {"amdgpu-max-num-workgroups"="10000000000,2,3"} + +; ERROR: error: attribute amdgpu-max-num-workgroups has incorrect number of integers; expected 3 +define amdgpu_kernel void @empty_max_num_workgroups_1_arg() #51 { +entry: + ret void +} +attributes #51 = {"amdgpu-max-num-workgroups"="1"} + +; ERROR: error: attribute amdgpu-max-num-workgroups has incorrect number of integers; expected 3 +define amdgpu_kernel void @empty_max_num_workgroups_2_args() #52 { +entry: + ret void +} +attributes #52 = {"amdgpu-max-num-workgroups"="1,2"} + +; ERROR: error: attribute amdgpu-max-num-workgroups has incorrect number of integers; expected 3 +define amdgpu_kernel void @empty_max_num_workgroups_4_args() #53 { +entry: + ret void +} +attributes #53 = {"amdgpu-max-num-workgroups"="1,2,3,4"}