-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] MCExpr-ify MC layer kernel descriptor #80855
Conversation
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Janek van Oirschot (JanekvO) ChangesKernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr. Required for moving function/program resource usage information propagation to MC layer. As a result of this change, some amdhsa directives in assembly can use asm symbols that are defined later than their use. Furthermore, this change (unintentionally) fixes a latent bug relating to some uses of the AMDHSA_BITS_SET macro for which the VAL argument is not encapsulated and can result in unintended C++ statements (e.g., see how the macro expands for the tg-split and wavefrontsize32 attributes in getDefaultAmdhsaKernelDescriptor). Patch is 63.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80855.diff 11 Files Affected:
diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 84cac3ef700e05..9c5d8fa1c1a607 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -52,6 +52,10 @@
#endif // AMDHSA_BITS_SET
namespace llvm {
+
+class MCContext;
+class MCExpr;
+
namespace amdhsa {
// Floating point rounding modes. Must match hardware definition.
@@ -238,18 +242,40 @@ enum : int32_t {
// Kernel descriptor. Must be kept backwards compatible.
struct kernel_descriptor_t {
- uint32_t group_segment_fixed_size;
- uint32_t private_segment_fixed_size;
- uint32_t kernarg_size;
+ const MCExpr *group_segment_fixed_size;
+ const MCExpr *private_segment_fixed_size;
+ const MCExpr *kernarg_size;
uint8_t reserved0[4];
int64_t kernel_code_entry_byte_offset;
uint8_t reserved1[20];
- uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
- uint32_t compute_pgm_rsrc1;
- uint32_t compute_pgm_rsrc2;
- uint16_t kernel_code_properties;
- uint16_t kernarg_preload;
+ const MCExpr *compute_pgm_rsrc3; // GFX10+ and GFX90A+
+ const MCExpr *compute_pgm_rsrc1;
+ const MCExpr *compute_pgm_rsrc2;
+ const MCExpr *kernel_code_properties;
+ const MCExpr *kernarg_preload;
uint8_t reserved3[4];
+
+ static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx);
+ static const MCExpr *bits_get(const MCExpr *Src, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx);
+};
+
+// Sizes for kernel_descriptor_t properties, should add up to 64.
+enum : uint32_t {
+ SIZEOF_GROUP_SEGMENT_FIXED_SIZE = 4,
+ SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE = 4,
+ SIZEOF_KERNARG_SIZE = 4,
+ SIZEOF_RESERVED0 = 4,
+ SIZEOF_KERNEL_CODE_ENTRY_BYTE_OFFSET = 8,
+ SIZEOF_RESERVED1 = 20,
+ SIZEOF_COMPUTE_PGM_RSRC3 = 4,
+ SIZEOF_COMPUTE_PGM_RSRC1 = 4,
+ SIZEOF_COMPUTE_PGM_RSRC2 = 4,
+ SIZEOF_KERNEL_CODE_PROPERTIES = 2,
+ SIZEOF_KERNARG_PRELOAD = 2,
+ SIZEOF_RESERVED3 = 4,
+ SIZEOF_KERNEL_DESCRIPTOR = 64
};
enum : uint32_t {
@@ -267,43 +293,6 @@ enum : uint32_t {
RESERVED3_OFFSET = 60
};
-static_assert(
- sizeof(kernel_descriptor_t) == 64,
- "invalid size for kernel_descriptor_t");
-static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
- GROUP_SEGMENT_FIXED_SIZE_OFFSET,
- "invalid offset for group_segment_fixed_size");
-static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
- PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
- "invalid offset for private_segment_fixed_size");
-static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
- KERNARG_SIZE_OFFSET,
- "invalid offset for kernarg_size");
-static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
- "invalid offset for reserved0");
-static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
- KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
- "invalid offset for kernel_code_entry_byte_offset");
-static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
- "invalid offset for reserved1");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
- COMPUTE_PGM_RSRC3_OFFSET,
- "invalid offset for compute_pgm_rsrc3");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
- COMPUTE_PGM_RSRC1_OFFSET,
- "invalid offset for compute_pgm_rsrc1");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
- COMPUTE_PGM_RSRC2_OFFSET,
- "invalid offset for compute_pgm_rsrc2");
-static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
- KERNEL_CODE_PROPERTIES_OFFSET,
- "invalid offset for kernel_code_properties");
-static_assert(offsetof(kernel_descriptor_t, kernarg_preload) ==
- KERNARG_PRELOAD_OFFSET,
- "invalid offset for kernarg_preload");
-static_assert(offsetof(kernel_descriptor_t, reserved3) == RESERVED3_OFFSET,
- "invalid offset for reserved3");
-
} // end namespace amdhsa
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index db81e1ee9e3899..d68c7e499f62c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -434,24 +434,30 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
assert(isUInt<32>(PI.getComputePGMRSrc2()));
- KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
- KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
+ KernelDescriptor.group_segment_fixed_size =
+ MCConstantExpr::create(PI.LDSSize, MF.getContext());
+ KernelDescriptor.private_segment_fixed_size =
+ MCConstantExpr::create(PI.ScratchSize, MF.getContext());
Align MaxKernArgAlign;
- KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+ KernelDescriptor.kernarg_size = MCConstantExpr::create(
+ STM.getKernArgSegmentSize(F, MaxKernArgAlign), MF.getContext());
- KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM);
- KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2();
- KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
+ KernelDescriptor.compute_pgm_rsrc1 =
+ MCConstantExpr::create(PI.getComputePGMRSrc1(STM), MF.getContext());
+ KernelDescriptor.compute_pgm_rsrc2 =
+ MCConstantExpr::create(PI.getComputePGMRSrc2(), MF.getContext());
+ KernelDescriptor.kernel_code_properties = MCConstantExpr::create(
+ getAmdhsaKernelCodeProperties(MF), MF.getContext());
assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
- if (STM.hasGFX90AInsts())
- KernelDescriptor.compute_pgm_rsrc3 =
- CurrentProgramInfo.ComputePGMRSrc3GFX90A;
+ KernelDescriptor.compute_pgm_rsrc3 = MCConstantExpr::create(
+ STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0,
+ MF.getContext());
- if (AMDGPU::hasKernargPreload(STM))
- KernelDescriptor.kernarg_preload =
- static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs());
+ KernelDescriptor.kernarg_preload = MCConstantExpr::create(
+ AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
+ MF.getContext());
return KernelDescriptor;
}
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 225e781588668f..2331af628fb730 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5236,7 +5236,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getParser().parseIdentifier(KernelName))
return true;
- kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
+ kernel_descriptor_t KD =
+ getDefaultAmdhsaKernelDescriptor(&getSTI(), getContext());
StringSet<> Seen;
@@ -5276,89 +5277,107 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return TokError(".amdhsa_ directives cannot be repeated");
SMLoc ValStart = getLoc();
- int64_t IVal;
- if (getParser().parseAbsoluteExpression(IVal))
+ const MCExpr *ExprVal;
+ if (getParser().parseExpression(ExprVal))
return true;
SMLoc ValEnd = getLoc();
SMRange ValRange = SMRange(ValStart, ValEnd);
- if (IVal < 0)
- return OutOfRangeError(ValRange);
-
+ int64_t IVal = 0;
uint64_t Val = IVal;
+ bool EvaluatableExpr;
+ if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
+ if (IVal < 0)
+ return OutOfRangeError(ValRange);
+ Val = IVal;
+ }
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
- if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
+ if (!isUInt<ENTRY##_WIDTH>(Val)) \
return OutOfRangeError(RANGE); \
- AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
+ kernel_descriptor_t::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
+ getContext());
+
+#define EXPR_SHOULD_RESOLVE() \
+ if (!EvaluatableExpr) \
+ return Error(IDRange.Start, "directive should have resolvable expression", \
+ IDRange);
if (ID == ".amdhsa_group_segment_fixed_size") {
- if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
+ if (!isUInt<SIZEOF_GROUP_SEGMENT_FIXED_SIZE * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.group_segment_fixed_size = Val;
+ KD.group_segment_fixed_size = ExprVal;
} else if (ID == ".amdhsa_private_segment_fixed_size") {
- if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
+ if (!isUInt<SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.private_segment_fixed_size = Val;
+ KD.private_segment_fixed_size = ExprVal;
} else if (ID == ".amdhsa_kernarg_size") {
- if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
+ if (!isUInt<SIZEOF_KERNARG_SIZE * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.kernarg_size = Val;
+ KD.kernarg_size = ExprVal;
} else if (ID == ".amdhsa_user_sgpr_count") {
+ EXPR_SHOULD_RESOLVE();
ExplicitUserSGPRCount = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
+ EXPR_SHOULD_RESOLVE();
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 4;
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
+ EXPR_SHOULD_RESOLVE();
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
if (Val > getMaxNumUserSGPRs())
return OutOfRangeError(ValRange);
- PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
ValRange);
if (Val) {
ImpliedUserSGPRCount += Val;
PreloadLength = Val;
}
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
+ EXPR_SHOULD_RESOLVE();
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
if (Val >= 1024)
return OutOfRangeError(ValRange);
- PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
ValRange);
if (Val)
PreloadOffset = Val;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
@@ -5367,34 +5386,39 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
- ValRange);
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 1;
} else if (ID == ".amdhsa_wavefront_size32") {
+ EXPR_SHOULD_RESOLVE();
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
EnableWavefrontSize32 = Val;
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
- Val, ValRange);
+ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_uses_dynamic_stack") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
+ KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_enable_private_segment") {
if (!hasArchitectedFlatScratch())
return Error(
@@ -5402,42 +5426,48 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
"directive is not supported without architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_vgpr_workitem_id") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
ValRange);
} else if (ID == ".amdhsa_next_free_vgpr") {
+ EXPR_SHOULD_RESOLVE();
VGPRRange = ValRange;
NextFreeVGPR = Val;
} else if (ID == ".amdhsa_next_free_sgpr") {
+ EXPR_SHOULD_RESOLVE();
SGPRRange = ValRange;
NextFreeSGPR = Val;
} else if (ID == ".amdhsa_accum_offset") {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+ EXPR_SHOULD_RESOLVE();
AccumOffset = Val;
} else if (ID == ".amdhsa_reserve_vcc") {
+ EXPR_SHOULD_RESOLVE();
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveVCC = Val;
} else if (ID == ".amdhsa_reserve_flat_scratch") {
+ EXPR_SHOULD_RESOLVE();
if (IVersion.Major < 7)
return Error(IDRange.Start, "directive requires gfx7+", IDRange);
if (hasArchitectedFlatScratch())
@@ -5457,97 +5487,105 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
IDRange);
} else if (ID == ".amdhsa_float_round_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_round_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
ValRange);
} else if (ID == ".amdhsa_dx10_clamp") {
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
ValRange);
} else if (ID == ".amdhsa_ieee_mode") {
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
ValRange);
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
+ PARSE...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add some examples where the expression has to refer to other functions' values?
llvm/test/MC/AMDGPU/hsa-sym-exprs.s
Outdated
@@ -0,0 +1,68 @@ | |||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test doesn't look like it comprehensively tests every field representable as an expression
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also needs to test more subtargets to hit all the streamer paths
uint64_t Val = IVal; | ||
bool EvaluatableExpr; | ||
if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) { | ||
if (IVal < 0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needs tests for these assorted errors
Currently there is no explicit tests I can add for this. I have one patch that converts AMDGPUResourceUsageAnalysis from a module level pass to a MachineFunction level pass and will include the infrastructure for resource usage information to be propagated through MCExprs and symbols. This propagation may mean that a symbol may be used before it is defined. However, I can't put a PR up for that unless I add support for the MC layer emit/parse for all (meta)data derived from resource usage. |
I would expect the MC part to be decoupled from the codegen change to make use of it. I'm mostly wondering what the syntax ends up looking like |
What I got so far for assembly format is emitting set directives for each resource usage info property gathered by AMDGPUResourceUsageAnalysis which are then combined with its callees' property (through a MAX/OR target specific MCExpr).
These symbols could then be used to construct any derived fields (e.g., some of the kernel descriptor fields). |
Can we define a single struct symbol per function, and emit that as a constant instead of individually setting each bit like this? |
As in, amdgpu directives instead of .set?
|
I mean what precisely does .set do? Is it defining a new external symbol for each field? I'm worried about blowing up the size of the symbol table by N entries for every function, and how you map from one function to the fields of another function |
It defines a symbol with its assigned MCExpr. Do correct me if I'm wrong, but can't we set them as temporary symbols that don't end up in the symbol table (i.e., prefix the symbols with ".L")? When going from IR to the object file directly, it will just use fixups so no symbols end up in the symbol table. These symbols won't be needed after assembling as well but given how there is no concept of fixups in the asm emit mechanism, we'll need them to avoid being order dependent for propagating the resource info in the case of going from .s -> .o. |
This would solve the issue of the day, but doesn't solve the general external object linking case. We need some way to refer to a specific external symbol's resource uses for that |
Would like to add a test that demonstrates the propagation to kernel descriptor expressions but will mean this depends on #82022.
What I've got so far mimics the behaviour of AMDGPUResourceUsageInfo which take the module scope maximums in case of external calls. Can't say I've looked into the scenario of linking, I wonder how nicely the required MCExpr operations such as the ones introduced in #82022 will play with relocations (if at all). |
KernelDescriptor.kernarg_preload = | ||
static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs()); | ||
KernelDescriptor.kernarg_preload = CreateExpr( | ||
AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would hope we could just unconditionally call getNumKernargPreloadedSGPRs, but that's a preexisting issue
llvm/test/MC/AMDGPU/hsa-sym-exprs.s
Outdated
@@ -0,0 +1,68 @@ | |||
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also needs to test more subtargets to hit all the streamer paths
806eca7
to
6d6ee5b
Compare
Rebase |
…hsa symbolic expression tests, apply feedback
aef34d0
to
9449e63
Compare
Rebase |
This PR causes a linker error :( |
The same issue happens on PPC, as well. For example, https://lab.llvm.org/buildbot/#/builders/57/builds/33632/steps/5/logs/stdio |
+1, can we revert? |
The BUILD_SHARED_LIBS=on linker error was due to library layerying violation. https://llvm.org/docs/CodingStandards.html#library-layering |
Kernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr.
Kernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr. Relands #80855 with fixes
Kernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr. Required for moving function/program resource usage information propagation to MC layer. As a result of this change, some amdhsa directives in assembly can use asm symbols that are defined later than their use.
Furthermore, this change (unintentionally) fixes a latent bug relating to some uses of the AMDHSA_BITS_SET macro for which the VAL argument is not encapsulated and can result in unintended C++ statements (e.g., see how the macro expands for the tg-split and wavefrontsize32 attributes in getDefaultAmdhsaKernelDescriptor).