[AMDGPU] MCExpr-ify MC layer kernel descriptor #80855

JanekvO · 2024-02-06T15:46:08Z

Kernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr. Required for moving function/program resource usage information propagation to MC layer. As a result of this change, some amdhsa directives in assembly can use asm symbols that are defined later than their use.

Furthermore, this change (unintentionally) fixes a latent bug relating to some uses of the AMDHSA_BITS_SET macro for which the VAL argument is not encapsulated and can result in unintended C++ statements (e.g., see how the macro expands for the tg-split and wavefrontsize32 attributes in getDefaultAmdhsaKernelDescriptor).

llvmbot · 2024-02-06T15:46:27Z

@llvm/pr-subscribers-mc
@llvm/pr-subscribers-llvm-support

@llvm/pr-subscribers-backend-amdgpu

Author: Janek van Oirschot (JanekvO)

Changes

Kernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr. Required for moving function/program resource usage information propagation to MC layer. As a result of this change, some amdhsa directives in assembly can use asm symbols that are defined later than their use.

Furthermore, this change (unintentionally) fixes a latent bug relating to some uses of the AMDHSA_BITS_SET macro for which the VAL argument is not encapsulated and can result in unintended C++ statements (e.g., see how the macro expands for the tg-split and wavefrontsize32 attributes in getDefaultAmdhsaKernelDescriptor).

Patch is 63.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80855.diff

11 Files Affected:

(modified) llvm/include/llvm/Support/AMDHSAKernelDescriptor.h (+34-45)
(modified) llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (+18-12)
(modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+119-69)
(modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (+236-145)
(added) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp (+32)
(modified) llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt (+1)
(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+53-31)
(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+3-2)
(modified) llvm/test/MC/AMDGPU/hsa-gfx12-v4.s (+3-3)
(added) llvm/test/MC/AMDGPU/hsa-sym-exprs.s (+68)
(added) llvm/test/MC/AMDGPU/hsa-tg-split.s (+74)

diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 84cac3ef700e05..9c5d8fa1c1a607 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -52,6 +52,10 @@
 #endif // AMDHSA_BITS_SET
 
 namespace llvm {
+
+class MCContext;
+class MCExpr;
+
 namespace amdhsa {
 
 // Floating point rounding modes. Must match hardware definition.
@@ -238,18 +242,40 @@ enum : int32_t {
 
 // Kernel descriptor. Must be kept backwards compatible.
 struct kernel_descriptor_t {
-  uint32_t group_segment_fixed_size;
-  uint32_t private_segment_fixed_size;
-  uint32_t kernarg_size;
+  const MCExpr *group_segment_fixed_size;
+  const MCExpr *private_segment_fixed_size;
+  const MCExpr *kernarg_size;
   uint8_t reserved0[4];
   int64_t kernel_code_entry_byte_offset;
   uint8_t reserved1[20];
-  uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
-  uint32_t compute_pgm_rsrc1;
-  uint32_t compute_pgm_rsrc2;
-  uint16_t kernel_code_properties;
-  uint16_t kernarg_preload;
+  const MCExpr *compute_pgm_rsrc3; // GFX10+ and GFX90A+
+  const MCExpr *compute_pgm_rsrc1;
+  const MCExpr *compute_pgm_rsrc2;
+  const MCExpr *kernel_code_properties;
+  const MCExpr *kernarg_preload;
   uint8_t reserved3[4];
+
+  static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift,
+                       uint32_t Mask, MCContext &Ctx);
+  static const MCExpr *bits_get(const MCExpr *Src, uint32_t Shift,
+                                uint32_t Mask, MCContext &Ctx);
+};
+
+// Sizes for kernel_descriptor_t properties, should add up to 64.
+enum : uint32_t {
+  SIZEOF_GROUP_SEGMENT_FIXED_SIZE = 4,
+  SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE = 4,
+  SIZEOF_KERNARG_SIZE = 4,
+  SIZEOF_RESERVED0 = 4,
+  SIZEOF_KERNEL_CODE_ENTRY_BYTE_OFFSET = 8,
+  SIZEOF_RESERVED1 = 20,
+  SIZEOF_COMPUTE_PGM_RSRC3 = 4,
+  SIZEOF_COMPUTE_PGM_RSRC1 = 4,
+  SIZEOF_COMPUTE_PGM_RSRC2 = 4,
+  SIZEOF_KERNEL_CODE_PROPERTIES = 2,
+  SIZEOF_KERNARG_PRELOAD = 2,
+  SIZEOF_RESERVED3 = 4,
+  SIZEOF_KERNEL_DESCRIPTOR = 64
 };
 
 enum : uint32_t {
@@ -267,43 +293,6 @@ enum : uint32_t {
   RESERVED3_OFFSET = 60
 };
 
-static_assert(
-    sizeof(kernel_descriptor_t) == 64,
-    "invalid size for kernel_descriptor_t");
-static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
-                  GROUP_SEGMENT_FIXED_SIZE_OFFSET,
-              "invalid offset for group_segment_fixed_size");
-static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
-                  PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
-              "invalid offset for private_segment_fixed_size");
-static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
-                  KERNARG_SIZE_OFFSET,
-              "invalid offset for kernarg_size");
-static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
-              "invalid offset for reserved0");
-static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
-                  KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
-              "invalid offset for kernel_code_entry_byte_offset");
-static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
-              "invalid offset for reserved1");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
-                  COMPUTE_PGM_RSRC3_OFFSET,
-              "invalid offset for compute_pgm_rsrc3");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
-                  COMPUTE_PGM_RSRC1_OFFSET,
-              "invalid offset for compute_pgm_rsrc1");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
-                  COMPUTE_PGM_RSRC2_OFFSET,
-              "invalid offset for compute_pgm_rsrc2");
-static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
-                  KERNEL_CODE_PROPERTIES_OFFSET,
-              "invalid offset for kernel_code_properties");
-static_assert(offsetof(kernel_descriptor_t, kernarg_preload) ==
-                  KERNARG_PRELOAD_OFFSET,
-              "invalid offset for kernarg_preload");
-static_assert(offsetof(kernel_descriptor_t, reserved3) == RESERVED3_OFFSET,
-              "invalid offset for reserved3");
-
 } // end namespace amdhsa
 } // end namespace llvm
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index db81e1ee9e3899..d68c7e499f62c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -434,24 +434,30 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
   assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
   assert(isUInt<32>(PI.getComputePGMRSrc2()));
 
-  KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
-  KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
+  KernelDescriptor.group_segment_fixed_size =
+      MCConstantExpr::create(PI.LDSSize, MF.getContext());
+  KernelDescriptor.private_segment_fixed_size =
+      MCConstantExpr::create(PI.ScratchSize, MF.getContext());
 
   Align MaxKernArgAlign;
-  KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+  KernelDescriptor.kernarg_size = MCConstantExpr::create(
+      STM.getKernArgSegmentSize(F, MaxKernArgAlign), MF.getContext());
 
-  KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM);
-  KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2();
-  KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
+  KernelDescriptor.compute_pgm_rsrc1 =
+      MCConstantExpr::create(PI.getComputePGMRSrc1(STM), MF.getContext());
+  KernelDescriptor.compute_pgm_rsrc2 =
+      MCConstantExpr::create(PI.getComputePGMRSrc2(), MF.getContext());
+  KernelDescriptor.kernel_code_properties = MCConstantExpr::create(
+      getAmdhsaKernelCodeProperties(MF), MF.getContext());
 
   assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
-  if (STM.hasGFX90AInsts())
-    KernelDescriptor.compute_pgm_rsrc3 =
-      CurrentProgramInfo.ComputePGMRSrc3GFX90A;
+  KernelDescriptor.compute_pgm_rsrc3 = MCConstantExpr::create(
+      STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0,
+      MF.getContext());
 
-  if (AMDGPU::hasKernargPreload(STM))
-    KernelDescriptor.kernarg_preload =
-        static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs());
+  KernelDescriptor.kernarg_preload = MCConstantExpr::create(
+      AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
+      MF.getContext());
 
   return KernelDescriptor;
 }
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 225e781588668f..2331af628fb730 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5236,7 +5236,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
   if (getParser().parseIdentifier(KernelName))
     return true;
 
-  kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
+  kernel_descriptor_t KD =
+      getDefaultAmdhsaKernelDescriptor(&getSTI(), getContext());
 
   StringSet<> Seen;
 
@@ -5276,89 +5277,107 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
       return TokError(".amdhsa_ directives cannot be repeated");
 
     SMLoc ValStart = getLoc();
-    int64_t IVal;
-    if (getParser().parseAbsoluteExpression(IVal))
+    const MCExpr *ExprVal;
+    if (getParser().parseExpression(ExprVal))
       return true;
     SMLoc ValEnd = getLoc();
     SMRange ValRange = SMRange(ValStart, ValEnd);
 
-    if (IVal < 0)
-      return OutOfRangeError(ValRange);
-
+    int64_t IVal = 0;
     uint64_t Val = IVal;
+    bool EvaluatableExpr;
+    if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
+      if (IVal < 0)
+        return OutOfRangeError(ValRange);
+      Val = IVal;
+    }
 
 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
-  if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
+  if (!isUInt<ENTRY##_WIDTH>(Val))                                             \
     return OutOfRangeError(RANGE);                                             \
-  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
+  kernel_descriptor_t::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY,            \
+                                getContext());
+
+#define EXPR_SHOULD_RESOLVE()                                                  \
+  if (!EvaluatableExpr)                                                        \
+    return Error(IDRange.Start, "directive should have resolvable expression", \
+                 IDRange);
 
     if (ID == ".amdhsa_group_segment_fixed_size") {
-      if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
+      if (!isUInt<SIZEOF_GROUP_SEGMENT_FIXED_SIZE * CHAR_BIT>(Val))
         return OutOfRangeError(ValRange);
-      KD.group_segment_fixed_size = Val;
+      KD.group_segment_fixed_size = ExprVal;
     } else if (ID == ".amdhsa_private_segment_fixed_size") {
-      if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
+      if (!isUInt<SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE * CHAR_BIT>(Val))
         return OutOfRangeError(ValRange);
-      KD.private_segment_fixed_size = Val;
+      KD.private_segment_fixed_size = ExprVal;
     } else if (ID == ".amdhsa_kernarg_size") {
-      if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
+      if (!isUInt<SIZEOF_KERNARG_SIZE * CHAR_BIT>(Val))
         return OutOfRangeError(ValRange);
-      KD.kernarg_size = Val;
+      KD.kernarg_size = ExprVal;
     } else if (ID == ".amdhsa_user_sgpr_count") {
+      EXPR_SHOULD_RESOLVE();
       ExplicitUserSGPRCount = Val;
     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
+      EXPR_SHOULD_RESOLVE();
       if (hasArchitectedFlatScratch())
         return Error(IDRange.Start,
                      "directive is not supported with architected flat scratch",
                      IDRange);
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
-                       Val, ValRange);
+                       ExprVal, ValRange);
       if (Val)
         ImpliedUserSGPRCount += 4;
     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
+      EXPR_SHOULD_RESOLVE();
       if (!hasKernargPreload())
         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
 
       if (Val > getMaxNumUserSGPRs())
         return OutOfRangeError(ValRange);
-      PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
+      PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
                        ValRange);
       if (Val) {
         ImpliedUserSGPRCount += Val;
         PreloadLength = Val;
       }
     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
+      EXPR_SHOULD_RESOLVE();
       if (!hasKernargPreload())
         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
 
       if (Val >= 1024)
         return OutOfRangeError(ValRange);
-      PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
+      PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
                        ValRange);
       if (Val)
         PreloadOffset = Val;
     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
+      EXPR_SHOULD_RESOLVE();
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
-                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
                        ValRange);
       if (Val)
         ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
+      EXPR_SHOULD_RESOLVE();
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
-                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
                        ValRange);
       if (Val)
         ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
+      EXPR_SHOULD_RESOLVE();
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
-                       Val, ValRange);
+                       ExprVal, ValRange);
       if (Val)
         ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
+      EXPR_SHOULD_RESOLVE();
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
-                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
                        ValRange);
       if (Val)
         ImpliedUserSGPRCount += 2;
@@ -5367,34 +5386,39 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
         return Error(IDRange.Start,
                      "directive is not supported with architected flat scratch",
                      IDRange);
+      EXPR_SHOULD_RESOLVE();
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
-                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
-                       ValRange);
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
+                       ExprVal, ValRange);
       if (Val)
         ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
+      EXPR_SHOULD_RESOLVE();
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
-                       Val, ValRange);
+                       ExprVal, ValRange);
       if (Val)
         ImpliedUserSGPRCount += 1;
     } else if (ID == ".amdhsa_wavefront_size32") {
+      EXPR_SHOULD_RESOLVE();
       if (IVersion.Major < 10)
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
       EnableWavefrontSize32 = Val;
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
-                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
-                       Val, ValRange);
+                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_uses_dynamic_stack") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
-                       KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
+                       KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
       if (hasArchitectedFlatScratch())
         return Error(IDRange.Start,
                      "directive is not supported with architected flat scratch",
                      IDRange);
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_enable_private_segment") {
       if (!hasArchitectedFlatScratch())
         return Error(
@@ -5402,42 +5426,48 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
             "directive is not supported without architected flat scratch",
             IDRange);
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
+                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
+                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
+                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
+                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
+                       COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_next_free_vgpr") {
+      EXPR_SHOULD_RESOLVE();
       VGPRRange = ValRange;
       NextFreeVGPR = Val;
     } else if (ID == ".amdhsa_next_free_sgpr") {
+      EXPR_SHOULD_RESOLVE();
       SGPRRange = ValRange;
       NextFreeSGPR = Val;
     } else if (ID == ".amdhsa_accum_offset") {
       if (!isGFX90A())
         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+      EXPR_SHOULD_RESOLVE();
       AccumOffset = Val;
     } else if (ID == ".amdhsa_reserve_vcc") {
+      EXPR_SHOULD_RESOLVE();
       if (!isUInt<1>(Val))
         return OutOfRangeError(ValRange);
       ReserveVCC = Val;
     } else if (ID == ".amdhsa_reserve_flat_scratch") {
+      EXPR_SHOULD_RESOLVE();
       if (IVersion.Major < 7)
         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
       if (hasArchitectedFlatScratch())
@@ -5457,97 +5487,105 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
                                  IDRange);
     } else if (ID == ".amdhsa_float_round_mode_32") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
-                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
+                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_float_round_mode_16_64") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
-                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
+                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_float_denorm_mode_32") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
-                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
+                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
-                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
+                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_dx10_clamp") {
       if (IVersion.Major >= 12)
         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
-                       COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
+                       COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_ieee_mode") {
       if (IVersion.Major >= 12)
         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
-                       COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
+                       COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_fp16_overflow") {
       if (IVersion.Major < 9)
         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
+      PARSE...
[truncated]

arsenm

Can you add some examples where the expression has to refer to other functions' values?

llvm/include/llvm/Support/AMDHSAKernelDescriptor.h

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp

arsenm · 2024-02-06T16:27:09Z

llvm/test/MC/AMDGPU/hsa-sym-exprs.s

@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s


This test doesn't look like it comprehensively tests every field representable as an expression

Also needs to test more subtargets to hit all the streamer paths

arsenm · 2024-02-06T17:06:35Z

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

    uint64_t Val = IVal;
+    bool EvaluatableExpr;
+    if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
+      if (IVal < 0)


Needs tests for these assorted errors

JanekvO · 2024-02-06T20:17:57Z

Can you add some examples where the expression has to refer to other functions' values?

Currently there is no explicit tests I can add for this. I have one patch that converts AMDGPUResourceUsageAnalysis from a module level pass to a MachineFunction level pass and will include the infrastructure for resource usage information to be propagated through MCExprs and symbols. This propagation may mean that a symbol may be used before it is defined. However, I can't put a PR up for that unless I add support for the MC layer emit/parse for all (meta)data derived from resource usage.

arsenm · 2024-02-08T10:31:47Z

Can you add some examples where the expression has to refer to other functions' values?

Currently there is no explicit tests I can add for this. I have one patch that converts AMDGPUResourceUsageAnalysis from a module level pass to a MachineFunction level pass and will include the infrastructure for resource usage information to be propagated through MCExprs and symbols. This propagation may mean that a symbol may be used before it is defined. However, I can't put a PR up for that unless I add support for the MC layer emit/parse for all (meta)data derived from resource usage.

I would expect the MC part to be decoupled from the codegen change to make use of it. I'm mostly wondering what the syntax ends up looking like

JanekvO · 2024-02-08T13:34:21Z

Can you add some examples where the expression has to refer to other functions' values?

Currently there is no explicit tests I can add for this. I have one patch that converts AMDGPUResourceUsageAnalysis from a module level pass to a MachineFunction level pass and will include the infrastructure for resource usage information to be propagated through MCExprs and symbols. This propagation may mean that a symbol may be used before it is defined. However, I can't put a PR up for that unless I add support for the MC layer emit/parse for all (meta)data derived from resource usage.

I would expect the MC part to be decoupled from the codegen change to make use of it. I'm mostly wondering what the syntax ends up looking like

What I got so far for assembly format is emitting set directives for each resource usage info property gathered by AMDGPUResourceUsageAnalysis which are then combined with its callees' property (through a MAX/OR target specific MCExpr).
For example:

with_call:
    call uses_vcc
    ...

.set with_call.num_vgpr, MAX(41, uses_vcc.num_vgpr)
.set with_call.num_agpr, MAX(0, uses_vcc.num_agpr)
.set with_call.num_sgpr, MAX(34, uses_vcc.num_sgpr)
.set with_call.private_seg_size, 16+(MAX(uses_vcc.private_seg_size))
.set with_call.uses_vcc, OR(1, uses_vcc.uses_vcc)
.set with_call.uses_flat_scratch, OR(0, uses_vcc.uses_flat_scratch)
.set with_call.has_dyn_sized_stack, OR(0, uses_vcc.has_dyn_sized_stack)
.set with_call.has_recursion, OR(0, uses_vcc.has_recursion)
.set with_call.has_indirect_call, OR(0, uses_vcc.has_indirect_call)

uses_vcc:
    ...

.set uses_vcc.num_vgpr, 0
.set uses_vcc.num_agpr, 0
.set uses_vcc.num_sgpr, 32
.set uses_vcc.private_seg_size, 0
.set uses_vcc.uses_vcc, 1
.set uses_vcc.uses_flat_scratch, 0
.set uses_vcc.has_dyn_sized_stack, 0
.set uses_vcc.has_recursion, 0
.set uses_vcc.has_indirect_call, 0

These symbols could then be used to construct any derived fields (e.g., some of the kernel descriptor fields).

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h

arsenm · 2024-02-08T14:58:43Z

Can you add some examples where the expression has to refer to other functions' values?

Currently there is no explicit tests I can add for this. I have one patch that converts AMDGPUResourceUsageAnalysis from a module level pass to a MachineFunction level pass and will include the infrastructure for resource usage information to be propagated through MCExprs and symbols. This propagation may mean that a symbol may be used before it is defined. However, I can't put a PR up for that unless I add support for the MC layer emit/parse for all (meta)data derived from resource usage.

I would expect the MC part to be decoupled from the codegen change to make use of it. I'm mostly wondering what the syntax ends up looking like

What I got so far for assembly format is emitting set directives for each resource usage info property gathered by AMDGPUResourceUsageAnalysis which are then combined with its callees' property (through a MAX/OR target specific MCExpr). For example:
with_call:
    call uses_vcc
    ...

.set with_call.num_vgpr, MAX(41, uses_vcc.num_vgpr)
.set with_call.num_agpr, MAX(0, uses_vcc.num_agpr)
.set with_call.num_sgpr, MAX(34, uses_vcc.num_sgpr)
.set with_call.private_seg_size, 16+(MAX(uses_vcc.private_seg_size))
.set with_call.uses_vcc, OR(1, uses_vcc.uses_vcc)
.set with_call.uses_flat_scratch, OR(0, uses_vcc.uses_flat_scratch)
.set with_call.has_dyn_sized_stack, OR(0, uses_vcc.has_dyn_sized_stack)
.set with_call.has_recursion, OR(0, uses_vcc.has_recursion)
.set with_call.has_indirect_call, OR(0, uses_vcc.has_indirect_call)

uses_vcc:
    ...

.set uses_vcc.num_vgpr, 0
.set uses_vcc.num_agpr, 0
.set uses_vcc.num_sgpr, 32
.set uses_vcc.private_seg_size, 0
.set uses_vcc.uses_vcc, 1
.set uses_vcc.uses_flat_scratch, 0
.set uses_vcc.has_dyn_sized_stack, 0
.set uses_vcc.has_recursion, 0
.set uses_vcc.has_indirect_call, 0
These symbols could then be used to construct any derived fields (e.g., some of the kernel descriptor fields).

Can we define a single struct symbol per function, and emit that as a constant instead of individually setting each bit like this?

JanekvO · 2024-02-08T16:27:35Z

Can you add some examples where the expression has to refer to other functions' values?

Currently there is no explicit tests I can add for this. I have one patch that converts AMDGPUResourceUsageAnalysis from a module level pass to a MachineFunction level pass and will include the infrastructure for resource usage information to be propagated through MCExprs and symbols. This propagation may mean that a symbol may be used before it is defined. However, I can't put a PR up for that unless I add support for the MC layer emit/parse for all (meta)data derived from resource usage.

I would expect the MC part to be decoupled from the codegen change to make use of it. I'm mostly wondering what the syntax ends up looking like

What I got so far for assembly format is emitting set directives for each resource usage info property gathered by AMDGPUResourceUsageAnalysis which are then combined with its callees' property (through a MAX/OR target specific MCExpr). For example:
with_call:
    call uses_vcc
    ...

.set with_call.num_vgpr, MAX(41, uses_vcc.num_vgpr)
.set with_call.num_agpr, MAX(0, uses_vcc.num_agpr)
.set with_call.num_sgpr, MAX(34, uses_vcc.num_sgpr)
.set with_call.private_seg_size, 16+(MAX(uses_vcc.private_seg_size))
.set with_call.uses_vcc, OR(1, uses_vcc.uses_vcc)
.set with_call.uses_flat_scratch, OR(0, uses_vcc.uses_flat_scratch)
.set with_call.has_dyn_sized_stack, OR(0, uses_vcc.has_dyn_sized_stack)
.set with_call.has_recursion, OR(0, uses_vcc.has_recursion)
.set with_call.has_indirect_call, OR(0, uses_vcc.has_indirect_call)

uses_vcc:
    ...

.set uses_vcc.num_vgpr, 0
.set uses_vcc.num_agpr, 0
.set uses_vcc.num_sgpr, 32
.set uses_vcc.private_seg_size, 0
.set uses_vcc.uses_vcc, 1
.set uses_vcc.uses_flat_scratch, 0
.set uses_vcc.has_dyn_sized_stack, 0
.set uses_vcc.has_recursion, 0
.set uses_vcc.has_indirect_call, 0
These symbols could then be used to construct any derived fields (e.g., some of the kernel descriptor fields).
Can we define a single struct symbol per function and emit that as a constant instead of individually setting each bit like this?

As in, amdgpu directives instead of .set?
e.g.,

with_call:
    call uses_vcc
    ...

.amdgpu_num_vgpr, MAX(41, uses_vcc.num_vgpr)
.amdgpu_num_agpr, MAX(0, uses_vcc.num_agpr)
.amdgpu_num_sgpr, MAX(34, uses_vcc.num_sgpr)
.amdgpu_private_seg_size, 16+(MAX(uses_vcc.private_seg_size))
.amdgpu_uses_vcc, OR(1, uses_vcc.uses_vcc)
.amdgpu_uses_flat_scratch, OR(0, uses_vcc.uses_flat_scratch)
.amdgpu_has_dyn_sized_stack, OR(0, uses_vcc.has_dyn_sized_stack)
.amdgpu_has_recursion, OR(0, uses_vcc.has_recursion)
.amdgpu_has_indirect_call, OR(0, uses_vcc.has_indirect_call)

uses_vcc:
    ...

.amdgpu_num_vgpr, 0
.amdgpu_num_agpr, 0
.amdgpu_num_sgpr, 32
.amdgpu_private_seg_size, 0
.amdgpu_uses_vcc, 1
.amdgpu_uses_flat_scratch, 0
.amdgpu_has_dyn_sized_stack, 0
.amdgpu_has_recursion, 0
.amdgpu_has_indirect_call, 0

arsenm · 2024-02-09T16:45:14Z

As in, amdgpu directives instead of .set? e.g.,

I mean what precisely does .set do? Is it defining a new external symbol for each field? I'm worried about blowing up the size of the symbol table by N entries for every function, and how you map from one function to the fields of another function

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

JanekvO · 2024-02-12T13:50:13Z

As in, amdgpu directives instead of .set? e.g.,

I mean what precisely does .set do? Is it defining a new external symbol for each field? I'm worried about blowing up the size of the symbol table by N entries for every function, and how you map from one function to the fields of another function

It defines a symbol with its assigned MCExpr. Do correct me if I'm wrong, but can't we set them as temporary symbols that don't end up in the symbol table (i.e., prefix the symbols with ".L")?

When going from IR to the object file directly, it will just use fixups so no symbols end up in the symbol table. These symbols won't be needed after assembling as well but given how there is no concept of fixups in the asm emit mechanism, we'll need them to avoid being order dependent for propagating the resource info in the case of going from .s -> .o.

arsenm · 2024-02-14T08:19:45Z

As in, amdgpu directives instead of .set? e.g.,

I mean what precisely does .set do? Is it defining a new external symbol for each field? I'm worried about blowing up the size of the symbol table by N entries for every function, and how you map from one function to the fields of another function

It defines a symbol with its assigned MCExpr. Do correct me if I'm wrong, but can't we set them as temporary symbols that don't end up in the symbol table (i.e., prefix the symbols with ".L")?

This would solve the issue of the day, but doesn't solve the general external object linking case. We need some way to refer to a specific external symbol's resource uses for that

JanekvO · 2024-02-16T19:00:39Z

Would like to add a test that demonstrates the propagation to kernel descriptor expressions but will mean this depends on #82022.

This would solve the issue of the day, but doesn't solve the general external object linking case. We need some way to refer to a specific external symbol's resource uses for that

What I've got so far mimics the behaviour of AMDGPUResourceUsageInfo which take the module scope maximums in case of external calls. Can't say I've looked into the scenario of linking, I wonder how nicely the required MCExpr operations such as the ones introduced in #82022 will play with relocations (if at all).

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

arsenm · 2024-02-29T08:47:32Z

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

-    KernelDescriptor.kernarg_preload =
-        static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs());
+  KernelDescriptor.kernarg_preload = CreateExpr(
+      AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0);


I would hope we could just unconditionally call getNumKernargPreloadedSGPRs, but that's a preexisting issue

arsenm · 2024-02-29T08:50:47Z

llvm/test/MC/AMDGPU/hsa-sym-exprs.s

@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s


Also needs to test more subtargets to hit all the streamer paths

llvm/test/MC/AMDGPU/hsa-gfx12-v4.s

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h

JanekvO · 2024-03-14T16:51:15Z

Rebase

…hsa symbolic expression tests, apply feedback

…lied

JanekvO · 2024-03-19T14:44:54Z

Rebase

dtcxzyw · 2024-03-21T15:04:26Z

&& /usr/bin/c++ -fPIC -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -fno-lifetime-dse -w -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG -Wl,-z,defs -Wl,-z,nodelete -Wl,-rpath-link,/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/./lib -Wl,--gc-sections -shared -Wl,-soname,libLLVMAMDGPUUtils.so.19.0git -o lib/libLLVMAMDGPUUtils.so.19.0git lib/Target/AMDGPU/Utils/CMakeFiles/LLVMAMDGPUUtils.dir/AMDGPUAsmUtils.cpp.o lib/Target/AMDGPU/Utils/CMakeFiles/LLVMAMDGPUUtils.dir/AMDGPUBaseInfo.cpp.o lib/Target/AMDGPU/Utils/CMakeFiles/LLVMAMDGPUUtils.dir/AMDGPUMemoryUtils.cpp.o lib/Target/AMDGPU/Utils/CMakeFiles/LLVMAMDGPUUtils.dir/AMDGPUPALMetadata.cpp.o lib/Target/AMDGPU/Utils/CMakeFiles/LLVMAMDGPUUtils.dir/AMDKernelCodeTUtils.cpp.o -Wl,-rpath,"$ORIGIN/../lib:/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/lib:" lib/libLLVMAnalysis.so.19.0git lib/libLLVMCodeGenTypes.so.19.0git lib/libLLVMCore.so.19.0git lib/libLLVMMC.so.19.0git lib/libLLVMBinaryFormat.so.19.0git lib/libLLVMTargetParser.so.19.0git lib/libLLVMSupport.so.19.0git -Wl,-rpath-link,/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/lib && :
/usr/bin/ld: lib/Target/AMDGPU/Utils/CMakeFiles/LLVMAMDGPUUtils.dir/AMDGPUBaseInfo.cpp.o: in function llvm::AMDGPU::getDefaultAmdhsaKernelDescriptor(llvm::MCSubtargetInfo const*, llvm::MCContext&)': AMDGPUBaseInfo.cpp:(.text._ZN4llvm6AMDGPU32getDefaultAmdhsaKernelDescriptorEPKNS_15MCSubtargetInfoERNS_9MCContextE+0xbb): undefined reference to llvm::AMDGPU::MCKernelDescriptor::bits_set(llvm::MCExpr const*&, llvm::MCExpr const*, unsigned int, unsigned int, llvm::MCContext&)'
/usr/bin/ld: AMDGPUBaseInfo.cpp:(.text._ZN4llvm6AMDGPU32getDefaultAmdhsaKernelDescriptorEPKNS_15MCSubtargetInfoERNS_9MCContextE+0xdf): undefined reference to llvm::AMDGPU::MCKernelDescriptor::bits_set(llvm::MCExpr const*&, llvm::MCExpr const*, unsigned int, unsigned int, llvm::MCContext&)' /usr/bin/ld: AMDGPUBaseInfo.cpp:(.text._ZN4llvm6AMDGPU32getDefaultAmdhsaKernelDescriptorEPKNS_15MCSubtargetInfoERNS_9MCContextE+0x111): undefined reference to llvm::AMDGPU::MCKernelDescriptor::bits_set(llvm::MCExpr const*&, llvm::MCExpr const*, unsigned int, unsigned int, llvm::MCContext&)'
/usr/bin/ld: AMDGPUBaseInfo.cpp:(.text._ZN4llvm6AMDGPU32getDefaultAmdhsaKernelDescriptorEPKNS_15MCSubtargetInfoERNS_9MCContextE+0x145): undefined reference to llvm::AMDGPU::MCKernelDescriptor::bits_set(llvm::MCExpr const*&, llvm::MCExpr const*, unsigned int, unsigned int, llvm::MCContext&)' /usr/bin/ld: AMDGPUBaseInfo.cpp:(.text._ZN4llvm6AMDGPU32getDefaultAmdhsaKernelDescriptorEPKNS_15MCSubtargetInfoERNS_9MCContextE+0x164): undefined reference to llvm::AMDGPU::MCKernelDescriptor::bits_set(llvm::MCExpr const*&, llvm::MCExpr const*, unsigned int, unsigned int, llvm::MCContext&)'
/usr/bin/ld: lib/Target/AMDGPU/Utils/CMakeFiles/LLVMAMDGPUUtils.dir/AMDGPUBaseInfo.cpp.o:AMDGPUBaseInfo.cpp:(.text._ZN4llvm6AMDGPU32getDefaultAmdhsaKernelDescriptorEPKNS_15MCSubtargetInfoERNS_9MCContextE+0x17c): more undefined references to `llvm::AMDGPU::MCKernelDescriptor::bits_set(llvm::MCExpr const*&, llvm::MCExpr const*, unsigned int, unsigned int, llvm::MCContext&)' follow
collect2: error: ld returned 1 exit status
ninja: build stopped: subcommand failed.

This PR causes a linker error :(

amy-kwan · 2024-03-21T15:53:11Z

The same issue happens on PPC, as well. For example, https://lab.llvm.org/buildbot/#/builders/57/builds/33632/steps/5/logs/stdio

goldsteinn · 2024-03-21T16:38:36Z

+1, can we revert?

This reverts commit 857161c.

Reverts #80855

MaskRay · 2024-03-21T17:21:53Z

The BUILD_SHARED_LIBS=on linker error was due to library layerying violation. https://llvm.org/docs/CodingStandards.html#library-layering
AMDGPU/Utils/ cannot reference symbols defined in AMDGPU/MCTargetDesc.

Kernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr.

Reverts llvm#80855

Kernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr. Relands #80855 with fixes

JanekvO added the backend:AMDGPU label Feb 6, 2024

JanekvO requested review from arsenm, kerbowa and Pierre-vh February 6, 2024 15:46

llvmbot added mc Machine (object) code llvm:support labels Feb 6, 2024

arsenm reviewed Feb 6, 2024

View reviewed changes

arsenm reviewed Feb 8, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h Outdated Show resolved Hide resolved

Pierre-vh reviewed Feb 12, 2024

View reviewed changes

arsenm reviewed Feb 29, 2024

View reviewed changes

llvm/test/MC/AMDGPU/hsa-gfx12-v4.s Outdated Show resolved Hide resolved

Pierre-vh reviewed Mar 1, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h Show resolved Hide resolved

JanekvO mentioned this pull request Mar 1, 2024

[AMDGPU] Add AMDGPU specific variadic operation MCExprs #82022

Merged

JanekvO force-pushed the MCExpr-kernel-descriptor branch from 806eca7 to 6d6ee5b Compare March 14, 2024 16:50

JanekvO added 4 commits March 19, 2024 12:12

MCExpr-ify MC layer kernel descriptor

769ee73

Separate MCExpr kernel descriptor from kernel_descriptor_t, add more …

8d3a7d1

…hsa symbolic expression tests, apply feedback

Apply feedback: nullptr MCExpr, remove superfluous memset

20fc1e4

Removed SIZEOF_* constants, renamings, convience lambda, feedback app…

8885b13

…lied

Feedback, move comments to header, add more subtarget tests

9449e63

JanekvO force-pushed the MCExpr-kernel-descriptor branch from aef34d0 to 9449e63 Compare March 19, 2024 14:44

arsenm approved these changes Mar 19, 2024

View reviewed changes

Test with AMDGPU MCExpr (resolves for .o)

939da34

JanekvO merged commit 857161c into llvm:main Mar 21, 2024
4 checks passed

JanekvO added a commit that referenced this pull request Mar 21, 2024

Revert "[AMDGPU] MCExpr-ify MC layer kernel descriptor (#80855)"

ddabe09

This reverts commit 857161c.

JanekvO mentioned this pull request Mar 21, 2024

Revert "[AMDGPU] MCExpr-ify MC layer kernel descriptor" #86151

Merged

MaskRay pushed a commit that referenced this pull request Mar 21, 2024

Revert "[AMDGPU] MCExpr-ify MC layer kernel descriptor" (#86151)

797336b

Reverts #80855

chencha3 pushed a commit to chencha3/llvm-project that referenced this pull request Mar 23, 2024

[AMDGPU] MCExpr-ify MC layer kernel descriptor (llvm#80855)

aafeb99

Kernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr.

chencha3 pushed a commit to chencha3/llvm-project that referenced this pull request Mar 23, 2024

Revert "[AMDGPU] MCExpr-ify MC layer kernel descriptor" (llvm#86151)

814bf96

Reverts llvm#80855

JanekvO mentioned this pull request Mar 25, 2024

Reland [AMDGPU] MCExpr-ify MC layer kernel descriptor #86494

Merged

JanekvO added a commit that referenced this pull request Mar 27, 2024

Reland [AMDGPU] MCExpr-ify MC layer kernel descriptor (#86494)

1103a2a

Kernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr. Relands #80855 with fixes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AMDGPU] MCExpr-ify MC layer kernel descriptor #80855

[AMDGPU] MCExpr-ify MC layer kernel descriptor #80855

JanekvO commented Feb 6, 2024

llvmbot commented Feb 6, 2024 •

edited

arsenm left a comment

arsenm Feb 6, 2024

arsenm Feb 29, 2024

arsenm Feb 6, 2024

JanekvO commented Feb 6, 2024

arsenm commented Feb 8, 2024

JanekvO commented Feb 8, 2024

arsenm commented Feb 8, 2024

JanekvO commented Feb 8, 2024

arsenm commented Feb 9, 2024

JanekvO commented Feb 12, 2024

arsenm commented Feb 14, 2024

JanekvO commented Feb 16, 2024

arsenm Feb 29, 2024

arsenm Feb 29, 2024

JanekvO commented Mar 14, 2024

JanekvO commented Mar 19, 2024

dtcxzyw commented Mar 21, 2024

amy-kwan commented Mar 21, 2024

goldsteinn commented Mar 21, 2024

MaskRay commented Mar 21, 2024

		@@ -0,0 +1,68 @@
		// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s \| FileCheck --check-prefix=ASM %s

[AMDGPU] MCExpr-ify MC layer kernel descriptor #80855

[AMDGPU] MCExpr-ify MC layer kernel descriptor #80855

Conversation

JanekvO commented Feb 6, 2024

llvmbot commented Feb 6, 2024 • edited

arsenm left a comment

Choose a reason for hiding this comment

arsenm Feb 6, 2024

Choose a reason for hiding this comment

arsenm Feb 29, 2024

Choose a reason for hiding this comment

arsenm Feb 6, 2024

Choose a reason for hiding this comment

JanekvO commented Feb 6, 2024

arsenm commented Feb 8, 2024

JanekvO commented Feb 8, 2024

arsenm commented Feb 8, 2024

JanekvO commented Feb 8, 2024

arsenm commented Feb 9, 2024

JanekvO commented Feb 12, 2024

arsenm commented Feb 14, 2024

JanekvO commented Feb 16, 2024

arsenm Feb 29, 2024

Choose a reason for hiding this comment

arsenm Feb 29, 2024

Choose a reason for hiding this comment

JanekvO commented Mar 14, 2024

JanekvO commented Mar 19, 2024

dtcxzyw commented Mar 21, 2024

amy-kwan commented Mar 21, 2024

goldsteinn commented Mar 21, 2024

MaskRay commented Mar 21, 2024

llvmbot commented Feb 6, 2024 •

edited