From 28cc8d8c7be45e8b5b1416ecb75741ebec5b7ecc Mon Sep 17 00:00:00 2001 From: David Stuttard Date: Mon, 1 Sep 2025 09:37:48 +0100 Subject: [PATCH 1/3] [AMDGPU] Pre-commit test for folding abs64 error --- llvm/test/CodeGen/AMDGPU/fold-abs64.mir | 40 +++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-abs64.mir diff --git a/llvm/test/CodeGen/AMDGPU/fold-abs64.mir b/llvm/test/CodeGen/AMDGPU/fold-abs64.mir new file mode 100644 index 0000000000000..6b9d8d94ff550 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-abs64.mir @@ -0,0 +1,40 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=si-fold-operands %s -o - | FileCheck %s + +--- | + @sym = external constant i32 + define void @fn() { ret void } + define void @fn2() { ret void } +... + +--- +name: fn +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: fn + ; CHECK: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 undef [[DEF]].sub0, target-flags(amdgpu-abs64) @sym, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] + %0:vreg_64 = IMPLICIT_DEF + %1:sreg_64 = S_MOV_B64 target-flags(amdgpu-abs64) @sym + %2:vgpr_32, %3:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 undef %1.sub0, undef %0.sub0, 0, implicit $exec + S_ENDPGM 0, implicit %2 +... + +--- +name: fn2 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: fn2 + ; CHECK: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 591751049, undef [[DEF]].sub0, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] + %0:vreg_64 = IMPLICIT_DEF + %1:sreg_64 = S_MOV_B64 4886718345 + %2:vgpr_32, %3:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 undef %1.sub0, undef %0.sub0, 0, implicit $exec + S_ENDPGM 0, implicit %2 +... +## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +# CHECK: {{.*}} From f6e09b8b1abbd7a3d548164797c42c190cac6137 Mon Sep 17 00:00:00 2001 From: David Stuttard Date: Thu, 28 Aug 2025 16:10:43 +0100 Subject: [PATCH 2/3] [AMDGPU] SIFoldOperands check that shrunk op is valid Folding results in a global ABS64 reloc used in an add instruction that doesn't support 64 bit immediates. This can cause issues as the reloc overflows to the next instruction and corrupts it. Checking for a valid shrunk instruction and not performing the fold if invalid fixes the issue. This fixes https://github.com/llvm/llvm-project/issues/153812 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 2 ++ llvm/test/CodeGen/AMDGPU/fold-abs64.mir | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 3979e1e0c44aa..9bb94e6dd2ad4 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -868,6 +868,8 @@ bool SIFoldOperandsImpl::tryAddToFoldList( // Make sure to get the 32-bit version of the commuted opcode. unsigned MaybeCommutedOpc = MI->getOpcode(); Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc); + if (TII->pseudoToMCOpcode(Op32) == -1) + return false; } appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, /*Commuted=*/true, diff --git a/llvm/test/CodeGen/AMDGPU/fold-abs64.mir b/llvm/test/CodeGen/AMDGPU/fold-abs64.mir index 6b9d8d94ff550..d78456afc2be2 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-abs64.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-abs64.mir @@ -14,7 +14,8 @@ body: | bb.0: ; CHECK-LABEL: name: fn ; CHECK: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 undef [[DEF]].sub0, target-flags(amdgpu-abs64) @sym, 0, implicit $exec + ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-abs64) @sym + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 undef [[DEF]].sub0, undef [[S_MOV_B64_]].sub0, 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] %0:vreg_64 = IMPLICIT_DEF %1:sreg_64 = S_MOV_B64 target-flags(amdgpu-abs64) @sym From 78798d4044d5fad5b773a3944987449d98976b47 Mon Sep 17 00:00:00 2001 From: David Stuttard Date: Tue, 2 Sep 2025 08:43:37 +0100 Subject: [PATCH 3/3] Fix test --- llvm/test/CodeGen/AMDGPU/fold-abs64.mir | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fold-abs64.mir b/llvm/test/CodeGen/AMDGPU/fold-abs64.mir index d78456afc2be2..fceb186717344 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-abs64.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-abs64.mir @@ -37,5 +37,3 @@ body: | %2:vgpr_32, %3:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 undef %1.sub0, undef %0.sub0, 0, implicit $exec S_ENDPGM 0, implicit %2 ... -## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -# CHECK: {{.*}}