From 5e68649e0a5f43e4b764617881a57beed5334d5c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 26 Sep 2025 13:47:35 +0900 Subject: [PATCH] AMDGPU: Check if immediate is legal for av_mov_b32_imm_pseudo This is primarily to avoid folding a frame index materialized into an SGPR into the pseudo; this would end up looking like: %sreg = s_mov_b32 %stack.0 %av_32 = av_mov_b32_imm_pseudo %sreg Which is not useful. Match the check used for the b64 case. This is limited to the pseudo to avoid regression due to gfx908's special case - it is expecting to pass here with v_accvgpr_write_b32 for illegal cases, and stay in the intermediate state with an sgpr input. This avoids regressions in a future patch. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 9 ++ .../CodeGen/AMDGPU/fold-imm-copy-agpr.mir | 4 +- llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir | 8 +- .../AMDGPU/fold-operands-frame-index-agpr.mir | 131 ++++++++++++++++++ 4 files changed, 146 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 51c56ecea2c96..fed37788802b9 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1313,6 +1313,15 @@ void SIFoldOperandsImpl::foldOperand( if (MovSrcRC) { if (UseSubReg) MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg); + + // FIXME: We should be able to directly check immediate operand legality + // for all cases, but gfx908 hacks break. + if (MovOp == AMDGPU::AV_MOV_B32_IMM_PSEUDO && + (!OpToFold.isImm() || + !TII->isImmOperandLegal(MovDesc, SrcIdx, + *OpToFold.getEffectiveImmVal()))) + break; + if (!MRI->constrainRegClass(SrcReg, MovSrcRC)) break; diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir index 73cdcddbef135..a3b2191695734 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir @@ -209,8 +209,8 @@ body: | bb.0: ; GCN-LABEL: name: s_mov_b32_imm_65_copy_to_av_32 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65, implicit $exec - ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]] + ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:sreg_32 = S_MOV_B32 65, implicit $exec %1:av_32 = COPY %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir index dfcf9a1f5c5ae..bec188e4e8378 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir @@ -240,8 +240,8 @@ body: | bb.0: ; GCN-LABEL: name: s_mov_b32_imm_literal_copy_s_to_av_32 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999 - ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec - ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]] + ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: $agpr0 = COPY [[COPY]] ; GCN-NEXT: S_ENDPGM 0 %0:sreg_32 = S_MOV_B32 999 %1:av_32 = COPY %0 @@ -257,8 +257,8 @@ body: | bb.0: ; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_av_32 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec - ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[V_MOV_B32_e32_]], implicit $exec - ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]] + ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]] + ; GCN-NEXT: $agpr0 = COPY [[COPY]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec %1:av_32 = COPY %0 diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir new file mode 100644 index 0000000000000..32a209608a4d0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir @@ -0,0 +1,131 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck %s + +--- +name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_av +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_av + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0 + ; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: SI_RETURN implicit [[AV_MOV_]] + %0:sreg_32 = S_MOV_B32 %stack.0 + %1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec + SI_RETURN implicit %1 + +... + +--- +name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_v +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_v + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0 + ; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %0:sreg_32 = S_MOV_B32 %stack.0 + %1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec + %2:vgpr_32 = COPY %1, implicit $exec + $vgpr0 = COPY %2 + SI_RETURN implicit $vgpr0 + +... + +--- +name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_lit_to_v +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_lit_to_v + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1234 + ; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %0:sreg_32 = S_MOV_B32 1234 + %1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec + %2:vgpr_32 = COPY %1, implicit $exec + $vgpr0 = COPY %2 + SI_RETURN implicit $vgpr0 + +... + +--- +name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_imm_to_v +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_imm_to_v + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %0:sreg_32 = S_MOV_B32 8 + %1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec + %2:vgpr_32 = COPY %1, implicit $exec + $vgpr0 = COPY %2 + SI_RETURN implicit $vgpr0 + +... + +--- +name: fold_frame_index_av_regression_0 +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index_av_regression_0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %0:sreg_32 = S_MOV_B32 %stack.0 + %1:av_32 = COPY %0 + %2:vgpr_32 = COPY %1, implicit $exec + $vgpr0 = COPY %2 + SI_RETURN implicit $vgpr0 + +... + +--- +name: fold_frame_index_av_regression_1 +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index_av_regression_1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %0:sreg_32 = S_MOV_B32 %stack.0 + %1:sreg_32 = S_MOV_B32 killed %0 + %2:sreg_64 = S_MOV_B64 0 + %3:av_32 = COPY %1 + %4:vgpr_32 = COPY %3, implicit $exec + $vgpr0 = COPY %4 + SI_RETURN implicit $vgpr0 + +... +