From 5e68649e0a5f43e4b764617881a57beed5334d5c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 26 Sep 2025 13:47:35 +0900
Subject: [PATCH] AMDGPU: Check if immediate is legal for av_mov_b32_imm_pseudo

This is primarily to avoid folding a frame index materialized
into an SGPR into the pseudo; this would end up looking like:
  %sreg = s_mov_b32 %stack.0
  %av_32 = av_mov_b32_imm_pseudo %sreg

Which is not useful.

Match the check used for the b64 case. This is limited to the
pseudo to avoid regression due to gfx908's special case - it
is expecting to pass here with v_accvgpr_write_b32 for illegal
cases, and stay in the intermediate state with an sgpr input.

This avoids regressions in a future patch.
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp     |   9 ++
 .../CodeGen/AMDGPU/fold-imm-copy-agpr.mir     |   4 +-
 llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir    |   8 +-
 .../AMDGPU/fold-operands-frame-index-agpr.mir | 131 ++++++++++++++++++
 4 files changed, 146 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 51c56ecea2c96..fed37788802b9 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1313,6 +1313,15 @@ void SIFoldOperandsImpl::foldOperand(
       if (MovSrcRC) {
         if (UseSubReg)
           MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
+
+        // FIXME: We should be able to directly check immediate operand legality
+        // for all cases, but gfx908 hacks break.
+        if (MovOp == AMDGPU::AV_MOV_B32_IMM_PSEUDO &&
+            (!OpToFold.isImm() ||
+             !TII->isImmOperandLegal(MovDesc, SrcIdx,
+                                     *OpToFold.getEffectiveImmVal())))
+          break;
+
         if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
           break;
 
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
index 73cdcddbef135..a3b2191695734 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
@@ -209,8 +209,8 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b32_imm_65_copy_to_av_32
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65, implicit $exec
-    ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:sreg_32 = S_MOV_B32 65, implicit $exec
     %1:av_32 = COPY %0
     S_ENDPGM 0, implicit %1
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index dfcf9a1f5c5ae..bec188e4e8378 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -240,8 +240,8 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b32_imm_literal_copy_s_to_av_32
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999
-    ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
-    ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
+    ; GCN-NEXT: $agpr0 = COPY [[COPY]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:sreg_32 = S_MOV_B32 999
     %1:av_32 = COPY %0
@@ -257,8 +257,8 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_av_32
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
-    ; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[V_MOV_B32_e32_]], implicit $exec
-    ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
+    ; GCN-NEXT: $agpr0 = COPY [[COPY]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
     %1:av_32 = COPY %0
diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir
new file mode 100644
index 0000000000000..32a209608a4d0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir
@@ -0,0 +1,131 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck %s
+
+---
+name:  fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_av
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_av
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+    ; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
+    ; CHECK-NEXT: SI_RETURN implicit [[AV_MOV_]]
+    %0:sreg_32 = S_MOV_B32 %stack.0
+    %1:av_32 =  AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
+    SI_RETURN implicit %1
+
+...
+
+---
+name:  fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_v
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_v
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+    ; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]], implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+    %0:sreg_32 = S_MOV_B32 %stack.0
+    %1:av_32 =  AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
+    %2:vgpr_32 = COPY %1, implicit $exec
+    $vgpr0 = COPY %2
+    SI_RETURN implicit $vgpr0
+
+...
+
+---
+name:  fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_lit_to_v
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_lit_to_v
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1234
+    ; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]], implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+    %0:sreg_32 = S_MOV_B32 1234
+    %1:av_32 =  AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
+    %2:vgpr_32 = COPY %1, implicit $exec
+    $vgpr0 = COPY %2
+    SI_RETURN implicit $vgpr0
+
+...
+
+---
+name:  fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_imm_to_v
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_imm_to_v
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8, implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+    %0:sreg_32 = S_MOV_B32 8
+    %1:av_32 =  AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
+    %2:vgpr_32 = COPY %1, implicit $exec
+    $vgpr0 = COPY %2
+    SI_RETURN implicit $vgpr0
+
+...
+
+---
+name:  fold_frame_index_av_regression_0
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index_av_regression_0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+    %0:sreg_32 = S_MOV_B32 %stack.0
+    %1:av_32 = COPY %0
+    %2:vgpr_32 = COPY %1, implicit $exec
+    $vgpr0 = COPY %2
+    SI_RETURN implicit $vgpr0
+
+...
+
+---
+name:  fold_frame_index_av_regression_1
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  16384
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_frame_index_av_regression_1
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+    %0:sreg_32 = S_MOV_B32 %stack.0
+    %1:sreg_32 = S_MOV_B32 killed %0
+    %2:sreg_64 = S_MOV_B64 0
+    %3:av_32 = COPY %1
+    %4:vgpr_32 = COPY %3, implicit $exec
+    $vgpr0 = COPY %4
+    SI_RETURN implicit $vgpr0
+
+...
+