Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Nov 15, 2025

This probably should have turned into a regular integer constant
earlier. This is to defend against future regressions.

Copy link
Contributor Author

arsenm commented Nov 15, 2025

@llvmbot
Copy link
Member

llvmbot commented Nov 15, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

This probably should have turned into a regular integer constant
earlier. This is to defend against future regressions.


Full diff: https://github.com/llvm/llvm-project/pull/168168.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+18)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h (+2-15)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll (+6-6)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll (+18-18)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b8b419d93021a..9308934c8baf8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -470,6 +470,24 @@ MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
 }
 
+SDNode *AMDGPUDAGToDAGISel::packConstantV2I16(const SDNode *N,
+                                              SelectionDAG &DAG) const {
+  // TODO: Handle undef as zero
+
+  assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
+  uint32_t LHSVal, RHSVal;
+  if (getConstantValue(N->getOperand(0), LHSVal) &&
+      getConstantValue(N->getOperand(1), RHSVal)) {
+    SDLoc SL(N);
+    uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16);
+    return DAG.getMachineNode(
+        isVGPRImm(N) ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32, SL,
+        N->getValueType(0), DAG.getTargetConstant(K, SL, MVT::i32));
+  }
+
+  return nullptr;
+}
+
 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
   EVT VT = N->getValueType(0);
   unsigned NumVectorElts = VT.getVectorNumElements();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 4fa0d3f72e1c7..c902b7e7f1d87 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -45,21 +45,6 @@ static inline bool getConstantValue(SDValue N, uint32_t &Out) {
   return false;
 }
 
-// TODO: Handle undef as zero
-static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
-  assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
-  uint32_t LHSVal, RHSVal;
-  if (getConstantValue(N->getOperand(0), LHSVal) &&
-      getConstantValue(N->getOperand(1), RHSVal)) {
-    SDLoc SL(N);
-    uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16);
-    return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
-                              DAG.getTargetConstant(K, SL, MVT::i32));
-  }
-
-  return nullptr;
-}
-
 /// AMDGPU specific code to select AMDGPU machine instructions for
 /// SelectionDAG operations.
 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
@@ -115,6 +100,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
 
   MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
 
+  SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG) const;
+
   SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
   SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
   SDNode *glueCopyToM0LDSInit(SDNode *N) const;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
index 5b6fc6ae2cb91..69ed10c7c02a9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
@@ -497,10 +497,10 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(ptr %p_node_
 ; GFX1030:       ; %bb.0: ; %main_body
 ; GFX1030-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
 ; GFX1030-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX1030-NEXT:    v_mov_b32_e32 v4, 2.0
-; GFX1030-NEXT:    v_mov_b32_e32 v5, 0x44004200
-; GFX1030-NEXT:    v_mov_b32_e32 v6, 0x46004500
 ; GFX1030-NEXT:    v_mov_b32_e32 v7, 0x48004700
+; GFX1030-NEXT:    v_mov_b32_e32 v6, 0x46004500
+; GFX1030-NEXT:    v_mov_b32_e32 v5, 0x44004200
+; GFX1030-NEXT:    v_mov_b32_e32 v4, 2.0
 ; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1030-NEXT:    v_add_co_u32 v0, s0, s0, v2
 ; GFX1030-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
@@ -810,11 +810,11 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
 ; GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
 ; GFX1030-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX1030-NEXT:    v_mov_b32_e32 v3, 0
+; GFX1030-NEXT:    v_mov_b32_e32 v8, 0x48004700
+; GFX1030-NEXT:    v_mov_b32_e32 v7, 0x46004500
+; GFX1030-NEXT:    v_mov_b32_e32 v6, 0x44004200
 ; GFX1030-NEXT:    v_mov_b32_e32 v5, 2.0
 ; GFX1030-NEXT:    v_mov_b32_e32 v4, 1.0
-; GFX1030-NEXT:    v_mov_b32_e32 v6, 0x44004200
-; GFX1030-NEXT:    v_mov_b32_e32 v7, 0x46004500
-; GFX1030-NEXT:    v_mov_b32_e32 v8, 0x48004700
 ; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1030-NEXT:    v_add_co_u32 v0, s4, s6, v0
 ; GFX1030-NEXT:    v_add_co_ci_u32_e64 v1, null, s7, 0, s4
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
index 7d3b316915923..c98feeb96232d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
@@ -746,9 +746,9 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
 ; GFX9-NEXT:    s_mov_b64 exec, 0
 ; GFX9-NEXT:  .LBB6_6: ; %.continue1
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3c00
-; GFX9-NEXT:    v_bfrev_b32_e32 v1, 60
-; GFX9-NEXT:    exp mrt0 v0, v0, v1, v1 done compr vm
+; GFX9-NEXT:    v_bfrev_b32_e32 v0, 60
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3c00
+; GFX9-NEXT:    exp mrt0 v1, v1, v0, v0 done compr vm
 ; GFX9-NEXT:    s_endpgm
 ; GFX9-NEXT:  .LBB6_7:
 ; GFX9-NEXT:    s_mov_b64 exec, 0
@@ -792,9 +792,9 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
 ; GFX10-32-NEXT:    s_mov_b32 exec_lo, 0
 ; GFX10-32-NEXT:  .LBB6_6: ; %.continue1
 ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
-; GFX10-32-NEXT:    v_mov_b32_e32 v0, 0x3c00
-; GFX10-32-NEXT:    v_bfrev_b32_e32 v1, 60
-; GFX10-32-NEXT:    exp mrt0 v0, v0, v1, v1 done compr vm
+; GFX10-32-NEXT:    v_bfrev_b32_e32 v0, 60
+; GFX10-32-NEXT:    v_mov_b32_e32 v1, 0x3c00
+; GFX10-32-NEXT:    exp mrt0 v1, v1, v0, v0 done compr vm
 ; GFX10-32-NEXT:    s_endpgm
 ; GFX10-32-NEXT:  .LBB6_7:
 ; GFX10-32-NEXT:    s_mov_b32 exec_lo, 0
@@ -838,9 +838,9 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
 ; GFX10-64-NEXT:    s_mov_b64 exec, 0
 ; GFX10-64-NEXT:  .LBB6_6: ; %.continue1
 ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[2:3]
-; GFX10-64-NEXT:    v_mov_b32_e32 v0, 0x3c00
-; GFX10-64-NEXT:    v_bfrev_b32_e32 v1, 60
-; GFX10-64-NEXT:    exp mrt0 v0, v0, v1, v1 done compr vm
+; GFX10-64-NEXT:    v_bfrev_b32_e32 v0, 60
+; GFX10-64-NEXT:    v_mov_b32_e32 v1, 0x3c00
+; GFX10-64-NEXT:    exp mrt0 v1, v1, v0, v0 done compr vm
 ; GFX10-64-NEXT:    s_endpgm
 ; GFX10-64-NEXT:  .LBB6_7:
 ; GFX10-64-NEXT:    s_mov_b64 exec, 0
@@ -1005,9 +1005,9 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
 ; GFX9-NEXT:  .LBB7_8: ; %.return
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX9-NEXT:    s_and_b64 exec, exec, s[0:1]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3c00
-; GFX9-NEXT:    v_bfrev_b32_e32 v1, 60
-; GFX9-NEXT:    exp mrt0 v0, v0, v1, v1 done compr vm
+; GFX9-NEXT:    v_bfrev_b32_e32 v0, 60
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3c00
+; GFX9-NEXT:    exp mrt0 v1, v1, v0, v0 done compr vm
 ; GFX9-NEXT:    s_endpgm
 ; GFX9-NEXT:  .LBB7_9:
 ; GFX9-NEXT:    s_mov_b64 exec, 0
@@ -1068,9 +1068,9 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
 ; GFX10-32-NEXT:  .LBB7_8: ; %.return
 ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX10-32-NEXT:    s_and_b32 exec_lo, exec_lo, s0
-; GFX10-32-NEXT:    v_mov_b32_e32 v0, 0x3c00
-; GFX10-32-NEXT:    v_bfrev_b32_e32 v1, 60
-; GFX10-32-NEXT:    exp mrt0 v0, v0, v1, v1 done compr vm
+; GFX10-32-NEXT:    v_bfrev_b32_e32 v0, 60
+; GFX10-32-NEXT:    v_mov_b32_e32 v1, 0x3c00
+; GFX10-32-NEXT:    exp mrt0 v1, v1, v0, v0 done compr vm
 ; GFX10-32-NEXT:    s_endpgm
 ; GFX10-32-NEXT:  .LBB7_9:
 ; GFX10-32-NEXT:    s_mov_b32 exec_lo, 0
@@ -1131,9 +1131,9 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
 ; GFX10-64-NEXT:  .LBB7_8: ; %.return
 ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX10-64-NEXT:    s_and_b64 exec, exec, s[0:1]
-; GFX10-64-NEXT:    v_mov_b32_e32 v0, 0x3c00
-; GFX10-64-NEXT:    v_bfrev_b32_e32 v1, 60
-; GFX10-64-NEXT:    exp mrt0 v0, v0, v1, v1 done compr vm
+; GFX10-64-NEXT:    v_bfrev_b32_e32 v0, 60
+; GFX10-64-NEXT:    v_mov_b32_e32 v1, 0x3c00
+; GFX10-64-NEXT:    exp mrt0 v1, v1, v0, v0 done compr vm
 ; GFX10-64-NEXT:    s_endpgm
 ; GFX10-64-NEXT:  .LBB7_9:
 ; GFX10-64-NEXT:    s_mov_b64 exec, 0

@arsenm arsenm marked this pull request as ready for review November 15, 2025 02:10
@arsenm arsenm force-pushed the users/arsenm/use-v-mov-b32-divergent-anyext-i64 branch from 41140ce to d5be7f1 Compare November 15, 2025 04:25
@arsenm arsenm force-pushed the users/arsenm/amdgpu/check-isVGPRImm-build-vector-v2i16-bitcast branch from ee34e82 to 9a8a0ec Compare November 15, 2025 04:25
Base automatically changed from users/arsenm/use-v-mov-b32-divergent-anyext-i64 to main November 15, 2025 04:58
This probably should have turned into a regular integer constant
earlier. This is to defend against future regressions.
@arsenm arsenm force-pushed the users/arsenm/amdgpu/check-isVGPRImm-build-vector-v2i16-bitcast branch from 9a8a0ec to f3c3a66 Compare November 15, 2025 05:07
@arsenm arsenm merged commit 9fecebf into main Nov 15, 2025
9 of 10 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/check-isVGPRImm-build-vector-v2i16-bitcast branch November 15, 2025 05:42
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants