-
Notifications
You must be signed in to change notification settings - Fork 15.2k
AMDGPU: Consider isVGPRImm when forming constant from build_vector #168168
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
arsenm
merged 1 commit into
main
from
users/arsenm/amdgpu/check-isVGPRImm-build-vector-v2i16-bitcast
Nov 15, 2025
Merged
AMDGPU: Consider isVGPRImm when forming constant from build_vector #168168
arsenm
merged 1 commit into
main
from
users/arsenm/amdgpu/check-isVGPRImm-build-vector-v2i16-bitcast
Nov 15, 2025
+44
−39
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Contributor
Author
This was referenced Nov 15, 2025
Member
|
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThis probably should have turned into a regular integer constant Full diff: https://github.com/llvm/llvm-project/pull/168168.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b8b419d93021a..9308934c8baf8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -470,6 +470,24 @@ MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
}
+SDNode *AMDGPUDAGToDAGISel::packConstantV2I16(const SDNode *N,
+ SelectionDAG &DAG) const {
+ // TODO: Handle undef as zero
+
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
+ uint32_t LHSVal, RHSVal;
+ if (getConstantValue(N->getOperand(0), LHSVal) &&
+ getConstantValue(N->getOperand(1), RHSVal)) {
+ SDLoc SL(N);
+ uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16);
+ return DAG.getMachineNode(
+ isVGPRImm(N) ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32, SL,
+ N->getValueType(0), DAG.getTargetConstant(K, SL, MVT::i32));
+ }
+
+ return nullptr;
+}
+
void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 4fa0d3f72e1c7..c902b7e7f1d87 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -45,21 +45,6 @@ static inline bool getConstantValue(SDValue N, uint32_t &Out) {
return false;
}
-// TODO: Handle undef as zero
-static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
- assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
- uint32_t LHSVal, RHSVal;
- if (getConstantValue(N->getOperand(0), LHSVal) &&
- getConstantValue(N->getOperand(1), RHSVal)) {
- SDLoc SL(N);
- uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16);
- return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
- DAG.getTargetConstant(K, SL, MVT::i32));
- }
-
- return nullptr;
-}
-
/// AMDGPU specific code to select AMDGPU machine instructions for
/// SelectionDAG operations.
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
@@ -115,6 +100,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
+ SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG) const;
+
SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
SDNode *glueCopyToM0LDSInit(SDNode *N) const;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
index 5b6fc6ae2cb91..69ed10c7c02a9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
@@ -497,10 +497,10 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(ptr %p_node_
; GFX1030: ; %bb.0: ; %main_body
; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX1030-NEXT: v_mov_b32_e32 v4, 2.0
-; GFX1030-NEXT: v_mov_b32_e32 v5, 0x44004200
-; GFX1030-NEXT: v_mov_b32_e32 v6, 0x46004500
; GFX1030-NEXT: v_mov_b32_e32 v7, 0x48004700
+; GFX1030-NEXT: v_mov_b32_e32 v6, 0x46004500
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x44004200
+; GFX1030-NEXT: v_mov_b32_e32 v4, 2.0
; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
; GFX1030-NEXT: v_add_co_u32 v0, s0, s0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
@@ -810,11 +810,11 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
+; GFX1030-NEXT: v_mov_b32_e32 v8, 0x48004700
+; GFX1030-NEXT: v_mov_b32_e32 v7, 0x46004500
+; GFX1030-NEXT: v_mov_b32_e32 v6, 0x44004200
; GFX1030-NEXT: v_mov_b32_e32 v5, 2.0
; GFX1030-NEXT: v_mov_b32_e32 v4, 1.0
-; GFX1030-NEXT: v_mov_b32_e32 v6, 0x44004200
-; GFX1030-NEXT: v_mov_b32_e32 v7, 0x46004500
-; GFX1030-NEXT: v_mov_b32_e32 v8, 0x48004700
; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
; GFX1030-NEXT: v_add_co_u32 v0, s4, s6, v0
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s7, 0, s4
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
index 7d3b316915923..c98feeb96232d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
@@ -746,9 +746,9 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
; GFX9-NEXT: s_mov_b64 exec, 0
; GFX9-NEXT: .LBB6_6: ; %.continue1
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX9-NEXT: v_mov_b32_e32 v0, 0x3c00
-; GFX9-NEXT: v_bfrev_b32_e32 v1, 60
-; GFX9-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
+; GFX9-NEXT: v_bfrev_b32_e32 v0, 60
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x3c00
+; GFX9-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
; GFX9-NEXT: s_endpgm
; GFX9-NEXT: .LBB6_7:
; GFX9-NEXT: s_mov_b64 exec, 0
@@ -792,9 +792,9 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-32-NEXT: .LBB6_6: ; %.continue1
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX10-32-NEXT: v_mov_b32_e32 v0, 0x3c00
-; GFX10-32-NEXT: v_bfrev_b32_e32 v1, 60
-; GFX10-32-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
+; GFX10-32-NEXT: v_bfrev_b32_e32 v0, 60
+; GFX10-32-NEXT: v_mov_b32_e32 v1, 0x3c00
+; GFX10-32-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
; GFX10-32-NEXT: s_endpgm
; GFX10-32-NEXT: .LBB6_7:
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
@@ -838,9 +838,9 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
; GFX10-64-NEXT: s_mov_b64 exec, 0
; GFX10-64-NEXT: .LBB6_6: ; %.continue1
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX10-64-NEXT: v_mov_b32_e32 v0, 0x3c00
-; GFX10-64-NEXT: v_bfrev_b32_e32 v1, 60
-; GFX10-64-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
+; GFX10-64-NEXT: v_bfrev_b32_e32 v0, 60
+; GFX10-64-NEXT: v_mov_b32_e32 v1, 0x3c00
+; GFX10-64-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
; GFX10-64-NEXT: s_endpgm
; GFX10-64-NEXT: .LBB6_7:
; GFX10-64-NEXT: s_mov_b64 exec, 0
@@ -1005,9 +1005,9 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX9-NEXT: .LBB7_8: ; %.return
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_and_b64 exec, exec, s[0:1]
-; GFX9-NEXT: v_mov_b32_e32 v0, 0x3c00
-; GFX9-NEXT: v_bfrev_b32_e32 v1, 60
-; GFX9-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
+; GFX9-NEXT: v_bfrev_b32_e32 v0, 60
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x3c00
+; GFX9-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
; GFX9-NEXT: s_endpgm
; GFX9-NEXT: .LBB7_9:
; GFX9-NEXT: s_mov_b64 exec, 0
@@ -1068,9 +1068,9 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX10-32-NEXT: .LBB7_8: ; %.return
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s0
-; GFX10-32-NEXT: v_mov_b32_e32 v0, 0x3c00
-; GFX10-32-NEXT: v_bfrev_b32_e32 v1, 60
-; GFX10-32-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
+; GFX10-32-NEXT: v_bfrev_b32_e32 v0, 60
+; GFX10-32-NEXT: v_mov_b32_e32 v1, 0x3c00
+; GFX10-32-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
; GFX10-32-NEXT: s_endpgm
; GFX10-32-NEXT: .LBB7_9:
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
@@ -1131,9 +1131,9 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX10-64-NEXT: .LBB7_8: ; %.return
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX10-64-NEXT: s_and_b64 exec, exec, s[0:1]
-; GFX10-64-NEXT: v_mov_b32_e32 v0, 0x3c00
-; GFX10-64-NEXT: v_bfrev_b32_e32 v1, 60
-; GFX10-64-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
+; GFX10-64-NEXT: v_bfrev_b32_e32 v0, 60
+; GFX10-64-NEXT: v_mov_b32_e32 v1, 0x3c00
+; GFX10-64-NEXT: exp mrt0 v1, v1, v0, v0 done compr vm
; GFX10-64-NEXT: s_endpgm
; GFX10-64-NEXT: .LBB7_9:
; GFX10-64-NEXT: s_mov_b64 exec, 0
|
shiltian
approved these changes
Nov 15, 2025
41140ce to
d5be7f1
Compare
ee34e82 to
9a8a0ec
Compare
Base automatically changed from
users/arsenm/use-v-mov-b32-divergent-anyext-i64
to
main
November 15, 2025 04:58
This probably should have turned into a regular integer constant earlier. This is to defend against future regressions.
9a8a0ec to
f3c3a66
Compare
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.

This probably should have turned into a regular integer constant
earlier. This is to defend against future regressions.