From d5be7f1f086f86d5d22a60fd0c35800bb67f36e4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 14 Nov 2025 16:32:25 -0800 Subject: [PATCH] AMDGPU: Use vgpr to implement divergent i32->i64 anyext Handle this for consistency with the zext case. --- llvm/lib/Target/AMDGPU/SIInstructions.td | 7 ++++- llvm/test/CodeGen/AMDGPU/rem_i128.ll | 30 +++++++------------ .../test/CodeGen/AMDGPU/wwm-reserved-spill.ll | 3 +- llvm/test/CodeGen/AMDGPU/wwm-reserved.ll | 6 ++-- 4 files changed, 19 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index b7256b81ee826..6cc9b3cc67530 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2951,10 +2951,15 @@ def : GCNPat < >; def : GCNPat < - (i64 (anyext i32:$src)), + (i64 (UniformUnaryFrag i32:$src)), (REG_SEQUENCE SReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1) >; +def : GCNPat < + (i64 (anyext i32:$src)), + (REG_SEQUENCE VReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1) +>; + class ZExt_i64_i1_Pat : GCNPat < (i64 (ext i1:$src)), (REG_SEQUENCE VReg_64, diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll index d36d95d182ab7..32862f73d2f29 100644 --- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll @@ -1161,9 +1161,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_add3_u32 v8, v0, v2, v8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, s5 ; GFX9-O0-NEXT: v_lshlrev_b64 v[8:9], s4, v[8:9] ; GFX9-O0-NEXT: v_mov_b32_e32 v14, v9 ; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 killed $vgpr17_vgpr18 killed $exec @@ -1190,9 +1189,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_add3_u32 v14, v9, v14, v15 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5 ; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15] ; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15 ; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 killed $vgpr19_vgpr20 killed $exec @@ -1221,9 +1219,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5 ; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[14:15] ; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16 ; GFX9-O0-NEXT: v_or_b32_e64 v9, v9, v14 @@ -1240,9 +1237,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5 ; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15] ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15 ; GFX9-O0-NEXT: v_or_b32_e64 v8, v8, v9 @@ -1280,9 +1276,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v23, v24 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v24, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v24, s5 ; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], s4, v[23:24] ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v24 ; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1 @@ -2447,9 +2442,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v10 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-O0-NEXT: v_lshlrev_b64 v[17:18], s4, v[2:3] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v18 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v4 @@ -2476,9 +2470,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v11 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s5 ; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[2:3] ; GFX9-O0-NEXT: v_mov_b32_e32 v12, v3 ; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 killed $vgpr15_vgpr16 killed $exec @@ -2507,9 +2500,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 ; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[15:16] ; GFX9-O0-NEXT: v_mov_b32_e32 v12, v16 ; GFX9-O0-NEXT: v_or_b32_e64 v11, v11, v12 @@ -2526,9 +2518,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 ; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[15:16] ; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16 ; GFX9-O0-NEXT: v_or_b32_e64 v10, v10, v17 @@ -2566,9 +2557,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v19, v20 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v20, s5 ; GFX9-O0-NEXT: v_lshlrev_b64 v[19:20], s4, v[19:20] ; GFX9-O0-NEXT: v_mov_b32_e32 v12, v20 ; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll index fe183287c46c2..81e17400973a4 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -475,9 +475,8 @@ define amdgpu_gfx i64 @strict_wwm_called_i64(i64 %a) noinline { ; GFX9-O0-NEXT: v_add3_u32 v0, v0, v1, v2 ; GFX9-O0-NEXT: ; implicit-def: $sgpr35 ; GFX9-O0-NEXT: ; implicit-def: $sgpr36 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s35 ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s35 ; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], s34, v[0:1] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll index 2aacb96ca4306..72672c8b6efad 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -479,9 +479,8 @@ define i64 @called_i64(i64 %a) noinline { ; GFX9-O0-NEXT: v_add3_u32 v0, v0, v1, v2 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], s4, v[0:1] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec @@ -1310,9 +1309,8 @@ define i64 @strict_wwm_called_i64(i64 %a) noinline { ; GFX9-O0-NEXT: v_add3_u32 v0, v0, v1, v2 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], s4, v[0:1] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec