Expand Up
@@ -2360,6 +2360,7 @@ define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(ptr addrspace(1) %o
; GFX12-NEXT: v_lshrrev_b16 v6, 5, s2
; GFX12-NEXT: v_lshrrev_b16 v9, 7, s2
; GFX12-NEXT: v_lshrrev_b16 v13, 3, s2
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_lshrrev_b16 v14, 5, s3
; GFX12-NEXT: v_lshrrev_b16 v18, 1, s3
; GFX12-NEXT: v_lshrrev_b16 v21, 3, s3
Expand Down
Expand Up
@@ -2397,6 +2398,7 @@ define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(ptr addrspace(1) %o
; GFX12-NEXT: s_bfe_u32 s2, s2, 0x10015
; GFX12-NEXT: v_and_b32_e32 v22, 1, v2
; GFX12-NEXT: v_dual_mov_b32 v28, s8 :: v_dual_and_b32 v1, 1, v10
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v29, s7 :: v_dual_and_b32 v2, 1, v11
; GFX12-NEXT: v_dual_mov_b32 v31, s3 :: v_dual_and_b32 v6, 1, v7
; GFX12-NEXT: v_and_b32_e32 v4, 1, v5
Expand Down
Expand Up
@@ -2794,6 +2796,7 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(ptr addrspace(1) %o
; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10000
; GFX12-NEXT: s_bfe_i32 s6, s2, 0x10013
; GFX12-NEXT: s_bfe_i32 s7, s2, 0x10012
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_lshrrev_b16 v16, 4, s3
; GFX12-NEXT: v_lshrrev_b16 v20, 5, s3
; GFX12-NEXT: v_lshrrev_b16 v21, 6, s3
Expand All
@@ -2807,7 +2810,7 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(ptr addrspace(1) %o
; GFX12-NEXT: s_bfe_i32 s10, s2, 0x10016
; GFX12-NEXT: s_bfe_i32 s11, s2, 0x10014
; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10015
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v32, 0 :: v_dual_mov_b32 v25, s2
; GFX12-NEXT: v_bfe_i32 v15, v14, 0, 1
; GFX12-NEXT: v_dual_mov_b32 v24, s11 :: v_dual_mov_b32 v27, s9
Expand Down
Expand Up
@@ -3454,6 +3457,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(ptr addrspace(1) %o
; GFX12-NEXT: v_lshrrev_b16 v3, 11, s2
; GFX12-NEXT: v_lshrrev_b16 v9, 13, s3
; GFX12-NEXT: v_and_b32_e32 v44, 1, v1
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_lshrrev_b16 v1, 1, s4
; GFX12-NEXT: s_lshr_b32 s5, s2, 24
; GFX12-NEXT: v_dual_mov_b32 v64, 0 :: v_dual_and_b32 v41, 1, v2
Expand All
@@ -3467,14 +3471,16 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(ptr addrspace(1) %o
; GFX12-NEXT: v_lshrrev_b16 v13, 7, s3
; GFX12-NEXT: v_lshrrev_b16 v14, 1, s3
; GFX12-NEXT: v_lshrrev_b16 v17, 5, s4
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_lshrrev_b16 v2, 5, s5
; GFX12-NEXT: s_and_b32 s7, s2, 1
; GFX12-NEXT: s_bfe_u32 s18, s3, 0x10010
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v52, s18 :: v_dual_and_b32 v35, 1, v9
; GFX12-NEXT: v_and_b32_e32 v9, 1, v1
; GFX12-NEXT: v_lshrrev_b16 v1, 3, s4
; GFX12-NEXT: s_bfe_u32 s19, s3, 0x10017
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v51, s19 :: v_dual_and_b32 v42, 1, v3
; GFX12-NEXT: v_lshrrev_b16 v3, 3, s5
; GFX12-NEXT: v_lshrrev_b16 v15, 3, s3
Expand All
@@ -3489,30 +3495,34 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(ptr addrspace(1) %o
; GFX12-NEXT: s_bfe_u32 s9, s2, 0x10012
; GFX12-NEXT: s_bfe_u32 s10, s2, 0x10011
; GFX12-NEXT: s_bfe_u32 s12, s2, 0x10017
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v59, s12 :: v_dual_and_b32 v22, 1, v13
; GFX12-NEXT: v_dual_mov_b32 v62, s9 :: v_dual_and_b32 v13, 1, v17
; GFX12-NEXT: v_lshrrev_b16 v17, 6, s5
; GFX12-NEXT: s_bfe_u32 s13, s2, 0x10016
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v58, s13 :: v_dual_and_b32 v23, 1, v14
; GFX12-NEXT: s_bfe_u32 s14, s2, 0x10015
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v57, s14 :: v_dual_and_b32 v26, 1, v11
; GFX12-NEXT: v_and_b32_e32 v11, 1, v1
; GFX12-NEXT: v_lshrrev_b16 v1, 1, s5
; GFX12-NEXT: s_bfe_u32 s15, s3, 0x10013
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v55, s15 :: v_dual_and_b32 v34, 1, v7
; GFX12-NEXT: v_lshrrev_b16 v7, 7, s5
; GFX12-NEXT: s_bfe_u32 s16, s3, 0x10012
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v54, s16 :: v_dual_and_b32 v31, 1, v10
; GFX12-NEXT: s_bfe_u32 s17, s3, 0x10011
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v53, s17 :: v_dual_and_b32 v38, 1, v5
; GFX12-NEXT: s_bfe_u32 s20, s3, 0x10016
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v50, s20 :: v_dual_and_b32 v39, 1, v6
; GFX12-NEXT: v_lshrrev_b16 v6, 2, s5
; GFX12-NEXT: s_bfe_u32 s21, s3, 0x10014
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v48, s21 :: v_dual_and_b32 v43, 1, v4
; GFX12-NEXT: v_lshrrev_b16 v4, 4, s5
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10018
Expand All
@@ -3522,7 +3532,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(ptr addrspace(1) %o
; GFX12-NEXT: v_lshrrev_b16 v24, 8, s3
; GFX12-NEXT: v_lshrrev_b16 v18, 2, s3
; GFX12-NEXT: s_bfe_u32 s11, s2, 0x10010
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v60, s11 :: v_dual_and_b32 v19, 1, v15
; GFX12-NEXT: v_lshrrev_b16 v0, 12, s2
; GFX12-NEXT: v_lshrrev_b16 v8, 14, s2
Expand All
@@ -3541,6 +3551,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(ptr addrspace(1) %o
; GFX12-NEXT: s_bfe_u32 s3, s3, 0x10015
; GFX12-NEXT: v_and_b32_e32 v1, 1, v1
; GFX12-NEXT: v_and_b32_e32 v3, 0xffff, v2
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v63, s8 :: v_dual_and_b32 v2, 1, v6
; GFX12-NEXT: v_and_b32_e32 v6, 1, v17
; GFX12-NEXT: v_and_b32_e32 v17, 0xffff, v23
Expand Down
Expand Up
@@ -4266,6 +4277,7 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(ptr addrspace(1) %o
; GFX12-NEXT: v_lshrrev_b16 v0, 12, s2
; GFX12-NEXT: v_lshrrev_b16 v8, 13, s2
; GFX12-NEXT: v_lshrrev_b16 v32, 15, s2
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_lshrrev_b16 v12, 4, s4
; GFX12-NEXT: v_lshrrev_b16 v13, 5, s4
; GFX12-NEXT: v_lshrrev_b16 v14, 6, s4
Expand Down
Expand Up
@@ -4311,7 +4323,7 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(ptr addrspace(1) %o
; GFX12-NEXT: s_bfe_i32 s20, s3, 0x10016
; GFX12-NEXT: s_bfe_i32 s21, s3, 0x10014
; GFX12-NEXT: s_bfe_i32 s3, s3, 0x10015
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v64, 0 :: v_dual_mov_b32 v49, s3
; GFX12-NEXT: v_bfe_i32 v23, v23, 0, 1
; GFX12-NEXT: v_bfe_i32 v22, v22, 0, 1
Expand Down
Expand Up
@@ -6791,6 +6803,7 @@ define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX12-NEXT: v_and_b32_e32 v0, 1, v0
; GFX12-NEXT: v_lshrrev_b16 v4, 9, s2
; GFX12-NEXT: v_lshrrev_b16 v8, 7, s2
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_lshrrev_b16 v16, 7, s3
; GFX12-NEXT: v_lshrrev_b16 v18, 6, s3
; GFX12-NEXT: v_lshrrev_b16 v17, 5, s3
Expand All
@@ -6808,6 +6821,7 @@ define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX12-NEXT: v_and_b32_e32 v28, 1, v21
; GFX12-NEXT: v_dual_mov_b32 v42, v1 :: v_dual_and_b32 v31, 1, v2
; GFX12-NEXT: v_dual_mov_b32 v32, v1 :: v_dual_and_b32 v33, 0xffff, v0
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_and_b32 v21, 0xffff, v3
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, v1
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10014
Expand All
@@ -6817,6 +6831,7 @@ define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX12-NEXT: v_and_b32_e32 v11, 1, v11
; GFX12-NEXT: v_and_b32_e32 v13, 1, v13
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:176
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v2, s6
; GFX12-NEXT: v_mov_b32_e32 v0, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10012
Expand All
@@ -6827,6 +6842,7 @@ define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX12-NEXT: v_and_b32_e32 v9, 1, v17
; GFX12-NEXT: v_and_b32_e32 v29, 1, v23
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:160
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v2, s6
; GFX12-NEXT: v_mov_b32_e32 v0, s3
; GFX12-NEXT: v_lshrrev_b16 v5, 15, s2
Expand All
@@ -6842,6 +6858,7 @@ define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX12-NEXT: v_and_b32_e32 v6, 0xffff, v13
; GFX12-NEXT: v_and_b32_e32 v17, 0xffff, v24
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:144
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v13, v1
; GFX12-NEXT: v_and_b32_e32 v43, 0xffff, v26
Expand Down
Expand Up
@@ -7554,6 +7571,7 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX12-NEXT: global_store_b128 v62, v[30:33], s[0:1] offset:64
; GFX12-NEXT: global_store_b128 v62, v[26:29], s[0:1] offset:48
; GFX12-NEXT: global_store_b128 v62, v[8:11], s[0:1] offset:32
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v10, s2 :: v_dual_mov_b32 v11, s3
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v62, v[4:7], s[0:1] offset:16
Expand Down
Expand Up
@@ -8449,6 +8467,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: v_and_b32_e32 v43, 1, v10
; GFX12-NEXT: v_dual_mov_b32 v68, v1 :: v_dual_and_b32 v69, 1, v2
; GFX12-NEXT: v_dual_mov_b32 v62, v1 :: v_dual_and_b32 v71, 0xffff, v0
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_and_b32 v67, 0xffff, v3
; GFX12-NEXT: v_mov_b32_e32 v66, v1
; GFX12-NEXT: v_dual_mov_b32 v2, s9 :: v_dual_mov_b32 v3, v1
Expand All
@@ -8457,6 +8476,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: v_lshrrev_b16 v14, 13, s3
; GFX12-NEXT: v_lshrrev_b16 v18, 9, s3
; GFX12-NEXT: v_dual_mov_b32 v47, v1 :: v_dual_and_b32 v38, 1, v6
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_lshrrev_b16 v4, 5, s4
; GFX12-NEXT: v_lshrrev_b16 v6, 3, s4
; GFX12-NEXT: s_bfe_u32 s8, s3, 0x10016
Expand All
@@ -8465,6 +8485,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: v_and_b32_e32 v45, 1, v12
; GFX12-NEXT: v_and_b32_e32 v41, 1, v16
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:416
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v2, s9
; GFX12-NEXT: v_mov_b32_e32 v0, s8
; GFX12-NEXT: s_lshr_b32 s5, s2, 24
Expand All
@@ -8473,6 +8494,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: v_dual_mov_b32 v49, v1 :: v_dual_and_b32 v40, 1, v8
; GFX12-NEXT: v_and_b32_e32 v44, 1, v14
; GFX12-NEXT: v_and_b32_e32 v14, 1, v6
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_lshrrev_b16 v6, 5, s5
; GFX12-NEXT: v_lshrrev_b16 v8, 1, s5
; GFX12-NEXT: v_lshrrev_b16 v10, 3, s5
Expand All
@@ -8483,6 +8505,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: s_bfe_u32 s9, s3, 0x10013
; GFX12-NEXT: v_and_b32_e32 v33, 1, v20
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:432
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v2, s9
; GFX12-NEXT: v_mov_b32_e32 v0, s8
; GFX12-NEXT: v_lshrrev_b16 v9, 15, s3
Expand All
@@ -8509,6 +8532,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10018
; GFX12-NEXT: s_bfe_u32 s3, s3, 0x10010
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:400
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s3
; GFX12-NEXT: v_mov_b32_e32 v2, s8
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10016
Expand All
@@ -8518,6 +8542,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: v_and_b32_e32 v82, 0xffff, v35
; GFX12-NEXT: v_and_b32_e32 v35, 1, v27
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:384
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s3
; GFX12-NEXT: v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v27, v1
; GFX12-NEXT: v_and_b32_e32 v81, 0xffff, v4
Expand All
@@ -8529,6 +8554,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: v_and_b32_e32 v26, 0xffff, v31
; GFX12-NEXT: v_and_b32_e32 v31, 1, v29
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:176
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v2, s8
; GFX12-NEXT: v_mov_b32_e32 v0, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10012
Expand All
@@ -8538,6 +8564,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: v_lshrrev_b16 v21, 2, s2
; GFX12-NEXT: v_and_b32_e32 v33, 0xffff, v33
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:160
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s3
; GFX12-NEXT: v_mov_b32_e32 v2, s8
; GFX12-NEXT: v_lshrrev_b16 v15, 8, s2
Expand All
@@ -8561,6 +8588,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: v_and_b32_e32 v10, 0xffff, v39
; GFX12-NEXT: v_and_b32_e32 v39, 1, v25
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:144
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_and_b32 v77, 1, v7
; GFX12-NEXT: v_and_b32_e32 v79, 0xffff, v5
Expand Down
Expand Up
@@ -9818,6 +9846,7 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: v_dual_mov_b32 v75, s42 :: v_dual_mov_b32 v76, s43
; GFX12-NEXT: v_bfe_i32 v79, v1, 0, 1
; GFX12-NEXT: v_bfe_i32 v85, v65, 0, 1
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v65, s40
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: global_store_b128 v12, v[69:72], s[0:1] offset:144
Expand Down
Expand Up
@@ -9903,6 +9932,7 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX12-NEXT: v_ashrrev_i32_e32 v50, 31, v49
; GFX12-NEXT: v_ashrrev_i32_e32 v88, 31, v87
; GFX12-NEXT: v_ashrrev_i32_e32 v86, 31, v85
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v34, s19 :: v_dual_mov_b32 v17, s4
; GFX12-NEXT: v_dual_mov_b32 v18, s5 :: v_dual_mov_b32 v1, s3
; GFX12-NEXT: v_ashrrev_i32_e32 v16, 31, v15
Expand Down