Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,17 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
PreloadedScratchRsrcReg,
ScratchRsrcReg, ScratchWaveOffsetReg);
}

if (ST.hasWaitXCnt()) {
// Set REPLAY_MODE (bit 25) in MODE register to enable multi-group XNACK
// replay. This aligns hardware behavior with the compiler's s_wait_xcnt
// insertion logic, which assumes multi-group mode by default.
unsigned RegEncoding =
AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 25, 1);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_IMM32_B32))
.addImm(1)
.addImm(RegEncoding);
}
}

// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
Expand Down
48 changes: 48 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll

Large diffs are not rendered by default.

157 changes: 130 additions & 27 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
;
; GFX1250-LABEL: abs_sgpr_i16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_sext_i32_i16 s0, s0
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: s_abs_i32 s0, s0
Expand All @@ -43,10 +44,26 @@ define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
}

define amdgpu_cs i32 @abs_sgpr_i32(i32 inreg %arg) {
; GFX-LABEL: abs_sgpr_i32:
; GFX: ; %bb.0:
; GFX-NEXT: s_abs_i32 s0, s0
; GFX-NEXT: ; return to shader part epilog
; GFX6-LABEL: abs_sgpr_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_abs_i32 s0, s0
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: abs_sgpr_i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_abs_i32 s0, s0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: abs_sgpr_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_abs_i32 s0, s0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: abs_sgpr_i32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_abs_i32 s0, s0
; GFX1250-NEXT: ; return to shader part epilog
%res = call i32 @llvm.abs.i32(i32 %arg, i1 false)
ret i32 %res
}
Expand Down Expand Up @@ -81,6 +98,7 @@ define amdgpu_cs i64 @abs_sgpr_i64(i64 inreg %arg) {
;
; GFX1250-LABEL: abs_sgpr_i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_ashr_i32 s2, s1, 31
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1250-NEXT: s_mov_b32 s3, s2
Expand All @@ -93,13 +111,38 @@ define amdgpu_cs i64 @abs_sgpr_i64(i64 inreg %arg) {
}

define amdgpu_cs <4 x i32> @abs_sgpr_v4i32(<4 x i32> inreg %arg) {
; GFX-LABEL: abs_sgpr_v4i32:
; GFX: ; %bb.0:
; GFX-NEXT: s_abs_i32 s0, s0
; GFX-NEXT: s_abs_i32 s1, s1
; GFX-NEXT: s_abs_i32 s2, s2
; GFX-NEXT: s_abs_i32 s3, s3
; GFX-NEXT: ; return to shader part epilog
; GFX6-LABEL: abs_sgpr_v4i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_abs_i32 s0, s0
; GFX6-NEXT: s_abs_i32 s1, s1
; GFX6-NEXT: s_abs_i32 s2, s2
; GFX6-NEXT: s_abs_i32 s3, s3
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: abs_sgpr_v4i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_abs_i32 s0, s0
; GFX8-NEXT: s_abs_i32 s1, s1
; GFX8-NEXT: s_abs_i32 s2, s2
; GFX8-NEXT: s_abs_i32 s3, s3
; GFX8-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: abs_sgpr_v4i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_abs_i32 s0, s0
; GFX10-NEXT: s_abs_i32 s1, s1
; GFX10-NEXT: s_abs_i32 s2, s2
; GFX10-NEXT: s_abs_i32 s3, s3
; GFX10-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: abs_sgpr_v4i32:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_abs_i32 s0, s0
; GFX1250-NEXT: s_abs_i32 s1, s1
; GFX1250-NEXT: s_abs_i32 s2, s2
; GFX1250-NEXT: s_abs_i32 s3, s3
; GFX1250-NEXT: ; return to shader part epilog
%res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false)
ret <4 x i32> %res
}
Expand Down Expand Up @@ -278,13 +321,38 @@ define <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
}

define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
; GFX-LABEL: abs_sgpr_v2i8:
; GFX: ; %bb.0:
; GFX-NEXT: s_sext_i32_i8 s0, s0
; GFX-NEXT: s_sext_i32_i8 s1, s1
; GFX-NEXT: s_abs_i32 s0, s0
; GFX-NEXT: s_abs_i32 s1, s1
; GFX-NEXT: ; return to shader part epilog
; GFX6-LABEL: abs_sgpr_v2i8:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_sext_i32_i8 s0, s0
; GFX6-NEXT: s_sext_i32_i8 s1, s1
; GFX6-NEXT: s_abs_i32 s0, s0
; GFX6-NEXT: s_abs_i32 s1, s1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: abs_sgpr_v2i8:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_sext_i32_i8 s0, s0
; GFX8-NEXT: s_sext_i32_i8 s1, s1
; GFX8-NEXT: s_abs_i32 s0, s0
; GFX8-NEXT: s_abs_i32 s1, s1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: abs_sgpr_v2i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_sext_i32_i8 s0, s0
; GFX10-NEXT: s_sext_i32_i8 s1, s1
; GFX10-NEXT: s_abs_i32 s0, s0
; GFX10-NEXT: s_abs_i32 s1, s1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: abs_sgpr_v2i8:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_sext_i32_i8 s0, s0
; GFX1250-NEXT: s_sext_i32_i8 s1, s1
; GFX1250-NEXT: s_abs_i32 s0, s0
; GFX1250-NEXT: s_abs_i32 s1, s1
; GFX1250-NEXT: ; return to shader part epilog
%res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false)
ret <2 x i8> %res
}
Expand Down Expand Up @@ -340,15 +408,46 @@ define <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
}

define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
; GFX-LABEL: abs_sgpr_v3i8:
; GFX: ; %bb.0:
; GFX-NEXT: s_sext_i32_i8 s0, s0
; GFX-NEXT: s_sext_i32_i8 s1, s1
; GFX-NEXT: s_sext_i32_i8 s2, s2
; GFX-NEXT: s_abs_i32 s0, s0
; GFX-NEXT: s_abs_i32 s1, s1
; GFX-NEXT: s_abs_i32 s2, s2
; GFX-NEXT: ; return to shader part epilog
; GFX6-LABEL: abs_sgpr_v3i8:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_sext_i32_i8 s0, s0
; GFX6-NEXT: s_sext_i32_i8 s1, s1
; GFX6-NEXT: s_sext_i32_i8 s2, s2
; GFX6-NEXT: s_abs_i32 s0, s0
; GFX6-NEXT: s_abs_i32 s1, s1
; GFX6-NEXT: s_abs_i32 s2, s2
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: abs_sgpr_v3i8:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_sext_i32_i8 s0, s0
; GFX8-NEXT: s_sext_i32_i8 s1, s1
; GFX8-NEXT: s_sext_i32_i8 s2, s2
; GFX8-NEXT: s_abs_i32 s0, s0
; GFX8-NEXT: s_abs_i32 s1, s1
; GFX8-NEXT: s_abs_i32 s2, s2
; GFX8-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: abs_sgpr_v3i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_sext_i32_i8 s0, s0
; GFX10-NEXT: s_sext_i32_i8 s1, s1
; GFX10-NEXT: s_sext_i32_i8 s2, s2
; GFX10-NEXT: s_abs_i32 s0, s0
; GFX10-NEXT: s_abs_i32 s1, s1
; GFX10-NEXT: s_abs_i32 s2, s2
; GFX10-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: abs_sgpr_v3i8:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_sext_i32_i8 s0, s0
; GFX1250-NEXT: s_sext_i32_i8 s1, s1
; GFX1250-NEXT: s_sext_i32_i8 s2, s2
; GFX1250-NEXT: s_abs_i32 s0, s0
; GFX1250-NEXT: s_abs_i32 s1, s1
; GFX1250-NEXT: s_abs_i32 s2, s2
; GFX1250-NEXT: ; return to shader part epilog
%res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false)
ret <3 x i8> %res
}
Expand Down Expand Up @@ -446,6 +545,7 @@ define amdgpu_cs <2 x i16> @abs_sgpr_v2i16(<2 x i16> inreg %arg) {
;
; GFX1250-LABEL: abs_sgpr_v2i16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_sext_i32_i16 s1, s0
; GFX1250-NEXT: s_ashr_i32 s0, s0, 16
; GFX1250-NEXT: s_abs_i32 s1, s1
Expand Down Expand Up @@ -536,6 +636,7 @@ define amdgpu_cs <3 x i16> @abs_sgpr_v3i16(<3 x i16> inreg %arg) {
;
; GFX1250-LABEL: abs_sgpr_v3i16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_sext_i32_i16 s2, s0
; GFX1250-NEXT: s_ashr_i32 s0, s0, 16
; GFX1250-NEXT: s_abs_i32 s2, s2
Expand Down Expand Up @@ -598,3 +699,5 @@ define <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
%res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
ret <3 x i16> %res
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX: {{.*}}
34 changes: 29 additions & 5 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,7 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg
;
; GFX1250-UNALIGNED-LABEL: s_load_constant_v3i32_align1:
; GFX1250-UNALIGNED: ; %bb.0:
; GFX1250-UNALIGNED-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-UNALIGNED-NEXT: global_load_b96 v[0:2], v0, s[0:1]
; GFX1250-UNALIGNED-NEXT: s_wait_loadcnt 0x0
Expand All @@ -940,6 +941,7 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg
;
; GFX1250-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1:
; GFX1250-NOUNALIGNED: ; %bb.0:
; GFX1250-NOUNALIGNED-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NOUNALIGNED-NEXT: s_clause 0xb
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s2, s[0:1], 0x1
; GFX1250-NOUNALIGNED-NEXT: s_load_u8 s3, s[0:1], 0x3
Expand Down Expand Up @@ -1208,6 +1210,7 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(ptr addrspace(4) inreg
;
; GFX1250-UNALIGNED-LABEL: s_load_constant_v3i32_align2:
; GFX1250-UNALIGNED: ; %bb.0:
; GFX1250-UNALIGNED-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-UNALIGNED-NEXT: global_load_b96 v[0:2], v0, s[0:1]
; GFX1250-UNALIGNED-NEXT: s_wait_loadcnt 0x0
Expand All @@ -1218,6 +1221,7 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(ptr addrspace(4) inreg
;
; GFX1250-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2:
; GFX1250-NOUNALIGNED: ; %bb.0:
; GFX1250-NOUNALIGNED-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NOUNALIGNED-NEXT: s_clause 0x5
; GFX1250-NOUNALIGNED-NEXT: s_load_u16 s2, s[0:1], 0x2
; GFX1250-NOUNALIGNED-NEXT: s_load_u16 s3, s[0:1], 0x6
Expand Down Expand Up @@ -1362,6 +1366,7 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg
;
; GFX1250-LABEL: s_load_constant_v3i32_align4:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_mov_b32 s4, s0
; GFX1250-NEXT: s_mov_b32 s5, s1
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
Expand Down Expand Up @@ -1413,6 +1418,7 @@ define amdgpu_ps i96 @s_load_constant_i96_align8(ptr addrspace(4) inreg %ptr) {
;
; GFX1250-LABEL: s_load_constant_i96_align8:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_mov_b32 s4, s0
; GFX1250-NEXT: s_mov_b32 s5, s1
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
Expand Down Expand Up @@ -1464,6 +1470,7 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(ptr addrspace(4) inreg
;
; GFX1250-LABEL: s_load_constant_v3i32_align8:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_mov_b32 s4, s0
; GFX1250-NEXT: s_mov_b32 s5, s1
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
Expand Down Expand Up @@ -1515,6 +1522,7 @@ define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(ptr addrspace(4) inreg
;
; GFX1250-LABEL: s_load_constant_v6i16_align8:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_mov_b32 s4, s0
; GFX1250-NEXT: s_mov_b32 s5, s1
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
Expand Down Expand Up @@ -1593,6 +1601,7 @@ define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(ptr addrspace(4) inreg
;
; GFX1250-LABEL: s_load_constant_v12i8_align8:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_mov_b32 s4, s0
; GFX1250-NEXT: s_mov_b32 s5, s1
; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
Expand Down Expand Up @@ -1670,11 +1679,24 @@ define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(ptr addrspace(4) inreg
}

define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align16(ptr addrspace(4) inreg %ptr) {
; GFX12-LABEL: s_load_constant_v3i32_align16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: ; return to shader part epilog
; GFX12-UNALIGNED-LABEL: s_load_constant_v3i32_align16:
; GFX12-UNALIGNED: ; %bb.0:
; GFX12-UNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
; GFX12-UNALIGNED-NEXT: ; return to shader part epilog
;
; GFX12-NOUNALIGNED-LABEL: s_load_constant_v3i32_align16:
; GFX12-NOUNALIGNED: ; %bb.0:
; GFX12-NOUNALIGNED-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog
;
; GFX1250-LABEL: s_load_constant_v3i32_align16:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: ; return to shader part epilog
;
; GCN-LABEL: s_load_constant_v3i32_align16:
; GCN: ; %bb.0:
Expand All @@ -1684,3 +1706,5 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align16(ptr addrspace(4) inreg
%load = load <3 x i32>, ptr addrspace(4) %ptr, align 16
ret <3 x i32> %load
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX12: {{.*}}
5 changes: 5 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/minmaxabs-i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ define i64 @test_abs_i64(i64 %a) {
define amdgpu_ps i64 @test_umin_i64_s(i64 inreg %a, i64 inreg %b) {
; CHECK-LABEL: test_umin_i64_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; CHECK-NEXT: v_min_u64 v[0:1], s[0:1], s[2:3]
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
Expand All @@ -144,6 +145,7 @@ define amdgpu_ps i64 @test_umin_i64_s(i64 inreg %a, i64 inreg %b) {
define amdgpu_ps i64 @test_umax_i64_s(i64 inreg %a, i64 inreg %b) {
; CHECK-LABEL: test_umax_i64_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; CHECK-NEXT: v_max_u64 v[0:1], s[0:1], s[2:3]
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
Expand All @@ -156,6 +158,7 @@ define amdgpu_ps i64 @test_umax_i64_s(i64 inreg %a, i64 inreg %b) {
define amdgpu_ps i64 @test_smin_i64_s(i64 inreg %a, i64 inreg %b) {
; CHECK-LABEL: test_smin_i64_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; CHECK-NEXT: v_min_i64 v[0:1], s[0:1], s[2:3]
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
Expand All @@ -168,6 +171,7 @@ define amdgpu_ps i64 @test_smin_i64_s(i64 inreg %a, i64 inreg %b) {
define amdgpu_ps i64 @test_smax_i64_s(i64 inreg %a, i64 inreg %b) {
; CHECK-LABEL: test_smax_i64_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; CHECK-NEXT: v_max_i64 v[0:1], s[0:1], s[2:3]
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
Expand All @@ -180,6 +184,7 @@ define amdgpu_ps i64 @test_smax_i64_s(i64 inreg %a, i64 inreg %b) {
define amdgpu_ps i64 @test_abs_i64_s(i64 inreg %a) {
; CHECK-LABEL: test_abs_i64_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; CHECK-NEXT: s_ashr_i32 s2, s1, 31
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; CHECK-NEXT: s_mov_b32 s3, s2
Expand Down
Loading