2,820 changes: 2,373 additions & 447 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll

Large diffs are not rendered by default.

63 changes: 15 additions & 48 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
Original file line number Diff line number Diff line change
Expand Up @@ -2055,30 +2055,18 @@ body: |
; VI-LABEL: name: test_fdiv_s16_constant_one_rcp
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00
; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16)
; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32)
; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]]
; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32)
; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16)
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16)
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-LABEL: name: test_fdiv_s16_constant_one_rcp
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16)
; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32)
; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]]
; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32)
; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-UNSAFE-LABEL: name: test_fdiv_s16_constant_one_rcp
; GFX9-UNSAFE: liveins: $vgpr0
Expand All @@ -2091,16 +2079,10 @@ body: |
; GFX10-LABEL: name: test_fdiv_s16_constant_one_rcp
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00
; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16)
; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32)
; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]]
; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32)
; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16)
; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
; GFX10-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16)
; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s16) = G_FCONSTANT half 1.0
%1:_(s32) = COPY $vgpr0
Expand Down Expand Up @@ -2143,30 +2125,20 @@ body: |
; VI-LABEL: name: test_fdiv_s16_constant_negative_one_rcp
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00
; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16)
; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32)
; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]]
; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32)
; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16)
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
; VI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]]
; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16)
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-LABEL: name: test_fdiv_s16_constant_negative_one_rcp
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16)
; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32)
; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]]
; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32)
; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]]
; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-UNSAFE-LABEL: name: test_fdiv_s16_constant_negative_one_rcp
; GFX9-UNSAFE: liveins: $vgpr0
Expand All @@ -2180,16 +2152,11 @@ body: |
; GFX10-LABEL: name: test_fdiv_s16_constant_negative_one_rcp
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00
; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16)
; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32)
; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]]
; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32)
; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16)
; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]]
; GFX10-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16)
; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s16) = G_FCONSTANT half -1.0
%1:_(s32) = COPY $vgpr0
Expand Down
27 changes: 23 additions & 4 deletions llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,7 @@ entry:

; GCN-LABEL: {{^}}reciprocal_f16_rounded:
; GFX8PLUS: {{flat|global}}_load_{{ushort|u16}} [[VAL16:v[0-9]+]], v{{.+}}
; GFX8PLUS: v_cvt_f32_f16_e32 [[CVT_TO32:v[0-9]+]], [[VAL16]]
; GFX8PLUS: v_rcp_f32_e32 [[RCP32:v[0-9]+]], [[CVT_TO32]]
; GFX8PLUS: v_cvt_f16_f32_e32 [[CVT_BACK16:v[0-9]+]], [[RCP32]]
; GFX8PLUS: v_div_fixup_f16 [[RESULT:v[0-9]+]], [[CVT_BACK16]], [[VAL16]], 1.0
; GFX8PLUS: v_rcp_f16_e32 [[RESULT:v[0-9]+]], [[VAL16]]
; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.+}}, [[RESULT]]
define amdgpu_kernel void @reciprocal_f16_rounded(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
entry:
Expand Down Expand Up @@ -265,6 +262,28 @@ define amdgpu_kernel void @div_afn_neg_k_x_pat_f16(ptr addrspace(1) %out) #0 {
ret void
}

; GCN-LABEL: {{^}}v_fdiv_f16_arcp:
; SI: v_rcp_f32
; SI: v_mul_f32

; GFX8PLUS: v_rcp_f16
; GFX8PLUS: v_mul_f16
define half @v_fdiv_f16_arcp(half %x, half %y) {
%fdiv = fdiv arcp half %x, %y
ret half %fdiv
}

; GCN-LABEL: {{^}}v_fdiv_f16_afn_nsz:
; SI: v_rcp_f32
; SI: v_mul_f32

; GFX8PLUS: v_rcp_f16
; GFX8PLUS: v_mul_f16
define half @v_fdiv_f16_afn_nsz(half %x, half %y) {
%fdiv = fdiv afn nsz half %x, %y
ret half %fdiv
}

declare i32 @llvm.amdgcn.workitem.id.x() #2
declare half @llvm.sqrt.f16(half) #2
declare half @llvm.fabs.f16(half) #2
Expand Down
24 changes: 22 additions & 2 deletions llvm/test/CodeGen/AMDGPU/fdiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,35 @@ entry:
ret void
}

; FUNC-LABEL: {{^}}fdiv_f32_arcp_math:
; FUNC-LABEL: {{^}}fdiv_f32_arcp_daz:
; GCN: v_div_scale_f32
; GCN-DAG: v_rcp_f32
; GCN-DAG: v_div_scale_f32
; GCN: {{s_setreg_imm32_b32|s_denorm_mode}}
; GCN: v_fma{{c?}}_f32
; GCN: v_fma{{c?}}_f32
; GCN: v_fma{{c?}}_f32
; GCN: v_fma{{c?}}_f32
; GCN: {{s_setreg_imm32_b32|s_denorm_mode}}
; GCN: v_div_fmas_f32
; GCN: v_div_fixup_f32
; GCN-NOT: v_mul_f32
define amdgpu_kernel void @fdiv_f32_arcp_daz(ptr addrspace(1) %out, float %a, float %b) #0 {
entry:
%fdiv = fdiv arcp float %a, %b
store float %fdiv, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}fdiv_f32_arcp_ninf:
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[2].Z,

; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
; GCN-NOT: [[RESULT]]
; GCN: buffer_store_{{dword|b32}} [[RESULT]]
define amdgpu_kernel void @fdiv_f32_arcp_math(ptr addrspace(1) %out, float %a, float %b) #0 {
define amdgpu_kernel void @fdiv_f32_arcp_ninf(ptr addrspace(1) %out, float %a, float %b) #0 {
entry:
%fdiv = fdiv arcp ninf float %a, %b
store float %fdiv, ptr addrspace(1) %out
Expand Down
45 changes: 15 additions & 30 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -527,69 +527,54 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
;
; VI-SAFE-LABEL: fneg_fadd_0_f16:
; VI-SAFE: ; %bb.0: ; %.entry
; VI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, s1
; VI-SAFE-NEXT: v_mov_b32_e32 v2, s0
; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
; VI-SAFE-NEXT: v_rcp_f32_e32 v0, v0
; VI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-SAFE-NEXT: v_div_fixup_f16 v0, v0, s1, 1.0
; VI-SAFE-NEXT: v_rcp_f16_e32 v0, s1
; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0
; VI-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
; VI-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
; VI-SAFE-NEXT: v_xor_b32_e32 v3, 0x8000, v0
; VI-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v0
; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, s0, v0
; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
; VI-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; VI-SAFE-NEXT: ; return to shader part epilog
;
; VI-NSZ-LABEL: fneg_fadd_0_f16:
; VI-NSZ: ; %bb.0: ; %.entry
; VI-NSZ-NEXT: v_cvt_f32_f16_e32 v0, s1
; VI-NSZ-NEXT: v_mov_b32_e32 v2, s0
; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7e00
; VI-NSZ-NEXT: v_rcp_f32_e32 v0, v0
; VI-NSZ-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-NSZ-NEXT: v_div_fixup_f16 v0, v0, s1, 1.0
; VI-NSZ-NEXT: v_rcp_f16_e32 v0, s1
; VI-NSZ-NEXT: v_mov_b32_e32 v1, s0
; VI-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
; VI-NSZ-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7e00
; VI-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; VI-NSZ-NEXT: ; return to shader part epilog
;
; GFX11-SAFE-LABEL: fneg_fadd_0_f16:
; GFX11-SAFE: ; %bb.0: ; %.entry
; GFX11-SAFE-NEXT: v_cvt_f32_f16_e32 v0, s1
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_rcp_f32_e32 v0, v0
; GFX11-SAFE-NEXT: v_rcp_f16_e32 v0, s1
; GFX11-SAFE-NEXT: s_waitcnt_depctr 0xfff
; GFX11-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-SAFE-NEXT: v_div_fixup_f16 v0, v0, s1, 1.0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v0
; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
; GFX11-SAFE-NEXT: ; return to shader part epilog
;
; GFX11-NSZ-LABEL: fneg_fadd_0_f16:
; GFX11-NSZ: ; %bb.0: ; %.entry
; GFX11-NSZ-NEXT: v_cvt_f32_f16_e32 v0, s1
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NSZ-NEXT: v_rcp_f32_e32 v0, v0
; GFX11-NSZ-NEXT: v_rcp_f16_e32 v0, s1
; GFX11-NSZ-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NSZ-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NSZ-NEXT: v_div_fixup_f16 v0, v0, s1, 1.0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
; GFX11-NSZ-NEXT: ; return to shader part epilog
Expand Down