From db23bab29fef934a41e22098f00bf6fe3360b876 Mon Sep 17 00:00:00 2001 From: PaperChalice Date: Sat, 27 Sep 2025 12:05:53 +0800 Subject: [PATCH 1/2] [TargetLowering] Remove NoSignedZerosFPMath uses --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4145c8a54a6fe..db3b21b4f83f4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7492,7 +7492,6 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, // Pre-increment recursion depth for use in recursive calls. ++Depth; const SDNodeFlags Flags = Op->getFlags(); - const TargetOptions &Options = DAG.getTarget().Options; EVT VT = Op.getValueType(); unsigned Opcode = Op.getOpcode(); @@ -7572,7 +7571,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, return DAG.getBuildVector(VT, DL, Ops); } case ISD::FADD: { - if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + if (!Flags.hasNoSignedZeros()) break; // After operation legalization, it might not be legal to create new FSUBs. @@ -7617,7 +7616,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, } case ISD::FSUB: { // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + if (!Flags.hasNoSignedZeros()) break; SDValue X = Op.getOperand(0), Y = Op.getOperand(1); @@ -7678,7 +7677,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, } case ISD::FMA: case ISD::FMAD: { - if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + if (!Flags.hasNoSignedZeros()) break; SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2); @@ -8797,7 +8796,6 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node, EVT VT = Node->getValueType(0); EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); bool IsMax = Opc == ISD::FMAXIMUMNUM; - const TargetOptions &Options = DAG.getTarget().Options; SDNodeFlags Flags = Node->getFlags(); unsigned NewOp = @@ -8856,8 +8854,8 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node, // TODO: We need quiet sNaN if strictfp. // Fixup signed zero behavior. - if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() || - DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) { + if (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) || + DAG.isKnownNeverZeroFloat(RHS)) { return MinMax; } SDValue TestZero = From 48ceaa5827ef7c3f127caf203a0364863a6dcf2d Mon Sep 17 00:00:00 2001 From: PaperChalice Date: Sat, 27 Sep 2025 13:04:35 +0800 Subject: [PATCH 2/2] fix tests --- .../AMDGPU/select-fabs-fneg-extract.v2f16.ll | 341 +++++++++++++----- llvm/test/CodeGen/AMDGPU/v_mac.ll | 9 +- llvm/test/CodeGen/AMDGPU/v_mac_f16.ll | 14 +- llvm/test/CodeGen/PowerPC/scalar_cmp.ll | 226 ++++++++---- llvm/test/CodeGen/X86/negative-sin.ll | 15 +- 5 files changed, 427 insertions(+), 178 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll index 92d3277d5d3e3..bb22144b815a1 100644 --- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll @@ -4148,28 +4148,28 @@ define <2 x half> @mul_select_negk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; -------------------------------------------------------------------------------- define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y) { -; CI-SAFE-LABEL: select_fneg_posk_src_add_v2f16: -; CI-SAFE: ; %bb.0: -; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-SAFE-NEXT: v_add_f32_e32 v3, 4.0, v3 -; CI-SAFE-NEXT: v_add_f32_e32 v2, 4.0, v2 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3 -; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2 -; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc -; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc -; CI-SAFE-NEXT: s_setpc_b64 s[30:31] +; CI-LABEL: select_fneg_posk_src_add_v2f16: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_add_f32_e32 v3, 4.0, v3 +; CI-NEXT: v_add_f32_e32 v2, 4.0, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; CI-NEXT: v_or_b32_e32 v2, v2, v3 +; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v2 +; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc +; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-SAFE-LABEL: select_fneg_posk_src_add_v2f16: ; VI-SAFE: ; %bb.0: @@ -4229,21 +4229,6 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, < ; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; CI-NSZ-LABEL: select_fneg_posk_src_add_v2f16: -; CI-NSZ: ; %bb.0: -; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NSZ-NEXT: v_sub_f32_e32 v2, -4.0, v2 -; CI-NSZ-NEXT: v_sub_f32_e32 v3, -4.0, v3 -; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc -; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc -; CI-NSZ-NEXT: s_setpc_b64 s[30:31] -; ; VI-NSZ-LABEL: select_fneg_posk_src_add_v2f16: ; VI-NSZ: ; %bb.0: ; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4302,6 +4287,105 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, < ret <2 x half> %select } +define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %x, <2 x half> %y) { +; CI-LABEL: select_fneg_posk_src_add_v2f16_nsz: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; CI-NEXT: v_sub_f32_e32 v2, -4.0, v2 +; CI-NEXT: v_sub_f32_e32 v3, -4.0, v3 +; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc +; CI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: select_fneg_posk_src_add_v2f16_nsz: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_mov_b32_e32 v1, 0xc400 +; VI-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_sub_f16_e32 v2, -4.0, v2 +; VI-NEXT: v_mov_b32_e32 v3, 0x4000 +; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5] +; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: select_fneg_posk_src_add_v2f16_nsz: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: +; GFX11-SAFE-TRUE16: ; %bb.0: +; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: +; GFX11-SAFE-FAKE16: ; %bb.0: +; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: +; GFX11-NSZ-TRUE16: ; %bb.0: +; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: +; GFX11-NSZ-FAKE16: ; %bb.0: +; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] + %cmp = icmp eq <2 x i32> %c, zeroinitializer + %add = fadd nsz <2 x half> %x, + %fneg = fneg <2 x half> %add + %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> + ret <2 x half> %select +} + define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) { ; CI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16: ; CI-SAFE: ; %bb.0: @@ -4704,34 +4788,34 @@ define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, < } define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %z) { -; CI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16: -; CI-SAFE: ; %bb.0: -; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-SAFE-NEXT: v_mul_f32_e32 v3, 4.0, v3 -; CI-SAFE-NEXT: v_add_f32_e32 v3, v3, v5 -; CI-SAFE-NEXT: v_mul_f32_e32 v2, 4.0, v2 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-SAFE-NEXT: v_add_f32_e32 v2, v2, v4 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3 -; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2 -; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc -; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc -; CI-SAFE-NEXT: s_setpc_b64 s[30:31] +; CI-LABEL: select_fneg_posk_src_fmad_v2f16: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; CI-NEXT: v_mul_f32_e32 v3, 4.0, v3 +; CI-NEXT: v_add_f32_e32 v3, v3, v5 +; CI-NEXT: v_mul_f32_e32 v2, 4.0, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_add_f32_e32 v2, v2, v4 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; CI-NEXT: v_or_b32_e32 v2, v2, v3 +; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v2 +; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc +; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16: ; VI-SAFE: ; %bb.0: @@ -4793,27 +4877,6 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x, ; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; CI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16: -; CI-NSZ: ; %bb.0: -; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NSZ-NEXT: v_mul_f32_e32 v2, -4.0, v2 -; CI-NSZ-NEXT: v_mul_f32_e32 v3, -4.0, v3 -; CI-NSZ-NEXT: v_sub_f32_e32 v2, v2, v4 -; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; CI-NSZ-NEXT: v_sub_f32_e32 v3, v3, v5 -; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc -; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc -; CI-NSZ-NEXT: s_setpc_b64 s[30:31] -; ; VI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16: ; VI-NSZ: ; %bb.0: ; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4873,6 +4936,112 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x, ret <2 x half> %select } +define <2 x half> @select_fneg_posk_src_fmad_v2f16_nsz(<2 x i32> %c, <2 x half> %x, <2 x half> %z) { +; CI-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; CI-NEXT: v_mul_f32_e32 v2, -4.0, v2 +; CI-NEXT: v_mul_f32_e32 v3, -4.0, v3 +; CI-NEXT: v_sub_f32_e32 v2, v2, v4 +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CI-NEXT: v_sub_f32_e32 v3, v3, v5 +; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc +; CI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; VI-NEXT: v_fma_f16 v1, v4, -4.0, -v1 +; VI-NEXT: v_fma_f16 v2, v2, -4.0, -v3 +; VI-NEXT: v_mov_b32_e32 v3, 0x4000 +; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5] +; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: +; GFX11-SAFE-TRUE16: ; %bb.0: +; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: +; GFX11-SAFE-FAKE16: ; %bb.0: +; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: +; GFX11-NSZ-TRUE16: ; %bb.0: +; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: +; GFX11-NSZ-FAKE16: ; %bb.0: +; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] + %cmp = icmp eq <2 x i32> %c, zeroinitializer + %fmad = call nsz <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> , <2 x half> %z) + %fneg = fneg <2 x half> %fmad + %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> + ret <2 x half> %select +} + declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0 declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0 diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll index c12871536bafa..f5dc824aae35f 100644 --- a/llvm/test/CodeGen/AMDGPU/v_mac.ll +++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll @@ -116,7 +116,7 @@ entry: ; GCN-LABEL: {{^}}nsz_mad_sub0_src0: ; GCN-NOT: v_mac_f32 ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} -define amdgpu_kernel void @nsz_mad_sub0_src0(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { +define amdgpu_kernel void @nsz_mad_sub0_src0(ptr addrspace(1) %out, ptr addrspace(1) %in) { entry: %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1 %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2 @@ -125,7 +125,7 @@ entry: %b = load float, ptr addrspace(1) %b_ptr %c = load float, ptr addrspace(1) %c_ptr - %neg_a = fsub float 0.0, %a + %neg_a = fsub nsz float 0.0, %a %tmp0 = fmul float %neg_a, %b %tmp1 = fadd float %tmp0, %c @@ -176,7 +176,7 @@ entry: ; GCN-LABEL: {{^}}nsz_mad_sub0_src1: ; GCN-NOT: v_mac_f32 ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} -define amdgpu_kernel void @nsz_mad_sub0_src1(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { +define amdgpu_kernel void @nsz_mad_sub0_src1(ptr addrspace(1) %out, ptr addrspace(1) %in) { entry: %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1 %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2 @@ -185,7 +185,7 @@ entry: %b = load float, ptr addrspace(1) %b_ptr %c = load float, ptr addrspace(1) %c_ptr - %neg_b = fsub float 0.0, %b + %neg_b = fsub nsz float 0.0, %b %tmp0 = fmul float %a, %neg_b %tmp1 = fadd float %tmp0, %c @@ -310,6 +310,5 @@ define float @v_mac_f32_dynamic_ftz(float %a, float %b, float %c) "denormal-fp-m declare i32 @llvm.amdgcn.workitem.id.x() #2 attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" } -attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" } attributes #2 = { nounwind readnone } attributes #3 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll index bcc60b06db291..8da6f2348690a 100644 --- a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll +++ b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll @@ -236,7 +236,7 @@ entry: %b.val = load half, ptr addrspace(1) %b %c.val = load half, ptr addrspace(1) %c - %a.neg = fsub half 0.0, %a.val + %a.neg = fsub nsz half 0.0, %a.val %t.val = fmul half %a.neg, %b.val %r.val = fadd half %t.val, %c.val @@ -263,7 +263,7 @@ entry: %b.val = load half, ptr addrspace(1) %b %c.val = load half, ptr addrspace(1) %c - %b.neg = fsub half 0.0, %b.val + %b.neg = fsub nsz half 0.0, %b.val %t.val = fmul half %a.val, %b.neg %r.val = fadd half %t.val, %c.val @@ -290,7 +290,7 @@ entry: %b.val = load half, ptr addrspace(1) %b %c.val = load half, ptr addrspace(1) %c - %c.neg = fsub half 0.0, %c.val + %c.neg = fsub nsz half 0.0, %c.val %t.val = fmul half %a.val, %b.val %r.val = fadd half %t.val, %c.neg @@ -601,7 +601,7 @@ entry: %b.val = load <2 x half>, ptr addrspace(1) %b %c.val = load <2 x half>, ptr addrspace(1) %c - %a.neg = fsub <2 x half> , %a.val + %a.neg = fsub nsz <2 x half> , %a.val %t.val = fmul <2 x half> %a.neg, %b.val %r.val = fadd <2 x half> %t.val, %c.val @@ -634,7 +634,7 @@ entry: %b.val = load <2 x half>, ptr addrspace(1) %b %c.val = load <2 x half>, ptr addrspace(1) %c - %b.neg = fsub <2 x half> , %b.val + %b.neg = fsub nsz <2 x half> , %b.val %t.val = fmul <2 x half> %a.val, %b.neg %r.val = fadd <2 x half> %t.val, %c.val @@ -667,7 +667,7 @@ entry: %b.val = load <2 x half>, ptr addrspace(1) %b %c.val = load <2 x half>, ptr addrspace(1) %c - %c.neg = fsub <2 x half> , %c.val + %c.neg = fsub nsz <2 x half> , %c.val %t.val = fmul <2 x half> %a.val, %b.val %r.val = fadd <2 x half> %t.val, %c.neg @@ -678,5 +678,5 @@ entry: declare void @llvm.amdgcn.s.barrier() #2 attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" } -attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" } +attributes #1 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" } attributes #2 = { nounwind convergent } diff --git a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll index aaabd76e163bb..fd0b494d57677 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll @@ -20,18 +20,18 @@ define float @select_oeq_float(float %a, float %b, float %c, float %d) { ; FAST-P8-LABEL: select_oeq_float: ; FAST-P8: # %bb.0: # %entry -; FAST-P8-NEXT: xssubsp f0, f2, f1 -; FAST-P8-NEXT: xssubsp f1, f1, f2 -; FAST-P8-NEXT: fsel f1, f1, f3, f4 -; FAST-P8-NEXT: fsel f1, f0, f1, f4 +; FAST-P8-NEXT: xssubsp f0, f1, f2 +; FAST-P8-NEXT: xsnegdp f1, f0 +; FAST-P8-NEXT: fsel f0, f0, f3, f4 +; FAST-P8-NEXT: fsel f1, f1, f0, f4 ; FAST-P8-NEXT: blr ; ; FAST-P9-LABEL: select_oeq_float: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: xssubsp f0, f2, f1 -; FAST-P9-NEXT: xssubsp f1, f1, f2 -; FAST-P9-NEXT: fsel f1, f1, f3, f4 -; FAST-P9-NEXT: fsel f1, f0, f1, f4 +; FAST-P9-NEXT: xssubsp f0, f1, f2 +; FAST-P9-NEXT: xsnegdp f1, f0 +; FAST-P9-NEXT: fsel f0, f0, f3, f4 +; FAST-P9-NEXT: fsel f1, f1, f0, f4 ; FAST-P9-NEXT: blr ; ; NO-FAST-P8-LABEL: select_oeq_float: @@ -59,6 +59,48 @@ entry: ret float %cond } +define float @select_oeq_float_nsz(float %a, float %b, float %c, float %d) { +; FAST-P8-LABEL: select_oeq_float_nsz: +; FAST-P8: # %bb.0: # %entry +; FAST-P8-NEXT: xssubsp f0, f2, f1 +; FAST-P8-NEXT: xssubsp f1, f1, f2 +; FAST-P8-NEXT: fsel f1, f1, f3, f4 +; FAST-P8-NEXT: fsel f1, f0, f1, f4 +; FAST-P8-NEXT: blr +; +; FAST-P9-LABEL: select_oeq_float_nsz: +; FAST-P9: # %bb.0: # %entry +; FAST-P9-NEXT: xssubsp f0, f2, f1 +; FAST-P9-NEXT: xssubsp f1, f1, f2 +; FAST-P9-NEXT: fsel f1, f1, f3, f4 +; FAST-P9-NEXT: fsel f1, f0, f1, f4 +; FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: select_oeq_float_nsz: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 +; NO-FAST-P8-NEXT: beq cr0, .LBB1_2 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f3, f4 +; NO-FAST-P8-NEXT: .LBB1_2: # %entry +; NO-FAST-P8-NEXT: fmr f1, f3 +; NO-FAST-P8-NEXT: blr +; +; NO-FAST-P9-LABEL: select_oeq_float_nsz: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2 +; NO-FAST-P9-NEXT: beq cr0, .LBB1_2 +; NO-FAST-P9-NEXT: # %bb.1: # %entry +; NO-FAST-P9-NEXT: fmr f3, f4 +; NO-FAST-P9-NEXT: .LBB1_2: # %entry +; NO-FAST-P9-NEXT: fmr f1, f3 +; NO-FAST-P9-NEXT: blr +entry: + %cmp = fcmp nsz oeq float %a, %b + %cond = select i1 %cmp, float %c, float %d + ret float %cond +} + define double @select_oeq_double(double %a, double %b, double %c, double %d) { ; FAST-P8-LABEL: select_oeq_double: ; FAST-P8: # %bb.0: # %entry @@ -79,20 +121,20 @@ define double @select_oeq_double(double %a, double %b, double %c, double %d) { ; NO-FAST-P8-LABEL: select_oeq_double: ; NO-FAST-P8: # %bb.0: # %entry ; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f2 -; NO-FAST-P8-NEXT: beq cr0, .LBB1_2 +; NO-FAST-P8-NEXT: beq cr0, .LBB2_2 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fmr f3, f4 -; NO-FAST-P8-NEXT: .LBB1_2: # %entry +; NO-FAST-P8-NEXT: .LBB2_2: # %entry ; NO-FAST-P8-NEXT: fmr f1, f3 ; NO-FAST-P8-NEXT: blr ; ; NO-FAST-P9-LABEL: select_oeq_double: ; NO-FAST-P9: # %bb.0: # %entry ; NO-FAST-P9-NEXT: xscmpudp cr0, f1, f2 -; NO-FAST-P9-NEXT: beq cr0, .LBB1_2 +; NO-FAST-P9-NEXT: beq cr0, .LBB2_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fmr f3, f4 -; NO-FAST-P9-NEXT: .LBB1_2: # %entry +; NO-FAST-P9-NEXT: .LBB2_2: # %entry ; NO-FAST-P9-NEXT: fmr f1, f3 ; NO-FAST-P9-NEXT: blr entry: @@ -182,13 +224,57 @@ entry: define float @select_one_float(float %a, float %b, float %c, float %d) { ; FAST-P8-LABEL: select_one_float: ; FAST-P8: # %bb.0: # %entry +; FAST-P8-NEXT: xssubsp f0, f1, f2 +; FAST-P8-NEXT: xsnegdp f1, f0 +; FAST-P8-NEXT: fsel f0, f0, f4, f3 +; FAST-P8-NEXT: fsel f1, f1, f0, f3 +; FAST-P8-NEXT: blr +; +; FAST-P9-LABEL: select_one_float: +; FAST-P9: # %bb.0: # %entry +; FAST-P9-NEXT: xssubsp f0, f1, f2 +; FAST-P9-NEXT: xsnegdp f1, f0 +; FAST-P9-NEXT: fsel f0, f0, f4, f3 +; FAST-P9-NEXT: fsel f1, f1, f0, f3 +; FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: select_one_float: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 +; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, eq +; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB5_2 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f3, f4 +; NO-FAST-P8-NEXT: .LBB5_2: # %entry +; NO-FAST-P8-NEXT: fmr f1, f3 +; NO-FAST-P8-NEXT: blr +; +; NO-FAST-P9-LABEL: select_one_float: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2 +; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, eq +; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB5_2 +; NO-FAST-P9-NEXT: # %bb.1: # %entry +; NO-FAST-P9-NEXT: fmr f3, f4 +; NO-FAST-P9-NEXT: .LBB5_2: # %entry +; NO-FAST-P9-NEXT: fmr f1, f3 +; NO-FAST-P9-NEXT: blr +entry: + %cmp = fcmp one float %a, %b + %cond = select i1 %cmp, float %c, float %d + ret float %cond +} + +define float @select_one_float_nsz(float %a, float %b, float %c, float %d) { +; FAST-P8-LABEL: select_one_float_nsz: +; FAST-P8: # %bb.0: # %entry ; FAST-P8-NEXT: xssubsp f0, f2, f1 ; FAST-P8-NEXT: xssubsp f1, f1, f2 ; FAST-P8-NEXT: fsel f1, f1, f4, f3 ; FAST-P8-NEXT: fsel f1, f0, f1, f3 ; FAST-P8-NEXT: blr ; -; FAST-P9-LABEL: select_one_float: +; FAST-P9-LABEL: select_one_float_nsz: ; FAST-P9: # %bb.0: # %entry ; FAST-P9-NEXT: xssubsp f0, f2, f1 ; FAST-P9-NEXT: xssubsp f1, f1, f2 @@ -196,29 +282,29 @@ define float @select_one_float(float %a, float %b, float %c, float %d) { ; FAST-P9-NEXT: fsel f1, f0, f1, f3 ; FAST-P9-NEXT: blr ; -; NO-FAST-P8-LABEL: select_one_float: +; NO-FAST-P8-LABEL: select_one_float_nsz: ; NO-FAST-P8: # %bb.0: # %entry ; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 ; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, eq -; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB4_2 +; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB6_2 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fmr f3, f4 -; NO-FAST-P8-NEXT: .LBB4_2: # %entry +; NO-FAST-P8-NEXT: .LBB6_2: # %entry ; NO-FAST-P8-NEXT: fmr f1, f3 ; NO-FAST-P8-NEXT: blr ; -; NO-FAST-P9-LABEL: select_one_float: +; NO-FAST-P9-LABEL: select_one_float_nsz: ; NO-FAST-P9: # %bb.0: # %entry ; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2 ; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, eq -; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB4_2 +; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB6_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fmr f3, f4 -; NO-FAST-P9-NEXT: .LBB4_2: # %entry +; NO-FAST-P9-NEXT: .LBB6_2: # %entry ; NO-FAST-P9-NEXT: fmr f1, f3 ; NO-FAST-P9-NEXT: blr entry: - %cmp = fcmp one float %a, %b + %cmp = fcmp nsz one float %a, %b %cond = select i1 %cmp, float %c, float %d ret float %cond } @@ -244,10 +330,10 @@ define double @select_one_double(double %a, double %b, double %c, double %d) { ; NO-FAST-P8: # %bb.0: # %entry ; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 ; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, eq -; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB5_2 +; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB7_2 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fmr f3, f4 -; NO-FAST-P8-NEXT: .LBB5_2: # %entry +; NO-FAST-P8-NEXT: .LBB7_2: # %entry ; NO-FAST-P8-NEXT: fmr f1, f3 ; NO-FAST-P8-NEXT: blr ; @@ -255,10 +341,10 @@ define double @select_one_double(double %a, double %b, double %c, double %d) { ; NO-FAST-P9: # %bb.0: # %entry ; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2 ; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, eq -; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB5_2 +; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB7_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fmr f3, f4 -; NO-FAST-P9-NEXT: .LBB5_2: # %entry +; NO-FAST-P9-NEXT: .LBB7_2: # %entry ; NO-FAST-P9-NEXT: fmr f1, f3 ; NO-FAST-P9-NEXT: blr entry: @@ -362,10 +448,10 @@ define float @select_oge_float(float %a, float %b, float %c, float %d) { ; NO-FAST-P8: # %bb.0: # %entry ; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 ; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, lt -; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB8_2 +; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB10_2 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fmr f3, f4 -; NO-FAST-P8-NEXT: .LBB8_2: # %entry +; NO-FAST-P8-NEXT: .LBB10_2: # %entry ; NO-FAST-P8-NEXT: fmr f1, f3 ; NO-FAST-P8-NEXT: blr ; @@ -373,10 +459,10 @@ define float @select_oge_float(float %a, float %b, float %c, float %d) { ; NO-FAST-P9: # %bb.0: # %entry ; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2 ; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, lt -; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB8_2 +; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB10_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fmr f3, f4 -; NO-FAST-P9-NEXT: .LBB8_2: # %entry +; NO-FAST-P9-NEXT: .LBB10_2: # %entry ; NO-FAST-P9-NEXT: fmr f1, f3 ; NO-FAST-P9-NEXT: blr entry: @@ -402,10 +488,10 @@ define double @select_oge_double(double %a, double %b, double %c, double %d) { ; NO-FAST-P8: # %bb.0: # %entry ; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 ; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, lt -; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB9_2 +; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB11_2 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fmr f3, f4 -; NO-FAST-P8-NEXT: .LBB9_2: # %entry +; NO-FAST-P8-NEXT: .LBB11_2: # %entry ; NO-FAST-P8-NEXT: fmr f1, f3 ; NO-FAST-P8-NEXT: blr ; @@ -413,10 +499,10 @@ define double @select_oge_double(double %a, double %b, double %c, double %d) { ; NO-FAST-P9: # %bb.0: # %entry ; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2 ; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, lt -; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB9_2 +; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB11_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fmr f3, f4 -; NO-FAST-P9-NEXT: .LBB9_2: # %entry +; NO-FAST-P9-NEXT: .LBB11_2: # %entry ; NO-FAST-P9-NEXT: fmr f1, f3 ; NO-FAST-P9-NEXT: blr entry: @@ -503,20 +589,20 @@ define float @select_olt_float(float %a, float %b, float %c, float %d) { ; NO-FAST-P8-LABEL: select_olt_float: ; NO-FAST-P8: # %bb.0: # %entry ; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 -; NO-FAST-P8-NEXT: blt cr0, .LBB12_2 +; NO-FAST-P8-NEXT: blt cr0, .LBB14_2 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fmr f3, f4 -; NO-FAST-P8-NEXT: .LBB12_2: # %entry +; NO-FAST-P8-NEXT: .LBB14_2: # %entry ; NO-FAST-P8-NEXT: fmr f1, f3 ; NO-FAST-P8-NEXT: blr ; ; NO-FAST-P9-LABEL: select_olt_float: ; NO-FAST-P9: # %bb.0: # %entry ; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2 -; NO-FAST-P9-NEXT: blt cr0, .LBB12_2 +; NO-FAST-P9-NEXT: blt cr0, .LBB14_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fmr f3, f4 -; NO-FAST-P9-NEXT: .LBB12_2: # %entry +; NO-FAST-P9-NEXT: .LBB14_2: # %entry ; NO-FAST-P9-NEXT: fmr f1, f3 ; NO-FAST-P9-NEXT: blr entry: @@ -541,20 +627,20 @@ define double @select_olt_double(double %a, double %b, double %c, double %d) { ; NO-FAST-P8-LABEL: select_olt_double: ; NO-FAST-P8: # %bb.0: # %entry ; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f2 -; NO-FAST-P8-NEXT: blt cr0, .LBB13_2 +; NO-FAST-P8-NEXT: blt cr0, .LBB15_2 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fmr f3, f4 -; NO-FAST-P8-NEXT: .LBB13_2: # %entry +; NO-FAST-P8-NEXT: .LBB15_2: # %entry ; NO-FAST-P8-NEXT: fmr f1, f3 ; NO-FAST-P8-NEXT: blr ; ; NO-FAST-P9-LABEL: select_olt_double: ; NO-FAST-P9: # %bb.0: # %entry ; NO-FAST-P9-NEXT: xscmpudp cr0, f1, f2 -; NO-FAST-P9-NEXT: blt cr0, .LBB13_2 +; NO-FAST-P9-NEXT: blt cr0, .LBB15_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fmr f3, f4 -; NO-FAST-P9-NEXT: .LBB13_2: # %entry +; NO-FAST-P9-NEXT: .LBB15_2: # %entry ; NO-FAST-P9-NEXT: fmr f1, f3 ; NO-FAST-P9-NEXT: blr entry: @@ -641,20 +727,20 @@ define float @select_ogt_float(float %a, float %b, float %c, float %d) { ; NO-FAST-P8-LABEL: select_ogt_float: ; NO-FAST-P8: # %bb.0: # %entry ; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 -; NO-FAST-P8-NEXT: bgt cr0, .LBB16_2 +; NO-FAST-P8-NEXT: bgt cr0, .LBB18_2 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fmr f3, f4 -; NO-FAST-P8-NEXT: .LBB16_2: # %entry +; NO-FAST-P8-NEXT: .LBB18_2: # %entry ; NO-FAST-P8-NEXT: fmr f1, f3 ; NO-FAST-P8-NEXT: blr ; ; NO-FAST-P9-LABEL: select_ogt_float: ; NO-FAST-P9: # %bb.0: # %entry ; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2 -; NO-FAST-P9-NEXT: bgt cr0, .LBB16_2 +; NO-FAST-P9-NEXT: bgt cr0, .LBB18_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fmr f3, f4 -; NO-FAST-P9-NEXT: .LBB16_2: # %entry +; NO-FAST-P9-NEXT: .LBB18_2: # %entry ; NO-FAST-P9-NEXT: fmr f1, f3 ; NO-FAST-P9-NEXT: blr entry: @@ -679,20 +765,20 @@ define double @select_ogt_double(double %a, double %b, double %c, double %d) { ; NO-FAST-P8-LABEL: select_ogt_double: ; NO-FAST-P8: # %bb.0: # %entry ; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f2 -; NO-FAST-P8-NEXT: bgt cr0, .LBB17_2 +; NO-FAST-P8-NEXT: bgt cr0, .LBB19_2 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fmr f3, f4 -; NO-FAST-P8-NEXT: .LBB17_2: # %entry +; NO-FAST-P8-NEXT: .LBB19_2: # %entry ; NO-FAST-P8-NEXT: fmr f1, f3 ; NO-FAST-P8-NEXT: blr ; ; NO-FAST-P9-LABEL: select_ogt_double: ; NO-FAST-P9: # %bb.0: # %entry ; NO-FAST-P9-NEXT: xscmpudp cr0, f1, f2 -; NO-FAST-P9-NEXT: bgt cr0, .LBB17_2 +; NO-FAST-P9-NEXT: bgt cr0, .LBB19_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fmr f3, f4 -; NO-FAST-P9-NEXT: .LBB17_2: # %entry +; NO-FAST-P9-NEXT: .LBB19_2: # %entry ; NO-FAST-P9-NEXT: fmr f1, f3 ; NO-FAST-P9-NEXT: blr entry: @@ -780,10 +866,10 @@ define float @select_ole_float(float %a, float %b, float %c, float %d) { ; NO-FAST-P8: # %bb.0: # %entry ; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 ; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, gt -; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB20_2 +; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB22_2 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fmr f3, f4 -; NO-FAST-P8-NEXT: .LBB20_2: # %entry +; NO-FAST-P8-NEXT: .LBB22_2: # %entry ; NO-FAST-P8-NEXT: fmr f1, f3 ; NO-FAST-P8-NEXT: blr ; @@ -791,10 +877,10 @@ define float @select_ole_float(float %a, float %b, float %c, float %d) { ; NO-FAST-P9: # %bb.0: # %entry ; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2 ; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, gt -; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB20_2 +; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB22_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fmr f3, f4 -; NO-FAST-P9-NEXT: .LBB20_2: # %entry +; NO-FAST-P9-NEXT: .LBB22_2: # %entry ; NO-FAST-P9-NEXT: fmr f1, f3 ; NO-FAST-P9-NEXT: blr entry: @@ -820,10 +906,10 @@ define double @select_ole_double(double %a, double %b, double %c, double %d) { ; NO-FAST-P8: # %bb.0: # %entry ; NO-FAST-P8-NEXT: fcmpu cr0, f1, f2 ; NO-FAST-P8-NEXT: crnor 4*cr5+lt, un, gt -; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB21_2 +; NO-FAST-P8-NEXT: bc 12, 4*cr5+lt, .LBB23_2 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fmr f3, f4 -; NO-FAST-P8-NEXT: .LBB21_2: # %entry +; NO-FAST-P8-NEXT: .LBB23_2: # %entry ; NO-FAST-P8-NEXT: fmr f1, f3 ; NO-FAST-P8-NEXT: blr ; @@ -831,10 +917,10 @@ define double @select_ole_double(double %a, double %b, double %c, double %d) { ; NO-FAST-P9: # %bb.0: # %entry ; NO-FAST-P9-NEXT: fcmpu cr0, f1, f2 ; NO-FAST-P9-NEXT: crnor 4*cr5+lt, un, gt -; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB21_2 +; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB23_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fmr f3, f4 -; NO-FAST-P9-NEXT: .LBB21_2: # %entry +; NO-FAST-P9-NEXT: .LBB23_2: # %entry ; NO-FAST-P9-NEXT: fmr f1, f3 ; NO-FAST-P9-NEXT: blr entry: @@ -926,13 +1012,13 @@ define double @onecmp1(double %a, double %y, double %z) { ; NO-FAST-P8-NEXT: vspltisw v2, 1 ; NO-FAST-P8-NEXT: xvcvsxwdp vs0, vs34 ; NO-FAST-P8-NEXT: fcmpu cr0, f1, f0 -; NO-FAST-P8-NEXT: bc 12, lt, .LBB24_3 +; NO-FAST-P8-NEXT: bc 12, lt, .LBB26_3 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fcmpu cr0, f1, f1 -; NO-FAST-P8-NEXT: bc 12, un, .LBB24_3 +; NO-FAST-P8-NEXT: bc 12, un, .LBB26_3 ; NO-FAST-P8-NEXT: # %bb.2: # %entry ; NO-FAST-P8-NEXT: fmr f3, f2 -; NO-FAST-P8-NEXT: .LBB24_3: # %entry +; NO-FAST-P8-NEXT: .LBB26_3: # %entry ; NO-FAST-P8-NEXT: fmr f1, f3 ; NO-FAST-P8-NEXT: blr ; @@ -941,13 +1027,13 @@ define double @onecmp1(double %a, double %y, double %z) { ; NO-FAST-P9-NEXT: vspltisw v2, 1 ; NO-FAST-P9-NEXT: xvcvsxwdp vs0, vs34 ; NO-FAST-P9-NEXT: fcmpu cr0, f1, f0 -; NO-FAST-P9-NEXT: bc 12, lt, .LBB24_3 +; NO-FAST-P9-NEXT: bc 12, lt, .LBB26_3 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fcmpu cr0, f1, f1 -; NO-FAST-P9-NEXT: bc 12, un, .LBB24_3 +; NO-FAST-P9-NEXT: bc 12, un, .LBB26_3 ; NO-FAST-P9-NEXT: # %bb.2: # %entry ; NO-FAST-P9-NEXT: fmr f3, f2 -; NO-FAST-P9-NEXT: .LBB24_3: # %entry +; NO-FAST-P9-NEXT: .LBB26_3: # %entry ; NO-FAST-P9-NEXT: fmr f1, f3 ; NO-FAST-P9-NEXT: blr entry: @@ -978,10 +1064,10 @@ define double @onecmp2(double %a, double %y, double %z) { ; NO-FAST-P8-NEXT: vspltisw v2, 1 ; NO-FAST-P8-NEXT: xvcvsxwdp vs0, vs34 ; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f0 -; NO-FAST-P8-NEXT: bgt cr0, .LBB25_2 +; NO-FAST-P8-NEXT: bgt cr0, .LBB27_2 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fmr f2, f3 -; NO-FAST-P8-NEXT: .LBB25_2: # %entry +; NO-FAST-P8-NEXT: .LBB27_2: # %entry ; NO-FAST-P8-NEXT: fmr f1, f2 ; NO-FAST-P8-NEXT: blr ; @@ -990,10 +1076,10 @@ define double @onecmp2(double %a, double %y, double %z) { ; NO-FAST-P9-NEXT: vspltisw v2, 1 ; NO-FAST-P9-NEXT: xvcvsxwdp vs0, vs34 ; NO-FAST-P9-NEXT: xscmpudp cr0, f1, f0 -; NO-FAST-P9-NEXT: bgt cr0, .LBB25_2 +; NO-FAST-P9-NEXT: bgt cr0, .LBB27_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fmr f2, f3 -; NO-FAST-P9-NEXT: .LBB25_2: # %entry +; NO-FAST-P9-NEXT: .LBB27_2: # %entry ; NO-FAST-P9-NEXT: fmr f1, f2 ; NO-FAST-P9-NEXT: blr entry: @@ -1028,10 +1114,10 @@ define double @onecmp3(double %a, double %y, double %z) { ; NO-FAST-P8-NEXT: vspltisw v2, 1 ; NO-FAST-P8-NEXT: xvcvsxwdp vs0, vs34 ; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f0 -; NO-FAST-P8-NEXT: beq cr0, .LBB26_2 +; NO-FAST-P8-NEXT: beq cr0, .LBB28_2 ; NO-FAST-P8-NEXT: # %bb.1: # %entry ; NO-FAST-P8-NEXT: fmr f2, f3 -; NO-FAST-P8-NEXT: .LBB26_2: # %entry +; NO-FAST-P8-NEXT: .LBB28_2: # %entry ; NO-FAST-P8-NEXT: fmr f1, f2 ; NO-FAST-P8-NEXT: blr ; @@ -1040,10 +1126,10 @@ define double @onecmp3(double %a, double %y, double %z) { ; NO-FAST-P9-NEXT: vspltisw v2, 1 ; NO-FAST-P9-NEXT: xvcvsxwdp vs0, vs34 ; NO-FAST-P9-NEXT: xscmpudp cr0, f1, f0 -; NO-FAST-P9-NEXT: beq cr0, .LBB26_2 +; NO-FAST-P9-NEXT: beq cr0, .LBB28_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry ; NO-FAST-P9-NEXT: fmr f2, f3 -; NO-FAST-P9-NEXT: .LBB26_2: # %entry +; NO-FAST-P9-NEXT: .LBB28_2: # %entry ; NO-FAST-P9-NEXT: fmr f1, f2 ; NO-FAST-P9-NEXT: blr entry: diff --git a/llvm/test/CodeGen/X86/negative-sin.ll b/llvm/test/CodeGen/X86/negative-sin.ll index f24507d3a4f38..4836da2ad7797 100644 --- a/llvm/test/CodeGen/X86/negative-sin.ll +++ b/llvm/test/CodeGen/X86/negative-sin.ll @@ -82,18 +82,13 @@ define double @semi_strict2(double %e) nounwind { ret double %h } -; FIXME: -; Auto-upgrade function attribute to IR-level fast-math-flags. - -define double @fn_attr(double %e) nounwind #0 { -; CHECK-LABEL: fn_attr: +define double @nsz_flag(double %e) nounwind { +; CHECK-LABEL: nsz_flag: ; CHECK: # %bb.0: ; CHECK-NEXT: jmp sin@PLT # TAILCALL - %f = fsub double 0.0, %e - %g = call double @sin(double %f) readonly - %h = fsub double 0.0, %g + %f = fsub nsz double 0.0, %e + %g = call nsz double @sin(double %f) readonly + %h = fsub nsz double 0.0, %g ret double %h } -attributes #0 = { "unsafe-fp-math"="true" "no-signed-zeros-fp-math"="true" } -