diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 51212837fbb17..a0a61fdf850cf 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -793,6 +793,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM, MVT::f32, Legal); setOperationAction(ISD::FCANONICALIZE, MVT::f64, Legal); setOperationAction(ISD::FCANONICALIZE, MVT::f32, Legal); } @@ -827,8 +831,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } if (Subtarget.hasVSX()) { + setOperationAction(ISD::FMAXNUM_IEEE, VT, Legal); + setOperationAction(ISD::FMINNUM_IEEE, VT, Legal); setOperationAction(ISD::FMAXNUM, VT, Legal); setOperationAction(ISD::FMINNUM, VT, Legal); + setOperationAction(ISD::FCANONICALIZE, VT, Legal); } // Vector instructions introduced in P8 diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index d72201df5b002..d5d3420a61445 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -1868,6 +1868,9 @@ def FpMinMax { dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC), (COPY_TO_REGCLASS $B, VSFRC)), VSSRC); + dag F32Can = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC), + (COPY_TO_REGCLASS $A, VSFRC)), + VSSRC); } def ScalarLoads { @@ -2757,14 +2760,26 @@ def : Pat<(v1i128 (vselect v1i128:$vA, v1i128:$vB, v1i128:$vC)), (COPY_TO_REGCLASS $vB, VSRC), (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>; +def : Pat<(v4f32 (fmaxnum_ieee v4f32:$src1, v4f32:$src2)), + (v4f32 (XVMAXSP $src1, $src2))>; def : Pat<(v4f32 (any_fmaxnum v4f32:$src1, v4f32:$src2)), (v4f32 (XVMAXSP $src1, $src2))>; +def : Pat<(v4f32 (fminnum_ieee v4f32:$src1, v4f32:$src2)), + (v4f32 (XVMINSP $src1, $src2))>; def : Pat<(v4f32 (any_fminnum v4f32:$src1, v4f32:$src2)), (v4f32 (XVMINSP $src1, $src2))>; +def : Pat<(v4f32 (fcanonicalize v4f32:$src1)), + (v4f32 (XVMAXSP $src1, $src1))>; +def : Pat<(v2f64 (fmaxnum_ieee v2f64:$src1, v2f64:$src2)), + (v2f64 (XVMAXDP $src1, $src2))>; def : Pat<(v2f64 (any_fmaxnum v2f64:$src1, v2f64:$src2)), (v2f64 (XVMAXDP $src1, $src2))>; +def : Pat<(v2f64 (fminnum_ieee v2f64:$src1, v2f64:$src2)), + (v2f64 (XVMINDP $src1, $src2))>; def : Pat<(v2f64 (any_fminnum v2f64:$src1, v2f64:$src2)), (v2f64 (XVMINDP $src1, $src2))>; +def : Pat<(v2f64 (fcanonicalize v2f64:$src1)), + (v2f64 (XVMAXDP $src1, $src1))>; // f32 abs def : Pat<(f32 (fabs f32:$S)), @@ -2776,43 +2791,27 @@ def : Pat<(f32 (fneg (fabs f32:$S))), (f32 (COPY_TO_REGCLASS (XSNABSDP (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -// f32 Min. +// Max and Min def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)), (f32 FpMinMax.F32Min)>; -def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)), - (f32 FpMinMax.F32Min)>; -def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))), - (f32 FpMinMax.F32Min)>; -def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))), - (f32 FpMinMax.F32Min)>; -// F32 Max. def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)), (f32 FpMinMax.F32Max)>; -def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)), - (f32 FpMinMax.F32Max)>; -def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))), - (f32 FpMinMax.F32Max)>; -def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))), +def : Pat<(f32 (fminnum f32:$A, f32:$B)), + (f32 FpMinMax.F32Min)>; +def : Pat<(f32 (fmaxnum f32:$A, f32:$B)), (f32 FpMinMax.F32Max)>; - -// f64 Min. +def : Pat<(f32 (fcanonicalize f32:$A)), + (f32 FpMinMax.F32Can)>; def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)), (f64 (XSMINDP $A, $B))>; -def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)), - (f64 (XSMINDP $A, $B))>; -def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))), - (f64 (XSMINDP $A, $B))>; -def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))), - (f64 (XSMINDP $A, $B))>; -// f64 Max. def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)), (f64 (XSMAXDP $A, $B))>; -def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)), - (f64 (XSMAXDP $A, $B))>; -def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))), - (f64 (XSMAXDP $A, $B))>; -def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))), +def : Pat<(f64 (fminnum f64:$A, f64:$B)), + (f64 (XSMINDP $A, $B))>; +def : Pat<(f64 (fmaxnum f64:$A, f64:$B)), (f64 (XSMAXDP $A, $B))>; +def : Pat<(f64 (fcanonicalize f64:$A)), + (f64 (XSMAXDP $A, $A))>; def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, ForceXForm:$dst), (STXVD2X $rS, ForceXForm:$dst)>; diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll index a99c25a4e4479..39cf136e10d77 100644 --- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll @@ -301,22 +301,13 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) { ; VSX-NEXT: xvcmpeqsp 1, 35, 35 ; VSX-NEXT: xvcmpeqsp 2, 34, 34 ; VSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha -; VSX-NEXT: xxleqv 36, 36, 36 -; VSX-NEXT: xvminsp 0, 34, 35 -; VSX-NEXT: vslw 4, 4, 4 ; VSX-NEXT: addi 3, 3, .LCPI4_0@toc@l ; VSX-NEXT: xxlnor 1, 1, 1 ; VSX-NEXT: xxlnor 2, 2, 2 -; VSX-NEXT: vcmpequw 5, 2, 4 +; VSX-NEXT: xvminsp 0, 34, 35 ; VSX-NEXT: xxlor 1, 2, 1 ; VSX-NEXT: lxvd2x 2, 0, 3 -; VSX-NEXT: xxsel 0, 0, 2, 1 -; VSX-NEXT: xxlxor 2, 2, 2 -; VSX-NEXT: xvcmpeqsp 2, 0, 2 -; VSX-NEXT: xxsel 1, 0, 34, 37 -; VSX-NEXT: vcmpequw 2, 3, 4 -; VSX-NEXT: xxsel 1, 1, 35, 34 -; VSX-NEXT: xxsel 34, 0, 1, 2 +; VSX-NEXT: xxsel 34, 0, 2, 1 ; VSX-NEXT: blr ; ; AIX-LABEL: v4f32_minimum: @@ -324,21 +315,12 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) { ; AIX-NEXT: xvcmpeqsp 1, 35, 35 ; AIX-NEXT: xvcmpeqsp 2, 34, 34 ; AIX-NEXT: ld 3, L..C4(2) # %const.0 -; AIX-NEXT: xxleqv 36, 36, 36 ; AIX-NEXT: xvminsp 0, 34, 35 -; AIX-NEXT: vslw 4, 4, 4 ; AIX-NEXT: xxlnor 1, 1, 1 ; AIX-NEXT: xxlnor 2, 2, 2 -; AIX-NEXT: vcmpequw 5, 2, 4 ; AIX-NEXT: xxlor 1, 2, 1 ; AIX-NEXT: lxvw4x 2, 0, 3 -; AIX-NEXT: xxsel 0, 0, 2, 1 -; AIX-NEXT: xxlxor 2, 2, 2 -; AIX-NEXT: xvcmpeqsp 2, 0, 2 -; AIX-NEXT: xxsel 1, 0, 34, 37 -; AIX-NEXT: vcmpequw 2, 3, 4 -; AIX-NEXT: xxsel 1, 1, 35, 34 -; AIX-NEXT: xxsel 34, 0, 1, 2 +; AIX-NEXT: xxsel 34, 0, 2, 1 ; AIX-NEXT: blr entry: %m = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b) @@ -377,16 +359,9 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) { ; VSX-NEXT: xxlnor 1, 1, 1 ; VSX-NEXT: xxlnor 2, 2, 2 ; VSX-NEXT: xvmaxsp 0, 34, 35 -; VSX-NEXT: xxlxor 36, 36, 36 -; VSX-NEXT: vcmpequw 5, 2, 4 ; VSX-NEXT: xxlor 1, 2, 1 ; VSX-NEXT: lxvd2x 2, 0, 3 -; VSX-NEXT: xxsel 0, 0, 2, 1 -; VSX-NEXT: xvcmpeqsp 2, 0, 36 -; VSX-NEXT: xxsel 1, 0, 34, 37 -; VSX-NEXT: vcmpequw 2, 3, 4 -; VSX-NEXT: xxsel 1, 1, 35, 34 -; VSX-NEXT: xxsel 34, 0, 1, 2 +; VSX-NEXT: xxsel 34, 0, 2, 1 ; VSX-NEXT: blr ; ; AIX-LABEL: v4f32_maximum: @@ -395,18 +370,11 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) { ; AIX-NEXT: xvcmpeqsp 2, 34, 34 ; AIX-NEXT: ld 3, L..C5(2) # %const.0 ; AIX-NEXT: xvmaxsp 0, 34, 35 -; AIX-NEXT: xxlxor 36, 36, 36 ; AIX-NEXT: xxlnor 1, 1, 1 ; AIX-NEXT: xxlnor 2, 2, 2 -; AIX-NEXT: vcmpequw 5, 2, 4 ; AIX-NEXT: xxlor 1, 2, 1 ; AIX-NEXT: lxvw4x 2, 0, 3 -; AIX-NEXT: xxsel 0, 0, 2, 1 -; AIX-NEXT: xvcmpeqsp 2, 0, 36 -; AIX-NEXT: xxsel 1, 0, 34, 37 -; AIX-NEXT: vcmpequw 2, 3, 4 -; AIX-NEXT: xxsel 1, 1, 35, 34 -; AIX-NEXT: xxsel 34, 0, 1, 2 +; AIX-NEXT: xxsel 34, 0, 2, 1 ; AIX-NEXT: blr entry: %m = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b) @@ -493,47 +461,28 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) { ; VSX-LABEL: v2f64_minimum: ; VSX: # %bb.0: # %entry ; VSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; VSX-NEXT: xvcmpeqdp 36, 35, 35 -; VSX-NEXT: xvcmpeqdp 37, 34, 34 -; VSX-NEXT: addi 3, 3, .LCPI6_0@toc@l -; VSX-NEXT: xxlnor 36, 36, 36 -; VSX-NEXT: xxlnor 37, 37, 37 ; VSX-NEXT: xvmindp 0, 34, 35 +; VSX-NEXT: xvcmpeqdp 35, 35, 35 +; VSX-NEXT: addi 3, 3, .LCPI6_0@toc@l +; VSX-NEXT: xvcmpeqdp 34, 34, 34 +; VSX-NEXT: xxlnor 35, 35, 35 +; VSX-NEXT: xxlnor 34, 34, 34 ; VSX-NEXT: lxvd2x 2, 0, 3 -; VSX-NEXT: addis 3, 2, .LCPI6_1@toc@ha -; VSX-NEXT: xxlor 1, 37, 36 -; VSX-NEXT: addi 3, 3, .LCPI6_1@toc@l -; VSX-NEXT: lxvd2x 36, 0, 3 -; VSX-NEXT: vcmpequd 5, 2, 4 -; VSX-NEXT: xxsel 0, 0, 2, 1 -; VSX-NEXT: xxlxor 2, 2, 2 -; VSX-NEXT: xxsel 1, 0, 34, 37 -; VSX-NEXT: vcmpequd 2, 3, 4 -; VSX-NEXT: xxsel 1, 1, 35, 34 -; VSX-NEXT: xvcmpeqdp 34, 0, 2 -; VSX-NEXT: xxsel 34, 0, 1, 34 +; VSX-NEXT: xxlor 1, 34, 35 +; VSX-NEXT: xxsel 34, 0, 2, 1 ; VSX-NEXT: blr ; ; AIX-LABEL: v2f64_minimum: ; AIX: # %bb.0: # %entry ; AIX-NEXT: ld 3, L..C6(2) # %const.0 -; AIX-NEXT: xvcmpeqdp 36, 35, 35 -; AIX-NEXT: xvcmpeqdp 37, 34, 34 -; AIX-NEXT: lxvd2x 2, 0, 3 -; AIX-NEXT: ld 3, L..C7(2) # %const.1 -; AIX-NEXT: xxlnor 36, 36, 36 -; AIX-NEXT: xxlnor 37, 37, 37 ; AIX-NEXT: xvmindp 0, 34, 35 -; AIX-NEXT: xxlor 1, 37, 36 -; AIX-NEXT: lxvd2x 36, 0, 3 -; AIX-NEXT: vcmpequd 5, 2, 4 -; AIX-NEXT: xxsel 0, 0, 2, 1 -; AIX-NEXT: xxlxor 2, 2, 2 -; AIX-NEXT: xxsel 1, 0, 34, 37 -; AIX-NEXT: vcmpequd 2, 3, 4 -; AIX-NEXT: xxsel 1, 1, 35, 34 -; AIX-NEXT: xvcmpeqdp 34, 0, 2 -; AIX-NEXT: xxsel 34, 0, 1, 34 +; AIX-NEXT: xvcmpeqdp 35, 35, 35 +; AIX-NEXT: lxvd2x 2, 0, 3 +; AIX-NEXT: xvcmpeqdp 34, 34, 34 +; AIX-NEXT: xxlnor 35, 35, 35 +; AIX-NEXT: xxlnor 34, 34, 34 +; AIX-NEXT: xxlor 1, 34, 35 +; AIX-NEXT: xxsel 34, 0, 2, 1 ; AIX-NEXT: blr entry: %m = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b) @@ -618,42 +567,28 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) { ; VSX-LABEL: v2f64_maximum: ; VSX: # %bb.0: # %entry ; VSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; VSX-NEXT: xvcmpeqdp 36, 35, 35 -; VSX-NEXT: xvcmpeqdp 37, 34, 34 -; VSX-NEXT: addi 3, 3, .LCPI7_0@toc@l -; VSX-NEXT: xxlnor 36, 36, 36 -; VSX-NEXT: xxlnor 37, 37, 37 ; VSX-NEXT: xvmaxdp 0, 34, 35 +; VSX-NEXT: xvcmpeqdp 35, 35, 35 +; VSX-NEXT: addi 3, 3, .LCPI7_0@toc@l +; VSX-NEXT: xvcmpeqdp 34, 34, 34 +; VSX-NEXT: xxlnor 35, 35, 35 +; VSX-NEXT: xxlnor 34, 34, 34 ; VSX-NEXT: lxvd2x 2, 0, 3 -; VSX-NEXT: xxlor 1, 37, 36 -; VSX-NEXT: xxlxor 36, 36, 36 -; VSX-NEXT: vcmpequd 5, 2, 4 -; VSX-NEXT: xxsel 0, 0, 2, 1 -; VSX-NEXT: xxsel 1, 0, 34, 37 -; VSX-NEXT: vcmpequd 2, 3, 4 -; VSX-NEXT: xxsel 1, 1, 35, 34 -; VSX-NEXT: xvcmpeqdp 34, 0, 36 -; VSX-NEXT: xxsel 34, 0, 1, 34 +; VSX-NEXT: xxlor 1, 34, 35 +; VSX-NEXT: xxsel 34, 0, 2, 1 ; VSX-NEXT: blr ; ; AIX-LABEL: v2f64_maximum: ; AIX: # %bb.0: # %entry -; AIX-NEXT: ld 3, L..C8(2) # %const.0 -; AIX-NEXT: xvcmpeqdp 36, 35, 35 -; AIX-NEXT: xvcmpeqdp 37, 34, 34 -; AIX-NEXT: lxvd2x 2, 0, 3 -; AIX-NEXT: xxlnor 36, 36, 36 -; AIX-NEXT: xxlnor 37, 37, 37 +; AIX-NEXT: ld 3, L..C7(2) # %const.0 ; AIX-NEXT: xvmaxdp 0, 34, 35 -; AIX-NEXT: xxlor 1, 37, 36 -; AIX-NEXT: xxlxor 36, 36, 36 -; AIX-NEXT: vcmpequd 5, 2, 4 -; AIX-NEXT: xxsel 0, 0, 2, 1 -; AIX-NEXT: xxsel 1, 0, 34, 37 -; AIX-NEXT: vcmpequd 2, 3, 4 -; AIX-NEXT: xxsel 1, 1, 35, 34 -; AIX-NEXT: xvcmpeqdp 34, 0, 36 -; AIX-NEXT: xxsel 34, 0, 1, 34 +; AIX-NEXT: xvcmpeqdp 35, 35, 35 +; AIX-NEXT: lxvd2x 2, 0, 3 +; AIX-NEXT: xvcmpeqdp 34, 34, 34 +; AIX-NEXT: xxlnor 35, 35, 35 +; AIX-NEXT: xxlnor 34, 34, 34 +; AIX-NEXT: xxlor 1, 34, 35 +; AIX-NEXT: xxsel 34, 0, 2, 1 ; AIX-NEXT: blr entry: %m = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b) diff --git a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll index 5f637e3ecddd3..a85f68c3ea039 100644 --- a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll @@ -24,19 +24,24 @@ entry: ret float %cond } -define dso_local double @testdmax(double %a, double %b) local_unnamed_addr { -; P9-LABEL: testdmax: -; P9: # %bb.0: # %entry -; P9-NEXT: xsmaxcdp f1, f1, f2 -; P9-NEXT: blr +define double @testdmax(double %a, double %b) local_unnamed_addr { +; CHECK-LABEL: testdmax: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmaxdp f1, f1, f2 +; CHECK-NEXT: blr ; -; P8-LABEL: testdmax: -; P8: # %bb.0: # %entry -; P8-NEXT: xscmpudp cr0, f1, f2 -; P8-NEXT: bgtlr cr0 -; P8-NEXT: # %bb.1: # %entry -; P8-NEXT: fmr f1, f2 -; P8-NEXT: blr +; NO-FAST-P9-LABEL: testdmax: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmaxcdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdmax: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xscmpudp cr0, f1, f2 +; NO-FAST-P8-NEXT: bgtlr cr0 +; NO-FAST-P8-NEXT: # %bb.1: # %entry +; NO-FAST-P8-NEXT: fmr f1, f2 +; NO-FAST-P8-NEXT: blr entry: %cmp = fcmp ogt double %a, %b %cond = select i1 %cmp, double %a, double %b @@ -82,62 +87,322 @@ entry: } define dso_local float @testfmax_fast(float %a, float %b) local_unnamed_addr { -; P9-LABEL: testfmax_fast: -; P9: # %bb.0: # %entry -; P9-NEXT: xsmaxdp f1, f1, f2 -; P9-NEXT: blr +; CHECK-LABEL: testfmax_fast: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmaxdp f1, f1, f2 +; CHECK-NEXT: blr ; -; P8-LABEL: testfmax_fast: -; P8: # %bb.0: # %entry -; P8-NEXT: xsmaxdp f1, f1, f2 -; P8-NEXT: blr +; NO-FAST-P9-LABEL: testfmax_fast: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmaxdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfmax_fast: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xsmaxdp f1, f1, f2 +; NO-FAST-P8-NEXT: blr entry: %cmp = fcmp nnan ninf ogt float %a, %b %cond = select nnan nsz i1 %cmp, float %a, float %b ret float %cond } define dso_local double @testdmax_fast(double %a, double %b) local_unnamed_addr { -; P9-LABEL: testdmax_fast: -; P9: # %bb.0: # %entry -; P9-NEXT: xsmaxdp f1, f1, f2 -; P9-NEXT: blr +; CHECK-LABEL: testdmax_fast: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmaxdp f1, f1, f2 +; CHECK-NEXT: blr ; -; P8-LABEL: testdmax_fast: -; P8: # %bb.0: # %entry -; P8-NEXT: xsmaxdp f1, f1, f2 -; P8-NEXT: blr +; NO-FAST-P9-LABEL: testdmax_fast: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmaxdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdmax_fast: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xsmaxdp f1, f1, f2 +; NO-FAST-P8-NEXT: blr entry: %cmp = fcmp nnan ninf ogt double %a, %b %cond = select nnan nsz i1 %cmp, double %a, double %b ret double %cond } define dso_local float @testfmin_fast(float %a, float %b) local_unnamed_addr { -; P9-LABEL: testfmin_fast: -; P9: # %bb.0: # %entry -; P9-NEXT: xsmindp f1, f1, f2 -; P9-NEXT: blr +; CHECK-LABEL: testfmin_fast: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmindp f1, f1, f2 +; CHECK-NEXT: blr ; -; P8-LABEL: testfmin_fast: -; P8: # %bb.0: # %entry -; P8-NEXT: xsmindp f1, f1, f2 -; P8-NEXT: blr +; NO-FAST-P9-LABEL: testfmin_fast: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmindp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfmin_fast: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xsmindp f1, f1, f2 +; NO-FAST-P8-NEXT: blr entry: %cmp = fcmp nnan ninf olt float %a, %b %cond = select nnan nsz i1 %cmp, float %a, float %b ret float %cond } define dso_local double @testdmin_fast(double %a, double %b) local_unnamed_addr { -; P9-LABEL: testdmin_fast: -; P9: # %bb.0: # %entry -; P9-NEXT: xsmindp f1, f1, f2 -; P9-NEXT: blr +; CHECK-LABEL: testdmin_fast: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmindp f1, f1, f2 +; CHECK-NEXT: blr ; -; P8-LABEL: testdmin_fast: -; P8: # %bb.0: # %entry -; P8-NEXT: xsmindp f1, f1, f2 -; P8-NEXT: blr +; NO-FAST-P9-LABEL: testdmin_fast: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmindp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdmin_fast: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xsmindp f1, f1, f2 +; NO-FAST-P8-NEXT: blr entry: %cmp = fcmp nnan ninf olt double %a, %b %cond = select nnan nsz i1 %cmp, double %a, double %b ret double %cond } + +define float @testfminnum(float %a, float %b) { +; CHECK-LABEL: testfminnum: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmindp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testfminnum: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmindp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfminnum: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xsmindp f1, f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %0 = tail call float @llvm.minnum.f32(float %a, float %b) + ret float %0 +} + +define float @testfmaxnum(float %a, float %b) { +; CHECK-LABEL: testfmaxnum: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmaxdp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testfmaxnum: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmaxdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfmaxnum: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xsmaxdp f1, f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %0 = tail call float @llvm.maxnum.f32(float %a, float %b) + ret float %0 +} + +define float @testfcanonicalize(float %a) { +; CHECK-LABEL: testfcanonicalize: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmaxdp f1, f1, f1 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testfcanonicalize: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmaxdp f1, f1, f1 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfcanonicalize: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xsmaxdp f1, f1, f1 +; NO-FAST-P8-NEXT: blr +entry: + %canonicalize = tail call float @llvm.canonicalize.f32(float %a) + ret float %canonicalize +} + +define double @testdminnum(double %a, double %b) { +; CHECK-LABEL: testdminnum: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmindp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testdminnum: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmindp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdminnum: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xsmindp f1, f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %0 = tail call double @llvm.minnum.f64(double %a, double %b) + ret double %0 +} + +define double @testdmaxnum(double %a, double %b) { +; CHECK-LABEL: testdmaxnum: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmaxdp f1, f1, f2 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testdmaxnum: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmaxdp f1, f1, f2 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdmaxnum: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xsmaxdp f1, f1, f2 +; NO-FAST-P8-NEXT: blr +entry: + %0 = tail call double @llvm.maxnum.f64(double %a, double %b) + ret double %0 +} + +define double @testdcanonicalize(double %a) { +; CHECK-LABEL: testdcanonicalize: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsmaxdp f1, f1, f1 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testdcanonicalize: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xsmaxdp f1, f1, f1 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdcanonicalize: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xsmaxdp f1, f1, f1 +; NO-FAST-P8-NEXT: blr +entry: + %canonicalize = tail call double @llvm.canonicalize.f64(double %a) + ret double %canonicalize +} + +define <4 x float> @testfminnum_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: testfminnum_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvminsp vs34, vs34, vs35 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testfminnum_v4f32: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xvminsp vs34, vs34, vs35 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfminnum_v4f32: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xvminsp vs34, vs34, vs35 +; NO-FAST-P8-NEXT: blr +entry: + %0 = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %0 +} + +define <4 x float> @testfmaxnum_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: testfmaxnum_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxsp vs34, vs34, vs35 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testfmaxnum_v4f32: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xvmaxsp vs34, vs34, vs35 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfmaxnum_v4f32: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xvmaxsp vs34, vs34, vs35 +; NO-FAST-P8-NEXT: blr +entry: + %0 = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %0 +} + +define <4 x float> @testfcanonicalize_v4f32(<4 x float> %a) { +; CHECK-LABEL: testfcanonicalize_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxsp vs34, vs34, vs34 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testfcanonicalize_v4f32: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xvmaxsp vs34, vs34, vs34 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testfcanonicalize_v4f32: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xvmaxsp vs34, vs34, vs34 +; NO-FAST-P8-NEXT: blr +entry: + %0 = tail call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %a) + ret <4 x float> %0 +} + +define <2 x double> @testdminnum_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: testdminnum_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmindp vs34, vs34, vs35 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testdminnum_v2f64: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xvmindp vs34, vs34, vs35 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdminnum_v2f64: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xvmindp vs34, vs34, vs35 +; NO-FAST-P8-NEXT: blr +entry: + %0 = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %0 +} + +define <2 x double> @testdmaxnum_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: testdmaxnum_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxdp vs34, vs34, vs35 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testdmaxnum_v2f64: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xvmaxdp vs34, vs34, vs35 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdmaxnum_v2f64: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xvmaxdp vs34, vs34, vs35 +; NO-FAST-P8-NEXT: blr +entry: + %0 = tail call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %0 +} + +define <2 x double> @testdcanonicalize_v2f64(<2 x double> %a) { +; CHECK-LABEL: testdcanonicalize_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxdp vs34, vs34, vs34 +; CHECK-NEXT: blr +; +; NO-FAST-P9-LABEL: testdcanonicalize_v2f64: +; NO-FAST-P9: # %bb.0: # %entry +; NO-FAST-P9-NEXT: xvmaxdp vs34, vs34, vs34 +; NO-FAST-P9-NEXT: blr +; +; NO-FAST-P8-LABEL: testdcanonicalize_v2f64: +; NO-FAST-P8: # %bb.0: # %entry +; NO-FAST-P8-NEXT: xvmaxdp vs34, vs34, vs34 +; NO-FAST-P8-NEXT: blr +entry: + %0 = tail call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %a) + ret <2 x double> %0 +}