From 7ad7792088ce128bc9bb4dd945cad00aed7f213c Mon Sep 17 00:00:00 2001 From: Scott Douglass Date: Wed, 8 Apr 2015 17:18:28 +0000 Subject: [PATCH] [ARM] make vminnm/vmaxnm work with ?le, ?ge and no-nans-fp-math Because -menable-no-nans causes fcmp conditions to be rewritten without 'o' or 'u' the recognition code in needs to cope. Also extended it to handle 'le' and 'ge. Differential Revision: http://reviews.llvm.org/D8725 llvm-svn: 234421 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 27 ++++--- llvm/test/CodeGen/ARM/vminmaxnm.ll | 97 ++++++++++++++++++++++++- 2 files changed, 113 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 3b1b8dd1b8ee4..72afd2c76aacf 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3504,25 +3504,34 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); - // Try to generate VSEL on ARMv8. + // Try to generate VMAXNM/VMINNM on ARMv8. if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { - // We can select VMAXNM/VMINNM from a compare followed by a select with the + // We can use VMAXNM/VMINNM for a compare followed by a select with the // same operands, as follows: - // c = fcmp [ogt, olt, ugt, ult] a, b + // c = fcmp [?gt, ?ge, ?lt, ?le] a, b // select c, a, b - // We only do this in unsafe-fp-math, because signed zeros and NaNs are - // handled differently than the original code sequence. + // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'. + // We only do this transformation in UnsafeFPMath and for no-NaNs + // comparisons, because signed zeros and NaNs are handled differently than + // the original code sequence. + // FIXME: There are more cases that can be transformed even with NaNs, + // signed zeroes and safe math. E.g. in the following, the result will be + // FalseVal if a is a NaN or -0./0. and that's what vmaxnm will give, too. + // c = fcmp ogt, a, 0. ; select c, a, 0. => vmaxnm a, 0. + // FIXME: There is similar code that allows some extensions in + // AArch64TargetLowering::LowerSELECT_CC that should be shared with this + // code. if (getTargetMachine().Options.UnsafeFPMath) { if (LHS == TrueVal && RHS == FalseVal) { - if (CC == ISD::SETOGT || CC == ISD::SETUGT) + if (CC == ISD::SETGT || CC == ISD::SETGE) return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); - if (CC == ISD::SETOLT || CC == ISD::SETULT) + if (CC == ISD::SETLT || CC == ISD::SETLE) return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); } else if (LHS == FalseVal && RHS == TrueVal) { - if (CC == ISD::SETOLT || CC == ISD::SETULT) + if (CC == ISD::SETLT || CC == ISD::SETLE) return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); - if (CC == ISD::SETOGT || CC == ISD::SETUGT) + if (CC == ISD::SETGT || CC == ISD::SETGE) return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); } } diff --git a/llvm/test/CodeGen/ARM/vminmaxnm.ll b/llvm/test/CodeGen/ARM/vminmaxnm.ll index a1832842fe029..2e2648d11f795 100644 --- a/llvm/test/CodeGen/ARM/vminmaxnm.ll +++ b/llvm/test/CodeGen/ARM/vminmaxnm.ll @@ -1,5 +1,8 @@ -; RUN: llc < %s -mtriple armv8 -mattr=+neon | FileCheck %s -; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST +; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 | FileCheck %s +; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 \ +; RUN: -enable-no-nans-fp-math -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST + +; vectors define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind { ; CHECK-LABEL: vmaxnmq: @@ -37,6 +40,8 @@ define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind { ret <2 x float> %tmp3 } +; scalars + define float @fp-armv8_vminnm_o(float %a, float %b) { ; CHECK-FAST-LABEL: "fp-armv8_vminnm_o": ; CHECK-FAST-NOT: vcmp @@ -48,6 +53,17 @@ define float @fp-armv8_vminnm_o(float %a, float %b) { ret float %cond } +define double @fp-armv8_vminnm_ole(double %a, double %b) { +; CHECK-FAST-LABEL: "fp-armv8_vminnm_ole": +; CHECK-FAST-NOT: vcmp +; CHECK-FAST: vminnm.f64 +; CHECK-LABEL: "fp-armv8_vminnm_ole": +; CHECK-NOT: vminnm.f64 + %cmp = fcmp ole double %a, %b + %cond = select i1 %cmp, double %a, double %b + ret double %cond +} + define float @fp-armv8_vminnm_o_rev(float %a, float %b) { ; CHECK-FAST-LABEL: "fp-armv8_vminnm_o_rev": ; CHECK-FAST-NOT: vcmp @@ -59,6 +75,17 @@ define float @fp-armv8_vminnm_o_rev(float %a, float %b) { ret float %cond } +define double @fp-armv8_vminnm_oge_rev(double %a, double %b) { +; CHECK-FAST-LABEL: "fp-armv8_vminnm_oge_rev": +; CHECK-FAST-NOT: vcmp +; CHECK-FAST: vminnm.f64 +; CHECK-LABEL: "fp-armv8_vminnm_oge_rev": +; CHECK-NOT: vminnm.f64 + %cmp = fcmp oge double %a, %b + %cond = select i1 %cmp, double %b, double %a + ret double %cond +} + define float @fp-armv8_vminnm_u(float %a, float %b) { ; CHECK-FAST-LABEL: "fp-armv8_vminnm_u": ; CHECK-FAST-NOT: vcmp @@ -70,6 +97,17 @@ define float @fp-armv8_vminnm_u(float %a, float %b) { ret float %cond } +define float @fp-armv8_vminnm_ule(float %a, float %b) { +; CHECK-FAST-LABEL: "fp-armv8_vminnm_ule": +; CHECK-FAST-NOT: vcmp +; CHECK-FAST: vminnm.f32 +; CHECK-LABEL: "fp-armv8_vminnm_ule": +; CHECK-NOT: vminnm.f32 + %cmp = fcmp ule float %a, %b + %cond = select i1 %cmp, float %a, float %b + ret float %cond +} + define float @fp-armv8_vminnm_u_rev(float %a, float %b) { ; CHECK-FAST-LABEL: "fp-armv8_vminnm_u_rev": ; CHECK-FAST-NOT: vcmp @@ -81,6 +119,17 @@ define float @fp-armv8_vminnm_u_rev(float %a, float %b) { ret float %cond } +define double @fp-armv8_vminnm_uge_rev(double %a, double %b) { +; CHECK-FAST-LABEL: "fp-armv8_vminnm_uge_rev": +; CHECK-FAST-NOT: vcmp +; CHECK-FAST: vminnm.f64 +; CHECK-LABEL: "fp-armv8_vminnm_uge_rev": +; CHECK-NOT: vminnm.f64 + %cmp = fcmp uge double %a, %b + %cond = select i1 %cmp, double %b, double %a + ret double %cond +} + define float @fp-armv8_vmaxnm_o(float %a, float %b) { ; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o": ; CHECK-FAST-NOT: vcmp @@ -92,6 +141,17 @@ define float @fp-armv8_vmaxnm_o(float %a, float %b) { ret float %cond } +define float @fp-armv8_vmaxnm_oge(float %a, float %b) { +; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_oge": +; CHECK-FAST-NOT: vcmp +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vmaxnm_oge": +; CHECK-NOT: vmaxnm.f32 + %cmp = fcmp oge float %a, %b + %cond = select i1 %cmp, float %a, float %b + ret float %cond +} + define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) { ; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o_rev": ; CHECK-FAST-NOT: vcmp @@ -103,6 +163,17 @@ define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) { ret float %cond } +define float @fp-armv8_vmaxnm_ole_rev(float %a, float %b) { +; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_ole_rev": +; CHECK-FAST-NOT: vcmp +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vmaxnm_ole_rev": +; CHECK-NOT: vmaxnm.f32 + %cmp = fcmp ole float %a, %b + %cond = select i1 %cmp, float %b, float %a + ret float %cond +} + define float @fp-armv8_vmaxnm_u(float %a, float %b) { ; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u": ; CHECK-FAST-NOT: vcmp @@ -114,6 +185,17 @@ define float @fp-armv8_vmaxnm_u(float %a, float %b) { ret float %cond } +define float @fp-armv8_vmaxnm_uge(float %a, float %b) { +; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_uge": +; CHECK-FAST-NOT: vcmp +; CHECK-FAST: vmaxnm.f32 +; CHECK-LABEL: "fp-armv8_vmaxnm_uge": +; CHECK-NOT: vmaxnm.f32 + %cmp = fcmp uge float %a, %b + %cond = select i1 %cmp, float %a, float %b + ret float %cond +} + define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) { ; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u_rev": ; CHECK-FAST-NOT: vcmp @@ -125,6 +207,17 @@ define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) { ret float %cond } +define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) { +; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_ule_rev": +; CHECK-FAST-NOT: vcmp +; CHECK-FAST: vmaxnm.f64 +; CHECK-LABEL: "fp-armv8_vmaxnm_ule_rev": +; CHECK-NOT: vmaxnm.f64 + %cmp = fcmp ule double %a, %b + %cond = select i1 %cmp, double %b, double %a + ret double %cond +} + declare <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone declare <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone