Skip to content

Commit

Permalink
[ARM] make vminnm/vmaxnm work with ?le, ?ge and no-nans-fp-math
Browse files Browse the repository at this point in the history
Because -menable-no-nans causes fcmp conditions to be rewritten
without 'o' or 'u' the recognition code in needs to cope. Also
extended it to handle 'le' and 'ge.

Differential Revision: http://reviews.llvm.org/D8725

llvm-svn: 234421
  • Loading branch information
Scott Douglass committed Apr 8, 2015
1 parent 71df405 commit 7ad7792
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 11 deletions.
27 changes: 18 additions & 9 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -3504,25 +3504,34 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);

// Try to generate VSEL on ARMv8.
// Try to generate VMAXNM/VMINNM on ARMv8.
if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
TrueVal.getValueType() == MVT::f64)) {
// We can select VMAXNM/VMINNM from a compare followed by a select with the
// We can use VMAXNM/VMINNM for a compare followed by a select with the
// same operands, as follows:
// c = fcmp [ogt, olt, ugt, ult] a, b
// c = fcmp [?gt, ?ge, ?lt, ?le] a, b
// select c, a, b
// We only do this in unsafe-fp-math, because signed zeros and NaNs are
// handled differently than the original code sequence.
// In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
// We only do this transformation in UnsafeFPMath and for no-NaNs
// comparisons, because signed zeros and NaNs are handled differently than
// the original code sequence.
// FIXME: There are more cases that can be transformed even with NaNs,
// signed zeroes and safe math. E.g. in the following, the result will be
// FalseVal if a is a NaN or -0./0. and that's what vmaxnm will give, too.
// c = fcmp ogt, a, 0. ; select c, a, 0. => vmaxnm a, 0.
// FIXME: There is similar code that allows some extensions in
// AArch64TargetLowering::LowerSELECT_CC that should be shared with this
// code.
if (getTargetMachine().Options.UnsafeFPMath) {
if (LHS == TrueVal && RHS == FalseVal) {
if (CC == ISD::SETOGT || CC == ISD::SETUGT)
if (CC == ISD::SETGT || CC == ISD::SETGE)
return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
if (CC == ISD::SETOLT || CC == ISD::SETULT)
if (CC == ISD::SETLT || CC == ISD::SETLE)
return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
} else if (LHS == FalseVal && RHS == TrueVal) {
if (CC == ISD::SETOLT || CC == ISD::SETULT)
if (CC == ISD::SETLT || CC == ISD::SETLE)
return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
if (CC == ISD::SETOGT || CC == ISD::SETUGT)
if (CC == ISD::SETGT || CC == ISD::SETGE)
return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
}
}
Expand Down
97 changes: 95 additions & 2 deletions llvm/test/CodeGen/ARM/vminmaxnm.ll
@@ -1,5 +1,8 @@
; RUN: llc < %s -mtriple armv8 -mattr=+neon | FileCheck %s
; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST
; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 | FileCheck %s
; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 \
; RUN: -enable-no-nans-fp-math -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST

; vectors

define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
; CHECK-LABEL: vmaxnmq:
Expand Down Expand Up @@ -37,6 +40,8 @@ define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
ret <2 x float> %tmp3
}

; scalars

define float @fp-armv8_vminnm_o(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vminnm_o":
; CHECK-FAST-NOT: vcmp
Expand All @@ -48,6 +53,17 @@ define float @fp-armv8_vminnm_o(float %a, float %b) {
ret float %cond
}

define double @fp-armv8_vminnm_ole(double %a, double %b) {
; CHECK-FAST-LABEL: "fp-armv8_vminnm_ole":
; CHECK-FAST-NOT: vcmp
; CHECK-FAST: vminnm.f64
; CHECK-LABEL: "fp-armv8_vminnm_ole":
; CHECK-NOT: vminnm.f64
%cmp = fcmp ole double %a, %b
%cond = select i1 %cmp, double %a, double %b
ret double %cond
}

define float @fp-armv8_vminnm_o_rev(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vminnm_o_rev":
; CHECK-FAST-NOT: vcmp
Expand All @@ -59,6 +75,17 @@ define float @fp-armv8_vminnm_o_rev(float %a, float %b) {
ret float %cond
}

define double @fp-armv8_vminnm_oge_rev(double %a, double %b) {
; CHECK-FAST-LABEL: "fp-armv8_vminnm_oge_rev":
; CHECK-FAST-NOT: vcmp
; CHECK-FAST: vminnm.f64
; CHECK-LABEL: "fp-armv8_vminnm_oge_rev":
; CHECK-NOT: vminnm.f64
%cmp = fcmp oge double %a, %b
%cond = select i1 %cmp, double %b, double %a
ret double %cond
}

define float @fp-armv8_vminnm_u(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vminnm_u":
; CHECK-FAST-NOT: vcmp
Expand All @@ -70,6 +97,17 @@ define float @fp-armv8_vminnm_u(float %a, float %b) {
ret float %cond
}

define float @fp-armv8_vminnm_ule(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vminnm_ule":
; CHECK-FAST-NOT: vcmp
; CHECK-FAST: vminnm.f32
; CHECK-LABEL: "fp-armv8_vminnm_ule":
; CHECK-NOT: vminnm.f32
%cmp = fcmp ule float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}

define float @fp-armv8_vminnm_u_rev(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vminnm_u_rev":
; CHECK-FAST-NOT: vcmp
Expand All @@ -81,6 +119,17 @@ define float @fp-armv8_vminnm_u_rev(float %a, float %b) {
ret float %cond
}

define double @fp-armv8_vminnm_uge_rev(double %a, double %b) {
; CHECK-FAST-LABEL: "fp-armv8_vminnm_uge_rev":
; CHECK-FAST-NOT: vcmp
; CHECK-FAST: vminnm.f64
; CHECK-LABEL: "fp-armv8_vminnm_uge_rev":
; CHECK-NOT: vminnm.f64
%cmp = fcmp uge double %a, %b
%cond = select i1 %cmp, double %b, double %a
ret double %cond
}

define float @fp-armv8_vmaxnm_o(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o":
; CHECK-FAST-NOT: vcmp
Expand All @@ -92,6 +141,17 @@ define float @fp-armv8_vmaxnm_o(float %a, float %b) {
ret float %cond
}

define float @fp-armv8_vmaxnm_oge(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_oge":
; CHECK-FAST-NOT: vcmp
; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_oge":
; CHECK-NOT: vmaxnm.f32
%cmp = fcmp oge float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}

define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o_rev":
; CHECK-FAST-NOT: vcmp
Expand All @@ -103,6 +163,17 @@ define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) {
ret float %cond
}

define float @fp-armv8_vmaxnm_ole_rev(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_ole_rev":
; CHECK-FAST-NOT: vcmp
; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_ole_rev":
; CHECK-NOT: vmaxnm.f32
%cmp = fcmp ole float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}

define float @fp-armv8_vmaxnm_u(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u":
; CHECK-FAST-NOT: vcmp
Expand All @@ -114,6 +185,17 @@ define float @fp-armv8_vmaxnm_u(float %a, float %b) {
ret float %cond
}

define float @fp-armv8_vmaxnm_uge(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_uge":
; CHECK-FAST-NOT: vcmp
; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_uge":
; CHECK-NOT: vmaxnm.f32
%cmp = fcmp uge float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}

define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u_rev":
; CHECK-FAST-NOT: vcmp
Expand All @@ -125,6 +207,17 @@ define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) {
ret float %cond
}

define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) {
; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_ule_rev":
; CHECK-FAST-NOT: vcmp
; CHECK-FAST: vmaxnm.f64
; CHECK-LABEL: "fp-armv8_vmaxnm_ule_rev":
; CHECK-NOT: vmaxnm.f64
%cmp = fcmp ule double %a, %b
%cond = select i1 %cmp, double %b, double %a
ret double %cond
}


declare <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
declare <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
Expand Down

0 comments on commit 7ad7792

Please sign in to comment.