Skip to content

Commit

Permalink
[SelectionDAG] Add fcmp UNDEF handling to SelectionDAG::FoldSetCC
Browse files Browse the repository at this point in the history
Second half of PR40800, this patch adds DAG undef handling to fcmp instructions to match the behavior in llvm::ConstantFoldCompareInstruction, this permits constant folding of vector comparisons where some elements had been reduced to UNDEF (by SimplifyDemandedVectorElts etc.).

This involves a lot of tweaking to reduced tests as bugpoint loves to reduce fcmp arguments to undef........

Differential Revision: https://reviews.llvm.org/D60006

llvm-svn: 357765
  • Loading branch information
RKSimon committed Apr 5, 2019
1 parent 84f2271 commit 17586cd
Show file tree
Hide file tree
Showing 8 changed files with 214 additions and 212 deletions.
11 changes: 8 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Expand Up @@ -2085,14 +2085,19 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT,
OpVT);
}
} else if (N1CFP && OpVT.isSimple()) {
} else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) {
// Ensure that the constant occurs on the RHS.
ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT()))
return SDValue();
return getSetCC(dl, VT, N2, N1, SwappedCond);
} else if (N2CFP && N2CFP->getValueAPF().isNaN()) {
// If an operand is known to be a nan, we can fold it.
} else if ((N2CFP && N2CFP->getValueAPF().isNaN()) ||
(OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) {
// If an operand is known to be a nan (or undef that could be a nan), we can
// fold it.
// Choosing NaN for the undef will always make unordered comparison succeed
// and ordered comparison fails.
// Matches behavior in llvm::ConstantFoldCompareInstruction.
switch (ISD::getUnorderedFlavor(Cond)) {
default:
llvm_unreachable("Unknown flavor!");
Expand Down
17 changes: 10 additions & 7 deletions llvm/test/CodeGen/AArch64/half.ll
Expand Up @@ -96,24 +96,27 @@ define void @test_trunc64(double %in, half* %addr) {
ret void
}

define i16 @test_fccmp(i1 %a) {
define i16 @test_fccmp(i1 %a, i16 %in) {
; CHECK-LABEL: test_fccmp:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #24576
; CHECK-NEXT: fmov s0, w1
; CHECK-NEXT: movk w8, #15974, lsl #16
; CHECK-NEXT: mov w9, #16384
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: movk w9, #15428, lsl #16
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcmp s0, s0
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: cset w8, pl
; CHECK-NEXT: fccmp s0, s0, #8, pl
; CHECK-NEXT: fccmp s0, s1, #8, pl
; CHECK-NEXT: mov w9, #4
; CHECK-NEXT: csinc w9, w9, wzr, mi
; CHECK-NEXT: add w0, w8, w9
; CHECK-NEXT: ret
%cmp0 = fcmp ogt half 0xH3333, undef
%cmp1 = fcmp ogt half 0xH2222, undef
%f16 = bitcast i16 %in to half
%cmp0 = fcmp ogt half 0xH3333, %f16
%cmp1 = fcmp ogt half 0xH2222, %f16
%x = select i1 %cmp0, i16 0, i16 1
%or = or i1 %cmp1, %cmp0
%y = select i1 %or, i16 4, i16 1
Expand Down
67 changes: 34 additions & 33 deletions llvm/test/CodeGen/ARM/fcmp-xo.ll
Expand Up @@ -2,34 +2,35 @@
; RUN: llc < %s -mtriple=thumbv7m-arm-none-eabi -mattr=+execute-only,+fp-armv8 | FileCheck %s --check-prefixes=CHECK,VMOVSR
; RUN: llc < %s -mtriple=thumbv7m-arm-none-eabi -mattr=+execute-only,+fp-armv8,+neon,+neonfp | FileCheck %s --check-prefixes=CHECK,NEON

define arm_aapcs_vfpcc float @foo0() local_unnamed_addr {
define arm_aapcs_vfpcc float @foo0(float %a0) local_unnamed_addr {
; CHECK-LABEL: foo0:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #5.000000e-01
; CHECK-NEXT: vmov.f32 s2, #-5.000000e-01
; CHECK-NEXT: vcmpe.f32 s0, #0
; CHECK-NEXT: vmov.f32 s2, #5.000000e-01
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vmov.f32 s4, #-5.000000e-01
; CHECK-NEXT: it mi
; CHECK-NEXT: vmovmi.f32 s0, s2
; CHECK-NEXT: vmovmi.f32 s2, s4
; CHECK-NEXT: vmov.f32 s0, s2
; CHECK-NEXT: bx lr
%1 = fcmp nsz olt float undef, 0.000000e+00
%1 = fcmp nsz olt float %a0, 0.000000e+00
%2 = select i1 %1, float -5.000000e-01, float 5.000000e-01
ret float %2
}

define arm_aapcs_vfpcc float @float1() local_unnamed_addr {
define arm_aapcs_vfpcc float @float1(float %a0) local_unnamed_addr {
; CHECK-LABEL: float1:
; CHECK: @ %bb.0: @ %.end
; CHECK-NEXT: vmov.f32 s0, #1.000000e+00
; CHECK-NEXT: vmov.f32 s2, #5.000000e-01
; CHECK-NEXT: vmov.f32 s4, #-5.000000e-01
; CHECK-NEXT: vcmpe.f32 s0, s0
; CHECK-NEXT: vmov.f32 s2, #1.000000e+00
; CHECK-NEXT: vmov.f32 s4, #5.000000e-01
; CHECK-NEXT: vmov.f32 s6, #-5.000000e-01
; CHECK-NEXT: vcmpe.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s4, s2
; CHECK-NEXT: vselgt.f32 s0, s6, s4
; CHECK-NEXT: bx lr
br i1 undef, label %.end, label %1

%2 = fcmp nsz olt float undef, 1.000000e+00
%2 = fcmp nsz olt float %a0, 1.000000e+00
%3 = select i1 %2, float -5.000000e-01, float 5.000000e-01
br label %.end

Expand All @@ -38,62 +39,62 @@ define arm_aapcs_vfpcc float @float1() local_unnamed_addr {
ret float %4
}

define arm_aapcs_vfpcc float @float128() local_unnamed_addr {
define arm_aapcs_vfpcc float @float128(float %a0) local_unnamed_addr {
; VMOVSR-LABEL: float128:
; VMOVSR: @ %bb.0:
; VMOVSR-NEXT: mov.w r0, #1124073472
; VMOVSR-NEXT: vmov.f32 s2, #5.000000e-01
; VMOVSR-NEXT: vmov s0, r0
; VMOVSR-NEXT: vmov.f32 s4, #-5.000000e-01
; VMOVSR-NEXT: vcmpe.f32 s0, s0
; VMOVSR-NEXT: vmov.f32 s4, #5.000000e-01
; VMOVSR-NEXT: vmov s2, r0
; VMOVSR-NEXT: vmov.f32 s6, #-5.000000e-01
; VMOVSR-NEXT: vcmpe.f32 s2, s0
; VMOVSR-NEXT: vmrs APSR_nzcv, fpscr
; VMOVSR-NEXT: vselgt.f32 s0, s4, s2
; VMOVSR-NEXT: vselgt.f32 s0, s6, s4
; VMOVSR-NEXT: bx lr
;
; NEON-LABEL: float128:
; NEON: @ %bb.0:
; NEON-NEXT: vmov.f32 s0, #5.000000e-01
; NEON-NEXT: mov.w r0, #1124073472
; NEON-NEXT: vmov d2, r0, r0
; NEON-NEXT: vmov.f32 s2, #-5.000000e-01
; NEON-NEXT: vcmpe.f32 s4, s0
; NEON-NEXT: vmov.f32 s2, #5.000000e-01
; NEON-NEXT: vmov d3, r0, r0
; NEON-NEXT: vmov.f32 s4, #-5.000000e-01
; NEON-NEXT: vcmpe.f32 s6, s0
; NEON-NEXT: vmrs APSR_nzcv, fpscr
; NEON-NEXT: vselgt.f32 s0, s2, s0
; NEON-NEXT: vselgt.f32 s0, s4, s2
; NEON-NEXT: bx lr
%1 = fcmp nsz olt float undef, 128.000000e+00
%1 = fcmp nsz olt float %a0, 128.000000e+00
%2 = select i1 %1, float -5.000000e-01, float 5.000000e-01
ret float %2
}

define arm_aapcs_vfpcc double @double1() local_unnamed_addr {
define arm_aapcs_vfpcc double @double1(double %a0) local_unnamed_addr {
; CHECK-LABEL: double1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f64 d16, #5.000000e-01
; CHECK-NEXT: vmov.f64 d18, #1.000000e+00
; CHECK-NEXT: vcmpe.f64 d18, d16
; CHECK-NEXT: vcmpe.f64 d18, d0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vmov.f64 d16, #5.000000e-01
; CHECK-NEXT: vmov.f64 d17, #-5.000000e-01
; CHECK-NEXT: vselgt.f64 d0, d17, d16
; CHECK-NEXT: bx lr
%1 = fcmp nsz olt double undef, 1.000000e+00
%1 = fcmp nsz olt double %a0, 1.000000e+00
%2 = select i1 %1, double -5.000000e-01, double 5.000000e-01
ret double %2
}

define arm_aapcs_vfpcc double @double128() local_unnamed_addr {
define arm_aapcs_vfpcc double @double128(double %a0) local_unnamed_addr {
; CHECK-LABEL: double128:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f64 d16, #5.000000e-01
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: movt r0, #16480
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: movt r0, #16480
; CHECK-NEXT: vmov.f64 d16, #5.000000e-01
; CHECK-NEXT: vmov d18, r1, r0
; CHECK-NEXT: vcmpe.f64 d18, d16
; CHECK-NEXT: vcmpe.f64 d18, d0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vmov.f64 d17, #-5.000000e-01
; CHECK-NEXT: vselgt.f64 d0, d17, d16
; CHECK-NEXT: bx lr
%1 = fcmp nsz olt double undef, 128.000000e+00
%1 = fcmp nsz olt double %a0, 128.000000e+00
%2 = select i1 %1, double -5.000000e-01, double 5.000000e-01
ret double %2
}

0 comments on commit 17586cd

Please sign in to comment.