Skip to content

Commit e5c3a53

Browse files
wzssyqanikicarsenm
authored
expandFMINIMUMNUM_FMAXIMUMNUM: Improve compare between zeros (#140193)
1. On GPR32 platform, expandIS_FPCLASS may fail due to ISD::BITCAST double to int64 may fail. Let's FP_ROUND double to float first. Since we use it if MinMax is zero only, so the flushing won't break anything. 2. Only one IS_FPCLASS is needed. MinMax will always be RHS if equal. So we can select between LHS and MinMax. It will even safe if FP_ROUND flush a small LHS, as if LHS is not zero then, MinMax won't be Zero, so we will always use MinMax. --------- Co-authored-by: Nikita Popov <github@npopov.com> Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
1 parent a29b29c commit e5c3a53

File tree

8 files changed

+19547
-22889
lines changed

8 files changed

+19547
-22889
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8855,6 +8855,7 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
88558855
RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
88568856
}
88578857

8858+
// Always prefer RHS if equal.
88588859
SDValue MinMax =
88598860
DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
88608861

@@ -8869,13 +8870,19 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
88698870
DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
88708871
SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
88718872
DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8872-
SDValue LCmp = DAG.getSelect(
8873-
DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8873+
EVT IntVT = VT.changeTypeToInteger();
8874+
EVT FloatVT = VT.changeElementType(MVT::f32);
8875+
SDValue LHSTrunc = LHS;
8876+
if (!isTypeLegal(IntVT) && !isOperationLegalOrCustom(ISD::IS_FPCLASS, VT)) {
8877+
LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS,
8878+
DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8879+
}
8880+
// It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
8881+
// we preferred RHS when generate MinMax, if the operands are equal.
8882+
SDValue RetZero = DAG.getSelect(
8883+
DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS,
88748884
MinMax, Flags);
8875-
SDValue RCmp = DAG.getSelect(
8876-
DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8877-
Flags);
8878-
return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8885+
return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
88798886
}
88808887

88818888
/// Returns a true value if if this FPClassTest can be performed with an ordered

llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,6 +1076,68 @@ entry:
10761076
ret <16 x half> %c
10771077
}
10781078

1079+
;;;;;;;;;;;;;;;; max_f128
1080+
define fp128 @max_fp128(fp128 %x, fp128 %y) {
1081+
; AARCH64-LABEL: max_fp128:
1082+
; AARCH64: // %bb.0: // %start
1083+
; AARCH64-NEXT: sub sp, sp, #48
1084+
; AARCH64-NEXT: str x30, [sp, #32] // 8-byte Spill
1085+
; AARCH64-NEXT: .cfi_def_cfa_offset 48
1086+
; AARCH64-NEXT: .cfi_offset w30, -16
1087+
; AARCH64-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
1088+
; AARCH64-NEXT: mov v1.16b, v0.16b
1089+
; AARCH64-NEXT: bl __unordtf2
1090+
; AARCH64-NEXT: ldr q0, [sp, #16] // 16-byte Reload
1091+
; AARCH64-NEXT: cmp w0, #0
1092+
; AARCH64-NEXT: b.eq .LBB32_2
1093+
; AARCH64-NEXT: // %bb.1: // %start
1094+
; AARCH64-NEXT: str q0, [sp] // 16-byte Spill
1095+
; AARCH64-NEXT: .LBB32_2: // %start
1096+
; AARCH64-NEXT: mov v1.16b, v0.16b
1097+
; AARCH64-NEXT: bl __unordtf2
1098+
; AARCH64-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
1099+
; AARCH64-NEXT: cmp w0, #0
1100+
; AARCH64-NEXT: b.eq .LBB32_4
1101+
; AARCH64-NEXT: // %bb.3: // %start
1102+
; AARCH64-NEXT: mov v1.16b, v0.16b
1103+
; AARCH64-NEXT: .LBB32_4: // %start
1104+
; AARCH64-NEXT: ldr q0, [sp] // 16-byte Reload
1105+
; AARCH64-NEXT: str q1, [sp, #16] // 16-byte Spill
1106+
; AARCH64-NEXT: bl __gttf2
1107+
; AARCH64-NEXT: ldr q0, [sp] // 16-byte Reload
1108+
; AARCH64-NEXT: cmp w0, #0
1109+
; AARCH64-NEXT: b.le .LBB32_6
1110+
; AARCH64-NEXT: // %bb.5: // %start
1111+
; AARCH64-NEXT: str q0, [sp, #16] // 16-byte Spill
1112+
; AARCH64-NEXT: .LBB32_6: // %start
1113+
; AARCH64-NEXT: str q0, [sp] // 16-byte Spill
1114+
; AARCH64-NEXT: bl __trunctfsf2
1115+
; AARCH64-NEXT: fmov w8, s0
1116+
; AARCH64-NEXT: ldr q0, [sp, #16] // 16-byte Reload
1117+
; AARCH64-NEXT: mov v1.16b, v0.16b
1118+
; AARCH64-NEXT: cmp w8, #0
1119+
; AARCH64-NEXT: b.ne .LBB32_8
1120+
; AARCH64-NEXT: // %bb.7: // %start
1121+
; AARCH64-NEXT: ldr q1, [sp] // 16-byte Reload
1122+
; AARCH64-NEXT: .LBB32_8: // %start
1123+
; AARCH64-NEXT: adrp x8, .LCPI32_0
1124+
; AARCH64-NEXT: str q1, [sp] // 16-byte Spill
1125+
; AARCH64-NEXT: ldr q1, [x8, :lo12:.LCPI32_0]
1126+
; AARCH64-NEXT: bl __eqtf2
1127+
; AARCH64-NEXT: ldr q0, [sp, #16] // 16-byte Reload
1128+
; AARCH64-NEXT: cmp w0, #0
1129+
; AARCH64-NEXT: b.ne .LBB32_10
1130+
; AARCH64-NEXT: // %bb.9: // %start
1131+
; AARCH64-NEXT: ldr q0, [sp] // 16-byte Reload
1132+
; AARCH64-NEXT: .LBB32_10: // %start
1133+
; AARCH64-NEXT: ldr x30, [sp, #32] // 8-byte Reload
1134+
; AARCH64-NEXT: add sp, sp, #48
1135+
; AARCH64-NEXT: ret
1136+
start:
1137+
%0 = tail call fp128 @llvm.maximumnum.f128(fp128 %x, fp128 %y)
1138+
ret fp128 %0
1139+
}
1140+
10791141
;;;;;;;;;;;;;;;; max_f64
10801142
define double @max_f64(double %a, double %b) {
10811143
; AARCH64-LABEL: max_f64:
@@ -1658,6 +1720,68 @@ entry:
16581720
ret <16 x half> %c
16591721
}
16601722

1723+
;;;;;;;;;;;;;;;; min_f128
1724+
define fp128 @min_fp128(fp128 %x, fp128 %y) {
1725+
; AARCH64-LABEL: min_fp128:
1726+
; AARCH64: // %bb.0: // %start
1727+
; AARCH64-NEXT: sub sp, sp, #48
1728+
; AARCH64-NEXT: str x30, [sp, #32] // 8-byte Spill
1729+
; AARCH64-NEXT: .cfi_def_cfa_offset 48
1730+
; AARCH64-NEXT: .cfi_offset w30, -16
1731+
; AARCH64-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
1732+
; AARCH64-NEXT: mov v1.16b, v0.16b
1733+
; AARCH64-NEXT: bl __unordtf2
1734+
; AARCH64-NEXT: ldr q0, [sp, #16] // 16-byte Reload
1735+
; AARCH64-NEXT: cmp w0, #0
1736+
; AARCH64-NEXT: b.eq .LBB49_2
1737+
; AARCH64-NEXT: // %bb.1: // %start
1738+
; AARCH64-NEXT: str q0, [sp] // 16-byte Spill
1739+
; AARCH64-NEXT: .LBB49_2: // %start
1740+
; AARCH64-NEXT: mov v1.16b, v0.16b
1741+
; AARCH64-NEXT: bl __unordtf2
1742+
; AARCH64-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
1743+
; AARCH64-NEXT: cmp w0, #0
1744+
; AARCH64-NEXT: b.eq .LBB49_4
1745+
; AARCH64-NEXT: // %bb.3: // %start
1746+
; AARCH64-NEXT: mov v1.16b, v0.16b
1747+
; AARCH64-NEXT: .LBB49_4: // %start
1748+
; AARCH64-NEXT: ldr q0, [sp] // 16-byte Reload
1749+
; AARCH64-NEXT: str q1, [sp, #16] // 16-byte Spill
1750+
; AARCH64-NEXT: bl __gttf2
1751+
; AARCH64-NEXT: ldr q0, [sp] // 16-byte Reload
1752+
; AARCH64-NEXT: cmp w0, #0
1753+
; AARCH64-NEXT: b.le .LBB49_6
1754+
; AARCH64-NEXT: // %bb.5: // %start
1755+
; AARCH64-NEXT: str q0, [sp, #16] // 16-byte Spill
1756+
; AARCH64-NEXT: .LBB49_6: // %start
1757+
; AARCH64-NEXT: str q0, [sp] // 16-byte Spill
1758+
; AARCH64-NEXT: bl __trunctfsf2
1759+
; AARCH64-NEXT: fmov w8, s0
1760+
; AARCH64-NEXT: ldr q0, [sp, #16] // 16-byte Reload
1761+
; AARCH64-NEXT: mov v1.16b, v0.16b
1762+
; AARCH64-NEXT: cmp w8, #0
1763+
; AARCH64-NEXT: b.ne .LBB49_8
1764+
; AARCH64-NEXT: // %bb.7: // %start
1765+
; AARCH64-NEXT: ldr q1, [sp] // 16-byte Reload
1766+
; AARCH64-NEXT: .LBB49_8: // %start
1767+
; AARCH64-NEXT: adrp x8, .LCPI49_0
1768+
; AARCH64-NEXT: str q1, [sp] // 16-byte Spill
1769+
; AARCH64-NEXT: ldr q1, [x8, :lo12:.LCPI49_0]
1770+
; AARCH64-NEXT: bl __eqtf2
1771+
; AARCH64-NEXT: ldr q0, [sp, #16] // 16-byte Reload
1772+
; AARCH64-NEXT: cmp w0, #0
1773+
; AARCH64-NEXT: b.ne .LBB49_10
1774+
; AARCH64-NEXT: // %bb.9: // %start
1775+
; AARCH64-NEXT: ldr q0, [sp] // 16-byte Reload
1776+
; AARCH64-NEXT: .LBB49_10: // %start
1777+
; AARCH64-NEXT: ldr x30, [sp, #32] // 8-byte Reload
1778+
; AARCH64-NEXT: add sp, sp, #48
1779+
; AARCH64-NEXT: ret
1780+
start:
1781+
%0 = tail call fp128 @llvm.maximumnum.f128(fp128 %x, fp128 %y)
1782+
ret fp128 %0
1783+
}
1784+
16611785
;;;;;;;;;;;;;;;; min_f64
16621786
define double @min_f64(double %a, double %b) {
16631787
; AARCH64-LABEL: min_f64:

0 commit comments

Comments
 (0)