Skip to content

Commit

Permalink
InstCombine: Handle folding fcmp of 0 into llvm.is.fpclass
Browse files Browse the repository at this point in the history
This needs to consider the denormal mode.
  • Loading branch information
arsenm committed Mar 15, 2023
1 parent 4109e3f commit 0d18f31
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 167 deletions.
33 changes: 28 additions & 5 deletions llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
Expand Up @@ -1273,17 +1273,37 @@ static Value *matchIsFiniteTest(InstCombiner::BuilderTy &Builder, FCmpInst *LHS,
return Builder.CreateFCmp(FCmpInst::getOrderedPredicate(PredR), RHS0, RHS1);
}

/// Return true if it's possible to assume IEEE treatment of input denormals in
/// \p F for \p Val.
static bool inputDenormalIsIEEE(const Function &F, const Value *Val) {
Type *Ty = Val->getType()->getScalarType();
return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
}

/// Returns a pair of values, which if passed to llvm.is.fpclass, returns the
/// same result as an fcmp with the given operands.
static std::pair<Value *, unsigned> fcmpToClassTest(FCmpInst::Predicate Pred,
const Function &F,
Value *LHS, Value *RHS) {
const APFloat *ConstRHS;
if (!match(RHS, m_APFloat(ConstRHS)))
return {nullptr, 0};

if (ConstRHS->isZero()) {
// Compares with 0 are only exactly equal to fcZero if input denormals are
// not flushed.
if (FCmpInst::isEquality(Pred) && !inputDenormalIsIEEE(F, LHS))
return {nullptr, 0};

switch (Pred) {
// TODO: Compares eq/ne with 0 depends on the denormal handling mode.
case FCmpInst::FCMP_OEQ: // Match x == 0.0
return {LHS, fcZero};
case FCmpInst::FCMP_UEQ: // Match isnan(x) || (x == 0.0)
return {LHS, fcZero | fcNan};
case FCmpInst::FCMP_UNE: // Match (x != 0.0)
return {LHS, ~fcZero & fcAllFlags};
case FCmpInst::FCMP_ONE: // Match !isnan(x) && x != 0.0
return {LHS, ~fcNan & ~fcZero & fcAllFlags};
case FCmpInst::FCMP_ORD:
// Canonical form of ord/uno is with a zero. We could also handle
// non-canonical other non-NaN constants or LHS == RHS.
Expand Down Expand Up @@ -1501,9 +1521,11 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
// potentially eliminate 4-6 instructions. If we can represent a test with a
// single fcmp with fneg and fabs, that's likely a better canonical form.
if (LHS->hasOneUse() && RHS->hasOneUse()) {
auto [ClassValRHS, ClassMaskRHS] = fcmpToClassTest(PredR, RHS0, RHS1);
auto [ClassValRHS, ClassMaskRHS] =
fcmpToClassTest(PredR, *RHS->getFunction(), RHS0, RHS1);
if (ClassValRHS) {
auto [ClassValLHS, ClassMaskLHS] = fcmpToClassTest(PredL, LHS0, LHS1);
auto [ClassValLHS, ClassMaskLHS] =
fcmpToClassTest(PredL, *LHS->getFunction(), LHS0, LHS1);
if (ClassValLHS == ClassValRHS) {
unsigned CombinedMask = IsAnd ? (ClassMaskLHS & ClassMaskRHS)
: (ClassMaskLHS | ClassMaskRHS);
Expand All @@ -1525,8 +1547,9 @@ static bool matchIsFPClassLikeFCmp(Value *Op, Value *&ClassVal,
if (!FCmp || !FCmp->hasOneUse())
return false;

std::tie(ClassVal, ClassMask) = fcmpToClassTest(
FCmp->getPredicate(), FCmp->getOperand(0), FCmp->getOperand(1));
std::tie(ClassVal, ClassMask) =
fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
FCmp->getOperand(0), FCmp->getOperand(1));
return ClassVal != nullptr;
}

Expand Down
48 changes: 16 additions & 32 deletions llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll
Expand Up @@ -211,10 +211,8 @@ define <2 x i1> @class_finite_or_fcmp_issubnormal_vector(<2 x half> %x) {

define i1 @fcmp_oeq_zero_or_class_normal(half %x) {
; CHECK-LABEL: @fcmp_oeq_zero_or_class_normal(
; CHECK-NEXT: [[OEQ_INF:%.*]] = fcmp oeq half [[X:%.*]], 0xH0000
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
; CHECK-NEXT: [[OR:%.*]] = or i1 [[OEQ_INF]], [[CLASS]]
; CHECK-NEXT: ret i1 [[OR]]
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 360)
; CHECK-NEXT: ret i1 [[CLASS]]
;
%oeq.inf = fcmp oeq half %x, 0.0
%class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
Expand Down Expand Up @@ -276,10 +274,8 @@ define <2 x i1> @fcmp_oeq_zero_or_class_normal_dynamic_v2f16(<2 x half> %x) #2 {

define i1 @class_normal_or_fcmp_oeq_zero(half %x) {
; CHECK-LABEL: @class_normal_or_fcmp_oeq_zero(
; CHECK-NEXT: [[OEQ_INF:%.*]] = fcmp oeq half [[X:%.*]], 0xH0000
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
; CHECK-NEXT: [[OR:%.*]] = or i1 [[OEQ_INF]], [[CLASS]]
; CHECK-NEXT: ret i1 [[OR]]
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 360)
; CHECK-NEXT: ret i1 [[CLASS]]
;
%oeq.inf = fcmp oeq half %x, 0.0
%class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
Expand All @@ -289,10 +285,8 @@ define i1 @class_normal_or_fcmp_oeq_zero(half %x) {

define i1 @fcmp_ueq_zero_or_class_normal(half %x) {
; CHECK-LABEL: @fcmp_ueq_zero_or_class_normal(
; CHECK-NEXT: [[UEQ_INF:%.*]] = fcmp ueq half [[X:%.*]], 0xH0000
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
; CHECK-NEXT: [[OR:%.*]] = or i1 [[UEQ_INF]], [[CLASS]]
; CHECK-NEXT: ret i1 [[OR]]
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 363)
; CHECK-NEXT: ret i1 [[CLASS]]
;
%ueq.inf = fcmp ueq half %x, 0.0
%class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
Expand All @@ -302,10 +296,8 @@ define i1 @fcmp_ueq_zero_or_class_normal(half %x) {

define i1 @class_normal_or_fcmp_ueq_zero(half %x) {
; CHECK-LABEL: @class_normal_or_fcmp_ueq_zero(
; CHECK-NEXT: [[UEQ_INF:%.*]] = fcmp ueq half [[X:%.*]], 0xH0000
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
; CHECK-NEXT: [[OR:%.*]] = or i1 [[UEQ_INF]], [[CLASS]]
; CHECK-NEXT: ret i1 [[OR]]
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 363)
; CHECK-NEXT: ret i1 [[CLASS]]
;
%ueq.inf = fcmp ueq half %x, 0.0
%class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
Expand All @@ -315,10 +307,8 @@ define i1 @class_normal_or_fcmp_ueq_zero(half %x) {

define i1 @fcmp_one_zero_or_class_normal(half %x) {
; CHECK-LABEL: @fcmp_one_zero_or_class_normal(
; CHECK-NEXT: [[ONE_INF:%.*]] = fcmp one half [[X:%.*]], 0xH0000
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
; CHECK-NEXT: [[OR:%.*]] = or i1 [[ONE_INF]], [[CLASS]]
; CHECK-NEXT: ret i1 [[OR]]
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 924)
; CHECK-NEXT: ret i1 [[CLASS]]
;
%one.inf = fcmp one half %x, 0.0
%class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
Expand Down Expand Up @@ -354,10 +344,8 @@ define i1 @fcmp_one_zero_or_class_normal_dynamic(half %x) #2 {

define i1 @class_normal_or_fcmp_one_zero(half %x) {
; CHECK-LABEL: @class_normal_or_fcmp_one_zero(
; CHECK-NEXT: [[ONE_INF:%.*]] = fcmp one half [[X:%.*]], 0xH0000
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
; CHECK-NEXT: [[OR:%.*]] = or i1 [[ONE_INF]], [[CLASS]]
; CHECK-NEXT: ret i1 [[OR]]
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 924)
; CHECK-NEXT: ret i1 [[CLASS]]
;
%one.inf = fcmp one half %x, 0.0
%class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
Expand All @@ -367,10 +355,8 @@ define i1 @class_normal_or_fcmp_one_zero(half %x) {

define i1 @fcmp_une_zero_or_class_normal(half %x) {
; CHECK-LABEL: @fcmp_une_zero_or_class_normal(
; CHECK-NEXT: [[UNE_INF:%.*]] = fcmp une half [[X:%.*]], 0xH0000
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
; CHECK-NEXT: [[OR:%.*]] = or i1 [[UNE_INF]], [[CLASS]]
; CHECK-NEXT: ret i1 [[OR]]
; CHECK-NEXT: [[CLASS:%.*]] = fcmp une half [[X:%.*]], 0xH0000
; CHECK-NEXT: ret i1 [[CLASS]]
;
%une.inf = fcmp une half %x, 0.0
%class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
Expand All @@ -380,10 +366,8 @@ define i1 @fcmp_une_zero_or_class_normal(half %x) {

define i1 @class_normal_or_fcmp_une_zero(half %x) {
; CHECK-LABEL: @class_normal_or_fcmp_une_zero(
; CHECK-NEXT: [[UNE_INF:%.*]] = fcmp une half [[X:%.*]], 0xH0000
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X]], i32 264)
; CHECK-NEXT: [[OR:%.*]] = or i1 [[UNE_INF]], [[CLASS]]
; CHECK-NEXT: ret i1 [[OR]]
; CHECK-NEXT: [[CLASS:%.*]] = fcmp une half [[X:%.*]], 0xH0000
; CHECK-NEXT: ret i1 [[CLASS]]
;
%une.inf = fcmp une half %x, 0.0
%class = call i1 @llvm.is.fpclass.f16(half %x, i32 264)
Expand Down

0 comments on commit 0d18f31

Please sign in to comment.