-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[InstCombine] fold float clamp pattern into llvm.max/min #159652
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
%v0 = fcmp nsz ogt float %arg0, 0.000000e+00 %v1 = select nsz i1 %v0, float %arg0, float 0.000000e+00 ==== %v0 = call float @llvm.maxnum.f32(float %arg0, float 0.000000e+00[) ==== fcmp + select patterns can be folded into fmax/fmin. This patch handles this transformation for OGT and OLT. Fixes llvm#157486
@llvm/pr-subscribers-clang @llvm/pr-subscribers-llvm-transforms Author: Vedant Paranjape (VedantParanjape) Changes%v0 = fcmp nsz ogt float %arg0, 0.000000e+00 fcmp + select patterns can be folded into fmax/fmin. This patch handles this transformation for OGT and OLT. Fixes #157486 Patch is 41.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159652.diff 14 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 8f9d0bf6240d5..028a432cdba44 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3963,6 +3963,60 @@ static Value *foldSelectIntoAddConstant(SelectInst &SI,
return nullptr;
}
+// fcmp + sel patterns into max/min intrinsic.
+static Value *foldSelectICmpIntoMaxMin(SelectInst &SI,
+ InstCombiner::BuilderTy &Builder) {
+
+ auto TryFoldIntoMaxMinIntrinsic =
+ [&Builder, &SI](CmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS,
+ Value *TVal, Value *FVal) -> Value * {
+ // Early exit if the operands are not in the expected form.
+ if ((CmpRHS != TVal || CmpLHS != FVal) &&
+ (CmpLHS != TVal || CmpRHS != FVal))
+ return nullptr;
+
+ bool isSwapped = (CmpLHS == FVal && CmpRHS == TVal);
+ // Only these relational predicates can be transformed into maxnum/minnum
+ // intrinsic.
+ // X > C ? X : C --> maxnum(X, C)
+ // X > C ? C : X --> minnum(X, C)
+ if (Pred == CmpInst::FCMP_OGT) {
+ Intrinsic::ID MaxMinIID =
+ isSwapped ? Intrinsic::minnum : Intrinsic::maxnum;
+ return Builder.CreateIntrinsic(SI.getType(), MaxMinIID, {TVal, FVal},
+ &SI);
+ }
+
+ // X < C ? X : C --> minnum(X, C)
+ // X < C ? C : X --> maxnum(X, C)
+ if (Pred == CmpInst::FCMP_OLT) {
+ Intrinsic::ID MaxMinIID =
+ isSwapped ? Intrinsic::maxnum : Intrinsic::minnum;
+ return Builder.CreateIntrinsic(SI.getType(), MaxMinIID, {TVal, FVal},
+ &SI);
+ }
+
+ return nullptr;
+ };
+
+ // select((fcmp Pred, X, Y), X, Y)
+ // => minnum/maxnum(X, Y)
+ //
+ // Pred := OGT and OLT
+ Value *X, *Y;
+ Value *TVal, *FVal;
+ CmpPredicate Pred;
+
+ // Note: OneUse check for `Cmp` is necessary because it makes sure that other
+ // InstCombine folds don't undo this transformation and cause an infinite
+ // loop. Furthermore, it could also increase the operation count.
+ if (match(&SI, m_OneUse(m_Select(m_OneUse(m_FCmp(Pred, m_Value(X), m_Value(Y))),
+ m_Value(TVal), m_Value(FVal)))))
+ return TryFoldIntoMaxMinIntrinsic(Pred, X, Y, TVal, FVal);
+
+ return nullptr;
+}
+
static Value *foldSelectBitTest(SelectInst &Sel, Value *CondVal, Value *TrueVal,
Value *FalseVal,
InstCombiner::BuilderTy &Builder,
@@ -4455,6 +4509,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Value *V = foldSelectIntoAddConstant(SI, Builder))
return replaceInstUsesWith(SI, V);
+ if (Value *V = foldSelectICmpIntoMaxMin(SI, Builder))
+ return replaceInstUsesWith(SI, V);
+
// select(mask, mload(,,mask,0), 0) -> mload(,,mask,0)
// Load inst is intentionally not checked for hasOneUse()
if (match(FalseVal, m_Zero()) &&
diff --git a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
index 7f3276608c5da..9d33a7835af8a 100644
--- a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
+++ b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
@@ -5,10 +5,9 @@
; (X < C1) ? C1 : MIN(X, C2)
define float @clamp_float_fast_ordered_strict_maxmin(float %x) {
; CHECK-LABEL: @clamp_float_fast_ordered_strict_maxmin(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
-; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00
-; CHECK-NEXT: [[R1:%.*]] = select nnan ninf i1 [[DOTINV]], float [[MIN]], float 1.000000e+00
+; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R1:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
; CHECK-NEXT: ret float [[R1]]
;
%cmp2 = fcmp fast olt float %x, 255.0
@@ -21,10 +20,9 @@ define float @clamp_float_fast_ordered_strict_maxmin(float %x) {
; (X <= C1) ? C1 : MIN(X, C2)
define float @clamp_float_fast_ordered_nonstrict_maxmin(float %x) {
; CHECK-LABEL: @clamp_float_fast_ordered_nonstrict_maxmin(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
-; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00
-; CHECK-NEXT: [[R1:%.*]] = select nnan ninf i1 [[DOTINV]], float [[MIN]], float 1.000000e+00
+; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ole float [[X]], 1.000000e+00
+; CHECK-NEXT: [[R1:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
; CHECK-NEXT: ret float [[R1]]
;
%cmp2 = fcmp fast olt float %x, 255.0
@@ -37,10 +35,9 @@ define float @clamp_float_fast_ordered_nonstrict_maxmin(float %x) {
; (X > C1) ? C1 : MAX(X, C2)
define float @clamp_float_fast_ordered_strict_minmax(float %x) {
; CHECK-LABEL: @clamp_float_fast_ordered_strict_minmax(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast ogt float [[X:%.*]], 1.000000e+00
-; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
-; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02
-; CHECK-NEXT: [[R1:%.*]] = select nnan ninf i1 [[DOTINV]], float [[MAX]], float 2.550000e+02
+; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ogt float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R1:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
; CHECK-NEXT: ret float [[R1]]
;
%cmp2 = fcmp fast ogt float %x, 1.0
@@ -53,10 +50,9 @@ define float @clamp_float_fast_ordered_strict_minmax(float %x) {
; (X >= C1) ? C1 : MAX(X, C2)
define float @clamp_float_fast_ordered_nonstrict_minmax(float %x) {
; CHECK-LABEL: @clamp_float_fast_ordered_nonstrict_minmax(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast ogt float [[X:%.*]], 1.000000e+00
-; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
-; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02
-; CHECK-NEXT: [[R1:%.*]] = select nnan ninf i1 [[DOTINV]], float [[MAX]], float 2.550000e+02
+; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast oge float [[X]], 2.550000e+02
+; CHECK-NEXT: [[R1:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
; CHECK-NEXT: ret float [[R1]]
;
%cmp2 = fcmp fast ogt float %x, 1.0
@@ -191,8 +187,7 @@ define float @clamp_negative_same_op(float %x) {
; First, check that we don't do bad things in the presence of signed zeros
define float @clamp_float_with_zero1(float %x) {
; CHECK-LABEL: @clamp_float_with_zero1(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
; CHECK-NEXT: [[CMP1:%.*]] = fcmp ole float [[X]], 0.000000e+00
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 0.000000e+00, float [[MIN]]
; CHECK-NEXT: ret float [[R]]
@@ -206,8 +201,7 @@ define float @clamp_float_with_zero1(float %x) {
define float @clamp_float_with_zero2(float %x) {
; CHECK-LABEL: @clamp_float_with_zero2(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[X]], 0.000000e+00
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 0.000000e+00, float [[MIN]]
; CHECK-NEXT: ret float [[R]]
@@ -228,8 +222,7 @@ define float @clamp_float_with_zero2(float %x) {
; (X < C1) ? C1 : MIN(X, C2)
define float @clamp_float_ordered_strict_maxmin1(float %x) {
; CHECK-LABEL: @clamp_float_ordered_strict_maxmin1(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[X]], 1.000000e+00
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
; CHECK-NEXT: ret float [[R]]
@@ -259,8 +252,7 @@ define float @clamp_float_ordered_strict_maxmin2(float %x) {
; (X <= C1) ? C1 : MIN(X, C2)
define float @clamp_float_ordered_nonstrict_maxmin1(float %x) {
; CHECK-LABEL: @clamp_float_ordered_nonstrict_maxmin1(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
; CHECK-NEXT: [[CMP1:%.*]] = fcmp ole float [[X]], 1.000000e+00
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
; CHECK-NEXT: ret float [[R]]
@@ -290,8 +282,7 @@ define float @clamp_float_ordered_nonstrict_maxmin2(float %x) {
; (X > C1) ? C1 : MAX(X, C2)
define float @clamp_float_ordered_strict_minmax1(float %x) {
; CHECK-LABEL: @clamp_float_ordered_strict_minmax1(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
-; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[X]], 2.550000e+02
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
; CHECK-NEXT: ret float [[R]]
@@ -321,8 +312,7 @@ define float @clamp_float_ordered_strict_minmax2(float %x) {
; (X >= C1) ? C1 : MAX(X, C2)
define float @clamp_float_ordered_nonstrict_minmax1(float %x) {
; CHECK-LABEL: @clamp_float_ordered_nonstrict_minmax1(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
-; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
; CHECK-NEXT: [[CMP1:%.*]] = fcmp oge float [[X]], 2.550000e+02
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
; CHECK-NEXT: ret float [[R]]
@@ -355,8 +345,7 @@ define float @clamp_float_ordered_nonstrict_minmax2(float %x) {
; (X < C1) ? C1 : MIN(X, C2)
define float @clamp_float_unordered_strict_maxmin1(float %x) {
; CHECK-LABEL: @clamp_float_unordered_strict_maxmin1(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
; CHECK-NEXT: [[CMP1:%.*]] = fcmp ult float [[X]], 1.000000e+00
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
; CHECK-NEXT: ret float [[R]]
@@ -386,8 +375,7 @@ define float @clamp_float_unordered_strict_maxmin2(float %x) {
; (X <= C1) ? C1 : MIN(X, C2)
define float @clamp_float_unordered_nonstrict_maxmin1(float %x) {
; CHECK-LABEL: @clamp_float_unordered_nonstrict_maxmin1(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
; CHECK-NEXT: [[CMP1:%.*]] = fcmp ule float [[X]], 1.000000e+00
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
; CHECK-NEXT: ret float [[R]]
@@ -417,8 +405,7 @@ define float @clamp_float_unordered_nonstrict_maxmin2(float %x) {
; (X > C1) ? C1 : MAX(X, C2)
define float @clamp_float_unordered_strict_minmax1(float %x) {
; CHECK-LABEL: @clamp_float_unordered_strict_minmax1(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
-; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt float [[X]], 2.550000e+02
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
; CHECK-NEXT: ret float [[R]]
@@ -448,8 +435,7 @@ define float @clamp_float_unordered_strict_minmax2(float %x) {
; (X >= C1) ? C1 : MAX(X, C2)
define float @clamp_float_unordered_nonstrict_minmax1(float %x) {
; CHECK-LABEL: @clamp_float_unordered_nonstrict_minmax1(
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
-; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
; CHECK-NEXT: [[CMP1:%.*]] = fcmp uge float [[X]], 2.550000e+02
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
; CHECK-NEXT: ret float [[R]]
@@ -527,8 +513,7 @@ define float @mixed_clamp_to_float_1(i32 %x) {
define i32 @mixed_clamp_to_i32_1(float %x) {
; CHECK-LABEL: @mixed_clamp_to_i32_1(
-; CHECK-NEXT: [[FLOAT_MIN_CMP:%.*]] = fcmp ogt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT: [[FLOAT_MIN:%.*]] = select i1 [[FLOAT_MIN_CMP]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT: [[FLOAT_MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
; CHECK-NEXT: [[I32_MIN:%.*]] = fptosi float [[FLOAT_MIN]] to i32
; CHECK-NEXT: [[I32_X:%.*]] = fptosi float [[X]] to i32
; CHECK-NEXT: [[LO_CMP:%.*]] = icmp eq i32 [[I32_X]], 0
@@ -561,8 +546,7 @@ define float @mixed_clamp_to_float_2(i32 %x) {
define i32 @mixed_clamp_to_i32_2(float %x) {
; CHECK-LABEL: @mixed_clamp_to_i32_2(
-; CHECK-NEXT: [[FLOAT_MIN_CMP:%.*]] = fcmp ogt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT: [[FLOAT_MIN:%.*]] = select i1 [[FLOAT_MIN_CMP]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT: [[FLOAT_MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
; CHECK-NEXT: [[I32_MIN:%.*]] = fptosi float [[FLOAT_MIN]] to i32
; CHECK-NEXT: [[LO_CMP:%.*]] = fcmp olt float [[X]], 1.000000e+00
; CHECK-NEXT: [[R:%.*]] = select i1 [[LO_CMP]], i32 1, i32 [[I32_MIN]]
diff --git a/llvm/test/Transforms/InstCombine/fcmp-select.ll b/llvm/test/Transforms/InstCombine/fcmp-select.ll
index b622c8636eccb..63e72fed55350 100644
--- a/llvm/test/Transforms/InstCombine/fcmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp-select.ll
@@ -202,10 +202,8 @@ define <2 x i1> @test_fcmp_select_const_const_vec(<2 x double> %x) {
define double @test_fcmp_select_clamp(double %x) {
; CHECK-LABEL: @test_fcmp_select_clamp(
-; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[X:%.*]], 9.000000e-01
-; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], double 9.000000e-01, double [[X]]
-; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt double [[SEL1]], 5.000000e-01
-; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], double 5.000000e-01, double [[SEL1]]
+; CHECK-NEXT: [[SEL1:%.*]] = call double @llvm.minnum.f64(double [[X:%.*]], double 9.000000e-01)
+; CHECK-NEXT: [[SEL2:%.*]] = call double @llvm.maxnum.f64(double [[SEL1]], double 5.000000e-01)
; CHECK-NEXT: ret double [[SEL2]]
;
%cmp1 = fcmp ogt double %x, 9.000000e-01
@@ -284,8 +282,7 @@ define i1 @test_fcmp_ord_select_fcmp_oeq_var_const(double %x) {
define float @test_select_nnan_nsz_fcmp_olt(float %x) {
; CHECK-LABEL: @test_select_nnan_nsz_fcmp_olt(
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt float [[X:%.*]], -0.000000e+00
-; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[TMP1]], float [[X]], float -0.000000e+00
+; CHECK-NEXT: [[SEL1:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float -0.000000e+00)
; CHECK-NEXT: ret float [[SEL1]]
;
%cmp = fcmp olt float %x, 0.000000e+00
diff --git a/llvm/test/Transforms/InstCombine/fptrunc.ll b/llvm/test/Transforms/InstCombine/fptrunc.ll
index 0b5d8b3cd06e0..faf9ec2aab5f0 100644
--- a/llvm/test/Transforms/InstCombine/fptrunc.ll
+++ b/llvm/test/Transforms/InstCombine/fptrunc.ll
@@ -116,8 +116,9 @@ define half @fptrunc_select_true_val_extra_use(half %x, float %y, i1 %cond) {
define half @fptrunc_max(half %arg) {
; CHECK-LABEL: @fptrunc_max(
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt half [[ARG:%.*]], 0xH0000
-; CHECK-NEXT: [[NARROW_SEL:%.*]] = select i1 [[CMP]], half 0xH0000, half [[ARG]]
+; CHECK-NEXT: [[EXT:%.*]] = fpext half [[ARG:%.*]] to double
+; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.maxnum.f64(double [[EXT]], double 0.000000e+00)
+; CHECK-NEXT: [[NARROW_SEL:%.*]] = fptrunc double [[MAX]] to half
; CHECK-NEXT: ret half [[NARROW_SEL]]
;
%ext = fpext half %arg to double
diff --git a/llvm/test/Transforms/InstCombine/load-bitcast-select.ll b/llvm/test/Transforms/InstCombine/load-bitcast-select.ll
index 3c43618b13569..8fc30442c2766 100644
--- a/llvm/test/Transforms/InstCombine/load-bitcast-select.ll
+++ b/llvm/test/Transforms/InstCombine/load-bitcast-select.ll
@@ -20,8 +20,7 @@ define void @_Z3foov() {
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr @b, i64 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
-; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt float [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[DOTV:%.*]] = select i1 [[CMP_I]], float [[TMP2]], float [[TMP1]]
+; CHECK-NEXT: [[DOTV:%.*]] = call float @llvm.maxnum.f32(float [[TMP2]], float [[TMP1]])
; CHECK-NEXT: store float [[DOTV]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1
; CHECK-NEXT: br label [[FOR_COND]]
@@ -79,8 +78,7 @@ define void @bitcasted_minmax_with_select_of_pointers(ptr %loadaddr1, ptr %loada
; CHECK-LABEL: @bitcasted_minmax_with_select_of_pointers(
; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[LOADADDR1:%.*]], align 4
; CHECK-NEXT: [[LD2:%.*]] = load float, ptr [[LOADADDR2:%.*]], align 4
-; CHECK-NEXT: [[COND:%.*]] = fcmp ogt float [[LD1]], [[LD2]]
-; CHECK-NEXT: [[LD_V:%.*]] = select i1 [[COND]], float [[LD1]], float [[LD2]]
+; CHECK-NEXT: [[LD_V:%.*]] = call float @llvm.maxnum.f32(float [[LD1]], float [[LD2]])
; CHECK-NEXT: store float [[LD_V]], ptr [[STOREADDR:%.*]], align 4
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
index 761129979445c..102e67a126d3c 100644
--- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -236,8 +236,7 @@ define double @preserve_load_metadata_after_select_transform2(ptr %a, ptr %b) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]]
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]]
-; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
-; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
+; CHECK-NEXT: [[L_SEL:%.*]] = call double @llvm.maxnum.f64(double [[L_B]], double [[L_A]])
; CHECK-NEXT: ret double [[L_SEL]]
;
entry:
@@ -255,8 +254,7 @@ define double @preserve_load_metadata_after_select_transform_metadata_missing_1(
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A]], align 8, !llvm.access.group [[META6]]
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]]
-; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
-; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
+; CHECK-NEXT: [[L_SEL:%.*]] = call double @llvm.maxnum.f64(dou...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
02ac735
to
affb811
Compare
affb811
to
76a3098
Compare
@dtcxzyw I want to do performance testing for this change as it seems to have changed many testcases, and the testcase in the issue was mined from ffmpeg. Does your llvm-opt-benchmark script do performance bechmarking? |
No. It doesn't provide actual performance numbers. |
@zyw-bot mfuzz |
This is a canonicalize introducing transformation. If %arg0 is a denormal, and the transformed result will be flushed but it wouldn't in the original |
That's a fair point, I was not aware of this while working. Will having a flush-all attribute on this instruction guarantee FTZ even in the backend? |
Can you refine llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp Lines 4171 to 4196 in 24b03d3
ieee .
|
ack |
to
fcmp + select patterns can be folded into fmax/fmin. This patch handles this transformation for OGT and OLT.
Fixes #157486