Skip to content

Conversation

VedantParanjape
Copy link
Contributor

@VedantParanjape VedantParanjape commented Sep 18, 2025

%v0 = fcmp nsz ogt float %arg0, 0.000000e+00
%v1 = select nsz i1 %v0, float %arg0, float 0.000000e+00 

to

%v0 = call float @llvm.maxnum.f32(float %arg0, float 0.000000e+00

fcmp + select patterns can be folded into fmax/fmin. This patch handles this transformation for OGT and OLT.

Fixes #157486

%v0 = fcmp nsz ogt float %arg0, 0.000000e+00
%v1 = select nsz i1 %v0, float %arg0, float 0.000000e+00
====
%v0 = call float @llvm.maxnum.f32(float %arg0, float 0.000000e+00[)
====

fcmp + select patterns can be folded into fmax/fmin. This patch handles
this transformation for OGT and OLT.

Fixes llvm#157486
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Sep 18, 2025
@llvmbot
Copy link
Member

llvmbot commented Sep 18, 2025

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-llvm-transforms

Author: Vedant Paranjape (VedantParanjape)

Changes

%v0 = fcmp nsz ogt float %arg0, 0.000000e+00
%v1 = select nsz i1 %v0, float %arg0, float 0.000000e+00 ====
%v0 = call float @llvm.maxnum.f32(float %arg0, float 0.000000e+00[) ====

fcmp + select patterns can be folded into fmax/fmin. This patch handles this transformation for OGT and OLT.

Fixes #157486


Patch is 41.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159652.diff

14 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (+57)
  • (modified) llvm/test/Transforms/InstCombine/clamp-to-minmax.ll (+24-40)
  • (modified) llvm/test/Transforms/InstCombine/fcmp-select.ll (+3-6)
  • (modified) llvm/test/Transforms/InstCombine/fptrunc.ll (+3-2)
  • (modified) llvm/test/Transforms/InstCombine/load-bitcast-select.ll (+2-4)
  • (modified) llvm/test/Transforms/InstCombine/loadstore-metadata.ll (+5-10)
  • (modified) llvm/test/Transforms/InstCombine/minmax-fold.ll (+2-4)
  • (modified) llvm/test/Transforms/InstCombine/minmax-fp.ll (+11-22)
  • (modified) llvm/test/Transforms/InstCombine/multiple-uses-load-bitcast-select.ll (+2-4)
  • (modified) llvm/test/Transforms/InstCombine/select-select.ll (+3-6)
  • (modified) llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll (+5-10)
  • (modified) llvm/test/Transforms/InstCombine/unordered-fcmp-select.ll (+2-4)
  • (modified) llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll (+6-8)
  • (modified) llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll (+1-2)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 8f9d0bf6240d5..028a432cdba44 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3963,6 +3963,60 @@ static Value *foldSelectIntoAddConstant(SelectInst &SI,
   return nullptr;
 }
 
+// fcmp + sel patterns into max/min intrinsic.
+static Value *foldSelectICmpIntoMaxMin(SelectInst &SI,
+                                       InstCombiner::BuilderTy &Builder) {
+
+  auto TryFoldIntoMaxMinIntrinsic =
+      [&Builder, &SI](CmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS,
+                      Value *TVal, Value *FVal) -> Value * {
+    // Early exit if the operands are not in the expected form.
+    if ((CmpRHS != TVal || CmpLHS != FVal) &&
+        (CmpLHS != TVal || CmpRHS != FVal))
+      return nullptr;
+
+    bool isSwapped = (CmpLHS == FVal && CmpRHS == TVal);
+    // Only these relational predicates can be transformed into maxnum/minnum
+    // intrinsic.
+    // X > C ? X : C --> maxnum(X, C)
+    // X > C ? C : X --> minnum(X, C)
+    if (Pred == CmpInst::FCMP_OGT) {
+      Intrinsic::ID MaxMinIID =
+          isSwapped ? Intrinsic::minnum : Intrinsic::maxnum;
+      return Builder.CreateIntrinsic(SI.getType(), MaxMinIID, {TVal, FVal},
+                                     &SI);
+    }
+
+    // X < C ? X : C --> minnum(X, C)
+    // X < C ? C : X --> maxnum(X, C)
+    if (Pred == CmpInst::FCMP_OLT) {
+      Intrinsic::ID MaxMinIID =
+          isSwapped ? Intrinsic::maxnum : Intrinsic::minnum;
+      return Builder.CreateIntrinsic(SI.getType(), MaxMinIID, {TVal, FVal},
+                                     &SI);
+    }
+
+    return nullptr;
+  };
+
+  // select((fcmp Pred, X, Y), X, Y)
+  //      => minnum/maxnum(X, Y)
+  //
+  // Pred := OGT and OLT
+  Value *X, *Y;
+  Value *TVal, *FVal;
+  CmpPredicate Pred;
+
+  // Note: OneUse check for `Cmp` is necessary because it makes sure that other
+  // InstCombine folds don't undo this transformation and cause an infinite
+  // loop. Furthermore, it could also increase the operation count.
+  if (match(&SI, m_OneUse(m_Select(m_OneUse(m_FCmp(Pred, m_Value(X), m_Value(Y))),
+                          m_Value(TVal), m_Value(FVal)))))
+    return TryFoldIntoMaxMinIntrinsic(Pred, X, Y, TVal, FVal);
+
+  return nullptr;
+}
+
 static Value *foldSelectBitTest(SelectInst &Sel, Value *CondVal, Value *TrueVal,
                                 Value *FalseVal,
                                 InstCombiner::BuilderTy &Builder,
@@ -4455,6 +4509,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
   if (Value *V = foldSelectIntoAddConstant(SI, Builder))
     return replaceInstUsesWith(SI, V);
 
+  if (Value *V = foldSelectICmpIntoMaxMin(SI, Builder))
+    return replaceInstUsesWith(SI, V);
+
   // select(mask, mload(,,mask,0), 0) -> mload(,,mask,0)
   // Load inst is intentionally not checked for hasOneUse()
   if (match(FalseVal, m_Zero()) &&
diff --git a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
index 7f3276608c5da..9d33a7835af8a 100644
--- a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
+++ b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
@@ -5,10 +5,9 @@
 ; (X < C1) ? C1 : MIN(X, C2)
 define float @clamp_float_fast_ordered_strict_maxmin(float %x) {
 ; CHECK-LABEL: @clamp_float_fast_ordered_strict_maxmin(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
-; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00
-; CHECK-NEXT:    [[R1:%.*]] = select nnan ninf i1 [[DOTINV]], float [[MIN]], float 1.000000e+00
+; CHECK-NEXT:    [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast olt float [[X]], 1.000000e+00
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
 ; CHECK-NEXT:    ret float [[R1]]
 ;
   %cmp2 = fcmp fast olt float %x, 255.0
@@ -21,10 +20,9 @@ define float @clamp_float_fast_ordered_strict_maxmin(float %x) {
 ; (X <= C1) ? C1 : MIN(X, C2)
 define float @clamp_float_fast_ordered_nonstrict_maxmin(float %x) {
 ; CHECK-LABEL: @clamp_float_fast_ordered_nonstrict_maxmin(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
-; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00
-; CHECK-NEXT:    [[R1:%.*]] = select nnan ninf i1 [[DOTINV]], float [[MIN]], float 1.000000e+00
+; CHECK-NEXT:    [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast ole float [[X]], 1.000000e+00
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
 ; CHECK-NEXT:    ret float [[R1]]
 ;
   %cmp2 = fcmp fast olt float %x, 255.0
@@ -37,10 +35,9 @@ define float @clamp_float_fast_ordered_nonstrict_maxmin(float %x) {
 ; (X > C1) ? C1 : MAX(X, C2)
 define float @clamp_float_fast_ordered_strict_minmax(float %x) {
 ; CHECK-LABEL: @clamp_float_fast_ordered_strict_minmax(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast ogt float [[X:%.*]], 1.000000e+00
-; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
-; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02
-; CHECK-NEXT:    [[R1:%.*]] = select nnan ninf i1 [[DOTINV]], float [[MAX]], float 2.550000e+02
+; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast ogt float [[X]], 2.550000e+02
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
 ; CHECK-NEXT:    ret float [[R1]]
 ;
   %cmp2 = fcmp fast ogt float %x, 1.0
@@ -53,10 +50,9 @@ define float @clamp_float_fast_ordered_strict_minmax(float %x) {
 ; (X >= C1) ? C1 : MAX(X, C2)
 define float @clamp_float_fast_ordered_nonstrict_minmax(float %x) {
 ; CHECK-LABEL: @clamp_float_fast_ordered_nonstrict_minmax(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast ogt float [[X:%.*]], 1.000000e+00
-; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
-; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02
-; CHECK-NEXT:    [[R1:%.*]] = select nnan ninf i1 [[DOTINV]], float [[MAX]], float 2.550000e+02
+; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast oge float [[X]], 2.550000e+02
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
 ; CHECK-NEXT:    ret float [[R1]]
 ;
   %cmp2 = fcmp fast ogt float %x, 1.0
@@ -191,8 +187,7 @@ define float @clamp_negative_same_op(float %x) {
 ; First, check that we don't do bad things in the presence of signed zeros
 define float @clamp_float_with_zero1(float %x) {
 ; CHECK-LABEL: @clamp_float_with_zero1(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ole float [[X]], 0.000000e+00
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 0.000000e+00, float [[MIN]]
 ; CHECK-NEXT:    ret float [[R]]
@@ -206,8 +201,7 @@ define float @clamp_float_with_zero1(float %x) {
 
 define float @clamp_float_with_zero2(float %x) {
 ; CHECK-LABEL: @clamp_float_with_zero2(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[X]], 0.000000e+00
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 0.000000e+00, float [[MIN]]
 ; CHECK-NEXT:    ret float [[R]]
@@ -228,8 +222,7 @@ define float @clamp_float_with_zero2(float %x) {
 ; (X < C1) ? C1 : MIN(X, C2)
 define float @clamp_float_ordered_strict_maxmin1(float %x) {
 ; CHECK-LABEL: @clamp_float_ordered_strict_maxmin1(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[X]], 1.000000e+00
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
 ; CHECK-NEXT:    ret float [[R]]
@@ -259,8 +252,7 @@ define float @clamp_float_ordered_strict_maxmin2(float %x) {
 ; (X <= C1) ? C1 : MIN(X, C2)
 define float @clamp_float_ordered_nonstrict_maxmin1(float %x) {
 ; CHECK-LABEL: @clamp_float_ordered_nonstrict_maxmin1(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ole float [[X]], 1.000000e+00
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
 ; CHECK-NEXT:    ret float [[R]]
@@ -290,8 +282,7 @@ define float @clamp_float_ordered_nonstrict_maxmin2(float %x) {
 ; (X > C1) ? C1 : MAX(X, C2)
 define float @clamp_float_ordered_strict_minmax1(float %x) {
 ; CHECK-LABEL: @clamp_float_ordered_strict_minmax1(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
-; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[X]], 2.550000e+02
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
 ; CHECK-NEXT:    ret float [[R]]
@@ -321,8 +312,7 @@ define float @clamp_float_ordered_strict_minmax2(float %x) {
 ; (X >= C1) ? C1 : MAX(X, C2)
 define float @clamp_float_ordered_nonstrict_minmax1(float %x) {
 ; CHECK-LABEL: @clamp_float_ordered_nonstrict_minmax1(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
-; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp oge float [[X]], 2.550000e+02
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
 ; CHECK-NEXT:    ret float [[R]]
@@ -355,8 +345,7 @@ define float @clamp_float_ordered_nonstrict_minmax2(float %x) {
 ; (X < C1) ? C1 : MIN(X, C2)
 define float @clamp_float_unordered_strict_maxmin1(float %x) {
 ; CHECK-LABEL: @clamp_float_unordered_strict_maxmin1(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ult float [[X]], 1.000000e+00
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
 ; CHECK-NEXT:    ret float [[R]]
@@ -386,8 +375,7 @@ define float @clamp_float_unordered_strict_maxmin2(float %x) {
 ; (X <= C1) ? C1 : MIN(X, C2)
 define float @clamp_float_unordered_nonstrict_maxmin1(float %x) {
 ; CHECK-LABEL: @clamp_float_unordered_nonstrict_maxmin1(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ule float [[X]], 1.000000e+00
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
 ; CHECK-NEXT:    ret float [[R]]
@@ -417,8 +405,7 @@ define float @clamp_float_unordered_nonstrict_maxmin2(float %x) {
 ; (X > C1) ? C1 : MAX(X, C2)
 define float @clamp_float_unordered_strict_minmax1(float %x) {
 ; CHECK-LABEL: @clamp_float_unordered_strict_minmax1(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
-; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ugt float [[X]], 2.550000e+02
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
 ; CHECK-NEXT:    ret float [[R]]
@@ -448,8 +435,7 @@ define float @clamp_float_unordered_strict_minmax2(float %x) {
 ; (X >= C1) ? C1 : MAX(X, C2)
 define float @clamp_float_unordered_nonstrict_minmax1(float %x) {
 ; CHECK-LABEL: @clamp_float_unordered_nonstrict_minmax1(
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
-; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp uge float [[X]], 2.550000e+02
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
 ; CHECK-NEXT:    ret float [[R]]
@@ -527,8 +513,7 @@ define float @mixed_clamp_to_float_1(i32 %x) {
 
 define i32 @mixed_clamp_to_i32_1(float %x) {
 ; CHECK-LABEL: @mixed_clamp_to_i32_1(
-; CHECK-NEXT:    [[FLOAT_MIN_CMP:%.*]] = fcmp ogt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT:    [[FLOAT_MIN:%.*]] = select i1 [[FLOAT_MIN_CMP]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT:    [[FLOAT_MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
 ; CHECK-NEXT:    [[I32_MIN:%.*]] = fptosi float [[FLOAT_MIN]] to i32
 ; CHECK-NEXT:    [[I32_X:%.*]] = fptosi float [[X]] to i32
 ; CHECK-NEXT:    [[LO_CMP:%.*]] = icmp eq i32 [[I32_X]], 0
@@ -561,8 +546,7 @@ define float @mixed_clamp_to_float_2(i32 %x) {
 
 define i32 @mixed_clamp_to_i32_2(float %x) {
 ; CHECK-LABEL: @mixed_clamp_to_i32_2(
-; CHECK-NEXT:    [[FLOAT_MIN_CMP:%.*]] = fcmp ogt float [[X:%.*]], 2.550000e+02
-; CHECK-NEXT:    [[FLOAT_MIN:%.*]] = select i1 [[FLOAT_MIN_CMP]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT:    [[FLOAT_MIN:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
 ; CHECK-NEXT:    [[I32_MIN:%.*]] = fptosi float [[FLOAT_MIN]] to i32
 ; CHECK-NEXT:    [[LO_CMP:%.*]] = fcmp olt float [[X]], 1.000000e+00
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[LO_CMP]], i32 1, i32 [[I32_MIN]]
diff --git a/llvm/test/Transforms/InstCombine/fcmp-select.ll b/llvm/test/Transforms/InstCombine/fcmp-select.ll
index b622c8636eccb..63e72fed55350 100644
--- a/llvm/test/Transforms/InstCombine/fcmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp-select.ll
@@ -202,10 +202,8 @@ define <2 x i1> @test_fcmp_select_const_const_vec(<2 x double> %x) {
 
 define double @test_fcmp_select_clamp(double %x) {
 ; CHECK-LABEL: @test_fcmp_select_clamp(
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt double [[X:%.*]], 9.000000e-01
-; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], double 9.000000e-01, double [[X]]
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp olt double [[SEL1]], 5.000000e-01
-; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[CMP2]], double 5.000000e-01, double [[SEL1]]
+; CHECK-NEXT:    [[SEL1:%.*]] = call double @llvm.minnum.f64(double [[X:%.*]], double 9.000000e-01)
+; CHECK-NEXT:    [[SEL2:%.*]] = call double @llvm.maxnum.f64(double [[SEL1]], double 5.000000e-01)
 ; CHECK-NEXT:    ret double [[SEL2]]
 ;
   %cmp1 = fcmp ogt double %x, 9.000000e-01
@@ -284,8 +282,7 @@ define i1 @test_fcmp_ord_select_fcmp_oeq_var_const(double %x) {
 
 define float @test_select_nnan_nsz_fcmp_olt(float %x) {
 ; CHECK-LABEL: @test_select_nnan_nsz_fcmp_olt(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt float [[X:%.*]], -0.000000e+00
-; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[TMP1]], float [[X]], float -0.000000e+00
+; CHECK-NEXT:    [[SEL1:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float -0.000000e+00)
 ; CHECK-NEXT:    ret float [[SEL1]]
 ;
   %cmp = fcmp olt float %x, 0.000000e+00
diff --git a/llvm/test/Transforms/InstCombine/fptrunc.ll b/llvm/test/Transforms/InstCombine/fptrunc.ll
index 0b5d8b3cd06e0..faf9ec2aab5f0 100644
--- a/llvm/test/Transforms/InstCombine/fptrunc.ll
+++ b/llvm/test/Transforms/InstCombine/fptrunc.ll
@@ -116,8 +116,9 @@ define half @fptrunc_select_true_val_extra_use(half %x, float %y, i1 %cond) {
 
 define half @fptrunc_max(half %arg) {
 ; CHECK-LABEL: @fptrunc_max(
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt half [[ARG:%.*]], 0xH0000
-; CHECK-NEXT:    [[NARROW_SEL:%.*]] = select i1 [[CMP]], half 0xH0000, half [[ARG]]
+; CHECK-NEXT:    [[EXT:%.*]] = fpext half [[ARG:%.*]] to double
+; CHECK-NEXT:    [[MAX:%.*]] = call double @llvm.maxnum.f64(double [[EXT]], double 0.000000e+00)
+; CHECK-NEXT:    [[NARROW_SEL:%.*]] = fptrunc double [[MAX]] to half
 ; CHECK-NEXT:    ret half [[NARROW_SEL]]
 ;
   %ext = fpext half %arg to double
diff --git a/llvm/test/Transforms/InstCombine/load-bitcast-select.ll b/llvm/test/Transforms/InstCombine/load-bitcast-select.ll
index 3c43618b13569..8fc30442c2766 100644
--- a/llvm/test/Transforms/InstCombine/load-bitcast-select.ll
+++ b/llvm/test/Transforms/InstCombine/load-bitcast-select.ll
@@ -20,8 +20,7 @@ define void @_Z3foov() {
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr @b, i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[DOTV:%.*]] = select i1 [[CMP_I]], float [[TMP2]], float [[TMP1]]
+; CHECK-NEXT:    [[DOTV:%.*]] = call float @llvm.maxnum.f32(float [[TMP2]], float [[TMP1]])
 ; CHECK-NEXT:    store float [[DOTV]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_0]], 1
 ; CHECK-NEXT:    br label [[FOR_COND]]
@@ -79,8 +78,7 @@ define void @bitcasted_minmax_with_select_of_pointers(ptr %loadaddr1, ptr %loada
 ; CHECK-LABEL: @bitcasted_minmax_with_select_of_pointers(
 ; CHECK-NEXT:    [[LD1:%.*]] = load float, ptr [[LOADADDR1:%.*]], align 4
 ; CHECK-NEXT:    [[LD2:%.*]] = load float, ptr [[LOADADDR2:%.*]], align 4
-; CHECK-NEXT:    [[COND:%.*]] = fcmp ogt float [[LD1]], [[LD2]]
-; CHECK-NEXT:    [[LD_V:%.*]] = select i1 [[COND]], float [[LD1]], float [[LD2]]
+; CHECK-NEXT:    [[LD_V:%.*]] = call float @llvm.maxnum.f32(float [[LD1]], float [[LD2]])
 ; CHECK-NEXT:    store float [[LD_V]], ptr [[STOREADDR:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
index 761129979445c..102e67a126d3c 100644
--- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -236,8 +236,7 @@ define double @preserve_load_metadata_after_select_transform2(ptr %a, ptr %b) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]]
 ; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]]
-; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
-; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
+; CHECK-NEXT:    [[L_SEL:%.*]] = call double @llvm.maxnum.f64(double [[L_B]], double [[L_A]])
 ; CHECK-NEXT:    ret double [[L_SEL]]
 ;
 entry:
@@ -255,8 +254,7 @@ define double @preserve_load_metadata_after_select_transform_metadata_missing_1(
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[L_A:%.*]] = load double, ptr [[A]], align 8, !llvm.access.group [[META6]]
 ; CHECK-NEXT:    [[L_B:%.*]] = load double, ptr [[B]], align 8, !tbaa [[SCALAR_TYPE_TBAA0]], !llvm.access.group [[META6]]
-; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
-; CHECK-NEXT:    [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
+; CHECK-NEXT:    [[L_SEL:%.*]] = call double @llvm.maxnum.f64(dou...
[truncated]

Copy link

github-actions bot commented Sep 18, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@llvmbot llvmbot added the clang Clang issues not falling into any other category label Sep 18, 2025
@VedantParanjape
Copy link
Contributor Author

VedantParanjape commented Sep 18, 2025

@dtcxzyw I want to do performance testing for this change as it seems to have changed many testcases, and the testcase in the issue was mined from ffmpeg. Does your llvm-opt-benchmark script do performance bechmarking?

@dtcxzyw
Copy link
Member

dtcxzyw commented Sep 19, 2025

Does your llvm-opt-benchmark script do performance bechmarking?

No. It doesn't provide actual performance numbers.

@dtcxzyw
Copy link
Member

dtcxzyw commented Sep 19, 2025

@zyw-bot mfuzz

@dtcxzyw dtcxzyw requested a review from arsenm September 19, 2025 02:38
@arsenm
Copy link
Contributor

arsenm commented Sep 19, 2025

This is a canonicalize introducing transformation. If %arg0 is a denormal, and the transformed result will be flushed but it wouldn't in the original

@arsenm arsenm added the floating-point Floating-point math label Sep 19, 2025
@VedantParanjape
Copy link
Contributor Author

VedantParanjape commented Sep 19, 2025

This is a canonicalize introducing transformation. If %arg0 is a denormal, and the transformed result will be flushed but it wouldn't in the original

That's a fair point, I was not aware of this while working. Will having a flush-all attribute on this instruction guarantee FTZ even in the backend?

@dtcxzyw
Copy link
Member

dtcxzyw commented Sep 19, 2025

Can you refine

// Canonicalize select of FP values where NaN and -0.0 are not valid as
// minnum/maxnum intrinsics.
if (SIFPOp->hasNoNaNs() &&
(SIFPOp->hasNoSignedZeros() ||
(SIFPOp->hasOneUse() &&
canIgnoreSignBitOfZero(*SIFPOp->use_begin())))) {
Value *X, *Y;
if (match(&SI, m_OrdOrUnordFMax(m_Value(X), m_Value(Y)))) {
Value *BinIntr =
Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, X, Y, &SI);
if (auto *BinIntrInst = dyn_cast<Instruction>(BinIntr)) {
BinIntrInst->setHasNoNaNs(FCmp->hasNoNaNs());
BinIntrInst->setHasNoInfs(FCmp->hasNoInfs());
}
return replaceInstUsesWith(SI, BinIntr);
}
if (match(&SI, m_OrdOrUnordFMin(m_Value(X), m_Value(Y)))) {
Value *BinIntr =
Builder.CreateBinaryIntrinsic(Intrinsic::minnum, X, Y, &SI);
if (auto *BinIntrInst = dyn_cast<Instruction>(BinIntr)) {
BinIntrInst->setHasNoNaNs(FCmp->hasNoNaNs());
BinIntrInst->setHasNoInfs(FCmp->hasNoInfs());
}
return replaceInstUsesWith(SI, BinIntr);
}
to handle this fold? As for the denormal flushing problem, we can check if the denormal mode of current function is ieee.

@VedantParanjape
Copy link
Contributor Author

Can you refine

// Canonicalize select of FP values where NaN and -0.0 are not valid as
// minnum/maxnum intrinsics.
if (SIFPOp->hasNoNaNs() &&
(SIFPOp->hasNoSignedZeros() ||
(SIFPOp->hasOneUse() &&
canIgnoreSignBitOfZero(*SIFPOp->use_begin())))) {
Value *X, *Y;
if (match(&SI, m_OrdOrUnordFMax(m_Value(X), m_Value(Y)))) {
Value *BinIntr =
Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, X, Y, &SI);
if (auto *BinIntrInst = dyn_cast<Instruction>(BinIntr)) {
BinIntrInst->setHasNoNaNs(FCmp->hasNoNaNs());
BinIntrInst->setHasNoInfs(FCmp->hasNoInfs());
}
return replaceInstUsesWith(SI, BinIntr);
}
if (match(&SI, m_OrdOrUnordFMin(m_Value(X), m_Value(Y)))) {
Value *BinIntr =
Builder.CreateBinaryIntrinsic(Intrinsic::minnum, X, Y, &SI);
if (auto *BinIntrInst = dyn_cast<Instruction>(BinIntr)) {
BinIntrInst->setHasNoNaNs(FCmp->hasNoNaNs());
BinIntrInst->setHasNoInfs(FCmp->hasNoInfs());
}
return replaceInstUsesWith(SI, BinIntr);
}

to handle this fold? As for the denormal flushing problem, we can check if the denormal mode of current function is ieee.

ack

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang Clang issues not falling into any other category floating-point Floating-point math llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

Missed Optimization: Convert float clamp pattern to llvm.minnum/llvm.maxnum
4 participants