[InstCombine] Optimize powi(X, Y)/ (X * Z) with Ofast #87047

vfdff · 2024-03-29T08:59:29Z

foldFDivPowDivisor can address A / powi(x, y) to A * powi(x, -y),
while for small const value y, for example y=2, the instcombine will
transform powi(x, 2) to fmul x, x, so it is not optimal for A / powi(x, 2).

Fix #77171

llvmbot · 2024-03-29T09:00:01Z

@llvm/pr-subscribers-llvm-transforms

Author: Allen (vfdff)

Changes

foldFDivPowDivisor can address A / powi(x, y) to A * powi(x, -y),
while for small const value y, for example y=2, the instcombine will
transform powi(x, 2) to fmul x, x, so it is not optimal for A / powi(x, 2).

Fix #77171

Full diff: https://github.com/llvm/llvm-project/pull/87047.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp (+34-14)
(modified) llvm/test/Transforms/InstCombine/powi.ll (+8)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 8c698e52b5a0e6..4f4c8ad062cd87 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -580,15 +580,21 @@ Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) {
 
 Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
   auto createPowiExpr = [](BinaryOperator &I, InstCombinerImpl &IC, Value *X,
-                           Value *Y, Value *Z) {
+                           Value *Y, Value *Z, bool UpdateUsers = true) {
     InstCombiner::BuilderTy &Builder = IC.Builder;
     Value *YZ = Builder.CreateAdd(Y, Z);
-    auto *NewPow = Builder.CreateIntrinsic(
+    Instruction *NewPow = Builder.CreateIntrinsic(
         Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);
-    return IC.replaceInstUsesWith(I, NewPow);
+    if (UpdateUsers)
+      return IC.replaceInstUsesWith(I, NewPow);
+    else
+      return NewPow;
   };
 
   Value *X, *Y, *Z;
+  unsigned Opcode = I.getOpcode();
+  assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) &&
+         "Unexpected opcode");
 
   // powi(X, Y) * X --> powi(X, Y+1)
   // X * powi(X, Y) --> powi(X, Y+1)
@@ -603,7 +609,7 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
   // powi(x, y) * powi(x, z) -> powi(x, y + z)
   Value *Op0 = I.getOperand(0);
   Value *Op1 = I.getOperand(1);
-  if (I.isOnlyUserOfAnyOperand() &&
+  if (Opcode == Instruction::FMul && I.isOnlyUserOfAnyOperand() &&
       match(Op0, m_AllowReassoc(
                      m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y)))) &&
       match(Op1, m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(m_Specific(X),
@@ -611,16 +617,30 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
       Y->getType() == Z->getType())
     return createPowiExpr(I, *this, X, Y, Z);
 
-  // powi(X, Y) / X --> powi(X, Y-1)
-  // This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan
-  // are required.
-  // TODO: Multi-use may be also better off creating Powi(x,y-1)
-  if (I.hasAllowReassoc() && I.hasNoNaNs() &&
-      match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
-                     m_Specific(Op1), m_Value(Y))))) &&
-      willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
-    Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
-    return createPowiExpr(I, *this, Op1, Y, NegOne);
+  if (Opcode == Instruction::FDiv && I.hasAllowReassoc() && I.hasNoNaNs()) {
+    // powi(X, Y) / X --> powi(X, Y-1)
+    // This is legal when (Y - 1) can't wraparound, in which case reassoc and
+    // nnan are required.
+    // TODO: Multi-use may be also better off creating Powi(x,y-1)
+    if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
+                       m_Specific(Op1), m_Value(Y))))) &&
+        willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
+      Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
+      return createPowiExpr(I, *this, Op1, Y, NegOne);
+    }
+
+    // powi(X, Y) / (X * Z) --> powi(X, Y-1) / Z
+    // This is legal when (Y - 1) can't wraparound, in which case reassoc and
+    // nnan are required.
+    // TODO: Multi-use may be also better off creating Powi(x,y-1)
+    if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
+                       m_Value(X), m_Value(Y))))) &&
+        match(Op1, m_AllowReassoc(m_c_FMul(m_Specific(X), m_Value(Z)))) &&
+        willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
+      Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
+      auto *NewPow = createPowiExpr(I, *this, X, Y, NegOne, false);
+      return BinaryOperator::CreateFDivFMF(NewPow, Z, &I);
+    }
   }
 
   return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/powi.ll b/llvm/test/Transforms/InstCombine/powi.ll
index 6c0575e8b71971..abf08542e1df0a 100644
--- a/llvm/test/Transforms/InstCombine/powi.ll
+++ b/llvm/test/Transforms/InstCombine/powi.ll
@@ -401,6 +401,14 @@ define double @fdiv_pow_powi_negative_variable(double %x, i32 %y) {
   ret double %div
 }
 
+; powi(X,C1)/ (X * Z) --> powi(X,C1 - 1)/ Z
+define double @fdiv_fmul_powi(double %a) {
+  %pow = call reassoc double @llvm.powi.f64.i32(double %a, i32 5)
+  %square = fmul reassoc double %a, %a
+  %div = fdiv reassoc nnan double %pow, %square
+  ret double %div
+}
+
 ; powi(X, Y) * X --> powi(X, Y+1)
 define double @powi_fmul_powi_x(double noundef %x) {
 ; CHECK-LABEL: @powi_fmul_powi_x(

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

arsenm · 2024-03-29T09:20:24Z

llvm/test/Transforms/InstCombine/powi.ll

+  %square = fmul reassoc double %a, %a
+  %div = fdiv reassoc nnan double %pow, %square
+  ret double %div
+}


Needs all the missing flag negative tests, negative may-wrap cases, plus vectors. Also variable case that may not wrap

add new tests according your comment, thanks

arsenm · 2024-03-29T09:22:28Z

llvm/test/Transforms/InstCombine/powi.ll

@@ -401,6 +401,18 @@ define double @fdiv_pow_powi_negative_variable(double %x, i32 %y) {
  ret double %div
 }

+; powi(X,C1)/ (X * Z) --> powi(X,C1 - 1)/ Z


The final result doesn't have the divide in the comment?

yes, there is 2 steps in the combine
a) powi(X,C1)/ (X * X) --> powi(X,C1-1)/ X -- this patch improves this case
b) powi(X,C1-1)/ X --> powi(X,C1-2) -- already supported by prior patch

arsenm · 2024-03-29T09:24:16Z

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

@@ -580,15 +580,21 @@ Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) {

 Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
  auto createPowiExpr = [](BinaryOperator &I, InstCombinerImpl &IC, Value *X,
-                           Value *Y, Value *Z) {
+                           Value *Y, Value *Z, bool UpdateUsers = true) {


Don't understand why UpdateUsers would be an option

In the new branch , the result of *createPowiExpr(I, this, X, Y, NegOne, false) is not return directly.
so it will missing to replace the operand in the later new created instruction BinaryOperator::CreateFDivFMF(NewPow, Z, &I)

Then can't the individual uses take care of the insert? Pull out the insertion handling to the context instead of passing this in

Apply your comment, thanks @arsenm

vfdff · 2024-04-14T02:55:47Z

ping

arsenm · 2024-04-17T14:42:18Z

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

+    return replaceInstUsesWith(I, NewPow);
+  }
+
+  if (Opcode == Instruction::FDiv && I.hasAllowReassoc() && I.hasNoNaNs()) {


We could probably do a no-nan operand check, but it will multiply the complexity I think

Thanks @arsenm.
so need I add a comment[1] or leave it as it is?

[1] TODO: Add a no-nan operand check

I don't feel like adding extra no-nan operand check because It is same as the following case, where we can ignore the possibility that X is infinity because INF/INF is NaN.

https://github.com/llvm/llvm-project/blob/6cebd3577245a687947506ff423ea726ccd80849/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.
cpp#L1885

Since PR86428, foldPowiReassoc is called by both FMul and FDiv, as the optimization of FDiv is placed after the FMul, so now it is correct we don't add the checking of FDiv for powi(X, Y) / X. But, we may add more matching scenarios later, so add the checking opcode explicitly is easier to understand.

foldFDivPowDivisor can address A / powi(x, y) to A * powi(x, -y), while for small const value y, for example y=2, the instcombine will transform powi(x, 2) to fmul x, x, so it is not optimal for A / powi(x, 2). Fix llvm#77171

vfdff requested review from dtcxzyw, goldsteinn and jcranmer-intel March 29, 2024 08:59

vfdff requested a review from nikic as a code owner March 29, 2024 08:59

llvmbot added the llvm:transforms label Mar 29, 2024

dtcxzyw requested a review from arsenm March 29, 2024 09:02

vfdff force-pushed the PR77171 branch from 6612fd1 to 5cc375b Compare March 29, 2024 09:05

arsenm reviewed Mar 29, 2024

View reviewed changes

vfdff force-pushed the PR77171 branch 2 times, most recently from d05ae38 to 141fa63 Compare March 29, 2024 12:10

vfdff requested a review from arsenm April 1, 2024 09:13

nikic removed their request for review April 15, 2024 00:52

arsenm approved these changes Apr 17, 2024

View reviewed changes

vfdff added 2 commits April 21, 2024 12:07

[InstCombine] Optimize powi(X, Y)/ (X * Z) with Ofast

56ca5ec

foldFDivPowDivisor can address A / powi(x, y) to A * powi(x, -y), while for small const value y, for example y=2, the instcombine will transform powi(x, 2) to fmul x, x, so it is not optimal for A / powi(x, 2). Fix llvm#77171

vfdff force-pushed the PR77171 branch from 0e9cbb0 to 56ca5ec Compare April 21, 2024 04:10

vfdff merged commit 56ca5ec into llvm:main Apr 21, 2024
3 of 4 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[InstCombine] Optimize powi(X, Y)/ (X * Z) with Ofast #87047

[InstCombine] Optimize powi(X, Y)/ (X * Z) with Ofast #87047

vfdff commented Mar 29, 2024

llvmbot commented Mar 29, 2024

arsenm Mar 29, 2024

vfdff Mar 29, 2024

arsenm Mar 29, 2024

vfdff Mar 29, 2024

arsenm Mar 29, 2024

vfdff Mar 29, 2024 •

edited

arsenm Apr 3, 2024

vfdff Apr 9, 2024

vfdff commented Apr 14, 2024

arsenm Apr 17, 2024

vfdff Apr 18, 2024

vfdff Apr 21, 2024

[InstCombine] Optimize powi(X, Y)/ (X * Z) with Ofast #87047

[InstCombine] Optimize powi(X, Y)/ (X * Z) with Ofast #87047

Conversation

vfdff commented Mar 29, 2024

llvmbot commented Mar 29, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

vfdff Mar 29, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

vfdff commented Apr 14, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

vfdff Mar 29, 2024 •

edited