[SimplifyLibCalls] powf(x, sitofp(n)) -> powi(x, n)

Summary: Partially solves https://bugs.llvm.org/show_bug.cgi?id=42190 Reviewers: spatel, nikic, efriedma Reviewed By: efriedma Subscribers: efriedma, nikic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63038 llvm-svn: 364940
llvm · Jul 2, 2019 · cb1a5a7 · cb1a5a7
1 parent 893bbc9
commit cb1a5a7
Show file tree

Hide file tree

Showing 3 changed files with 390 additions and 88 deletions.
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1322,12 +1322,12 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
     APFloat BaseR = APFloat(1.0);
     BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
     BaseR = BaseR / *BaseF;
-    bool IsInteger    = BaseF->isInteger(),
-         IsReciprocal = BaseR.isInteger();
+    bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger();
     const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
     APSInt NI(64, false);
     if ((IsInteger || IsReciprocal) &&
-        !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) &&
+        NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) ==
+            APFloat::opOK &&
         NI > 1 && NI.isPowerOf2()) {
       double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
       Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
@@ -1410,12 +1410,22 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
   return Sqrt;
 }
 
+static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
+                                           IRBuilder<> &B) {
+  Value *Args[] = {Base, Expo};
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType());
+  return B.CreateCall(F, Args);
+}
+
 Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
-  Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+  Value *Base = Pow->getArgOperand(0);
+  Value *Expo = Pow->getArgOperand(1);
   Function *Callee = Pow->getCalledFunction();
   StringRef Name = Callee->getName();
   Type *Ty = Pow->getType();
+  Module *M = Pow->getModule();
   Value *Shrunk = nullptr;
+  bool AllowApprox = Pow->hasApproxFunc();
   bool Ignored;
 
   // Bail out if simplifying libcalls to pow() is disabled.
@@ -1428,8 +1438,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
 
   // Shrink pow() to powf() if the arguments are single precision,
   // unless the result is expected to be double precision.
-  if (UnsafeFPShrink &&
-      Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
+  if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
+      hasFloatVersion(Name))
     Shrunk = optimizeBinaryDoubleFP(Pow, B, true);
 
   // Evaluate special cases related to the base.
@@ -1438,6 +1448,21 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
   if (match(Base, m_FPOne()))
     return Base;
 
+  // powf(x, sitofp(e)) -> powi(x, e)
+  // powf(x, uitofp(e)) -> powi(x, e)
+  if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
+    Value *IntExpo = cast<Instruction>(Expo)->getOperand(0);
+    Value *NewExpo = nullptr;
+    unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits();
+    if (isa<SIToFPInst>(Expo) && BitWidth == 32)
+      NewExpo = IntExpo;
+    else if (BitWidth < 32)
+      NewExpo = isa<SIToFPInst>(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty())
+                                      : B.CreateZExt(IntExpo, B.getInt32Ty());
+    if (NewExpo)
+      return createPowWithIntegerExponent(Base, NewExpo, M, B);
+  }
+
   if (Value *Exp = replacePowWithExp(Pow, B))
     return Exp;
 
@@ -1449,7 +1474,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
 
   // pow(x, 0.0) -> 1.0
   if (match(Expo, m_SpecificFP(0.0)))
-      return ConstantFP::get(Ty, 1.0);
+    return ConstantFP::get(Ty, 1.0);
 
   // pow(x, 1.0) -> x
   if (match(Expo, m_FPOne()))
@@ -1462,9 +1487,12 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
   if (Value *Sqrt = replacePowWithSqrt(Pow, B))
     return Sqrt;
 
+  if (!AllowApprox)
+    return Shrunk;
+
   // pow(x, n) -> x * x * x * ...
   const APFloat *ExpoF;
-  if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) {
+  if (match(Expo, m_APFloat(ExpoF))) {
     // We limit to a max of 7 multiplications, thus the maximum exponent is 32.
     // If the exponent is an integer+0.5 we generate a call to sqrt and an
     // additional fmul.
@@ -1488,9 +1516,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
         if (!Expo2.isInteger())
           return nullptr;
 
-        Sqrt =
-            getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
-                        Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI);
+        Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
+                           Pow->doesNotAccessMemory(), M, B, TLI);
       }
 
       // We will memoize intermediate products of the Addition Chain.
@@ -1513,6 +1540,14 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
 
       return FMul;
     }
+
+    APSInt IntExpo(32, /*isUnsigned=*/false);
+    // powf(x, C) -> powi(x, C) iff C is a constant signed integer value
+    if (ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
+        APFloat::opOK) {
+      return createPowWithIntegerExponent(
+          Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B);
+    }
   }
 
   return Shrunk;
@@ -3101,4 +3136,4 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
 
 FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
     const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
-    : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
+    : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
diff --git a/llvm/test/Transforms/InstCombine/pow-4.ll b/llvm/test/Transforms/InstCombine/pow-4.ll
@@ -11,9 +11,9 @@ declare double @pow(double, double)
 ; pow(x, 3.0)
 define double @test_simplify_3(double %x) {
 ; CHECK-LABEL: @test_simplify_3(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[TMP1]], [[X]]
-; CHECK-NEXT:    ret double [[TMP2]]
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[X]]
+; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %1 = call fast double @llvm.pow.f64(double %x, double 3.000000e+00)
   ret double %1
@@ -22,9 +22,9 @@ define double @test_simplify_3(double %x) {
 ; powf(x, 4.0)
 define float @test_simplify_4f(float %x) {
 ; CHECK-LABEL: @test_simplify_4f(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %1 = call fast float @llvm.pow.f32(float %x, float 4.000000e+00)
   ret float %1
@@ -33,9 +33,9 @@ define float @test_simplify_4f(float %x) {
 ; pow(x, 4.0)
 define double @test_simplify_4(double %x) {
 ; CHECK-LABEL: @test_simplify_4(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
-; CHECK-NEXT:    ret double [[TMP2]]
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
+; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %1 = call fast double @llvm.pow.f64(double %x, double 4.000000e+00)
   ret double %1
@@ -44,12 +44,12 @@ define double @test_simplify_4(double %x) {
 ; powf(x, <15.0, 15.0>)
 define <2 x float> @test_simplify_15(<2 x float> %x) {
 ; CHECK-LABEL: @test_simplify_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <2 x float> [[X:%.*]], [[X]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast <2 x float> [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <2 x float> [[SQUARE]], [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    ret <2 x float> [[TMP5]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    ret <2 x float> [[TMP4]]
 ;
   %1 = call fast <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 1.500000e+01, float 1.500000e+01>)
   ret <2 x float> %1
@@ -58,12 +58,12 @@ define <2 x float> @test_simplify_15(<2 x float> %x) {
 ; pow(x, -7.0)
 define <2 x double> @test_simplify_neg_7(<2 x double> %x) {
 ; CHECK-LABEL: @test_simplify_neg_7(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <2 x double> [[X:%.*]], [[X]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], [[X]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
-; CHECK-NEXT:    ret <2 x double> [[TMP5]]
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast <2 x double> [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <2 x double> [[SQUARE]], [[SQUARE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <2 x double> [[SQUARE]], [[TMP2]]
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP3]]
+; CHECK-NEXT:    ret <2 x double> [[RECIPROCAL]]
 ;
   %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -7.000000e+00, double -7.000000e+00>)
   ret <2 x double> %1
@@ -72,14 +72,14 @@ define <2 x double> @test_simplify_neg_7(<2 x double> %x) {
 ; powf(x, -19.0)
 define float @test_simplify_neg_19(float %x) {
 ; CHECK-LABEL: @test_simplify_neg_19(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP1]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast float [[TMP5]], [[X]]
-; CHECK-NEXT:    [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]]
-; CHECK-NEXT:    ret float [[TMP7]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast float [[SQUARE]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP4]], [[X]]
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[TMP5]]
+; CHECK-NEXT:    ret float [[RECIPROCAL]]
 ;
   %1 = call fast float @llvm.pow.f32(float %x, float -1.900000e+01)
   ret float %1
@@ -98,12 +98,12 @@ define double @test_simplify_11_23(double %x) {
 ; powf(x, 32.0)
 define float @test_simplify_32(float %x) {
 ; CHECK-LABEL: @test_simplify_32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP4]], [[TMP4]]
-; CHECK-NEXT:    ret float [[TMP5]]
+; CHECK-NEXT:    ret float [[TMP4]]
 ;
   %1 = call fast float @llvm.pow.f32(float %x, float 3.200000e+01)
   ret float %1
@@ -112,7 +112,7 @@ define float @test_simplify_32(float %x) {
 ; pow(x, 33.0)
 define double @test_simplify_33(double %x) {
 ; CHECK-LABEL: @test_simplify_33(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 3.300000e+01)
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[X:%.*]], i32 33)
 ; CHECK-NEXT:    ret double [[TMP1]]
 ;
   %1 = call fast double @llvm.pow.f64(double %x, double 3.300000e+01)
@@ -122,8 +122,8 @@ define double @test_simplify_33(double %x) {
 ; pow(x, 16.5) with double
 define double @test_simplify_16_5(double %x) {
 ; CHECK-LABEL: @test_simplify_16_5(
-; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
@@ -137,8 +137,8 @@ define double @test_simplify_16_5(double %x) {
 ; pow(x, -16.5) with double
 define double @test_simplify_neg_16_5(double %x) {
 ; CHECK-LABEL: @test_simplify_neg_16_5(
-; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X]])
-; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
@@ -214,10 +214,10 @@ define <2 x double> @test_simplify_7_5(<2 x double> %x) {
 define <4 x float> @test_simplify_3_5(<4 x float> %x) {
 ; CHECK-LABEL: @test_simplify_3_5(
 ; CHECK-NEXT:    [[SQRT:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]])
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <4 x float> [[X]], [[X]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[X]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[SQRT]]
-; CHECK-NEXT:    ret <4 x float> [[TMP3]]
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast <4 x float> [[X]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <4 x float> [[SQUARE]], [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[SQRT]]
+; CHECK-NEXT:    ret <4 x float> [[TMP2]]
 ;
   %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 3.500000e+00, float 3.500000e+00, float 3.500000e+00, float 3.500000e+00>)
   ret <4 x float> %1