Skip to content

Commit

Permalink
[SimplifyLibCalls] powf(x, sitofp(n)) -> powi(x, n)
Browse files Browse the repository at this point in the history
Summary:
Partially solves https://bugs.llvm.org/show_bug.cgi?id=42190



Reviewers: spatel, nikic, efriedma

Reviewed By: efriedma

Subscribers: efriedma, nikic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63038

llvm-svn: 364940
  • Loading branch information
davidbolvansky committed Jul 2, 2019
1 parent 893bbc9 commit cb1a5a7
Show file tree
Hide file tree
Showing 3 changed files with 390 additions and 88 deletions.
59 changes: 47 additions & 12 deletions llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
Expand Up @@ -1322,12 +1322,12 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
APFloat BaseR = APFloat(1.0);
BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
BaseR = BaseR / *BaseF;
bool IsInteger = BaseF->isInteger(),
IsReciprocal = BaseR.isInteger();
bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger();
const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
APSInt NI(64, false);
if ((IsInteger || IsReciprocal) &&
!NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) &&
NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) ==
APFloat::opOK &&
NI > 1 && NI.isPowerOf2()) {
double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
Expand Down Expand Up @@ -1410,12 +1410,22 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
return Sqrt;
}

static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
IRBuilder<> &B) {
Value *Args[] = {Base, Expo};
Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType());
return B.CreateCall(F, Args);
}

Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
Value *Base = Pow->getArgOperand(0);
Value *Expo = Pow->getArgOperand(1);
Function *Callee = Pow->getCalledFunction();
StringRef Name = Callee->getName();
Type *Ty = Pow->getType();
Module *M = Pow->getModule();
Value *Shrunk = nullptr;
bool AllowApprox = Pow->hasApproxFunc();
bool Ignored;

// Bail out if simplifying libcalls to pow() is disabled.
Expand All @@ -1428,8 +1438,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {

// Shrink pow() to powf() if the arguments are single precision,
// unless the result is expected to be double precision.
if (UnsafeFPShrink &&
Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
hasFloatVersion(Name))
Shrunk = optimizeBinaryDoubleFP(Pow, B, true);

// Evaluate special cases related to the base.
Expand All @@ -1438,6 +1448,21 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
if (match(Base, m_FPOne()))
return Base;

// powf(x, sitofp(e)) -> powi(x, e)
// powf(x, uitofp(e)) -> powi(x, e)
if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
Value *IntExpo = cast<Instruction>(Expo)->getOperand(0);
Value *NewExpo = nullptr;
unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits();
if (isa<SIToFPInst>(Expo) && BitWidth == 32)
NewExpo = IntExpo;
else if (BitWidth < 32)
NewExpo = isa<SIToFPInst>(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty())
: B.CreateZExt(IntExpo, B.getInt32Ty());
if (NewExpo)
return createPowWithIntegerExponent(Base, NewExpo, M, B);
}

if (Value *Exp = replacePowWithExp(Pow, B))
return Exp;

Expand All @@ -1449,7 +1474,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {

// pow(x, 0.0) -> 1.0
if (match(Expo, m_SpecificFP(0.0)))
return ConstantFP::get(Ty, 1.0);
return ConstantFP::get(Ty, 1.0);

// pow(x, 1.0) -> x
if (match(Expo, m_FPOne()))
Expand All @@ -1462,9 +1487,12 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
if (Value *Sqrt = replacePowWithSqrt(Pow, B))
return Sqrt;

if (!AllowApprox)
return Shrunk;

// pow(x, n) -> x * x * x * ...
const APFloat *ExpoF;
if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) {
if (match(Expo, m_APFloat(ExpoF))) {
// We limit to a max of 7 multiplications, thus the maximum exponent is 32.
// If the exponent is an integer+0.5 we generate a call to sqrt and an
// additional fmul.
Expand All @@ -1488,9 +1516,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
if (!Expo2.isInteger())
return nullptr;

Sqrt =
getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI);
Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
Pow->doesNotAccessMemory(), M, B, TLI);
}

// We will memoize intermediate products of the Addition Chain.
Expand All @@ -1513,6 +1540,14 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {

return FMul;
}

APSInt IntExpo(32, /*isUnsigned=*/false);
// powf(x, C) -> powi(x, C) iff C is a constant signed integer value
if (ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
APFloat::opOK) {
return createPowWithIntegerExponent(
Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B);
}
}

return Shrunk;
Expand Down Expand Up @@ -3101,4 +3136,4 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {

FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
: TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
: TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
76 changes: 38 additions & 38 deletions llvm/test/Transforms/InstCombine/pow-4.ll
Expand Up @@ -11,9 +11,9 @@ declare double @pow(double, double)
; pow(x, 3.0)
define double @test_simplify_3(double %x) {
; CHECK-LABEL: @test_simplify_3(
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[X]]
; CHECK-NEXT: ret double [[TMP2]]
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[X]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = call fast double @llvm.pow.f64(double %x, double 3.000000e+00)
ret double %1
Expand All @@ -22,9 +22,9 @@ define double @test_simplify_3(double %x) {
; powf(x, 4.0)
define float @test_simplify_4f(float %x) {
; CHECK-LABEL: @test_simplify_4f(
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
; CHECK-NEXT: ret float [[TMP2]]
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
; CHECK-NEXT: ret float [[TMP1]]
;
%1 = call fast float @llvm.pow.f32(float %x, float 4.000000e+00)
ret float %1
Expand All @@ -33,9 +33,9 @@ define float @test_simplify_4f(float %x) {
; pow(x, 4.0)
define double @test_simplify_4(double %x) {
; CHECK-LABEL: @test_simplify_4(
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
; CHECK-NEXT: ret double [[TMP2]]
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = call fast double @llvm.pow.f64(double %x, double 4.000000e+00)
ret double %1
Expand All @@ -44,12 +44,12 @@ define double @test_simplify_4(double %x) {
; powf(x, <15.0, 15.0>)
define <2 x float> @test_simplify_15(<2 x float> %x) {
; CHECK-LABEL: @test_simplify_15(
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x float> [[X:%.*]], [[X]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[X]]
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <2 x float> [[X:%.*]], [[X]]
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x float> [[SQUARE]], [[X]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP4]]
; CHECK-NEXT: ret <2 x float> [[TMP5]]
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP3]]
; CHECK-NEXT: ret <2 x float> [[TMP4]]
;
%1 = call fast <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 1.500000e+01, float 1.500000e+01>)
ret <2 x float> %1
Expand All @@ -58,12 +58,12 @@ define <2 x float> @test_simplify_15(<2 x float> %x) {
; pow(x, -7.0)
define <2 x double> @test_simplify_neg_7(<2 x double> %x) {
; CHECK-LABEL: @test_simplify_neg_7(
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[X:%.*]], [[X]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], [[X]]
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
; CHECK-NEXT: ret <2 x double> [[TMP5]]
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <2 x double> [[X:%.*]], [[X]]
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[SQUARE]], [[SQUARE]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[X]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[SQUARE]], [[TMP2]]
; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP3]]
; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]]
;
%1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -7.000000e+00, double -7.000000e+00>)
ret <2 x double> %1
Expand All @@ -72,14 +72,14 @@ define <2 x double> @test_simplify_neg_7(<2 x double> %x) {
; powf(x, -19.0)
define float @test_simplify_neg_19(float %x) {
; CHECK-LABEL: @test_simplify_neg_19(
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP1]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP5]], [[X]]
; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]]
; CHECK-NEXT: ret float [[TMP7]]
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[SQUARE]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], [[X]]
; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[TMP5]]
; CHECK-NEXT: ret float [[RECIPROCAL]]
;
%1 = call fast float @llvm.pow.f32(float %x, float -1.900000e+01)
ret float %1
Expand All @@ -98,12 +98,12 @@ define double @test_simplify_11_23(double %x) {
; powf(x, 32.0)
define float @test_simplify_32(float %x) {
; CHECK-LABEL: @test_simplify_32(
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], [[TMP4]]
; CHECK-NEXT: ret float [[TMP5]]
; CHECK-NEXT: ret float [[TMP4]]
;
%1 = call fast float @llvm.pow.f32(float %x, float 3.200000e+01)
ret float %1
Expand All @@ -112,7 +112,7 @@ define float @test_simplify_32(float %x) {
; pow(x, 33.0)
define double @test_simplify_33(double %x) {
; CHECK-LABEL: @test_simplify_33(
; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 3.300000e+01)
; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64(double [[X:%.*]], i32 33)
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = call fast double @llvm.pow.f64(double %x, double 3.300000e+01)
Expand All @@ -122,8 +122,8 @@ define double @test_simplify_33(double %x) {
; pow(x, 16.5) with double
define double @test_simplify_16_5(double %x) {
; CHECK-LABEL: @test_simplify_16_5(
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X]])
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
Expand All @@ -137,8 +137,8 @@ define double @test_simplify_16_5(double %x) {
; pow(x, -16.5) with double
define double @test_simplify_neg_16_5(double %x) {
; CHECK-LABEL: @test_simplify_neg_16_5(
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X]])
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
Expand Down Expand Up @@ -214,10 +214,10 @@ define <2 x double> @test_simplify_7_5(<2 x double> %x) {
define <4 x float> @test_simplify_3_5(<4 x float> %x) {
; CHECK-LABEL: @test_simplify_3_5(
; CHECK-NEXT: [[SQRT:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[X]], [[X]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[X]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[SQRT]]
; CHECK-NEXT: ret <4 x float> [[TMP3]]
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <4 x float> [[X]], [[X]]
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[SQUARE]], [[X]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[SQRT]]
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 3.500000e+00, float 3.500000e+00, float 3.500000e+00, float 3.500000e+00>)
ret <4 x float> %1
Expand Down

0 comments on commit cb1a5a7

Please sign in to comment.