Skip to content

Commit

Permalink
Revert "[AggressiveInstCombine] Fold strcmp for short string literals"
Browse files Browse the repository at this point in the history
This reverts commit 8981520.
  • Loading branch information
nikic authored and tru committed Aug 14, 2023
1 parent c925b9b commit 04b4914
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 267 deletions.
4 changes: 0 additions & 4 deletions llvm/include/llvm/Analysis/ValueTracking.h
Expand Up @@ -124,10 +124,6 @@ bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
const DominatorTree *DT = nullptr,
bool UseInstrInfo = true);

/// Return true if the given instruction is only used in zero comparison
bool isOnlyUsedInZeroComparison(const Instruction *CxtI);

/// Return true if the given instruction is only used in zero equality comparison
bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI);

/// Return true if the given value is known to be non-zero when defined. For
Expand Down
Expand Up @@ -8,7 +8,7 @@
/// \file
///
/// AggressiveInstCombiner - Combine expression patterns to form expressions
/// with fewer, simple instructions.
/// with fewer, simple instructions. This pass does not modify the CFG.
///
//===----------------------------------------------------------------------===//

Expand Down
7 changes: 0 additions & 7 deletions llvm/lib/Analysis/ValueTracking.cpp
Expand Up @@ -261,13 +261,6 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown);
}

bool llvm::isOnlyUsedInZeroComparison(const Instruction *I) {
return !I->user_empty() && all_of(I->users(), [](const User *U) {
ICmpInst::Predicate P;
return match(U, m_ICmp(P, m_Value(), m_Zero()));
});
}

bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
return !I->user_empty() && all_of(I->users(), [](const User *U) {
ICmpInst::Predicate P;
Expand Down
217 changes: 54 additions & 163 deletions llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
Expand Up @@ -19,7 +19,6 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
Expand All @@ -29,7 +28,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"

Expand Down Expand Up @@ -398,6 +396,54 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
return true;
}

/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
/// pessimistic codegen that has to account for setting errno and can enable
/// vectorization.
static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI,
TargetLibraryInfo &TLI, AssumptionCache &AC,
DominatorTree &DT) {
// Match a call to sqrt mathlib function.
auto *Call = dyn_cast<CallInst>(&I);
if (!Call)
return false;

Module *M = Call->getModule();
LibFunc Func;
if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func))
return false;

if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
return false;

// If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
// (because NNAN or the operand arg must not be less than -0.0) and (2) we
// would not end up lowering to a libcall anyway (which could change the value
// of errno), then:
// (1) errno won't be set.
// (2) it is safe to convert this to an intrinsic call.
Type *Ty = Call->getType();
Value *Arg = Call->getArgOperand(0);
if (TTI.haveFastSqrt(Ty) &&
(Call->hasNoNaNs() ||
cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, &I,
&DT))) {
IRBuilder<> Builder(&I);
IRBuilderBase::FastMathFlagGuard Guard(Builder);
Builder.setFastMathFlags(Call->getFastMathFlags());

Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
I.replaceAllUsesWith(NewSqrt);

// Explicitly erase the old call because a call with side effects is not
// trivially dead.
I.eraseFromParent();
return true;
}

return false;
}

// Check if this array of constants represents a cttz table.
// Iterate over the elements from \p Table by trying to find/match all
// the numbers from 0 to \p InputBits that should represent cttz results.
Expand Down Expand Up @@ -869,159 +915,13 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
return true;
}

/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
/// pessimistic codegen that has to account for setting errno and can enable
/// vectorization.
static bool foldSqrt(CallInst *Call, TargetTransformInfo &TTI,
TargetLibraryInfo &TLI, AssumptionCache &AC,
DominatorTree &DT) {
Module *M = Call->getModule();

// If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
// (because NNAN or the operand arg must not be less than -0.0) and (2) we
// would not end up lowering to a libcall anyway (which could change the value
// of errno), then:
// (1) errno won't be set.
// (2) it is safe to convert this to an intrinsic call.
Type *Ty = Call->getType();
Value *Arg = Call->getArgOperand(0);
if (TTI.haveFastSqrt(Ty) &&
(Call->hasNoNaNs() ||
cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, Call,
&DT))) {
IRBuilder<> Builder(Call);
IRBuilderBase::FastMathFlagGuard Guard(Builder);
Builder.setFastMathFlags(Call->getFastMathFlags());

Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
Call->replaceAllUsesWith(NewSqrt);

// Explicitly erase the old call because a call with side effects is not
// trivially dead.
Call->eraseFromParent();
return true;
}

return false;
}

/// Try to expand strcmp(P, "x") calls.
static bool expandStrcmp(CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) {
Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);

// Trivial cases are optimized during inst combine
if (Str1P == Str2P)
return false;

StringRef Str1, Str2;
bool HasStr1 = getConstantStringInfo(Str1P, Str1);
bool HasStr2 = getConstantStringInfo(Str2P, Str2);

Value *NonConstantP = nullptr;
StringRef ConstantStr;

if (!HasStr1 && HasStr2 && Str2.size() == 1) {
NonConstantP = Str1P;
ConstantStr = Str2;
} else if (!HasStr2 && HasStr1 && Str1.size() == 1) {
NonConstantP = Str2P;
ConstantStr = Str1;
} else {
return false;
}

// Check if strcmp result is only used in a comparison with zero
if (!isOnlyUsedInZeroComparison(CI))
return false;

// For strcmp(P, "x") do the following transformation:
//
// (before)
// dst = strcmp(P, "x")
//
// (after)
// v0 = P[0] - 'x'
// [if v0 == 0]
// v1 = P[1]
// dst = phi(v0, v1)
//

IRBuilder<> B(CI->getParent());
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);

Type *RetType = CI->getType();

B.SetInsertPoint(CI);
BasicBlock *InitialBB = B.GetInsertBlock();
Value *Str1FirstCharacterValue =
B.CreateZExt(B.CreateLoad(B.getInt8Ty(), NonConstantP), RetType);
Value *Str2FirstCharacterValue =
ConstantInt::get(RetType, static_cast<unsigned char>(ConstantStr[0]));
Value *FirstCharacterSub =
B.CreateNSWSub(Str1FirstCharacterValue, Str2FirstCharacterValue);
Value *IsFirstCharacterSubZero =
B.CreateICmpEQ(FirstCharacterSub, ConstantInt::get(RetType, 0));
Instruction *IsFirstCharacterSubZeroBBTerminator = SplitBlockAndInsertIfThen(
IsFirstCharacterSubZero, CI, /*Unreachable*/ false,
/*BranchWeights*/ nullptr, &DTU);

B.SetInsertPoint(IsFirstCharacterSubZeroBBTerminator);
B.GetInsertBlock()->setName("strcmp_expand_sub_is_zero");
BasicBlock *IsFirstCharacterSubZeroBB = B.GetInsertBlock();
Value *Str1SecondCharacterValue = B.CreateZExt(
B.CreateLoad(B.getInt8Ty(), B.CreateConstInBoundsGEP1_64(
B.getInt8Ty(), NonConstantP, 1)),
RetType);

B.SetInsertPoint(CI);
B.GetInsertBlock()->setName("strcmp_expand_sub_join");

PHINode *Result = B.CreatePHI(RetType, 2);
Result->addIncoming(FirstCharacterSub, InitialBB);
Result->addIncoming(Str1SecondCharacterValue, IsFirstCharacterSubZeroBB);

CI->replaceAllUsesWith(Result);
CI->eraseFromParent();

MadeCFGChange = true;

return true;
}

static bool foldLibraryCalls(Instruction &I, TargetTransformInfo &TTI,
TargetLibraryInfo &TLI, DominatorTree &DT,
AssumptionCache &AC, bool &MadeCFGChange) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI)
return false;

LibFunc Func;
Module *M = I.getModule();
if (!TLI.getLibFunc(*CI, Func) || !isLibFuncEmittable(M, &TLI, Func))
return false;

switch (Func) {
case LibFunc_sqrt:
case LibFunc_sqrtf:
case LibFunc_sqrtl:
return foldSqrt(CI, TTI, TLI, AC, DT);
case LibFunc_strcmp:
return expandStrcmp(CI, DT, MadeCFGChange);
default:
break;
}

return false;
}

/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
TargetTransformInfo &TTI,
TargetLibraryInfo &TLI, AliasAnalysis &AA,
AssumptionCache &AC, bool &MadeCFGChange) {
AssumptionCache &AC) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
Expand All @@ -1046,7 +946,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
// NOTE: This function introduces erasing of the instruction `I`, so it
// needs to be called at the end of this sequence, otherwise we may make
// bugs.
MadeChange |= foldLibraryCalls(I, TTI, TLI, DT, AC, MadeCFGChange);
MadeChange |= foldSqrt(I, TTI, TLI, AC, DT);
}
}

Expand All @@ -1062,12 +962,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
/// handled in the callers of this function.
static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
TargetLibraryInfo &TLI, DominatorTree &DT,
AliasAnalysis &AA, bool &ChangedCFG) {
AliasAnalysis &AA) {
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, ChangedCFG);
MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC);
return MadeChange;
}

Expand All @@ -1078,21 +978,12 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);

bool MadeCFGChange = false;

if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
if (!runImpl(F, AC, TTI, TLI, DT, AA)) {
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
}

// Mark all the analyses that instcombine updates as preserved.
PreservedAnalyses PA;

if (MadeCFGChange)
PA.preserve<DominatorTreeAnalysis>();
else
PA.preserveSet<CFGAnalyses>();

PA.preserveSet<CFGAnalyses>();
return PA;
}
14 changes: 13 additions & 1 deletion llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
Expand Up @@ -227,9 +227,21 @@ static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr,
return ConstantInt::get(RetTy, Result);
}

static bool isOnlyUsedInComparisonWithZero(Value *V) {
for (User *U : V->users()) {
if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
if (C->isNullValue())
continue;
// Unknown instruction.
return false;
}
return true;
}

static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
const DataLayout &DL) {
if (!isOnlyUsedInZeroComparison(CI))
if (!isOnlyUsedInComparisonWithZero(CI))
return false;

if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL))
Expand Down

0 comments on commit 04b4914

Please sign in to comment.