Permalink
Cannot retrieve contributors at this time
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
6556 lines (5822 sloc)
243 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//===- ValueTracking.cpp - Walk computations to compute properties --------===// | |
// | |
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |
// See https://llvm.org/LICENSE.txt for license information. | |
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
// | |
//===----------------------------------------------------------------------===// | |
// | |
// This file contains routines that help analyze properties that chains of | |
// computations have. | |
// | |
//===----------------------------------------------------------------------===// | |
#include "llvm/Analysis/ValueTracking.h" | |
#include "llvm/ADT/APFloat.h" | |
#include "llvm/ADT/APInt.h" | |
#include "llvm/ADT/ArrayRef.h" | |
#include "llvm/ADT/None.h" | |
#include "llvm/ADT/Optional.h" | |
#include "llvm/ADT/STLExtras.h" | |
#include "llvm/ADT/SmallPtrSet.h" | |
#include "llvm/ADT/SmallSet.h" | |
#include "llvm/ADT/SmallVector.h" | |
#include "llvm/ADT/StringRef.h" | |
#include "llvm/ADT/iterator_range.h" | |
#include "llvm/Analysis/AliasAnalysis.h" | |
#include "llvm/Analysis/AssumeBundleQueries.h" | |
#include "llvm/Analysis/AssumptionCache.h" | |
#include "llvm/Analysis/GuardUtils.h" | |
#include "llvm/Analysis/InstructionSimplify.h" | |
#include "llvm/Analysis/Loads.h" | |
#include "llvm/Analysis/LoopInfo.h" | |
#include "llvm/Analysis/OptimizationRemarkEmitter.h" | |
#include "llvm/Analysis/TargetLibraryInfo.h" | |
#include "llvm/IR/Argument.h" | |
#include "llvm/IR/Attributes.h" | |
#include "llvm/IR/BasicBlock.h" | |
#include "llvm/IR/Constant.h" | |
#include "llvm/IR/ConstantRange.h" | |
#include "llvm/IR/Constants.h" | |
#include "llvm/IR/DerivedTypes.h" | |
#include "llvm/IR/DiagnosticInfo.h" | |
#include "llvm/IR/Dominators.h" | |
#include "llvm/IR/Function.h" | |
#include "llvm/IR/GetElementPtrTypeIterator.h" | |
#include "llvm/IR/GlobalAlias.h" | |
#include "llvm/IR/GlobalValue.h" | |
#include "llvm/IR/GlobalVariable.h" | |
#include "llvm/IR/InstrTypes.h" | |
#include "llvm/IR/Instruction.h" | |
#include "llvm/IR/Instructions.h" | |
#include "llvm/IR/IntrinsicInst.h" | |
#include "llvm/IR/Intrinsics.h" | |
#include "llvm/IR/IntrinsicsAArch64.h" | |
#include "llvm/IR/IntrinsicsX86.h" | |
#include "llvm/IR/LLVMContext.h" | |
#include "llvm/IR/Metadata.h" | |
#include "llvm/IR/Module.h" | |
#include "llvm/IR/Operator.h" | |
#include "llvm/IR/PatternMatch.h" | |
#include "llvm/IR/Type.h" | |
#include "llvm/IR/User.h" | |
#include "llvm/IR/Value.h" | |
#include "llvm/Support/Casting.h" | |
#include "llvm/Support/CommandLine.h" | |
#include "llvm/Support/Compiler.h" | |
#include "llvm/Support/ErrorHandling.h" | |
#include "llvm/Support/KnownBits.h" | |
#include "llvm/Support/MathExtras.h" | |
#include <algorithm> | |
#include <array> | |
#include <cassert> | |
#include <cstdint> | |
#include <iterator> | |
#include <utility> | |
using namespace llvm; | |
using namespace llvm::PatternMatch; | |
const unsigned MaxDepth = 6; | |
// Controls the number of uses of the value searched for possible | |
// dominating comparisons. | |
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", | |
cl::Hidden, cl::init(20)); | |
/// Returns the bitwidth of the given scalar or pointer type. For vector types, | |
/// returns the element type's bitwidth. | |
static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { | |
if (unsigned BitWidth = Ty->getScalarSizeInBits()) | |
return BitWidth; | |
return DL.getPointerTypeSizeInBits(Ty); | |
} | |
namespace { | |
// Simplifying using an assume can only be done in a particular control-flow | |
// context (the context instruction provides that context). If an assume and | |
// the context instruction are not in the same block then the DT helps in | |
// figuring out if we can use it. | |
struct Query { | |
const DataLayout &DL; | |
AssumptionCache *AC; | |
const Instruction *CxtI; | |
const DominatorTree *DT; | |
// Unlike the other analyses, this may be a nullptr because not all clients | |
// provide it currently. | |
OptimizationRemarkEmitter *ORE; | |
/// Set of assumptions that should be excluded from further queries. | |
/// This is because of the potential for mutual recursion to cause | |
/// computeKnownBits to repeatedly visit the same assume intrinsic. The | |
/// classic case of this is assume(x = y), which will attempt to determine | |
/// bits in x from bits in y, which will attempt to determine bits in y from | |
/// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call | |
/// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo | |
/// (all of which can call computeKnownBits), and so on. | |
std::array<const Value *, MaxDepth> Excluded; | |
/// If true, it is safe to use metadata during simplification. | |
InstrInfoQuery IIQ; | |
unsigned NumExcluded = 0; | |
Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, | |
const DominatorTree *DT, bool UseInstrInfo, | |
OptimizationRemarkEmitter *ORE = nullptr) | |
: DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {} | |
Query(const Query &Q, const Value *NewExcl) | |
: DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), IIQ(Q.IIQ), | |
NumExcluded(Q.NumExcluded) { | |
Excluded = Q.Excluded; | |
Excluded[NumExcluded++] = NewExcl; | |
assert(NumExcluded <= Excluded.size()); | |
} | |
bool isExcluded(const Value *Value) const { | |
if (NumExcluded == 0) | |
return false; | |
auto End = Excluded.begin() + NumExcluded; | |
return std::find(Excluded.begin(), End, Value) != End; | |
} | |
}; | |
} // end anonymous namespace | |
// Given the provided Value and, potentially, a context instruction, return | |
// the preferred context instruction (if any). | |
static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { | |
// If we've been provided with a context instruction, then use that (provided | |
// it has been inserted). | |
if (CxtI && CxtI->getParent()) | |
return CxtI; | |
// If the value is really an already-inserted instruction, then use that. | |
CxtI = dyn_cast<Instruction>(V); | |
if (CxtI && CxtI->getParent()) | |
return CxtI; | |
return nullptr; | |
} | |
static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf, | |
const APInt &DemandedElts, | |
APInt &DemandedLHS, APInt &DemandedRHS) { | |
// The length of scalable vectors is unknown at compile time, thus we | |
// cannot check their values | |
if (isa<ScalableVectorType>(Shuf->getType())) | |
return false; | |
int NumElts = | |
cast<VectorType>(Shuf->getOperand(0)->getType())->getNumElements(); | |
int NumMaskElts = Shuf->getType()->getNumElements(); | |
DemandedLHS = DemandedRHS = APInt::getNullValue(NumElts); | |
if (DemandedElts.isNullValue()) | |
return true; | |
// Simple case of a shuffle with zeroinitializer. | |
if (all_of(Shuf->getShuffleMask(), [](int Elt) { return Elt == 0; })) { | |
DemandedLHS.setBit(0); | |
return true; | |
} | |
for (int i = 0; i != NumMaskElts; ++i) { | |
if (!DemandedElts[i]) | |
continue; | |
int M = Shuf->getMaskValue(i); | |
assert(M < (NumElts * 2) && "Invalid shuffle mask constant"); | |
// For undef elements, we don't know anything about the common state of | |
// the shuffle result. | |
if (M == -1) | |
return false; | |
if (M < NumElts) | |
DemandedLHS.setBit(M % NumElts); | |
else | |
DemandedRHS.setBit(M % NumElts); | |
} | |
return true; | |
} | |
static void computeKnownBits(const Value *V, const APInt &DemandedElts, | |
KnownBits &Known, unsigned Depth, const Query &Q); | |
static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, | |
const Query &Q) { | |
// FIXME: We currently have no way to represent the DemandedElts of a scalable | |
// vector | |
if (isa<ScalableVectorType>(V->getType())) { | |
Known.resetAll(); | |
return; | |
} | |
auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); | |
APInt DemandedElts = | |
FVTy ? APInt::getAllOnesValue(FVTy->getNumElements()) : APInt(1, 1); | |
computeKnownBits(V, DemandedElts, Known, Depth, Q); | |
} | |
void llvm::computeKnownBits(const Value *V, KnownBits &Known, | |
const DataLayout &DL, unsigned Depth, | |
AssumptionCache *AC, const Instruction *CxtI, | |
const DominatorTree *DT, | |
OptimizationRemarkEmitter *ORE, bool UseInstrInfo) { | |
::computeKnownBits(V, Known, Depth, | |
Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); | |
} | |
void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, | |
KnownBits &Known, const DataLayout &DL, | |
unsigned Depth, AssumptionCache *AC, | |
const Instruction *CxtI, const DominatorTree *DT, | |
OptimizationRemarkEmitter *ORE, bool UseInstrInfo) { | |
::computeKnownBits(V, DemandedElts, Known, Depth, | |
Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); | |
} | |
static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, | |
unsigned Depth, const Query &Q); | |
static KnownBits computeKnownBits(const Value *V, unsigned Depth, | |
const Query &Q); | |
KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, | |
unsigned Depth, AssumptionCache *AC, | |
const Instruction *CxtI, | |
const DominatorTree *DT, | |
OptimizationRemarkEmitter *ORE, | |
bool UseInstrInfo) { | |
return ::computeKnownBits( | |
V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); | |
} | |
KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, | |
const DataLayout &DL, unsigned Depth, | |
AssumptionCache *AC, const Instruction *CxtI, | |
const DominatorTree *DT, | |
OptimizationRemarkEmitter *ORE, | |
bool UseInstrInfo) { | |
return ::computeKnownBits( | |
V, DemandedElts, Depth, | |
Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE)); | |
} | |
bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, | |
const DataLayout &DL, AssumptionCache *AC, | |
const Instruction *CxtI, const DominatorTree *DT, | |
bool UseInstrInfo) { | |
assert(LHS->getType() == RHS->getType() && | |
"LHS and RHS should have the same type"); | |
assert(LHS->getType()->isIntOrIntVectorTy() && | |
"LHS and RHS should be integers"); | |
// Look for an inverted mask: (X & ~M) op (Y & M). | |
Value *M; | |
if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) && | |
match(RHS, m_c_And(m_Specific(M), m_Value()))) | |
return true; | |
if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) && | |
match(LHS, m_c_And(m_Specific(M), m_Value()))) | |
return true; | |
IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType()); | |
KnownBits LHSKnown(IT->getBitWidth()); | |
KnownBits RHSKnown(IT->getBitWidth()); | |
computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo); | |
computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo); | |
return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue(); | |
} | |
bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) { | |
for (const User *U : CxtI->users()) { | |
if (const ICmpInst *IC = dyn_cast<ICmpInst>(U)) | |
if (IC->isEquality()) | |
if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) | |
if (C->isNullValue()) | |
continue; | |
return false; | |
} | |
return true; | |
} | |
static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, | |
const Query &Q); | |
bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, | |
bool OrZero, unsigned Depth, | |
AssumptionCache *AC, const Instruction *CxtI, | |
const DominatorTree *DT, bool UseInstrInfo) { | |
return ::isKnownToBeAPowerOfTwo( | |
V, OrZero, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); | |
} | |
static bool isKnownNonZero(const Value *V, const APInt &DemandedElts, | |
unsigned Depth, const Query &Q); | |
static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q); | |
bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, | |
AssumptionCache *AC, const Instruction *CxtI, | |
const DominatorTree *DT, bool UseInstrInfo) { | |
return ::isKnownNonZero(V, Depth, | |
Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); | |
} | |
bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL, | |
unsigned Depth, AssumptionCache *AC, | |
const Instruction *CxtI, const DominatorTree *DT, | |
bool UseInstrInfo) { | |
KnownBits Known = | |
computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo); | |
return Known.isNonNegative(); | |
} | |
bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth, | |
AssumptionCache *AC, const Instruction *CxtI, | |
const DominatorTree *DT, bool UseInstrInfo) { | |
if (auto *CI = dyn_cast<ConstantInt>(V)) | |
return CI->getValue().isStrictlyPositive(); | |
// TODO: We'd doing two recursive queries here. We should factor this such | |
// that only a single query is needed. | |
return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo) && | |
isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo); | |
} | |
bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth, | |
AssumptionCache *AC, const Instruction *CxtI, | |
const DominatorTree *DT, bool UseInstrInfo) { | |
KnownBits Known = | |
computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo); | |
return Known.isNegative(); | |
} | |
static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q); | |
bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, | |
const DataLayout &DL, AssumptionCache *AC, | |
const Instruction *CxtI, const DominatorTree *DT, | |
bool UseInstrInfo) { | |
return ::isKnownNonEqual(V1, V2, | |
Query(DL, AC, safeCxtI(V1, safeCxtI(V2, CxtI)), DT, | |
UseInstrInfo, /*ORE=*/nullptr)); | |
} | |
static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, | |
const Query &Q); | |
bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, | |
const DataLayout &DL, unsigned Depth, | |
AssumptionCache *AC, const Instruction *CxtI, | |
const DominatorTree *DT, bool UseInstrInfo) { | |
return ::MaskedValueIsZero( | |
V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); | |
} | |
static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, | |
unsigned Depth, const Query &Q); | |
static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, | |
const Query &Q) { | |
// FIXME: We currently have no way to represent the DemandedElts of a scalable | |
// vector | |
if (isa<ScalableVectorType>(V->getType())) | |
return 1; | |
auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); | |
APInt DemandedElts = | |
FVTy ? APInt::getAllOnesValue(FVTy->getNumElements()) : APInt(1, 1); | |
return ComputeNumSignBits(V, DemandedElts, Depth, Q); | |
} | |
unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, | |
unsigned Depth, AssumptionCache *AC, | |
const Instruction *CxtI, | |
const DominatorTree *DT, bool UseInstrInfo) { | |
return ::ComputeNumSignBits( | |
V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); | |
} | |
static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, | |
bool NSW, const APInt &DemandedElts, | |
KnownBits &KnownOut, KnownBits &Known2, | |
unsigned Depth, const Query &Q) { | |
computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q); | |
// If one operand is unknown and we have no nowrap information, | |
// the result will be unknown independently of the second operand. | |
if (KnownOut.isUnknown() && !NSW) | |
return; | |
computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); | |
KnownOut = KnownBits::computeForAddSub(Add, NSW, Known2, KnownOut); | |
} | |
static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, | |
const APInt &DemandedElts, KnownBits &Known, | |
KnownBits &Known2, unsigned Depth, | |
const Query &Q) { | |
unsigned BitWidth = Known.getBitWidth(); | |
computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q); | |
computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); | |
bool isKnownNegative = false; | |
bool isKnownNonNegative = false; | |
// If the multiplication is known not to overflow, compute the sign bit. | |
if (NSW) { | |
if (Op0 == Op1) { | |
// The product of a number with itself is non-negative. | |
isKnownNonNegative = true; | |
} else { | |
bool isKnownNonNegativeOp1 = Known.isNonNegative(); | |
bool isKnownNonNegativeOp0 = Known2.isNonNegative(); | |
bool isKnownNegativeOp1 = Known.isNegative(); | |
bool isKnownNegativeOp0 = Known2.isNegative(); | |
// The product of two numbers with the same sign is non-negative. | |
isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || | |
(isKnownNonNegativeOp1 && isKnownNonNegativeOp0); | |
// The product of a negative number and a non-negative number is either | |
// negative or zero. | |
if (!isKnownNonNegative) | |
isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 && | |
isKnownNonZero(Op0, Depth, Q)) || | |
(isKnownNegativeOp0 && isKnownNonNegativeOp1 && | |
isKnownNonZero(Op1, Depth, Q)); | |
} | |
} | |
assert(!Known.hasConflict() && !Known2.hasConflict()); | |
// Compute a conservative estimate for high known-0 bits. | |
unsigned LeadZ = std::max(Known.countMinLeadingZeros() + | |
Known2.countMinLeadingZeros(), | |
BitWidth) - BitWidth; | |
LeadZ = std::min(LeadZ, BitWidth); | |
// The result of the bottom bits of an integer multiply can be | |
// inferred by looking at the bottom bits of both operands and | |
// multiplying them together. | |
// We can infer at least the minimum number of known trailing bits | |
// of both operands. Depending on number of trailing zeros, we can | |
// infer more bits, because (a*b) <=> ((a/m) * (b/n)) * (m*n) assuming | |
// a and b are divisible by m and n respectively. | |
// We then calculate how many of those bits are inferrable and set | |
// the output. For example, the i8 mul: | |
// a = XXXX1100 (12) | |
// b = XXXX1110 (14) | |
// We know the bottom 3 bits are zero since the first can be divided by | |
// 4 and the second by 2, thus having ((12/4) * (14/2)) * (2*4). | |
// Applying the multiplication to the trimmed arguments gets: | |
// XX11 (3) | |
// X111 (7) | |
// ------- | |
// XX11 | |
// XX11 | |
// XX11 | |
// XX11 | |
// ------- | |
// XXXXX01 | |
// Which allows us to infer the 2 LSBs. Since we're multiplying the result | |
// by 8, the bottom 3 bits will be 0, so we can infer a total of 5 bits. | |
// The proof for this can be described as: | |
// Pre: (C1 >= 0) && (C1 < (1 << C5)) && (C2 >= 0) && (C2 < (1 << C6)) && | |
// (C7 == (1 << (umin(countTrailingZeros(C1), C5) + | |
// umin(countTrailingZeros(C2), C6) + | |
// umin(C5 - umin(countTrailingZeros(C1), C5), | |
// C6 - umin(countTrailingZeros(C2), C6)))) - 1) | |
// %aa = shl i8 %a, C5 | |
// %bb = shl i8 %b, C6 | |
// %aaa = or i8 %aa, C1 | |
// %bbb = or i8 %bb, C2 | |
// %mul = mul i8 %aaa, %bbb | |
// %mask = and i8 %mul, C7 | |
// => | |
// %mask = i8 ((C1*C2)&C7) | |
// Where C5, C6 describe the known bits of %a, %b | |
// C1, C2 describe the known bottom bits of %a, %b. | |
// C7 describes the mask of the known bits of the result. | |
APInt Bottom0 = Known.One; | |
APInt Bottom1 = Known2.One; | |
// How many times we'd be able to divide each argument by 2 (shr by 1). | |
// This gives us the number of trailing zeros on the multiplication result. | |
unsigned TrailBitsKnown0 = (Known.Zero | Known.One).countTrailingOnes(); | |
unsigned TrailBitsKnown1 = (Known2.Zero | Known2.One).countTrailingOnes(); | |
unsigned TrailZero0 = Known.countMinTrailingZeros(); | |
unsigned TrailZero1 = Known2.countMinTrailingZeros(); | |
unsigned TrailZ = TrailZero0 + TrailZero1; | |
// Figure out the fewest known-bits operand. | |
unsigned SmallestOperand = std::min(TrailBitsKnown0 - TrailZero0, | |
TrailBitsKnown1 - TrailZero1); | |
unsigned ResultBitsKnown = std::min(SmallestOperand + TrailZ, BitWidth); | |
APInt BottomKnown = Bottom0.getLoBits(TrailBitsKnown0) * | |
Bottom1.getLoBits(TrailBitsKnown1); | |
Known.resetAll(); | |
Known.Zero.setHighBits(LeadZ); | |
Known.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown); | |
Known.One |= BottomKnown.getLoBits(ResultBitsKnown); | |
// Only make use of no-wrap flags if we failed to compute the sign bit | |
// directly. This matters if the multiplication always overflows, in | |
// which case we prefer to follow the result of the direct computation, | |
// though as the program is invoking undefined behaviour we can choose | |
// whatever we like here. | |
if (isKnownNonNegative && !Known.isNegative()) | |
Known.makeNonNegative(); | |
else if (isKnownNegative && !Known.isNonNegative()) | |
Known.makeNegative(); | |
} | |
void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, | |
KnownBits &Known) { | |
unsigned BitWidth = Known.getBitWidth(); | |
unsigned NumRanges = Ranges.getNumOperands() / 2; | |
assert(NumRanges >= 1); | |
Known.Zero.setAllBits(); | |
Known.One.setAllBits(); | |
for (unsigned i = 0; i < NumRanges; ++i) { | |
ConstantInt *Lower = | |
mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0)); | |
ConstantInt *Upper = | |
mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1)); | |
ConstantRange Range(Lower->getValue(), Upper->getValue()); | |
// The first CommonPrefixBits of all values in Range are equal. | |
unsigned CommonPrefixBits = | |
(Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros(); | |
APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); | |
Known.One &= Range.getUnsignedMax() & Mask; | |
Known.Zero &= ~Range.getUnsignedMax() & Mask; | |
} | |
} | |
static bool isEphemeralValueOf(const Instruction *I, const Value *E) { | |
SmallVector<const Value *, 16> WorkSet(1, I); | |
SmallPtrSet<const Value *, 32> Visited; | |
SmallPtrSet<const Value *, 16> EphValues; | |
// The instruction defining an assumption's condition itself is always | |
// considered ephemeral to that assumption (even if it has other | |
// non-ephemeral users). See r246696's test case for an example. | |
if (is_contained(I->operands(), E)) | |
return true; | |
while (!WorkSet.empty()) { | |
const Value *V = WorkSet.pop_back_val(); | |
if (!Visited.insert(V).second) | |
continue; | |
// If all uses of this value are ephemeral, then so is this value. | |
if (llvm::all_of(V->users(), [&](const User *U) { | |
return EphValues.count(U); | |
})) { | |
if (V == E) | |
return true; | |
if (V == I || isSafeToSpeculativelyExecute(V)) { | |
EphValues.insert(V); | |
if (const User *U = dyn_cast<User>(V)) | |
for (User::const_op_iterator J = U->op_begin(), JE = U->op_end(); | |
J != JE; ++J) | |
WorkSet.push_back(*J); | |
} | |
} | |
} | |
return false; | |
} | |
// Is this an intrinsic that cannot be speculated but also cannot trap? | |
bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { | |
if (const CallInst *CI = dyn_cast<CallInst>(I)) | |
if (Function *F = CI->getCalledFunction()) | |
switch (F->getIntrinsicID()) { | |
default: break; | |
// FIXME: This list is repeated from NoTTI::getIntrinsicCost. | |
case Intrinsic::assume: | |
case Intrinsic::sideeffect: | |
case Intrinsic::dbg_declare: | |
case Intrinsic::dbg_value: | |
case Intrinsic::dbg_label: | |
case Intrinsic::invariant_start: | |
case Intrinsic::invariant_end: | |
case Intrinsic::lifetime_start: | |
case Intrinsic::lifetime_end: | |
case Intrinsic::objectsize: | |
case Intrinsic::ptr_annotation: | |
case Intrinsic::var_annotation: | |
return true; | |
} | |
return false; | |
} | |
bool llvm::isValidAssumeForContext(const Instruction *Inv, | |
const Instruction *CxtI, | |
const DominatorTree *DT) { | |
// There are two restrictions on the use of an assume: | |
// 1. The assume must dominate the context (or the control flow must | |
// reach the assume whenever it reaches the context). | |
// 2. The context must not be in the assume's set of ephemeral values | |
// (otherwise we will use the assume to prove that the condition | |
// feeding the assume is trivially true, thus causing the removal of | |
// the assume). | |
if (Inv->getParent() == CxtI->getParent()) { | |
// If Inv and CtxI are in the same block, check if the assume (Inv) is first | |
// in the BB. | |
if (Inv->comesBefore(CxtI)) | |
return true; | |
// Don't let an assume affect itself - this would cause the problems | |
// `isEphemeralValueOf` is trying to prevent, and it would also make | |
// the loop below go out of bounds. | |
if (Inv == CxtI) | |
return false; | |
// The context comes first, but they're both in the same block. | |
// Make sure there is nothing in between that might interrupt | |
// the control flow, not even CxtI itself. | |
for (BasicBlock::const_iterator I(CxtI), IE(Inv); I != IE; ++I) | |
if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) | |
return false; | |
return !isEphemeralValueOf(Inv, CxtI); | |
} | |
// Inv and CxtI are in different blocks. | |
if (DT) { | |
if (DT->dominates(Inv, CxtI)) | |
return true; | |
} else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) { | |
// We don't have a DT, but this trivially dominates. | |
return true; | |
} | |
return false; | |
} | |
static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) { | |
// Use of assumptions is context-sensitive. If we don't have a context, we | |
// cannot use them! | |
if (!Q.AC || !Q.CxtI) | |
return false; | |
// Note that the patterns below need to be kept in sync with the code | |
// in AssumptionCache::updateAffectedValues. | |
auto CmpExcludesZero = [V](ICmpInst *Cmp) { | |
auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); | |
Value *RHS; | |
CmpInst::Predicate Pred; | |
if (!match(Cmp, m_c_ICmp(Pred, m_V, m_Value(RHS)))) | |
return false; | |
// assume(v u> y) -> assume(v != 0) | |
if (Pred == ICmpInst::ICMP_UGT) | |
return true; | |
// assume(v != 0) | |
// We special-case this one to ensure that we handle `assume(v != null)`. | |
if (Pred == ICmpInst::ICMP_NE) | |
return match(RHS, m_Zero()); | |
// All other predicates - rely on generic ConstantRange handling. | |
ConstantInt *CI; | |
if (!match(RHS, m_ConstantInt(CI))) | |
return false; | |
ConstantRange RHSRange(CI->getValue()); | |
ConstantRange TrueValues = | |
ConstantRange::makeAllowedICmpRegion(Pred, RHSRange); | |
return !TrueValues.contains(APInt::getNullValue(CI->getBitWidth())); | |
}; | |
if (Q.CxtI && V->getType()->isPointerTy()) { | |
SmallVector<Attribute::AttrKind, 2> AttrKinds{Attribute::NonNull}; | |
if (!NullPointerIsDefined(Q.CxtI->getFunction(), | |
V->getType()->getPointerAddressSpace())) | |
AttrKinds.push_back(Attribute::Dereferenceable); | |
if (getKnowledgeValidInContext(V, AttrKinds, Q.CxtI, Q.DT, Q.AC)) | |
return true; | |
} | |
for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { | |
if (!AssumeVH) | |
continue; | |
CallInst *I = cast<CallInst>(AssumeVH); | |
assert(I->getFunction() == Q.CxtI->getFunction() && | |
"Got assumption for the wrong function!"); | |
if (Q.isExcluded(I)) | |
continue; | |
// Warning: This loop can end up being somewhat performance sensitive. | |
// We're running this loop for once for each value queried resulting in a | |
// runtime of ~O(#assumes * #values). | |
assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && | |
"must be an assume intrinsic"); | |
Value *Arg = I->getArgOperand(0); | |
ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); | |
if (!Cmp) | |
continue; | |
if (CmpExcludesZero(Cmp) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) | |
return true; | |
} | |
return false; | |
} | |
static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, | |
unsigned Depth, const Query &Q) { | |
// Use of assumptions is context-sensitive. If we don't have a context, we | |
// cannot use them! | |
if (!Q.AC || !Q.CxtI) | |
return; | |
unsigned BitWidth = Known.getBitWidth(); | |
// Note that the patterns below need to be kept in sync with the code | |
// in AssumptionCache::updateAffectedValues. | |
for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { | |
if (!AssumeVH) | |
continue; | |
CallInst *I = cast<CallInst>(AssumeVH); | |
assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() && | |
"Got assumption for the wrong function!"); | |
if (Q.isExcluded(I)) | |
continue; | |
// Warning: This loop can end up being somewhat performance sensitive. | |
// We're running this loop for once for each value queried resulting in a | |
// runtime of ~O(#assumes * #values). | |
assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && | |
"must be an assume intrinsic"); | |
Value *Arg = I->getArgOperand(0); | |
if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
assert(BitWidth == 1 && "assume operand is not i1?"); | |
Known.setAllOnes(); | |
return; | |
} | |
if (match(Arg, m_Not(m_Specific(V))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
assert(BitWidth == 1 && "assume operand is not i1?"); | |
Known.setAllZero(); | |
return; | |
} | |
// The remaining tests are all recursive, so bail out if we hit the limit. | |
if (Depth == MaxDepth) | |
continue; | |
ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); | |
if (!Cmp) | |
continue; | |
// Note that ptrtoint may change the bitwidth. | |
Value *A, *B; | |
auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); | |
CmpInst::Predicate Pred; | |
uint64_t C; | |
switch (Cmp->getPredicate()) { | |
default: | |
break; | |
case ICmpInst::ICMP_EQ: | |
// assume(v = a) | |
if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
Known.Zero |= RHSKnown.Zero; | |
Known.One |= RHSKnown.One; | |
// assume(v & b = a) | |
} else if (match(Cmp, | |
m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
KnownBits MaskKnown = | |
computeKnownBits(B, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
// For those bits in the mask that are known to be one, we can propagate | |
// known bits from the RHS to V. | |
Known.Zero |= RHSKnown.Zero & MaskKnown.One; | |
Known.One |= RHSKnown.One & MaskKnown.One; | |
// assume(~(v & b) = a) | |
} else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), | |
m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
KnownBits MaskKnown = | |
computeKnownBits(B, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
// For those bits in the mask that are known to be one, we can propagate | |
// inverted known bits from the RHS to V. | |
Known.Zero |= RHSKnown.One & MaskKnown.One; | |
Known.One |= RHSKnown.Zero & MaskKnown.One; | |
// assume(v | b = a) | |
} else if (match(Cmp, | |
m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
KnownBits BKnown = | |
computeKnownBits(B, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
// For those bits in B that are known to be zero, we can propagate known | |
// bits from the RHS to V. | |
Known.Zero |= RHSKnown.Zero & BKnown.Zero; | |
Known.One |= RHSKnown.One & BKnown.Zero; | |
// assume(~(v | b) = a) | |
} else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), | |
m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
KnownBits BKnown = | |
computeKnownBits(B, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
// For those bits in B that are known to be zero, we can propagate | |
// inverted known bits from the RHS to V. | |
Known.Zero |= RHSKnown.One & BKnown.Zero; | |
Known.One |= RHSKnown.Zero & BKnown.Zero; | |
// assume(v ^ b = a) | |
} else if (match(Cmp, | |
m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
KnownBits BKnown = | |
computeKnownBits(B, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
// For those bits in B that are known to be zero, we can propagate known | |
// bits from the RHS to V. For those bits in B that are known to be one, | |
// we can propagate inverted known bits from the RHS to V. | |
Known.Zero |= RHSKnown.Zero & BKnown.Zero; | |
Known.One |= RHSKnown.One & BKnown.Zero; | |
Known.Zero |= RHSKnown.One & BKnown.One; | |
Known.One |= RHSKnown.Zero & BKnown.One; | |
// assume(~(v ^ b) = a) | |
} else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), | |
m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
KnownBits BKnown = | |
computeKnownBits(B, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
// For those bits in B that are known to be zero, we can propagate | |
// inverted known bits from the RHS to V. For those bits in B that are | |
// known to be one, we can propagate known bits from the RHS to V. | |
Known.Zero |= RHSKnown.One & BKnown.Zero; | |
Known.One |= RHSKnown.Zero & BKnown.Zero; | |
Known.Zero |= RHSKnown.Zero & BKnown.One; | |
Known.One |= RHSKnown.One & BKnown.One; | |
// assume(v << c = a) | |
} else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), | |
m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
// For those bits in RHS that are known, we can propagate them to known | |
// bits in V shifted to the right by C. | |
RHSKnown.Zero.lshrInPlace(C); | |
Known.Zero |= RHSKnown.Zero; | |
RHSKnown.One.lshrInPlace(C); | |
Known.One |= RHSKnown.One; | |
// assume(~(v << c) = a) | |
} else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), | |
m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
// For those bits in RHS that are known, we can propagate them inverted | |
// to known bits in V shifted to the right by C. | |
RHSKnown.One.lshrInPlace(C); | |
Known.Zero |= RHSKnown.One; | |
RHSKnown.Zero.lshrInPlace(C); | |
Known.One |= RHSKnown.Zero; | |
// assume(v >> c = a) | |
} else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)), | |
m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
// For those bits in RHS that are known, we can propagate them to known | |
// bits in V shifted to the right by C. | |
Known.Zero |= RHSKnown.Zero << C; | |
Known.One |= RHSKnown.One << C; | |
// assume(~(v >> c) = a) | |
} else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))), | |
m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
// For those bits in RHS that are known, we can propagate them inverted | |
// to known bits in V shifted to the right by C. | |
Known.Zero |= RHSKnown.One << C; | |
Known.One |= RHSKnown.Zero << C; | |
} | |
break; | |
case ICmpInst::ICMP_SGE: | |
// assume(v >=_s c) where c is non-negative | |
if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth + 1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
if (RHSKnown.isNonNegative()) { | |
// We know that the sign bit is zero. | |
Known.makeNonNegative(); | |
} | |
} | |
break; | |
case ICmpInst::ICMP_SGT: | |
// assume(v >_s c) where c is at least -1. | |
if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth + 1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) { | |
// We know that the sign bit is zero. | |
Known.makeNonNegative(); | |
} | |
} | |
break; | |
case ICmpInst::ICMP_SLE: | |
// assume(v <=_s c) where c is negative | |
if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth + 1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
if (RHSKnown.isNegative()) { | |
// We know that the sign bit is one. | |
Known.makeNegative(); | |
} | |
} | |
break; | |
case ICmpInst::ICMP_SLT: | |
// assume(v <_s c) where c is non-positive | |
if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
if (RHSKnown.isZero() || RHSKnown.isNegative()) { | |
// We know that the sign bit is one. | |
Known.makeNegative(); | |
} | |
} | |
break; | |
case ICmpInst::ICMP_ULE: | |
// assume(v <=_u c) | |
if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
// Whatever high bits in c are zero are known to be zero. | |
Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); | |
} | |
break; | |
case ICmpInst::ICMP_ULT: | |
// assume(v <_u c) | |
if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && | |
isValidAssumeForContext(I, Q.CxtI, Q.DT)) { | |
KnownBits RHSKnown = | |
computeKnownBits(A, Depth+1, Query(Q, I)).anyextOrTrunc(BitWidth); | |
// If the RHS is known zero, then this assumption must be wrong (nothing | |
// is unsigned less than zero). Signal a conflict and get out of here. | |
if (RHSKnown.isZero()) { | |
Known.Zero.setAllBits(); | |
Known.One.setAllBits(); | |
break; | |
} | |
// Whatever high bits in c are zero are known to be zero (if c is a power | |
// of 2, then one more). | |
if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I))) | |
Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1); | |
else | |
Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); | |
} | |
break; | |
} | |
} | |
// If assumptions conflict with each other or previous known bits, then we | |
// have a logical fallacy. It's possible that the assumption is not reachable, | |
// so this isn't a real bug. On the other hand, the program may have undefined | |
// behavior, or we might have a bug in the compiler. We can't assert/crash, so | |
// clear out the known bits, try to warn the user, and hope for the best. | |
if (Known.Zero.intersects(Known.One)) { | |
Known.resetAll(); | |
if (Q.ORE) | |
Q.ORE->emit([&]() { | |
auto *CxtI = const_cast<Instruction *>(Q.CxtI); | |
return OptimizationRemarkAnalysis("value-tracking", "BadAssumption", | |
CxtI) | |
<< "Detected conflicting code assumptions. Program may " | |
"have undefined behavior, or compiler may have " | |
"internal error."; | |
}); | |
} | |
} | |
/// Compute known bits from a shift operator, including those with a | |
/// non-constant shift amount. Known is the output of this function. Known2 is a | |
/// pre-allocated temporary with the same bit width as Known. KZF and KOF are | |
/// operator-specific functions that, given the known-zero or known-one bits | |
/// respectively, and a shift amount, compute the implied known-zero or | |
/// known-one bits of the shift operator's result respectively for that shift | |
/// amount. The results from calling KZF and KOF are conservatively combined for | |
/// all permitted shift amounts. | |
static void computeKnownBitsFromShiftOperator( | |
const Operator *I, const APInt &DemandedElts, KnownBits &Known, | |
KnownBits &Known2, unsigned Depth, const Query &Q, | |
function_ref<APInt(const APInt &, unsigned)> KZF, | |
function_ref<APInt(const APInt &, unsigned)> KOF) { | |
unsigned BitWidth = Known.getBitWidth(); | |
computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); | |
if (Known.isConstant()) { | |
unsigned ShiftAmt = Known.getConstant().getLimitedValue(BitWidth - 1); | |
computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); | |
Known.Zero = KZF(Known.Zero, ShiftAmt); | |
Known.One = KOF(Known.One, ShiftAmt); | |
// If the known bits conflict, this must be an overflowing left shift, so | |
// the shift result is poison. We can return anything we want. Choose 0 for | |
// the best folding opportunity. | |
if (Known.hasConflict()) | |
Known.setAllZero(); | |
return; | |
} | |
// If the shift amount could be greater than or equal to the bit-width of the | |
// LHS, the value could be poison, but bail out because the check below is | |
// expensive. | |
// TODO: Should we just carry on? | |
if (Known.getMaxValue().uge(BitWidth)) { | |
Known.resetAll(); | |
return; | |
} | |
// Note: We cannot use Known.Zero.getLimitedValue() here, because if | |
// BitWidth > 64 and any upper bits are known, we'll end up returning the | |
// limit value (which implies all bits are known). | |
uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue(); | |
uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue(); | |
// It would be more-clearly correct to use the two temporaries for this | |
// calculation. Reusing the APInts here to prevent unnecessary allocations. | |
Known.resetAll(); | |
// If we know the shifter operand is nonzero, we can sometimes infer more | |
// known bits. However this is expensive to compute, so be lazy about it and | |
// only compute it when absolutely necessary. | |
Optional<bool> ShifterOperandIsNonZero; | |
// Early exit if we can't constrain any well-defined shift amount. | |
if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) && | |
!(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) { | |
ShifterOperandIsNonZero = | |
isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q); | |
if (!*ShifterOperandIsNonZero) | |
return; | |
} | |
computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); | |
Known.Zero.setAllBits(); | |
Known.One.setAllBits(); | |
for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) { | |
// Combine the shifted known input bits only for those shift amounts | |
// compatible with its known constraints. | |
if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt) | |
continue; | |
if ((ShiftAmt | ShiftAmtKO) != ShiftAmt) | |
continue; | |
// If we know the shifter is nonzero, we may be able to infer more known | |
// bits. This check is sunk down as far as possible to avoid the expensive | |
// call to isKnownNonZero if the cheaper checks above fail. | |
if (ShiftAmt == 0) { | |
if (!ShifterOperandIsNonZero.hasValue()) | |
ShifterOperandIsNonZero = | |
isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q); | |
if (*ShifterOperandIsNonZero) | |
continue; | |
} | |
Known.Zero &= KZF(Known2.Zero, ShiftAmt); | |
Known.One &= KOF(Known2.One, ShiftAmt); | |
} | |
// If the known bits conflict, the result is poison. Return a 0 and hope the | |
// caller can further optimize that. | |
if (Known.hasConflict()) | |
Known.setAllZero(); | |
} | |
static void computeKnownBitsFromOperator(const Operator *I, | |
const APInt &DemandedElts, | |
KnownBits &Known, unsigned Depth, | |
const Query &Q) { | |
unsigned BitWidth = Known.getBitWidth(); | |
KnownBits Known2(BitWidth); | |
switch (I->getOpcode()) { | |
default: break; | |
case Instruction::Load: | |
if (MDNode *MD = | |
Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range)) | |
computeKnownBitsFromRangeMetadata(*MD, Known); | |
break; | |
case Instruction::And: { | |
// If either the LHS or the RHS are Zero, the result is zero. | |
computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); | |
computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); | |
Known &= Known2; | |
// and(x, add (x, -1)) is a common idiom that always clears the low bit; | |
// here we handle the more general case of adding any odd number by | |
// matching the form add(x, add(x, y)) where y is odd. | |
// TODO: This could be generalized to clearing any bit set in y where the | |
// following bit is known to be unset in y. | |
Value *X = nullptr, *Y = nullptr; | |
if (!Known.Zero[0] && !Known.One[0] && | |
match(I, m_c_BinOp(m_Value(X), m_Add(m_Deferred(X), m_Value(Y))))) { | |
Known2.resetAll(); | |
computeKnownBits(Y, DemandedElts, Known2, Depth + 1, Q); | |
if (Known2.countMinTrailingOnes() > 0) | |
Known.Zero.setBit(0); | |
} | |
break; | |
} | |
case Instruction::Or: | |
computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); | |
computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); | |
Known |= Known2; | |
break; | |
case Instruction::Xor: | |
computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); | |
computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); | |
Known ^= Known2; | |
break; | |
case Instruction::Mul: { | |
bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); | |
computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts, | |
Known, Known2, Depth, Q); | |
break; | |
} | |
case Instruction::UDiv: { | |
// For the purposes of computing leading zeros we can conservatively | |
// treat a udiv as a logical right shift by the power of 2 known to | |
// be less than the denominator. | |
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); | |
unsigned LeadZ = Known2.countMinLeadingZeros(); | |
Known2.resetAll(); | |
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); | |
unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros(); | |
if (RHSMaxLeadingZeros != BitWidth) | |
LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1); | |
Known.Zero.setHighBits(LeadZ); | |
break; | |
} | |
case Instruction::Select: { | |
const Value *LHS = nullptr, *RHS = nullptr; | |
SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; | |
if (SelectPatternResult::isMinOrMax(SPF)) { | |
computeKnownBits(RHS, Known, Depth + 1, Q); | |
computeKnownBits(LHS, Known2, Depth + 1, Q); | |
} else { | |
computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); | |
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); | |
} | |
unsigned MaxHighOnes = 0; | |
unsigned MaxHighZeros = 0; | |
if (SPF == SPF_SMAX) { | |
// If both sides are negative, the result is negative. | |
if (Known.isNegative() && Known2.isNegative()) | |
// We can derive a lower bound on the result by taking the max of the | |
// leading one bits. | |
MaxHighOnes = | |
std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes()); | |
// If either side is non-negative, the result is non-negative. | |
else if (Known.isNonNegative() || Known2.isNonNegative()) | |
MaxHighZeros = 1; | |
} else if (SPF == SPF_SMIN) { | |
// If both sides are non-negative, the result is non-negative. | |
if (Known.isNonNegative() && Known2.isNonNegative()) | |
// We can derive an upper bound on the result by taking the max of the | |
// leading zero bits. | |
MaxHighZeros = std::max(Known.countMinLeadingZeros(), | |
Known2.countMinLeadingZeros()); | |
// If either side is negative, the result is negative. | |
else if (Known.isNegative() || Known2.isNegative()) | |
MaxHighOnes = 1; | |
} else if (SPF == SPF_UMAX) { | |
// We can derive a lower bound on the result by taking the max of the | |
// leading one bits. | |
MaxHighOnes = | |
std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes()); | |
} else if (SPF == SPF_UMIN) { | |
// We can derive an upper bound on the result by taking the max of the | |
// leading zero bits. | |
MaxHighZeros = | |
std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); | |
} else if (SPF == SPF_ABS) { | |
// RHS from matchSelectPattern returns the negation part of abs pattern. | |
// If the negate has an NSW flag we can assume the sign bit of the result | |
// will be 0 because that makes abs(INT_MIN) undefined. | |
if (match(RHS, m_Neg(m_Specific(LHS))) && | |
Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS))) | |
MaxHighZeros = 1; | |
} | |
// Only known if known in both the LHS and RHS. | |
Known.One &= Known2.One; | |
Known.Zero &= Known2.Zero; | |
if (MaxHighOnes > 0) | |
Known.One.setHighBits(MaxHighOnes); | |
if (MaxHighZeros > 0) | |
Known.Zero.setHighBits(MaxHighZeros); | |
break; | |
} | |
case Instruction::FPTrunc: | |
case Instruction::FPExt: | |
case Instruction::FPToUI: | |
case Instruction::FPToSI: | |
case Instruction::SIToFP: | |
case Instruction::UIToFP: | |
break; // Can't work with floating point. | |
case Instruction::PtrToInt: | |
case Instruction::IntToPtr: | |
// Fall through and handle them the same as zext/trunc. | |
LLVM_FALLTHROUGH; | |
case Instruction::ZExt: | |
case Instruction::Trunc: { | |
Type *SrcTy = I->getOperand(0)->getType(); | |
unsigned SrcBitWidth; | |
// Note that we handle pointer operands here because of inttoptr/ptrtoint | |
// which fall through here. | |
Type *ScalarTy = SrcTy->getScalarType(); | |
SrcBitWidth = ScalarTy->isPointerTy() ? | |
Q.DL.getPointerTypeSizeInBits(ScalarTy) : | |
Q.DL.getTypeSizeInBits(ScalarTy); | |
assert(SrcBitWidth && "SrcBitWidth can't be zero"); | |
Known = Known.anyextOrTrunc(SrcBitWidth); | |
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); | |
Known = Known.zextOrTrunc(BitWidth); | |
break; | |
} | |
case Instruction::BitCast: { | |
Type *SrcTy = I->getOperand(0)->getType(); | |
if (SrcTy->isIntOrPtrTy() && | |
// TODO: For now, not handling conversions like: | |
// (bitcast i64 %x to <2 x i32>) | |
!I->getType()->isVectorTy()) { | |
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); | |
break; | |
} | |
break; | |
} | |
case Instruction::SExt: { | |
// Compute the bits in the result that are not present in the input. | |
unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); | |
Known = Known.trunc(SrcBitWidth); | |
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); | |
// If the sign bit of the input is known set or clear, then we know the | |
// top bits of the result. | |
Known = Known.sext(BitWidth); | |
break; | |
} | |
case Instruction::Shl: { | |
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 | |
bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); | |
auto KZF = [NSW](const APInt &KnownZero, unsigned ShiftAmt) { | |
APInt KZResult = KnownZero << ShiftAmt; | |
KZResult.setLowBits(ShiftAmt); // Low bits known 0. | |
// If this shift has "nsw" keyword, then the result is either a poison | |
// value or has the same sign bit as the first operand. | |
if (NSW && KnownZero.isSignBitSet()) | |
KZResult.setSignBit(); | |
return KZResult; | |
}; | |
auto KOF = [NSW](const APInt &KnownOne, unsigned ShiftAmt) { | |
APInt KOResult = KnownOne << ShiftAmt; | |
if (NSW && KnownOne.isSignBitSet()) | |
KOResult.setSignBit(); | |
return KOResult; | |
}; | |
computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, | |
KZF, KOF); | |
break; | |
} | |
case Instruction::LShr: { | |
// (lshr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 | |
auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) { | |
APInt KZResult = KnownZero.lshr(ShiftAmt); | |
// High bits known zero. | |
KZResult.setHighBits(ShiftAmt); | |
return KZResult; | |
}; | |
auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) { | |
return KnownOne.lshr(ShiftAmt); | |
}; | |
computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, | |
KZF, KOF); | |
break; | |
} | |
case Instruction::AShr: { | |
// (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 | |
auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) { | |
return KnownZero.ashr(ShiftAmt); | |
}; | |
auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) { | |
return KnownOne.ashr(ShiftAmt); | |
}; | |
computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, | |
KZF, KOF); | |
break; | |
} | |
case Instruction::Sub: { | |
bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); | |
computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, | |
DemandedElts, Known, Known2, Depth, Q); | |
break; | |
} | |
case Instruction::Add: { | |
bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); | |
computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, | |
DemandedElts, Known, Known2, Depth, Q); | |
break; | |
} | |
case Instruction::SRem: | |
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { | |
APInt RA = Rem->getValue().abs(); | |
if (RA.isPowerOf2()) { | |
APInt LowBits = RA - 1; | |
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); | |
// The low bits of the first operand are unchanged by the srem. | |
Known.Zero = Known2.Zero & LowBits; | |
Known.One = Known2.One & LowBits; | |
// If the first operand is non-negative or has all low bits zero, then | |
// the upper bits are all zero. | |
if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero)) | |
Known.Zero |= ~LowBits; | |
// If the first operand is negative and not all low bits are zero, then | |
// the upper bits are all one. | |
if (Known2.isNegative() && LowBits.intersects(Known2.One)) | |
Known.One |= ~LowBits; | |
assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); | |
break; | |
} | |
} | |
// The sign bit is the LHS's sign bit, except when the result of the | |
// remainder is zero. | |
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); | |
// If it's known zero, our sign bit is also zero. | |
if (Known2.isNonNegative()) | |
Known.makeNonNegative(); | |
break; | |
case Instruction::URem: { | |
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { | |
const APInt &RA = Rem->getValue(); | |
if (RA.isPowerOf2()) { | |
APInt LowBits = (RA - 1); | |
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); | |
Known.Zero |= ~LowBits; | |
Known.One &= LowBits; | |
break; | |
} | |
} | |
// Since the result is less than or equal to either operand, any leading | |
// zero bits in either operand must also exist in the result. | |
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); | |
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); | |
unsigned Leaders = | |
std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); | |
Known.resetAll(); | |
Known.Zero.setHighBits(Leaders); | |
break; | |
} | |
case Instruction::Alloca: | |
Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign())); | |
break; | |
case Instruction::GetElementPtr: { | |
// Analyze all of the subscripts of this getelementptr instruction | |
// to determine if we can prove known low zero bits. | |
KnownBits LocalKnown(BitWidth); | |
computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q); | |
unsigned TrailZ = LocalKnown.countMinTrailingZeros(); | |
gep_type_iterator GTI = gep_type_begin(I); | |
for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { | |
// TrailZ can only become smaller, short-circuit if we hit zero. | |
if (TrailZ == 0) | |
break; | |
Value *Index = I->getOperand(i); | |
if (StructType *STy = GTI.getStructTypeOrNull()) { | |
// Handle struct member offset arithmetic. | |
// Handle case when index is vector zeroinitializer | |
Constant *CIndex = cast<Constant>(Index); | |
if (CIndex->isZeroValue()) | |
continue; | |
if (CIndex->getType()->isVectorTy()) | |
Index = CIndex->getSplatValue(); | |
unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); | |
const StructLayout *SL = Q.DL.getStructLayout(STy); | |
uint64_t Offset = SL->getElementOffset(Idx); | |
TrailZ = std::min<unsigned>(TrailZ, | |
countTrailingZeros(Offset)); | |
} else { | |
// Handle array index arithmetic. | |
Type *IndexedTy = GTI.getIndexedType(); | |
if (!IndexedTy->isSized()) { | |
TrailZ = 0; | |
break; | |
} | |
unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); | |
uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy).getKnownMinSize(); | |
LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0); | |
computeKnownBits(Index, LocalKnown, Depth + 1, Q); | |
TrailZ = std::min(TrailZ, | |
unsigned(countTrailingZeros(TypeSize) + | |
LocalKnown.countMinTrailingZeros())); | |
} | |
} | |
Known.Zero.setLowBits(TrailZ); | |
break; | |
} | |
case Instruction::PHI: { | |
const PHINode *P = cast<PHINode>(I); | |
// Handle the case of a simple two-predecessor recurrence PHI. | |
// There's a lot more that could theoretically be done here, but | |
// this is sufficient to catch some interesting cases. | |
if (P->getNumIncomingValues() == 2) { | |
for (unsigned i = 0; i != 2; ++i) { | |
Value *L = P->getIncomingValue(i); | |
Value *R = P->getIncomingValue(!i); | |
Instruction *RInst = P->getIncomingBlock(!i)->getTerminator(); | |
Instruction *LInst = P->getIncomingBlock(i)->getTerminator(); | |
Operator *LU = dyn_cast<Operator>(L); | |
if (!LU) | |
continue; | |
unsigned Opcode = LU->getOpcode(); | |
// Check for operations that have the property that if | |
// both their operands have low zero bits, the result | |
// will have low zero bits. | |
if (Opcode == Instruction::Add || | |
Opcode == Instruction::Sub || | |
Opcode == Instruction::And || | |
Opcode == Instruction::Or || | |
Opcode == Instruction::Mul) { | |
Value *LL = LU->getOperand(0); | |
Value *LR = LU->getOperand(1); | |
// Find a recurrence. | |
if (LL == I) | |
L = LR; | |
else if (LR == I) | |
L = LL; | |
else | |
continue; // Check for recurrence with L and R flipped. | |
// Change the context instruction to the "edge" that flows into the | |
// phi. This is important because that is where the value is actually | |
// "evaluated" even though it is used later somewhere else. (see also | |
// D69571). | |
Query RecQ = Q; | |
// Ok, we have a PHI of the form L op= R. Check for low | |
// zero bits. | |
RecQ.CxtI = RInst; | |
computeKnownBits(R, Known2, Depth + 1, RecQ); | |
// We need to take the minimum number of known bits | |
KnownBits Known3(BitWidth); | |
RecQ.CxtI = LInst; | |
computeKnownBits(L, Known3, Depth + 1, RecQ); | |
Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), | |
Known3.countMinTrailingZeros())); | |
auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU); | |
if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) { | |
// If initial value of recurrence is nonnegative, and we are adding | |
// a nonnegative number with nsw, the result can only be nonnegative | |
// or poison value regardless of the number of times we execute the | |
// add in phi recurrence. If initial value is negative and we are | |
// adding a negative number with nsw, the result can only be | |
// negative or poison value. Similar arguments apply to sub and mul. | |
// | |
// (add non-negative, non-negative) --> non-negative | |
// (add negative, negative) --> negative | |
if (Opcode == Instruction::Add) { | |
if (Known2.isNonNegative() && Known3.isNonNegative()) | |
Known.makeNonNegative(); | |
else if (Known2.isNegative() && Known3.isNegative()) | |
Known.makeNegative(); | |
} | |
// (sub nsw non-negative, negative) --> non-negative | |
// (sub nsw negative, non-negative) --> negative | |
else if (Opcode == Instruction::Sub && LL == I) { | |
if (Known2.isNonNegative() && Known3.isNegative()) | |
Known.makeNonNegative(); | |
else if (Known2.isNegative() && Known3.isNonNegative()) | |
Known.makeNegative(); | |
} | |
// (mul nsw non-negative, non-negative) --> non-negative | |
else if (Opcode == Instruction::Mul && Known2.isNonNegative() && | |
Known3.isNonNegative()) | |
Known.makeNonNegative(); | |
} | |
break; | |
} | |
} | |
} | |
// Unreachable blocks may have zero-operand PHI nodes. | |
if (P->getNumIncomingValues() == 0) | |
break; | |
// Otherwise take the unions of the known bit sets of the operands, | |
// taking conservative care to avoid excessive recursion. | |
if (Depth < MaxDepth - 1 && !Known.Zero && !Known.One) { | |
// Skip if every incoming value references to ourself. | |
if (dyn_cast_or_null<UndefValue>(P->hasConstantValue())) | |
break; | |
Known.Zero.setAllBits(); | |
Known.One.setAllBits(); | |
for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) { | |
Value *IncValue = P->getIncomingValue(u); | |
// Skip direct self references. | |
if (IncValue == P) continue; | |
// Change the context instruction to the "edge" that flows into the | |
// phi. This is important because that is where the value is actually | |
// "evaluated" even though it is used later somewhere else. (see also | |
// D69571). | |
Query RecQ = Q; | |
RecQ.CxtI = P->getIncomingBlock(u)->getTerminator(); | |
Known2 = KnownBits(BitWidth); | |
// Recurse, but cap the recursion to one level, because we don't | |
// want to waste time spinning around in loops. | |
computeKnownBits(IncValue, Known2, MaxDepth - 1, RecQ); | |
Known.Zero &= Known2.Zero; | |
Known.One &= Known2.One; | |
// If all bits have been ruled out, there's no need to check | |
// more operands. | |
if (!Known.Zero && !Known.One) | |
break; | |
} | |
} | |
break; | |
} | |
case Instruction::Call: | |
case Instruction::Invoke: | |
// If range metadata is attached to this call, set known bits from that, | |
// and then intersect with known bits based on other properties of the | |
// function. | |
if (MDNode *MD = | |
Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range)) | |
computeKnownBitsFromRangeMetadata(*MD, Known); | |
if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) { | |
computeKnownBits(RV, Known2, Depth + 1, Q); | |
Known.Zero |= Known2.Zero; | |
Known.One |= Known2.One; | |
} | |
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { | |
switch (II->getIntrinsicID()) { | |
default: break; | |
case Intrinsic::bitreverse: | |
computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); | |
Known.Zero |= Known2.Zero.reverseBits(); | |
Known.One |= Known2.One.reverseBits(); | |
break; | |
case Intrinsic::bswap: | |
computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); | |
Known.Zero |= Known2.Zero.byteSwap(); | |
Known.One |= Known2.One.byteSwap(); | |
break; | |
case Intrinsic::ctlz: { | |
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); | |
// If we have a known 1, its position is our upper bound. | |
unsigned PossibleLZ = Known2.One.countLeadingZeros(); | |
// If this call is undefined for 0, the result will be less than 2^n. | |
if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) | |
PossibleLZ = std::min(PossibleLZ, BitWidth - 1); | |
unsigned LowBits = Log2_32(PossibleLZ)+1; | |
Known.Zero.setBitsFrom(LowBits); | |
break; | |
} | |
case Intrinsic::cttz: { | |
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); | |
// If we have a known 1, its position is our upper bound. | |
unsigned PossibleTZ = Known2.One.countTrailingZeros(); | |
// If this call is undefined for 0, the result will be less than 2^n. | |
if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) | |
PossibleTZ = std::min(PossibleTZ, BitWidth - 1); | |
unsigned LowBits = Log2_32(PossibleTZ)+1; | |
Known.Zero.setBitsFrom(LowBits); | |
break; | |
} | |
case Intrinsic::ctpop: { | |
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); | |
// We can bound the space the count needs. Also, bits known to be zero | |
// can't contribute to the population. | |
unsigned BitsPossiblySet = Known2.countMaxPopulation(); | |
unsigned LowBits = Log2_32(BitsPossiblySet)+1; | |
Known.Zero.setBitsFrom(LowBits); | |
// TODO: we could bound KnownOne using the lower bound on the number | |
// of bits which might be set provided by popcnt KnownOne2. | |
break; | |
} | |
case Intrinsic::fshr: | |
case Intrinsic::fshl: { | |
const APInt *SA; | |
if (!match(I->getOperand(2), m_APInt(SA))) | |
break; | |
// Normalize to funnel shift left. | |
uint64_t ShiftAmt = SA->urem(BitWidth); | |
if (II->getIntrinsicID() == Intrinsic::fshr) | |
ShiftAmt = BitWidth - ShiftAmt; | |
KnownBits Known3(BitWidth); | |
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); | |
computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q); | |
Known.Zero = | |
Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt); | |
Known.One = | |
Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt); | |
break; | |
} | |
case Intrinsic::uadd_sat: | |
case Intrinsic::usub_sat: { | |
bool IsAdd = II->getIntrinsicID() == Intrinsic::uadd_sat; | |
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); | |
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); | |
// Add: Leading ones of either operand are preserved. | |
// Sub: Leading zeros of LHS and leading ones of RHS are preserved | |
// as leading zeros in the result. | |
unsigned LeadingKnown; | |
if (IsAdd) | |
LeadingKnown = std::max(Known.countMinLeadingOnes(), | |
Known2.countMinLeadingOnes()); | |
else | |
LeadingKnown = std::max(Known.countMinLeadingZeros(), | |
Known2.countMinLeadingOnes()); | |
Known = KnownBits::computeForAddSub( | |
IsAdd, /* NSW */ false, Known, Known2); | |
// We select between the operation result and all-ones/zero | |
// respectively, so we can preserve known ones/zeros. | |
if (IsAdd) { | |
Known.One.setHighBits(LeadingKnown); | |
Known.Zero.clearAllBits(); | |
} else { | |
Known.Zero.setHighBits(LeadingKnown); | |
Known.One.clearAllBits(); | |
} | |
break; | |
} | |
case Intrinsic::x86_sse42_crc32_64_64: | |
Known.Zero.setBitsFrom(32); | |
break; | |
} | |
} | |
break; | |
case Instruction::ShuffleVector: { | |
auto *Shuf = dyn_cast<ShuffleVectorInst>(I); | |
// FIXME: Do we need to handle ConstantExpr involving shufflevectors? | |
if (!Shuf) { | |
Known.resetAll(); | |
return; | |
} | |
// For undef elements, we don't know anything about the common state of | |
// the shuffle result. | |
APInt DemandedLHS, DemandedRHS; | |
if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) { | |
Known.resetAll(); | |
return; | |
} | |
Known.One.setAllBits(); | |
Known.Zero.setAllBits(); | |
if (!!DemandedLHS) { | |
const Value *LHS = Shuf->getOperand(0); | |
computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q); | |
// If we don't know any bits, early out. | |
if (Known.isUnknown()) | |
break; | |
} | |
if (!!DemandedRHS) { | |
const Value *RHS = Shuf->getOperand(1); | |
computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q); | |
Known.One &= Known2.One; | |
Known.Zero &= Known2.Zero; | |
} | |
break; | |
} | |
case Instruction::InsertElement: { | |
const Value *Vec = I->getOperand(0); | |
const Value *Elt = I->getOperand(1); | |
auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2)); | |
// Early out if the index is non-constant or out-of-range. | |
unsigned NumElts = DemandedElts.getBitWidth(); | |
if (!CIdx || CIdx->getValue().uge(NumElts)) { | |
Known.resetAll(); | |
return; | |
} | |
Known.One.setAllBits(); | |
Known.Zero.setAllBits(); | |
unsigned EltIdx = CIdx->getZExtValue(); | |
// Do we demand the inserted element? | |
if (DemandedElts[EltIdx]) { | |
computeKnownBits(Elt, Known, Depth + 1, Q); | |
// If we don't know any bits, early out. | |
if (Known.isUnknown()) | |
break; | |
} | |
// We don't need the base vector element that has been inserted. | |
APInt DemandedVecElts = DemandedElts; | |
DemandedVecElts.clearBit(EltIdx); | |
if (!!DemandedVecElts) { | |
computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q); | |
Known.One &= Known2.One; | |
Known.Zero &= Known2.Zero; | |
} | |
break; | |
} | |
case Instruction::ExtractElement: { | |
// Look through extract element. If the index is non-constant or | |
// out-of-range demand all elements, otherwise just the extracted element. | |
const Value *Vec = I->getOperand(0); | |
const Value *Idx = I->getOperand(1); | |
auto *CIdx = dyn_cast<ConstantInt>(Idx); | |
if (isa<ScalableVectorType>(Vec->getType())) { | |
// FIXME: there's probably *something* we can do with scalable vectors | |
Known.resetAll(); | |
break; | |
} | |
unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements(); | |
APInt DemandedVecElts = APInt::getAllOnesValue(NumElts); | |
if (CIdx && CIdx->getValue().ult(NumElts)) | |
DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); | |
computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q); | |
break; | |
} | |
case Instruction::ExtractValue: | |
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) { | |
const ExtractValueInst *EVI = cast<ExtractValueInst>(I); | |
if (EVI->getNumIndices() != 1) break; | |
if (EVI->getIndices()[0] == 0) { | |
switch (II->getIntrinsicID()) { | |
default: break; | |
case Intrinsic::uadd_with_overflow: | |
case Intrinsic::sadd_with_overflow: | |
computeKnownBitsAddSub(true, II->getArgOperand(0), | |
II->getArgOperand(1), false, DemandedElts, | |
Known, Known2, Depth, Q); | |
break; | |
case Intrinsic::usub_with_overflow: | |
case Intrinsic::ssub_with_overflow: | |
computeKnownBitsAddSub(false, II->getArgOperand(0), | |
II->getArgOperand(1), false, DemandedElts, | |
Known, Known2, Depth, Q); | |
break; | |
case Intrinsic::umul_with_overflow: | |
case Intrinsic::smul_with_overflow: | |
computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, | |
DemandedElts, Known, Known2, Depth, Q); | |
break; | |
} | |
} | |
} | |
break; | |
} | |
} | |
/// Determine which bits of V are known to be either zero or one and return | |
/// them. | |
KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, | |
unsigned Depth, const Query &Q) { | |
KnownBits Known(getBitWidth(V->getType(), Q.DL)); | |
computeKnownBits(V, DemandedElts, Known, Depth, Q); | |
return Known; | |
} | |
/// Determine which bits of V are known to be either zero or one and return | |
/// them. | |
KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) { | |
KnownBits Known(getBitWidth(V->getType(), Q.DL)); | |
computeKnownBits(V, Known, Depth, Q); | |
return Known; | |
} | |
/// Determine which bits of V are known to be either zero or one and return | |
/// them in the Known bit set. | |
/// | |
/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that | |
/// we cannot optimize based on the assumption that it is zero without changing | |
/// it to be an explicit zero. If we don't change it to zero, other code could | |
/// optimized based on the contradictory assumption that it is non-zero. | |
/// Because instcombine aggressively folds operations with undef args anyway, | |
/// this won't lose us code quality. | |
/// | |
/// This function is defined on values with integer type, values with pointer | |
/// type, and vectors of integers. In the case | |
/// where V is a vector, known zero, and known one values are the | |
/// same width as the vector element, and the bit is set only if it is true | |
/// for all of the demanded elements in the vector specified by DemandedElts. | |
void computeKnownBits(const Value *V, const APInt &DemandedElts, | |
KnownBits &Known, unsigned Depth, const Query &Q) { | |
if (!DemandedElts || isa<ScalableVectorType>(V->getType())) { | |
// No demanded elts or V is a scalable vector, better to assume we don't | |
// know anything. | |
Known.resetAll(); | |
return; | |
} | |
assert(V && "No Value?"); | |
assert(Depth <= MaxDepth && "Limit Search Depth"); | |
#ifndef NDEBUG | |
Type *Ty = V->getType(); | |
unsigned BitWidth = Known.getBitWidth(); | |
assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) && | |
"Not integer or pointer type!"); | |
if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { | |
assert( | |
FVTy->getNumElements() == DemandedElts.getBitWidth() && | |
"DemandedElt width should equal the fixed vector number of elements"); | |
} else { | |
assert(DemandedElts == APInt(1, 1) && | |
"DemandedElt width should be 1 for scalars"); | |
} | |
Type *ScalarTy = Ty->getScalarType(); | |
if (ScalarTy->isPointerTy()) { | |
assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) && | |
"V and Known should have same BitWidth"); | |
} else { | |
assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) && | |
"V and Known should have same BitWidth"); | |
} | |
#endif | |
const APInt *C; | |
if (match(V, m_APInt(C))) { | |
// We know all of the bits for a scalar constant or a splat vector constant! | |
Known.One = *C; | |
Known.Zero = ~Known.One; | |
return; | |
} | |
// Null and aggregate-zero are all-zeros. | |
if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) { | |
Known.setAllZero(); | |
return; | |
} | |
// Handle a constant vector by taking the intersection of the known bits of | |
// each element. | |
if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) { | |
// We know that CDV must be a vector of integers. Take the intersection of | |
// each element. | |
Known.Zero.setAllBits(); Known.One.setAllBits(); | |
for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) { | |
if (!DemandedElts[i]) | |
continue; | |
APInt Elt = CDV->getElementAsAPInt(i); | |
Known.Zero &= ~Elt; | |
Known.One &= Elt; | |
} | |
return; | |
} | |
if (const auto *CV = dyn_cast<ConstantVector>(V)) { | |
// We know that CV must be a vector of integers. Take the intersection of | |
// each element. | |
Known.Zero.setAllBits(); Known.One.setAllBits(); | |
for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { | |
if (!DemandedElts[i]) | |
continue; | |
Constant *Element = CV->getAggregateElement(i); | |
auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); | |
if (!ElementCI) { | |
Known.resetAll(); | |
return; | |
} | |
const APInt &Elt = ElementCI->getValue(); | |
Known.Zero &= ~Elt; | |
Known.One &= Elt; | |
} | |
return; | |
} | |
// Start out not knowing anything. | |
Known.resetAll(); | |
// We can't imply anything about undefs. | |
if (isa<UndefValue>(V)) | |
return; | |
// There's no point in looking through other users of ConstantData for | |
// assumptions. Confirm that we've handled them all. | |
assert(!isa<ConstantData>(V) && "Unhandled constant data!"); | |
// Limit search depth. | |
// All recursive calls that increase depth must come after this. | |
if (Depth == MaxDepth) | |
return; | |
// A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has | |
// the bits of its aliasee. | |
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { | |
if (!GA->isInterposable()) | |
computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q); | |
return; | |
} | |
if (const Operator *I = dyn_cast<Operator>(V)) | |
computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q); | |
// Aligned pointers have trailing zeros - refine Known.Zero set | |
if (isa<PointerType>(V->getType())) { | |
Align Alignment = V->getPointerAlignment(Q.DL); | |
Known.Zero.setLowBits(countTrailingZeros(Alignment.value())); | |
} | |
// computeKnownBitsFromAssume strictly refines Known. | |
// Therefore, we run them after computeKnownBitsFromOperator. | |
// Check whether a nearby assume intrinsic can determine some known bits. | |
computeKnownBitsFromAssume(V, Known, Depth, Q); | |
assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); | |
} | |
/// Return true if the given value is known to have exactly one | |
/// bit set when defined. For vectors return true if every element is known to | |
/// be a power of two when defined. Supports values with integer or pointer | |
/// types and vectors of integers. | |
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, | |
const Query &Q) { | |
assert(Depth <= MaxDepth && "Limit Search Depth"); | |
// Attempt to match against constants. | |
if (OrZero && match(V, m_Power2OrZero())) | |
return true; | |
if (match(V, m_Power2())) | |
return true; | |
// 1 << X is clearly a power of two if the one is not shifted off the end. If | |
// it is shifted off the end then the result is undefined. | |
if (match(V, m_Shl(m_One(), m_Value()))) | |
return true; | |
// (signmask) >>l X is clearly a power of two if the one is not shifted off | |
// the bottom. If it is shifted off the bottom then the result is undefined. | |
if (match(V, m_LShr(m_SignMask(), m_Value()))) | |
return true; | |
// The remaining tests are all recursive, so bail out if we hit the limit. | |
if (Depth++ == MaxDepth) | |
return false; | |
Value *X = nullptr, *Y = nullptr; | |
// A shift left or a logical shift right of a power of two is a power of two | |
// or zero. | |
if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || | |
match(V, m_LShr(m_Value(X), m_Value())))) | |
return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q); | |
if (const ZExtInst *ZI = dyn_cast<ZExtInst>(V)) | |
return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q); | |
if (const SelectInst *SI = dyn_cast<SelectInst>(V)) | |
return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) && | |
isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q); | |
if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { | |
// A power of two and'd with anything is a power of two or zero. | |
if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q) || | |
isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q)) | |
return true; | |
// X & (-X) is always a power of two or zero. | |
if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) | |
return true; | |
return false; | |
} | |
// Adding a power-of-two or zero to the same power-of-two or zero yields | |
// either the original power-of-two, a larger power-of-two or zero. | |
if (match(V, m_Add(m_Value(X), m_Value(Y)))) { | |
const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V); | |
if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) || | |
Q.IIQ.hasNoSignedWrap(VOBO)) { | |
if (match(X, m_And(m_Specific(Y), m_Value())) || | |
match(X, m_And(m_Value(), m_Specific(Y)))) | |
if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q)) | |
return true; | |
if (match(Y, m_And(m_Specific(X), m_Value())) || | |
match(Y, m_And(m_Value(), m_Specific(X)))) | |
if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q)) | |
return true; | |
unsigned BitWidth = V->getType()->getScalarSizeInBits(); | |
KnownBits LHSBits(BitWidth); | |
computeKnownBits(X, LHSBits, Depth, Q); | |
KnownBits RHSBits(BitWidth); | |
computeKnownBits(Y, RHSBits, Depth, Q); | |
// If i8 V is a power of two or zero: | |
// ZeroBits: 1 1 1 0 1 1 1 1 | |
// ~ZeroBits: 0 0 0 1 0 0 0 0 | |
if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2()) | |
// If OrZero isn't set, we cannot give back a zero result. | |
// Make sure either the LHS or RHS has a bit set. | |
if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue()) | |
return true; | |
} | |
} | |
// An exact divide or right shift can only shift off zero bits, so the result | |
// is a power of two only if the first operand is a power of two and not | |
// copying a sign bit (sdiv int_min, 2). | |
if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || | |
match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { | |
return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero, | |
Depth, Q); | |
} | |
return false; | |
} | |
/// Test whether a GEP's result is known to be non-null. | |
/// | |
/// Uses properties inherent in a GEP to try to determine whether it is known | |
/// to be non-null. | |
/// | |
/// Currently this routine does not support vector GEPs. | |
static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, | |
const Query &Q) { | |
const Function *F = nullptr; | |
if (const Instruction *I = dyn_cast<Instruction>(GEP)) | |
F = I->getFunction(); | |
if (!GEP->isInBounds() || | |
NullPointerIsDefined(F, GEP->getPointerAddressSpace())) | |
return false; | |
// FIXME: Support vector-GEPs. | |
assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP"); | |
// If the base pointer is non-null, we cannot walk to a null address with an | |
// inbounds GEP in address space zero. | |
if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q)) | |
return true; | |
// Walk the GEP operands and see if any operand introduces a non-zero offset. | |
// If so, then the GEP cannot produce a null pointer, as doing so would | |
// inherently violate the inbounds contract within address space zero. | |
for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); | |
GTI != GTE; ++GTI) { | |
// Struct types are easy -- they must always be indexed by a constant. | |
if (StructType *STy = GTI.getStructTypeOrNull()) { | |
ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand()); | |
unsigned ElementIdx = OpC->getZExtValue(); | |
const StructLayout *SL = Q.DL.getStructLayout(STy); | |
uint64_t ElementOffset = SL->getElementOffset(ElementIdx); | |
if (ElementOffset > 0) | |
return true; | |
continue; | |
} | |
// If we have a zero-sized type, the index doesn't matter. Keep looping. | |
if (Q.DL.getTypeAllocSize(GTI.getIndexedType()).getKnownMinSize() == 0) | |
continue; | |
// Fast path the constant operand case both for efficiency and so we don't | |
// increment Depth when just zipping down an all-constant GEP. | |
if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) { | |
if (!OpC->isZero()) | |
return true; | |
continue; | |
} | |
// We post-increment Depth here because while isKnownNonZero increments it | |
// as well, when we pop back up that increment won't persist. We don't want | |
// to recurse 10k times just because we have 10k GEP operands. We don't | |
// bail completely out because we want to handle constant GEPs regardless | |
// of depth. | |
if (Depth++ >= MaxDepth) | |
continue; | |
if (isKnownNonZero(GTI.getOperand(), Depth, Q)) | |
return true; | |
} | |
return false; | |
} | |
static bool isKnownNonNullFromDominatingCondition(const Value *V, | |
const Instruction *CtxI, | |
const DominatorTree *DT) { | |
if (isa<Constant>(V)) | |
return false; | |
if (!CtxI || !DT) | |
return false; | |
unsigned NumUsesExplored = 0; | |
for (auto *U : V->users()) { | |
// Avoid massive lists | |
if (NumUsesExplored >= DomConditionsMaxUses) | |
break; | |
NumUsesExplored++; | |
// If the value is used as an argument to a call or invoke, then argument | |
// attributes may provide an answer about null-ness. | |
if (const auto *CB = dyn_cast<CallBase>(U)) | |
if (auto *CalledFunc = CB->getCalledFunction()) | |
for (const Argument &Arg : CalledFunc->args()) | |
if (CB->getArgOperand(Arg.getArgNo()) == V && | |
Arg.hasNonNullAttr() && DT->dominates(CB, CtxI)) | |
return true; | |
// If the value is used as a load/store, then the pointer must be non null. | |
if (V == getLoadStorePointerOperand(U)) { | |
const Instruction *I = cast<Instruction>(U); | |
if (!NullPointerIsDefined(I->getFunction(), | |
V->getType()->getPointerAddressSpace()) && | |
DT->dominates(I, CtxI)) | |
return true; | |
} | |
// Consider only compare instructions uniquely controlling a branch | |
CmpInst::Predicate Pred; | |
if (!match(const_cast<User *>(U), | |
m_c_ICmp(Pred, m_Specific(V), m_Zero())) || | |
(Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)) | |
continue; | |
SmallVector<const User *, 4> WorkList; | |
SmallPtrSet<const User *, 4> Visited; | |
for (auto *CmpU : U->users()) { | |
assert(WorkList.empty() && "Should be!"); | |
if (Visited.insert(CmpU).second) | |
WorkList.push_back(CmpU); | |
while (!WorkList.empty()) { | |
auto *Curr = WorkList.pop_back_val(); | |
// If a user is an AND, add all its users to the work list. We only | |
// propagate "pred != null" condition through AND because it is only | |
// correct to assume that all conditions of AND are met in true branch. | |
// TODO: Support similar logic of OR and EQ predicate? | |
if (Pred == ICmpInst::ICMP_NE) | |
if (auto *BO = dyn_cast<BinaryOperator>(Curr)) | |
if (BO->getOpcode() == Instruction::And) { | |
for (auto *BOU : BO->users()) | |
if (Visited.insert(BOU).second) | |
WorkList.push_back(BOU); | |
continue; | |
} | |
if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) { | |
assert(BI->isConditional() && "uses a comparison!"); | |
BasicBlock *NonNullSuccessor = | |
BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0); | |
BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); | |
if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) | |
return true; | |
} else if (Pred == ICmpInst::ICMP_NE && isGuard(Curr) && | |
DT->dominates(cast<Instruction>(Curr), CtxI)) { | |
return true; | |
} | |
} | |
} | |
} | |
return false; | |
} | |
/// Does the 'Range' metadata (which must be a valid MD_range operand list) | |
/// ensure that the value it's attached to is never Value? 'RangeType' is | |
/// is the type of the value described by the range. | |
static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) { | |
const unsigned NumRanges = Ranges->getNumOperands() / 2; | |
assert(NumRanges >= 1); | |
for (unsigned i = 0; i < NumRanges; ++i) { | |
ConstantInt *Lower = | |
mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0)); | |
ConstantInt *Upper = | |
mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1)); | |
ConstantRange Range(Lower->getValue(), Upper->getValue()); | |
if (Range.contains(Value)) | |
return false; | |
} | |
return true; | |
} | |
/// Return true if the given value is known to be non-zero when defined. For | |
/// vectors, return true if every demanded element is known to be non-zero when | |
/// defined. For pointers, if the context instruction and dominator tree are | |
/// specified, perform context-sensitive analysis and return true if the | |
/// pointer couldn't possibly be null at the specified instruction. | |
/// Supports values with integer or pointer type and vectors of integers. | |
bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, | |
const Query &Q) { | |
// FIXME: We currently have no way to represent the DemandedElts of a scalable | |
// vector | |
if (isa<ScalableVectorType>(V->getType())) | |
return false; | |
if (auto *C = dyn_cast<Constant>(V)) { | |
if (C->isNullValue()) | |
return false; | |
if (isa<ConstantInt>(C)) | |
// Must be non-zero due to null test above. | |
return true; | |
if (auto *CE = dyn_cast<ConstantExpr>(C)) { | |
// See the comment for IntToPtr/PtrToInt instructions below. | |
if (CE->getOpcode() == Instruction::IntToPtr || | |
CE->getOpcode() == Instruction::PtrToInt) | |
if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType()) <= | |
Q.DL.getTypeSizeInBits(CE->getType())) | |
return isKnownNonZero(CE->getOperand(0), Depth, Q); | |
} | |
// For constant vectors, check that all elements are undefined or known | |
// non-zero to determine that the whole vector is known non-zero. | |
if (auto *VecTy = dyn_cast<FixedVectorType>(C->getType())) { | |
for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) { | |
if (!DemandedElts[i]) | |
continue; | |
Constant *Elt = C->getAggregateElement(i); | |
if (!Elt || Elt->isNullValue()) | |
return false; | |
if (!isa<UndefValue>(Elt) && !isa<ConstantInt>(Elt)) | |
return false; | |
} | |
return true; | |
} | |
// A global variable in address space 0 is non null unless extern weak | |
// or an absolute symbol reference. Other address spaces may have null as a | |
// valid address for a global, so we can't assume anything. | |
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { | |
if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() && | |
GV->getType()->getAddressSpace() == 0) | |
return true; | |
} else | |
return false; | |
} | |
if (auto *I = dyn_cast<Instruction>(V)) { | |
if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) { | |
// If the possible ranges don't contain zero, then the value is | |
// definitely non-zero. | |
if (auto *Ty = dyn_cast<IntegerType>(V->getType())) { | |
const APInt ZeroValue(Ty->getBitWidth(), 0); | |
if (rangeMetadataExcludesValue(Ranges, ZeroValue)) | |
return true; | |
} | |
} | |
} | |
if (isKnownNonZeroFromAssume(V, Q)) | |
return true; | |
// Some of the tests below are recursive, so bail out if we hit the limit. | |
if (Depth++ >= MaxDepth) | |
return false; | |
// Check for pointer simplifications. | |
if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) { | |
// Alloca never returns null, malloc might. | |
if (isa<AllocaInst>(V) && Q.DL.getAllocaAddrSpace() == 0) | |
return true; | |
// A byval, inalloca may not be null in a non-default addres space. A | |
// nonnull argument is assumed never 0. | |
if (const Argument *A = dyn_cast<Argument>(V)) { | |
if (((A->hasPassPointeeByValueCopyAttr() && | |
!NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) || | |
A->hasNonNullAttr())) | |
return true; | |
} | |
// A Load tagged with nonnull metadata is never null. | |
if (const LoadInst *LI = dyn_cast<LoadInst>(V)) | |
if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull)) | |
return true; | |
if (const auto *Call = dyn_cast<CallBase>(V)) { | |
if (Call->isReturnNonNull()) | |
return true; | |
if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) | |
return isKnownNonZero(RP, Depth, Q); | |
} | |
} | |
if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) | |
return true; | |
// Check for recursive pointer simplifications. | |
if (V->getType()->isPointerTy()) { | |
// Look through bitcast operations, GEPs, and int2ptr instructions as they | |
// do not alter the value, or at least not the nullness property of the | |
// value, e.g., int2ptr is allowed to zero/sign extend the value. | |
// | |
// Note that we have to take special care to avoid looking through | |
// truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well | |
// as casts that can alter the value, e.g., AddrSpaceCasts. | |
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) | |
if (isGEPKnownNonNull(GEP, Depth, Q)) | |
return true; | |
if (auto *BCO = dyn_cast<BitCastOperator>(V)) | |
return isKnownNonZero(BCO->getOperand(0), Depth, Q); | |
if (auto *I2P = dyn_cast<IntToPtrInst>(V)) | |
if (Q.DL.getTypeSizeInBits(I2P->getSrcTy()) <= | |
Q.DL.getTypeSizeInBits(I2P->getDestTy())) | |
return isKnownNonZero(I2P->getOperand(0), Depth, Q); | |
} | |
// Similar to int2ptr above, we can look through ptr2int here if the cast | |
// is a no-op or an extend and not a truncate. | |
if (auto *P2I = dyn_cast<PtrToIntInst>(V)) | |
if (Q.DL.getTypeSizeInBits(P2I->getSrcTy()) <= | |
Q.DL.getTypeSizeInBits(P2I->getDestTy())) | |
return isKnownNonZero(P2I->getOperand(0), Depth, Q); | |
unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL); | |
// X | Y != 0 if X != 0 or Y != 0. | |
Value *X = nullptr, *Y = nullptr; | |
if (match(V, m_Or(m_Value(X), m_Value(Y)))) | |
return isKnownNonZero(X, DemandedElts, Depth, Q) || | |
isKnownNonZero(Y, DemandedElts, Depth, Q); | |
// ext X != 0 if X != 0. | |
if (isa<SExtInst>(V) || isa<ZExtInst>(V)) | |
return isKnownNonZero(cast<Instruction>(V)->getOperand(0), Depth, Q); | |
// shl X, Y != 0 if X is odd. Note that the value of the shift is undefined | |
// if the lowest bit is shifted off the end. | |
if (match(V, m_Shl(m_Value(X), m_Value(Y)))) { | |
// shl nuw can't remove any non-zero bits. | |
const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); | |
if (Q.IIQ.hasNoUnsignedWrap(BO)) | |
return isKnownNonZero(X, Depth, Q); | |
KnownBits Known(BitWidth); | |
computeKnownBits(X, DemandedElts, Known, Depth, Q); | |
if (Known.One[0]) | |
return true; | |
} | |
// shr X, Y != 0 if X is negative. Note that the value of the shift is not | |
// defined if the sign bit is shifted off the end. | |
else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) { | |
// shr exact can only shift out zero bits. | |
const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V); | |
if (BO->isExact()) | |
return isKnownNonZero(X, Depth, Q); | |
KnownBits Known = computeKnownBits(X, DemandedElts, Depth, Q); | |
if (Known.isNegative()) | |
return true; | |
// If the shifter operand is a constant, and all of the bits shifted | |
// out are known to be zero, and X is known non-zero then at least one | |
// non-zero bit must remain. | |
if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) { | |
auto ShiftVal = Shift->getLimitedValue(BitWidth - 1); | |
// Is there a known one in the portion not shifted out? | |
if (Known.countMaxLeadingZeros() < BitWidth - ShiftVal) | |
return true; | |
// Are all the bits to be shifted out known zero? | |
if (Known.countMinTrailingZeros() >= ShiftVal) | |
return isKnownNonZero(X, DemandedElts, Depth, Q); | |
} | |
} | |
// div exact can only produce a zero if the dividend is zero. | |
else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { | |
return isKnownNonZero(X, DemandedElts, Depth, Q); | |
} | |
// X + Y. | |
else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { | |
KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); | |
KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); | |
// If X and Y are both non-negative (as signed values) then their sum is not | |
// zero unless both X and Y are zero. | |
if (XKnown.isNonNegative() && YKnown.isNonNegative()) | |
if (isKnownNonZero(X, DemandedElts, Depth, Q) || | |
isKnownNonZero(Y, DemandedElts, Depth, Q)) | |
return true; | |
// If X and Y are both negative (as signed values) then their sum is not | |
// zero unless both X and Y equal INT_MIN. | |
if (XKnown.isNegative() && YKnown.isNegative()) { | |
APInt Mask = APInt::getSignedMaxValue(BitWidth); | |
// The sign bit of X is set. If some other bit is set then X is not equal | |
// to INT_MIN. | |
if (XKnown.One.intersects(Mask)) | |
return true; | |
// The sign bit of Y is set. If some other bit is set then Y is not equal | |
// to INT_MIN. | |
if (YKnown.One.intersects(Mask)) | |
return true; | |
} | |
// The sum of a non-negative number and a power of two is not zero. | |
if (XKnown.isNonNegative() && | |
isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q)) | |
return true; | |
if (YKnown.isNonNegative() && | |
isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q)) | |
return true; | |
} | |
// X * Y. | |
else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) { | |
const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V); | |
// If X and Y are non-zero then so is X * Y as long as the multiplication | |
// does not overflow. | |
if ((Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO)) && | |
isKnownNonZero(X, DemandedElts, Depth, Q) && | |
isKnownNonZero(Y, DemandedElts, Depth, Q)) | |
return true; | |
} | |
// (C ? X : Y) != 0 if X != 0 and Y != 0. | |
else if (const SelectInst *SI = dyn_cast<SelectInst>(V)) { | |
if (isKnownNonZero(SI->getTrueValue(), DemandedElts, Depth, Q) && | |
isKnownNonZero(SI->getFalseValue(), DemandedElts, Depth, Q)) | |
return true; | |
} | |
// PHI | |
else if (const PHINode *PN = dyn_cast<PHINode>(V)) { | |
// Try and detect a recurrence that monotonically increases from a | |
// starting value, as these are common as induction variables. | |
if (PN->getNumIncomingValues() == 2) { | |
Value *Start = PN->getIncomingValue(0); | |
Value *Induction = PN->getIncomingValue(1); | |
if (isa<ConstantInt>(Induction) && !isa<ConstantInt>(Start)) | |
std::swap(Start, Induction); | |
if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) { | |
if (!C->isZero() && !C->isNegative()) { | |
ConstantInt *X; | |
if (Q.IIQ.UseInstrInfo && | |
(match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) || | |
match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) && | |
!X->isNegative()) | |
return true; | |
} | |
} | |
} | |
// Check if all incoming values are non-zero constant. | |
bool AllNonZeroConstants = llvm::all_of(PN->operands(), [](Value *V) { | |
return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZero(); | |
}); | |
if (AllNonZeroConstants) | |
return true; | |
} | |
// ExtractElement | |
else if (const auto *EEI = dyn_cast<ExtractElementInst>(V)) { | |
const Value *Vec = EEI->getVectorOperand(); | |
const Value *Idx = EEI->getIndexOperand(); | |
auto *CIdx = dyn_cast<ConstantInt>(Idx); | |
unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements(); | |
APInt DemandedVecElts = APInt::getAllOnesValue(NumElts); | |
if (CIdx && CIdx->getValue().ult(NumElts)) | |
DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); | |
return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); | |
} | |
KnownBits Known(BitWidth); | |
computeKnownBits(V, DemandedElts, Known, Depth, Q); | |
return Known.One != 0; | |
} | |
bool isKnownNonZero(const Value* V, unsigned Depth, const Query& Q) { | |
// FIXME: We currently have no way to represent the DemandedElts of a scalable | |
// vector | |
if (isa<ScalableVectorType>(V->getType())) | |
return false; | |
auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); | |
APInt DemandedElts = | |
FVTy ? APInt::getAllOnesValue(FVTy->getNumElements()) : APInt(1, 1); | |
return isKnownNonZero(V, DemandedElts, Depth, Q); | |
} | |
/// Return true if V2 == V1 + X, where X is known non-zero. | |
static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) { | |
const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1); | |
if (!BO || BO->getOpcode() != Instruction::Add) | |
return false; | |
Value *Op = nullptr; | |
if (V2 == BO->getOperand(0)) | |
Op = BO->getOperand(1); | |
else if (V2 == BO->getOperand(1)) | |
Op = BO->getOperand(0); | |
else | |
return false; | |
return isKnownNonZero(Op, 0, Q); | |
} | |
/// Return true if it is known that V1 != V2. | |
static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { | |
if (V1 == V2) | |
return false; | |
if (V1->getType() != V2->getType()) | |
// We can't look through casts yet. | |
return false; | |
if (isAddOfNonZero(V1, V2, Q) || isAddOfNonZero(V2, V1, Q)) | |
return true; | |
if (V1->getType()->isIntOrIntVectorTy()) { | |
// Are any known bits in V1 contradictory to known bits in V2? If V1 | |
// has a known zero where V2 has a known one, they must not be equal. | |
KnownBits Known1 = computeKnownBits(V1, 0, Q); | |
KnownBits Known2 = computeKnownBits(V2, 0, Q); | |
if (Known1.Zero.intersects(Known2.One) || | |
Known2.Zero.intersects(Known1.One)) | |
return true; | |
} | |
return false; | |
} | |
/// Return true if 'V & Mask' is known to be zero. We use this predicate to | |
/// simplify operations downstream. Mask is known to be zero for bits that V | |
/// cannot have. | |
/// | |
/// This function is defined on values with integer type, values with pointer | |
/// type, and vectors of integers. In the case | |
/// where V is a vector, the mask, known zero, and known one values are the | |
/// same width as the vector element, and the bit is set only if it is true | |
/// for all of the elements in the vector. | |
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, | |
const Query &Q) { | |
KnownBits Known(Mask.getBitWidth()); | |
computeKnownBits(V, Known, Depth, Q); | |
return Mask.isSubsetOf(Known.Zero); | |
} | |
// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow). | |
// Returns the input and lower/upper bounds. | |
static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, | |
const APInt *&CLow, const APInt *&CHigh) { | |
assert(isa<Operator>(Select) && | |
cast<Operator>(Select)->getOpcode() == Instruction::Select && | |
"Input should be a Select!"); | |
const Value *LHS = nullptr, *RHS = nullptr; | |
SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor; | |
if (SPF != SPF_SMAX && SPF != SPF_SMIN) | |
return false; | |
if (!match(RHS, m_APInt(CLow))) | |
return false; | |
const Value *LHS2 = nullptr, *RHS2 = nullptr; | |
SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor; | |
if (getInverseMinMaxFlavor(SPF) != SPF2) | |
return false; | |
if (!match(RHS2, m_APInt(CHigh))) | |
return false; | |
if (SPF == SPF_SMIN) | |
std::swap(CLow, CHigh); | |
In = LHS2; | |
return CLow->sle(*CHigh); | |
} | |
/// For vector constants, loop over the elements and find the constant with the | |
/// minimum number of sign bits. Return 0 if the value is not a vector constant | |
/// or if any element was not analyzed; otherwise, return the count for the | |
/// element with the minimum number of sign bits. | |
static unsigned computeNumSignBitsVectorConstant(const Value *V, | |
const APInt &DemandedElts, | |
unsigned TyBits) { | |
const auto *CV = dyn_cast<Constant>(V); | |
if (!CV || !isa<FixedVectorType>(CV->getType())) | |
return 0; | |
unsigned MinSignBits = TyBits; | |
unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements(); | |
for (unsigned i = 0; i != NumElts; ++i) { | |
if (!DemandedElts[i]) | |
continue; | |
// If we find a non-ConstantInt, bail out. | |
auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i)); | |
if (!Elt) | |
return 0; | |
MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits()); | |
} | |
return MinSignBits; | |
} | |
static unsigned ComputeNumSignBitsImpl(const Value *V, | |
const APInt &DemandedElts, | |
unsigned Depth, const Query &Q); | |
static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, | |
unsigned Depth, const Query &Q) { | |
unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q); | |
assert(Result > 0 && "At least one sign bit needs to be present!"); | |
return Result; | |
} | |
/// Return the number of times the sign bit of the register is replicated into | |
/// the other bits. We know that at least 1 bit is always equal to the sign bit | |
/// (itself), but other cases can give us information. For example, immediately | |
/// after an "ashr X, 2", we know that the top 3 bits are all equal to each | |
/// other, so we return 3. For vectors, return the number of sign bits for the | |
/// vector element with the minimum number of known sign bits of the demanded | |
/// elements in the vector specified by DemandedElts. | |
static unsigned ComputeNumSignBitsImpl(const Value *V, | |
const APInt &DemandedElts, | |
unsigned Depth, const Query &Q) { | |
Type *Ty = V->getType(); | |
// FIXME: We currently have no way to represent the DemandedElts of a scalable | |
// vector | |
if (isa<ScalableVectorType>(Ty)) | |
return 1; | |
#ifndef NDEBUG | |
assert(Depth <= MaxDepth && "Limit Search Depth"); | |
if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { | |
assert( | |
FVTy->getNumElements() == DemandedElts.getBitWidth() && | |
"DemandedElt width should equal the fixed vector number of elements"); | |
} else { | |
assert(DemandedElts == APInt(1, 1) && | |
"DemandedElt width should be 1 for scalars"); | |
} | |
#endif | |
// We return the minimum number of sign bits that are guaranteed to be present | |
// in V, so for undef we have to conservatively return 1. We don't have the | |
// same behavior for poison though -- that's a FIXME today. | |
Type *ScalarTy = Ty->getScalarType(); | |
unsigned TyBits = ScalarTy->isPointerTy() ? | |
Q.DL.getPointerTypeSizeInBits(ScalarTy) : | |
Q.DL.getTypeSizeInBits(ScalarTy); | |
unsigned Tmp, Tmp2; | |
unsigned FirstAnswer = 1; | |
// Note that ConstantInt is handled by the general computeKnownBits case | |
// below. | |
if (Depth == MaxDepth) | |
return 1; // Limit search depth. | |
if (auto *U = dyn_cast<Operator>(V)) { | |
switch (Operator::getOpcode(V)) { | |
default: break; | |
case Instruction::SExt: | |
Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); | |
return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp; | |
case Instruction::SDiv: { | |
const APInt *Denominator; | |
// sdiv X, C -> adds log(C) sign bits. | |
if (match(U->getOperand(1), m_APInt(Denominator))) { | |
// Ignore non-positive denominator. | |
if (!Denominator->isStrictlyPositive()) | |
break; | |
// Calculate the incoming numerator bits. | |
unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); | |
// Add floor(log(C)) bits to the numerator bits. | |
return std::min(TyBits, NumBits + Denominator->logBase2()); | |
} | |
break; | |
} | |
case Instruction::SRem: { | |
const APInt *Denominator; | |
// srem X, C -> we know that the result is within [-C+1,C) when C is a | |
// positive constant. This let us put a lower bound on the number of sign | |
// bits. | |
if (match(U->getOperand(1), m_APInt(Denominator))) { | |
// Ignore non-positive denominator. | |
if (!Denominator->isStrictlyPositive()) | |
break; | |
// Calculate the incoming numerator bits. SRem by a positive constant | |
// can't lower the number of sign bits. | |
unsigned NumrBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); | |
// Calculate the leading sign bit constraints by examining the | |
// denominator. Given that the denominator is positive, there are two | |
// cases: | |
// | |
// 1. the numerator is positive. The result range is [0,C) and [0,C) u< | |
// (1 << ceilLogBase2(C)). | |
// | |
// 2. the numerator is negative. Then the result range is (-C,0] and | |
// integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). | |
// | |
// Thus a lower bound on the number of sign bits is `TyBits - | |
// ceilLogBase2(C)`. | |
unsigned ResBits = TyBits - Denominator->ceilLogBase2(); | |
return std::max(NumrBits, ResBits); | |
} | |
break; | |
} | |
case Instruction::AShr: { | |
Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); | |
// ashr X, C -> adds C sign bits. Vectors too. | |
const APInt *ShAmt; | |
if (match(U->getOperand(1), m_APInt(ShAmt))) { | |
if (ShAmt->uge(TyBits)) | |
break; // Bad shift. | |
unsigned ShAmtLimited = ShAmt->getZExtValue(); | |
Tmp += ShAmtLimited; | |
if (Tmp > TyBits) Tmp = TyBits; | |
} | |
return Tmp; | |
} | |
case Instruction::Shl: { | |
const APInt *ShAmt; | |
if (match(U->getOperand(1), m_APInt(ShAmt))) { | |
// shl destroys sign bits. | |
Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); | |
if (ShAmt->uge(TyBits) || // Bad shift. | |
ShAmt->uge(Tmp)) break; // Shifted all sign bits out. | |
Tmp2 = ShAmt->getZExtValue(); | |
return Tmp - Tmp2; | |
} | |
break; | |
} | |
case Instruction::And: | |
case Instruction::Or: | |
case Instruction::Xor: // NOT is handled here. | |
// Logical binary ops preserve the number of sign bits at the worst. | |
Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); | |
if (Tmp != 1) { | |
Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); | |
FirstAnswer = std::min(Tmp, Tmp2); | |
// We computed what we know about the sign bits as our first | |
// answer. Now proceed to the generic code that uses | |
// computeKnownBits, and pick whichever answer is better. | |
} | |
break; | |
case Instruction::Select: { | |
// If we have a clamp pattern, we know that the number of sign bits will | |
// be the minimum of the clamp min/max range. | |
const Value *X; | |
const APInt *CLow, *CHigh; | |
if (isSignedMinMaxClamp(U, X, CLow, CHigh)) | |
return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); | |
Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); | |
if (Tmp == 1) break; | |
Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); | |
return std::min(Tmp, Tmp2); | |
} | |
case Instruction::Add: | |
// Add can have at most one carry bit. Thus we know that the output | |
// is, at worst, one more bit than the inputs. | |
Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); | |
if (Tmp == 1) break; | |
// Special case decrementing a value (ADD X, -1): | |
if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) | |
if (CRHS->isAllOnesValue()) { | |
KnownBits Known(TyBits); | |
computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); | |
// If the input is known to be 0 or 1, the output is 0/-1, which is | |
// all sign bits set. | |
if ((Known.Zero | 1).isAllOnesValue()) | |
return TyBits; | |
// If we are subtracting one from a positive number, there is no carry | |
// out of the result. | |
if (Known.isNonNegative()) | |
return Tmp; | |
} | |
Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); | |
if (Tmp2 == 1) break; | |
return std::min(Tmp, Tmp2) - 1; | |
case Instruction::Sub: | |
Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); | |
if (Tmp2 == 1) break; | |
// Handle NEG. | |
if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) | |
if (CLHS->isNullValue()) { | |
KnownBits Known(TyBits); | |
computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); | |
// If the input is known to be 0 or 1, the output is 0/-1, which is | |
// all sign bits set. | |
if ((Known.Zero | 1).isAllOnesValue()) | |
return TyBits; | |
// If the input is known to be positive (the sign bit is known clear), | |
// the output of the NEG has the same number of sign bits as the | |
// input. | |
if (Known.isNonNegative()) | |
return Tmp2; | |