Skip to content

Commit

Permalink
[Attributor] AAUndefinedBehavior: Check for branches on undef value.
Browse files Browse the repository at this point in the history
A branch is considered UB if it depends on an undefined / uninitialized value.
At this point this handles simple UB branches in the form: `br i1 undef, ...`
We query `AAValueSimplify` to get a value for the branch condition, so the branch
can be more complicated than just: `br i1 undef, ...`.

Patch By: Stefanos Baziotis (@baziotis)

Reviewers: jdoerfert, sstefan1, uenoku

Reviewed By: uenoku

Differential Revision: https://reviews.llvm.org/D71799
  • Loading branch information
uenoku committed Dec 29, 2019
1 parent a5c96e3 commit ef4febd
Show file tree
Hide file tree
Showing 6 changed files with 333 additions and 65 deletions.
35 changes: 35 additions & 0 deletions llvm/include/llvm/Transforms/IPO/Attributor.h
Expand Up @@ -823,6 +823,38 @@ struct Attributor {
return true;
}

/// Get pointer operand of memory accessing instruction. If \p I is
/// not a memory accessing instruction, return nullptr. If \p AllowVolatile,
/// is set to false and the instruction is volatile, return nullptr.
static const Value *getPointerOperand(const Instruction *I,
bool AllowVolatile) {
if (auto *LI = dyn_cast<LoadInst>(I)) {
if (!AllowVolatile && LI->isVolatile())
return nullptr;
return LI->getPointerOperand();
}

if (auto *SI = dyn_cast<StoreInst>(I)) {
if (!AllowVolatile && SI->isVolatile())
return nullptr;
return SI->getPointerOperand();
}

if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(I)) {
if (!AllowVolatile && CXI->isVolatile())
return nullptr;
return CXI->getPointerOperand();
}

if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) {
if (!AllowVolatile && RMWI->isVolatile())
return nullptr;
return RMWI->getPointerOperand();
}

return nullptr;
}

/// Record that \p I is to be replaced with `unreachable` after information
/// was manifested.
void changeToUnreachableAfterManifest(Instruction *I) {
Expand Down Expand Up @@ -1710,6 +1742,9 @@ struct AAUndefinedBehavior
/// Return true if "undefined behavior" is known.
bool isKnownToCauseUB() const { return getKnown(); }

/// Return true if "undefined behavior" is known for a specific instruction.
virtual bool isKnownToCauseUB(Instruction *I) const = 0;

/// Return an IR position, see struct IRPosition.
const IRPosition &getIRPosition() const override { return *this; }

Expand Down
192 changes: 139 additions & 53 deletions llvm/lib/Transforms/IPO/Attributor.cpp
Expand Up @@ -332,30 +332,13 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,

llvm_unreachable("Expected enum or string attribute!");
}
static const Value *getPointerOperand(const Instruction *I) {
if (auto *LI = dyn_cast<LoadInst>(I))
if (!LI->isVolatile())
return LI->getPointerOperand();

if (auto *SI = dyn_cast<StoreInst>(I))
if (!SI->isVolatile())
return SI->getPointerOperand();

if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(I))
if (!CXI->isVolatile())
return CXI->getPointerOperand();

if (auto *RMWI = dyn_cast<AtomicRMWInst>(I))
if (!RMWI->isVolatile())
return RMWI->getPointerOperand();

return nullptr;
}
static const Value *
getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset,
const DataLayout &DL,
bool AllowNonInbounds = false) {
const Value *Ptr = getPointerOperand(I);
const Value *Ptr =
Attributor::getPointerOperand(I, /* AllowVolatile */ false);
if (!Ptr)
return nullptr;

Expand Down Expand Up @@ -1734,7 +1717,8 @@ static int64_t getKnownNonNullAndDerefBytesForUse(

int64_t Offset;
if (const Value *Base = getBasePointerOfAccessPointerOperand(I, Offset, DL)) {
if (Base == &AssociatedValue && getPointerOperand(I) == UseV) {
if (Base == &AssociatedValue &&
Attributor::getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
int64_t DerefBytes =
(int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()) + Offset;

Expand All @@ -1747,7 +1731,7 @@ static int64_t getKnownNonNullAndDerefBytesForUse(
if (const Value *Base = getBasePointerOfAccessPointerOperand(
I, Offset, DL, /*AllowNonInbounds*/ true)) {
if (Offset == 0 && Base == &AssociatedValue &&
getPointerOperand(I) == UseV) {
Attributor::getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
int64_t DerefBytes =
(int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType());
IsNonNull |= !NullPointerIsDefined;
Expand Down Expand Up @@ -1993,28 +1977,29 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
AAUndefinedBehaviorImpl(const IRPosition &IRP) : AAUndefinedBehavior(IRP) {}

/// See AbstractAttribute::updateImpl(...).
// TODO: We should not only check instructions that access memory
// through a pointer (i.e. also branches etc.)
ChangeStatus updateImpl(Attributor &A) override {
const size_t PrevSize = NoUBMemAccessInsts.size();
const size_t UBPrevSize = KnownUBInsts.size();
const size_t NoUBPrevSize = AssumedNoUBInsts.size();

auto InspectMemAccessInstForUB = [&](Instruction &I) {
// Skip instructions that are already saved.
if (NoUBMemAccessInsts.count(&I) || UBMemAccessInsts.count(&I))
if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
return true;

// `InspectMemAccessInstForUB` is only called on instructions
// for which getPointerOperand() should give us their
// pointer operand unless they're volatile.
const Value *PtrOp = getPointerOperand(&I);
if (!PtrOp)
return true;
// If we reach here, we know we have an instruction
// that accesses memory through a pointer operand,
// for which getPointerOperand() should give it to us.
const Value *PtrOp =
Attributor::getPointerOperand(&I, /* AllowVolatile */ true);
assert(PtrOp &&
"Expected pointer operand of memory accessing instruction");

// A memory access through a pointer is considered UB
// only if the pointer has constant null value.
// TODO: Expand it to not only check constant values.
if (!isa<ConstantPointerNull>(PtrOp)) {
NoUBMemAccessInsts.insert(&I);
AssumedNoUBInsts.insert(&I);
return true;
}
const Type *PtrTy = PtrOp->getType();
Expand All @@ -2025,30 +2010,82 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {

// A memory access using constant null pointer is only considered UB
// if null pointer is _not_ defined for the target platform.
if (!llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()))
UBMemAccessInsts.insert(&I);
if (llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()))
AssumedNoUBInsts.insert(&I);
else
NoUBMemAccessInsts.insert(&I);
KnownUBInsts.insert(&I);
return true;
};

auto InspectBrInstForUB = [&](Instruction &I) {
// A conditional branch instruction is considered UB if it has `undef`
// condition.

// Skip instructions that are already saved.
if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
return true;

// We know we have a branch instruction.
auto BrInst = cast<BranchInst>(&I);

// Unconditional branches are never considered UB.
if (BrInst->isUnconditional())
return true;

// Either we stopped and the appropriate action was taken,
// or we got back a simplified value to continue.
Optional<Value *> SimplifiedCond =
stopOnUndefOrAssumed(A, BrInst->getCondition(), BrInst);
if (!SimplifiedCond.hasValue())
return true;
AssumedNoUBInsts.insert(&I);
return true;
};

A.checkForAllInstructions(InspectMemAccessInstForUB, *this,
{Instruction::Load, Instruction::Store,
Instruction::AtomicCmpXchg,
Instruction::AtomicRMW});
if (PrevSize != NoUBMemAccessInsts.size())
A.checkForAllInstructions(InspectBrInstForUB, *this, {Instruction::Br});
if (NoUBPrevSize != AssumedNoUBInsts.size() ||
UBPrevSize != KnownUBInsts.size())
return ChangeStatus::CHANGED;
return ChangeStatus::UNCHANGED;
}

bool isKnownToCauseUB(Instruction *I) const override {
return KnownUBInsts.count(I);
}

bool isAssumedToCauseUB(Instruction *I) const override {
return UBMemAccessInsts.count(I);
// In simple words, if an instruction is not in the assumed to _not_
// cause UB, then it is assumed UB (that includes those
// in the KnownUBInsts set). The rest is boilerplate
// is to ensure that it is one of the instructions we test
// for UB.

switch (I->getOpcode()) {
case Instruction::Load:
case Instruction::Store:
case Instruction::AtomicCmpXchg:
case Instruction::AtomicRMW:
return !AssumedNoUBInsts.count(I);
case Instruction::Br: {
auto BrInst = cast<BranchInst>(I);
if (BrInst->isUnconditional())
return false;
return !AssumedNoUBInsts.count(I);
} break;
default:
return false;
}
return false;
}

ChangeStatus manifest(Attributor &A) override {
if (!UBMemAccessInsts.size())
if (KnownUBInsts.empty())
return ChangeStatus::UNCHANGED;
for (Instruction *I : UBMemAccessInsts)
for (Instruction *I : KnownUBInsts)
A.changeToUnreachableAfterManifest(I);
return ChangeStatus::CHANGED;
}
Expand All @@ -2058,22 +2095,69 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
return getAssumed() ? "undefined-behavior" : "no-ub";
}

/// Note: The correctness of this analysis depends on the fact that the
/// following 2 sets will stop changing after some point.
/// "Change" here means that their size changes.
/// The size of each set is monotonically increasing
/// (we only add items to them) and it is upper bounded by the number of
/// instructions in the processed function (we can never save more
/// elements in either set than this number). Hence, at some point,
/// they will stop increasing.
/// Consequently, at some point, both sets will have stopped
/// changing, effectively making the analysis reach a fixpoint.

/// Note: These 2 sets are disjoint and an instruction can be considered
/// one of 3 things:
/// 1) Known to cause UB (AAUndefinedBehavior could prove it) and put it in
/// the KnownUBInsts set.
/// 2) Assumed to cause UB (in every updateImpl, AAUndefinedBehavior
/// has a reason to assume it).
/// 3) Assumed to not cause UB. very other instruction - AAUndefinedBehavior
/// could not find a reason to assume or prove that it can cause UB,
/// hence it assumes it doesn't. We have a set for these instructions
/// so that we don't reprocess them in every update.
/// Note however that instructions in this set may cause UB.

protected:
// A set of all the (live) memory accessing instructions that _are_ assumed to
// cause UB.
SmallPtrSet<Instruction *, 8> UBMemAccessInsts;
/// A set of all live instructions _known_ to cause UB.
SmallPtrSet<Instruction *, 8> KnownUBInsts;

private:
// A set of all the (live) memory accessing instructions
// that are _not_ assumed to cause UB.
// Note: The correctness of the procedure depends on the fact that this
// set stops changing after some point. "Change" here means that the size
// of the set changes. The size of this set is monotonically increasing
// (we only add items to it) and is upper bounded by the number of memory
// accessing instructions in the processed function (we can never save more
// elements in this set than this number). Hence, the size of this set, at
// some point, will stop increasing, effectively reaching a fixpoint.
SmallPtrSet<Instruction *, 8> NoUBMemAccessInsts;
/// A set of all the (live) instructions that are assumed to _not_ cause UB.
SmallPtrSet<Instruction *, 8> AssumedNoUBInsts;

// Should be called on updates in which if we're processing an instruction
// \p I that depends on a value \p V, one of the following has to happen:
// - If the value is assumed, then stop.
// - If the value is known but undef, then consider it UB.
// - Otherwise, do specific processing with the simplified value.
// We return None in the first 2 cases to signify that an appropriate
// action was taken and the caller should stop.
// Otherwise, we return the simplified value that the caller should
// use for specific processing.
Optional<Value *> stopOnUndefOrAssumed(Attributor &A, const Value *V,
Instruction *I) {
const auto &ValueSimplifyAA =
A.getAAFor<AAValueSimplify>(*this, IRPosition::value(*V));
Optional<Value *> SimplifiedV =
ValueSimplifyAA.getAssumedSimplifiedValue(A);
if (!ValueSimplifyAA.isKnown()) {
// Don't depend on assumed values.
return llvm::None;
}
if (!SimplifiedV.hasValue()) {
// If it is known (which we tested above) but it doesn't have a value,
// then we can assume `undef` and hence the instruction is UB.
KnownUBInsts.insert(I);
return llvm::None;
}
Value *Val = SimplifiedV.getValue();
if (isa<UndefValue>(Val)) {
KnownUBInsts.insert(I);
return llvm::None;
}
return Val;
}
};

struct AAUndefinedBehaviorFunction final : AAUndefinedBehaviorImpl {
Expand All @@ -2085,7 +2169,7 @@ struct AAUndefinedBehaviorFunction final : AAUndefinedBehaviorImpl {
STATS_DECL(UndefinedBehaviorInstruction, Instruction,
"Number of instructions known to have UB");
BUILD_STAT_NAME(UndefinedBehaviorInstruction, Instruction) +=
UBMemAccessInsts.size();
KnownUBInsts.size();
}
};

Expand Down Expand Up @@ -3101,7 +3185,8 @@ struct AADereferenceableImpl : AADereferenceable {
int64_t Offset;
if (const Value *Base = getBasePointerOfAccessPointerOperand(
I, Offset, DL, /*AllowNonInbounds*/ true)) {
if (Base == &getAssociatedValue() && getPointerOperand(I) == UseV) {
if (Base == &getAssociatedValue() &&
Attributor::getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
uint64_t Size = DL.getTypeStoreSize(PtrTy->getPointerElementType());
addAccessedBytes(Offset, Size);
}
Expand Down Expand Up @@ -5592,6 +5677,7 @@ void Attributor::initializeInformationCache(Function &F) {
case Instruction::CatchSwitch:
case Instruction::AtomicRMW:
case Instruction::AtomicCmpXchg:
case Instruction::Br:
case Instruction::Resume:
case Instruction::Ret:
IsInterestingOpcode = true;
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll
Expand Up @@ -9,13 +9,12 @@ define void @fn2(i32* %P) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[IF_END:%.*]]
; CHECK: for.cond1:
; CHECK-NEXT: br i1 undef, label [[IF_END]], label [[IF_END]]
; CHECK-NEXT: unreachable
; CHECK: if.end:
; CHECK-NEXT: [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ], [ null, [[FOR_COND1]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* undef, align 4
; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn1(i32 [[TMP0]])
; CHECK-NEXT: store i32 [[CALL]], i32* [[P]]
; CHECK-NEXT: br label [[FOR_COND1]]
; CHECK-NEXT: br label %for.cond1
;
entry:
br label %if.end
Expand Down Expand Up @@ -51,13 +50,12 @@ define void @fn_no_null_opt(i32* %P) #0 {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[IF_END:%.*]]
; CHECK: for.cond1:
; CHECK-NEXT: br i1 undef, label [[IF_END]], label [[IF_END]]
; CHECK-NEXT: unreachable
; CHECK: if.end:
; CHECK-NEXT: [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ], [ null, [[FOR_COND1]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* undef, align 4
; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn0(i32 [[TMP0]])
; CHECK-NEXT: store i32 [[CALL]], i32* [[P]]
; CHECK-NEXT: br label [[FOR_COND1]]
; CHECK-NEXT: br label %for.cond1
;
entry:
br label %if.end
Expand Down
Expand Up @@ -7,7 +7,7 @@ define void @test(i32 signext %n) {
; CHECK-LABEL: define {{[^@]+}}@test
; CHECK-SAME: (i32 signext [[N:%.*]])
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK-NEXT: unreachable
; CHECK: if.then:
; CHECK-NEXT: unreachable
; CHECK: if.end:
Expand Down
Expand Up @@ -7,7 +7,7 @@ define internal i32 @testf(i1 %c) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C]], label [[IF_COND:%.*]], label [[IF_END:%.*]]
; CHECK: if.cond:
; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_END]]
; CHECK-NEXT: unreachable
; CHECK: if.then:
; CHECK-NEXT: unreachable
; CHECK: if.end:
Expand Down

0 comments on commit ef4febd

Please sign in to comment.