Skip to content

Commit

Permalink
[Verifier] Add verification for TBAA metadata
Browse files Browse the repository at this point in the history
Summary:
This change adds some verification in the IR verifier around struct path
TBAA metadata.

Other than some basic sanity checks (e.g. we get constant integers where
we expect constant integers), this checks:

 - That by the time an struct access tuple `(base-type, offset)` is
   "reduced" to a scalar base type, the offset is `0`.  For instance, in
   C++ you can't start from, say `("struct-a", 16)`, and end up with
   `("int", 4)` -- by the time the base type is `"int"`, the offset
   better be zero.  In particular, a variant of this invariant is needed
   for `llvm::getMostGenericTBAA` to be correct.

 - That there are no cycles in a struct path.

 - That struct type nodes have their offsets listed in an ascending
   order.

 - That when generating the struct access path, you eventually reach the
   access type listed in the tbaa tag node.

Reviewers: dexonsmith, chandlerc, reames, mehdi_amini, manmanren

Subscribers: mcrosier, llvm-commits

Differential Revision: https://reviews.llvm.org/D26438

llvm-svn: 289402
  • Loading branch information
sanjoy committed Dec 11, 2016
1 parent 81ed349 commit 3336f68
Show file tree
Hide file tree
Showing 36 changed files with 454 additions and 63 deletions.
267 changes: 265 additions & 2 deletions llvm/lib/IR/Verifier.cpp
Expand Up @@ -187,6 +187,14 @@ struct VerifierSupport {
*OS << *C;
}

void Write(const APInt *AI) {
if (!AI)
return;
*OS << *AI << '\n';
}

void Write(const unsigned i) { *OS << i << '\n'; }

template <typename T> void Write(ArrayRef<T> Vs) {
for (const T &V : Vs)
Write(V);
Expand Down Expand Up @@ -285,6 +293,17 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
// constant expressions, we can arrive at a particular user many times.
SmallPtrSet<const Value *, 32> GlobalValueVisited;

/// Cache of TBAA base nodes that have already been visited. This cachce maps
/// a node that has been visited to a pair (IsInvalid, BitWidth) where
///
/// \c IsInvalid is true iff the node is invalid.
/// \c BitWidth, if non-zero, is the bitwidth of the integer used to denoting
/// the offset of the access. If zero, only a zero offset is allowed.
///
/// \c BitWidth has no meaning if \c IsInvalid is true.
typedef std::pair<bool, unsigned> TBAABaseNodeSummary;
DenseMap<MDNode *, TBAABaseNodeSummary> TBAABaseNodes;

void checkAtomicMemAccessSize(Type *Ty, const Instruction *I);

public:
Expand Down Expand Up @@ -393,6 +412,14 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
void visitDereferenceableMetadata(Instruction &I, MDNode *MD);
void visitTBAAMetadata(Instruction &I, MDNode *MD);

/// \name Helper functions used by \c visitTBAAMetadata.
/// @{
MDNode *getFieldNodeFromTBAABaseNode(Instruction &I, MDNode *BaseNode,
APInt &Offset);
TBAABaseNodeSummary verifyTBAABaseNode(Instruction &I, MDNode *BaseNode);
TBAABaseNodeSummary verifyTBAABaseNodeImpl(Instruction &I, MDNode *BaseNode);
/// @}

template <class Ty> bool isValidMetadataArray(const MDTuple &N);
#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
#include "llvm/IR/Metadata.def"
Expand Down Expand Up @@ -3657,13 +3684,245 @@ void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) {
"dereferenceable_or_null metadata value must be an i64!", &I);
}

/// Verify that \p BaseNode can be used as the "base type" in the struct-path
/// TBAA scheme. This means \p BaseNode is either a scalar node, or a
/// struct-type node describing an aggregate data structure (like a struct).
Verifier::TBAABaseNodeSummary Verifier::verifyTBAABaseNode(Instruction &I,
MDNode *BaseNode) {
if (BaseNode->getNumOperands() < 2) {
CheckFailed("Base nodes must have at least two operands", &I, BaseNode);
return {true, ~0u};
}

auto Itr = TBAABaseNodes.find(BaseNode);
if (Itr != TBAABaseNodes.end())
return Itr->second;

auto Result = verifyTBAABaseNodeImpl(I, BaseNode);
auto InsertResult = TBAABaseNodes.insert({BaseNode, Result});
(void)InsertResult;
assert(InsertResult.second && "We just checked!");
return Result;
}

Verifier::TBAABaseNodeSummary
Verifier::verifyTBAABaseNodeImpl(Instruction &I, MDNode *BaseNode) {
const Verifier::TBAABaseNodeSummary InvalidNode = {true, ~0u};

if (BaseNode->getNumOperands() == 2) {
// This is a scalar base node.
if (!BaseNode->getOperand(0) || !BaseNode->getOperand(1)) {
CheckFailed("Null operands in scalar type nodes!", &I, BaseNode);
return InvalidNode;
}
if (!isa<MDNode>(BaseNode->getOperand(1))) {
CheckFailed("Invalid parent operand in scalar TBAA node", &I, BaseNode);
return InvalidNode;
}
if (!isa<MDString>(BaseNode->getOperand(0))) {
CheckFailed("Invalid name operand in scalar TBAA node", &I, BaseNode);
return InvalidNode;
}

// Scalar nodes can only be accessed at offset 0.
return {false, 0};
}

if (BaseNode->getNumOperands() % 2 != 1) {
CheckFailed("Struct tag nodes must have an odd number of operands!",
BaseNode);
return InvalidNode;
}

bool Failed = false;

Optional<APInt> PrevOffset;
unsigned BitWidth = ~0u;

// We've already checked that BaseNode is not a degenerate root node with one
// operand in \c verifyTBAABaseNode, so this loop should run at least once.
for (unsigned Idx = 1; Idx < BaseNode->getNumOperands(); Idx += 2) {
const MDOperand &FieldTy = BaseNode->getOperand(Idx);
const MDOperand &FieldOffset = BaseNode->getOperand(Idx + 1);
if (!isa<MDNode>(FieldTy)) {
CheckFailed("Incorrect field entry in struct type node!", &I, BaseNode);
Failed = true;
continue;
}

auto *OffsetEntryCI =
mdconst::dyn_extract_or_null<ConstantInt>(FieldOffset);
if (!OffsetEntryCI) {
CheckFailed("Offset entries must be constants!", &I, BaseNode);
Failed = true;
continue;
}

if (BitWidth == ~0u)
BitWidth = OffsetEntryCI->getBitWidth();

if (OffsetEntryCI->getBitWidth() != BitWidth) {
CheckFailed(
"Bitwidth between the offsets and struct type entries must match", &I,
BaseNode);
Failed = true;
continue;
}

// NB! As far as I can tell, we generate a non-strictly increasing offset
// sequence only from structs that have zero size bit fields. When
// recursing into a contained struct in \c getFieldNodeFromTBAABaseNode we
// pick the field lexically the latest in struct type metadata node. This
// mirrors the actual behavior of the alias analysis implementation.
bool IsAscending =
!PrevOffset || PrevOffset->ule(OffsetEntryCI->getValue());

if (!IsAscending) {
CheckFailed("Offsets must be increasing!", &I, BaseNode);
Failed = true;
}

PrevOffset = OffsetEntryCI->getValue();
}

return Failed ? InvalidNode : Verifier::TBAABaseNodeSummary(false, BitWidth);
}

static bool IsRootTBAANode(const MDNode *MD) {
return MD->getNumOperands() < 2;
}

static bool IsScalarTBAANodeImpl(const MDNode *MD,
SmallPtrSetImpl<const MDNode *> &Visited) {
if (MD->getNumOperands() == 2)
return true;

if (MD->getNumOperands() != 3)
return false;

auto *Offset = mdconst::dyn_extract<ConstantInt>(MD->getOperand(2));
if (!(Offset && Offset->isZero() && isa<MDString>(MD->getOperand(0))))
return false;

auto *Parent = dyn_cast<MDNode>(MD->getOperand(1));
return Visited.insert(Parent).second &&
(IsRootTBAANode(Parent) || IsScalarTBAANodeImpl(Parent, Visited));
}

static bool IsScalarTBAANode(const MDNode *MD) {
SmallPtrSet<const MDNode *, 4> Visited;
return IsScalarTBAANodeImpl(MD, Visited);
}

/// Returns the field node at the offset \p Offset in \p BaseNode. Update \p
/// Offset in place to be the offset within the field node returned.
///
/// We assume we've okayed \p BaseNode via \c verifyTBAABaseNode.
MDNode *Verifier::getFieldNodeFromTBAABaseNode(Instruction &I, MDNode *BaseNode,
APInt &Offset) {
assert(BaseNode->getNumOperands() >= 2 && "Invalid base node!");

// Scalar nodes have only one possible "field" -- their parent in the access
// hierarchy. Offset must be zero at this point, but our caller is supposed
// to Assert that.
if (BaseNode->getNumOperands() == 2)
return cast<MDNode>(BaseNode->getOperand(1));

for (unsigned Idx = 1; Idx < BaseNode->getNumOperands(); Idx += 2) {
auto *OffsetEntryCI =
mdconst::extract<ConstantInt>(BaseNode->getOperand(Idx + 1));
if (OffsetEntryCI->getValue().ugt(Offset)) {
if (Idx == 1) {
CheckFailed("Could not find TBAA parent in struct type node", &I,
BaseNode, &Offset);
return nullptr;
}

auto *PrevOffsetEntryCI =
mdconst::extract<ConstantInt>(BaseNode->getOperand(Idx - 1));
Offset -= PrevOffsetEntryCI->getValue();
return cast<MDNode>(BaseNode->getOperand(Idx - 2));
}
}

auto *LastOffsetEntryCI = mdconst::extract<ConstantInt>(
BaseNode->getOperand(BaseNode->getNumOperands() - 1));

Offset -= LastOffsetEntryCI->getValue();
return cast<MDNode>(BaseNode->getOperand(BaseNode->getNumOperands() - 2));
}

void Verifier::visitTBAAMetadata(Instruction &I, MDNode *MD) {
bool IsStructPathTBAA =
isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;

Assert(IsStructPathTBAA,
"Old-style TBAA is no longer allowed, use struct-path TBAA instead",
&I);

Assert(MD->getNumOperands() < 5,
"Struct tag metadata must have either 3 or 4 operands", &I, MD);

MDNode *BaseNode = dyn_cast_or_null<MDNode>(MD->getOperand(0));
MDNode *AccessType = dyn_cast_or_null<MDNode>(MD->getOperand(1));

if (MD->getNumOperands() == 4) {
auto *IsImmutableCI =
mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(3));
Assert(IsImmutableCI,
"Immutability tag on struct tag metadata must be a constant", &I,
MD);
Assert(IsImmutableCI->isZero() || IsImmutableCI->isOne(),
"Immutability part of the struct tag metadata must be either 0 or 1",
&I, MD);
}

Assert(BaseNode && AccessType,
"Malformed struct tag metadata: base and access-type "
"should be non-null and point to Metadata nodes",
&I, MD, BaseNode, AccessType);

Assert(IsScalarTBAANode(AccessType), "Access type node must be scalar", &I,
MD, AccessType);

auto *OffsetCI = mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(2));
Assert(OffsetCI, "Offset must be constant integer", &I, MD);

APInt Offset = OffsetCI->getValue();
bool SeenAccessTypeInPath = false;

SmallPtrSet<MDNode *, 4> StructPath;

for (/* empty */; BaseNode && !IsRootTBAANode(BaseNode);
BaseNode = getFieldNodeFromTBAABaseNode(I, BaseNode, Offset)) {
if (!StructPath.insert(BaseNode).second) {
CheckFailed("Cycle detected in struct path", &I, MD);
return;
}

bool Invalid;
unsigned BaseNodeBitWidth;
std::tie(Invalid, BaseNodeBitWidth) = verifyTBAABaseNode(I, BaseNode);

// If the base node is invalid in itself, then we've already printed all the
// errors we wanted to print.
if (Invalid)
return;

SeenAccessTypeInPath |= BaseNode == AccessType;

if (IsScalarTBAANode(BaseNode) || BaseNode == AccessType)
Assert(Offset == 0, "Offset not zero at the point of scalar access", &I,
MD, &Offset);

Assert(BaseNodeBitWidth == Offset.getBitWidth() ||
(BaseNodeBitWidth == 0 && Offset == 0),
"Access bit-width not the same as description bit-width", &I, MD,
BaseNodeBitWidth, Offset.getBitWidth());
}

Assert(SeenAccessTypeInPath, "Did not see access type in access path!", &I,
MD);
}

/// verifyInstruction - Verify that an instruction is well formed.
Expand Down Expand Up @@ -3801,8 +4060,12 @@ void Verifier::visitInstruction(Instruction &I) {
if (MDNode *MD = I.getMetadata(LLVMContext::MD_dereferenceable_or_null))
visitDereferenceableMetadata(I, MD);

if (MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa))
visitTBAAMetadata(I, MD);
if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa)) {
Assert(isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
isa<VAArgInst>(I),
"TBAA is only for loads, stores and calls!", &I);
visitTBAAMetadata(I, TBAA);
}

if (MDNode *AlignMD = I.getMetadata(LLVMContext::MD_align)) {
Assert(I.getType()->isPointerTy(), "align applies only to pointer types",
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/BasicAA/full-store-partial-alias.ll
Expand Up @@ -31,7 +31,7 @@ entry:

!0 = !{!4, !4, i64 0}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA", null}
!2 = !{!"Simple C/C++ TBAA"}
!3 = !{!5, !5, i64 0}
!4 = !{!"double", !1}
!5 = !{!"int", !1}
Expand Up @@ -33,7 +33,7 @@ entry:

!0 = !{!4, !4, i64 0}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA", null}
!2 = !{!"Simple C/C++ TBAA"}
!3 = !{!5, !5, i64 0}
!4 = !{!"double", !1}
!5 = !{!"int", !1}
3 changes: 2 additions & 1 deletion llvm/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
Expand Up @@ -63,5 +63,6 @@ define i8 @test1_no(i8* %a, i8* %b) nounwind {
!7 = !{ !"foo", !0 }
!8 = !{ !"bar", !0 }
!9 = !{ !"foo", !0 }
!10 = !{ !"bar", !"different" }
!10 = !{ !"bar", !12 }
!11 = !{ !"qux", !0}
!12 = !{!"different"}
2 changes: 1 addition & 1 deletion llvm/test/Analysis/TypeBasedAliasAnalysis/cyclic.ll
@@ -1,5 +1,5 @@
; RUN: not opt -instcombine < %s 2>&1 | FileCheck %s
; CHECK: Cycle found in TBAA metadata.
; CHECK: Access type node must be scalar

define void @test6(i32* %gi) #0 {
entry:
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/Analysis/TypeBasedAliasAnalysis/dse.ll
Expand Up @@ -68,5 +68,6 @@ define i8 @test1_no(i8* %a, i8* %b) nounwind {
!7 = !{ !"foo", !0 }
!8 = !{ !"bar", !0 }
!9 = !{ !"foo", !0 }
!10 = !{ !"bar", !"different" }
!10 = !{ !"bar", !12}
!11 = !{ !"qux", !0}
!12 = !{!"different"}
Expand Up @@ -127,7 +127,7 @@ for.end: ; preds = %for.body
; CHECK: [[TYPE_LL]] = !{!"long long", {{!.*}}}
!0 = !{!6, !6, i64 0}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA", null}
!2 = !{!"Simple C/C++ TBAA"}
!3 = !{!7, !7, i64 0}
!4 = !{!8, !8, i64 0}
!5 = !{!9, !9, i64 0}
Expand Down
Expand Up @@ -91,5 +91,6 @@ if.else:
!4 = !{!8, !8, i64 0}
!5 = !{!"red", !0}
!6 = !{!"blu", !0}
!7 = !{!"outer space"}
!7 = !{!"outer space", !9}
!8 = !{!"brick red", !5}
!9 = !{!"observable universe"}
2 changes: 1 addition & 1 deletion llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
Expand Up @@ -26,7 +26,7 @@ declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
; CHECK: attributes #1 = { argmemonly nounwind }
; CHECK: attributes [[NUW]] = { nounwind }

!0 = !{!"tbaa root", null}
!0 = !{!"tbaa root"}
!1 = !{!3, !3, i64 0}
!2 = !{!4, !4, i64 0}
!3 = !{!"A", !0}
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/Analysis/TypeBasedAliasAnalysis/licm.ll
Expand Up @@ -29,7 +29,7 @@ for.end: ; preds = %for.body, %entry
ret void
}

!0 = !{!"root", null}
!0 = !{!"root"}
!1 = !{!6, !6, i64 0}
!2 = !{!7, !7, i64 0}

Expand Down Expand Up @@ -58,8 +58,9 @@ loop:

!3 = !{!"pointer", !8}
!4 = !{!8, !8, i64 0}
!5 = !{!9, !9, i64 0}
!5 = !{!10, !10, i64 0}
!6 = !{!"pointer", !0}
!7 = !{!"double", !0}
!8 = !{!"char", !9}
!9 = !{!"root", null}
!9 = !{!"root"}
!10 = !{!"scalar-type", !9}

0 comments on commit 3336f68

Please sign in to comment.