From 015b059569eef883e9313035cd98c72f947477ab Mon Sep 17 00:00:00 2001 From: Kristof Umann Date: Mon, 13 Aug 2018 18:43:08 +0000 Subject: [PATCH] [analyzer][UninitializedObjectChecker] Refactoring p4.: Wrap FieldRegions and reduce weight on FieldChainInfo Before this patch, FieldChainInfo used a spaghetti: it took care of way too many cases, even though it was always meant as a lightweight wrapper around ImmutableList. This problem is solved by introducing a lightweight polymorphic wrapper around const FieldRegion *, FieldNode. It is an interface that abstracts away special cases like pointers/references, objects that need to be casted to another type for a proper note messages. Changes to FieldChainInfo: * Now wraps ImmutableList. * Any pointer/reference related fields and methods were removed * Got a new add method. This replaces it's former constructors as a way to create a new FieldChainInfo objects with a new element. Changes to FindUninitializedField: * In order not to deal with dynamic memory management, when an uninitialized field is found, the note message for it is constructed and is stored instead of a FieldChainInfo object. (see doc around addFieldToUninits). Some of the test files are changed too, from now on uninitialized pointees of references always print "uninitialized pointee" instead of "uninitialized field" (which should've really been like this from the beginning). I also updated every comment according to these changes. Differential Revision: https://reviews.llvm.org/D50506 llvm-svn: 339599 --- .../UninitializedObject/UninitializedObject.h | 141 ++++++++++++------ .../UninitializedObjectChecker.cpp | 130 ++++++++-------- .../UninitializedPointee.cpp | 43 +++++- .../Analysis/cxx-uninitialized-object.cpp | 8 +- .../Analysis/objcpp-uninitialized-object.mm | 2 +- 5 files changed, 206 insertions(+), 118 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObject.h b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObject.h index 15faf4bfa5474..095a1e607d808 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObject.h +++ b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObject.h @@ -26,58 +26,85 @@ namespace clang { namespace ento { +/// Represent a single field. This is only an interface to abstract away special +/// cases like pointers/references. +class FieldNode { +protected: + const FieldRegion *FR; + +public: + FieldNode(const FieldRegion *FR) : FR(FR) { assert(FR); } + + FieldNode() = delete; + FieldNode(const FieldNode &) = delete; + FieldNode(FieldNode &&) = delete; + FieldNode &operator=(const FieldNode &) = delete; + FieldNode &operator=(const FieldNode &&) = delete; + + /// Profile - Used to profile the contents of this object for inclusion in a + /// FoldingSet. + void Profile(llvm::FoldingSetNodeID &ID) const { ID.AddPointer(this); } + + bool operator<(const FieldNode &Other) const { return FR < Other.FR; } + bool isSameRegion(const FieldRegion *OtherFR) const { return FR == OtherFR; } + + const FieldRegion *getRegion() const { return FR; } + const FieldDecl *getDecl() const { return FR->getDecl(); } + + // When a fieldchain is printed (a list of FieldNode objects), it will have + // the following format: + // 'this->...' + + /// If this is the last element of the fieldchain, this method will be called. + /// The note message should state something like "uninitialized field" or + /// "uninitialized pointee" etc. + virtual void printNoteMsg(llvm::raw_ostream &Out) const = 0; + + /// Print any prefixes before the fieldchain. + virtual void printPrefix(llvm::raw_ostream &Out) const = 0; + + /// Print the node. Should contain the name of the field stored in getRegion. + virtual void printNode(llvm::raw_ostream &Out) const = 0; + + /// Print the separator. For example, fields may be separated with '.' or + /// "->". + virtual void printSeparator(llvm::raw_ostream &Out) const = 0; +}; + +/// Returns with Field's name. This is a helper function to get the correct name +/// even if Field is a captured lambda variable. +StringRef getVariableName(const FieldDecl *Field); + /// Represents a field chain. A field chain is a vector of fields where the /// first element of the chain is the object under checking (not stored), and /// every other element is a field, and the element that precedes it is the /// object that contains it. /// -/// Note that this class is immutable, and new fields may only be added through -/// constructor calls. +/// Note that this class is immutable (essentially a wrapper around an +/// ImmutableList), and new elements can only be added by creating new +/// FieldChainInfo objects through add(). class FieldChainInfo { public: - using FieldChainImpl = llvm::ImmutableListImpl; - using FieldChain = llvm::ImmutableList; + using FieldChainImpl = llvm::ImmutableListImpl; + using FieldChain = llvm::ImmutableList; private: - FieldChain::Factory &Factory; + FieldChain::Factory &ChainFactory; FieldChain Chain; - const bool IsDereferenced = false; - public: FieldChainInfo() = delete; - FieldChainInfo(FieldChain::Factory &F) : Factory(F) {} + FieldChainInfo(FieldChain::Factory &F) : ChainFactory(F) {} + FieldChainInfo(const FieldChainInfo &Other) = default; - FieldChainInfo(const FieldChainInfo &Other, const bool IsDereferenced) - : Factory(Other.Factory), Chain(Other.Chain), - IsDereferenced(IsDereferenced) {} + template FieldChainInfo add(const FieldNodeT &FN); - FieldChainInfo(const FieldChainInfo &Other, const FieldRegion *FR, - const bool IsDereferenced = false); - - bool contains(const FieldRegion *FR) const { return Chain.contains(FR); } - bool isPointer() const; - - /// If this is a fieldchain whose last element is an uninitialized region of a - /// pointer type, `IsDereferenced` will store whether the pointer itself or - /// the pointee is uninitialized. - bool isDereferenced() const; - const FieldDecl *getEndOfChain() const; - void print(llvm::raw_ostream &Out) const; - -private: - friend struct FieldChainInfoComparator; + bool contains(const FieldRegion *FR) const; + const FieldRegion *getUninitRegion() const; + void printNoteMsg(llvm::raw_ostream &Out) const; }; -struct FieldChainInfoComparator { - bool operator()(const FieldChainInfo &lhs, const FieldChainInfo &rhs) const { - assert(!lhs.Chain.isEmpty() && !rhs.Chain.isEmpty() && - "Attempted to store an empty fieldchain!"); - return *lhs.Chain.begin() < *rhs.Chain.begin(); - } -}; - -using UninitFieldSet = std::set; +using UninitFieldMap = std::map>; /// Searches for and stores uninitialized fields in a non-union object. class FindUninitializedFields { @@ -89,20 +116,27 @@ class FindUninitializedFields { bool IsAnyFieldInitialized = false; - FieldChainInfo::FieldChain::Factory Factory; - UninitFieldSet UninitFields; + FieldChainInfo::FieldChain::Factory ChainFactory; + + /// A map for assigning uninitialized regions to note messages. For example, + /// + /// struct A { + /// int x; + /// }; + /// + /// A a; + /// + /// After analyzing `a`, the map will contain a pair for `a.x`'s region and + /// the note message "uninitialized field 'this->x'. + UninitFieldMap UninitFields; public: FindUninitializedFields(ProgramStateRef State, const TypedValueRegion *const R, bool IsPedantic, bool CheckPointeeInitialization); - const UninitFieldSet &getUninitFields(); + const UninitFieldMap &getUninitFields(); private: - /// Adds a FieldChainInfo object to UninitFields. Return true if an insertion - /// took place. - bool addFieldToUninits(FieldChainInfo LocalChain); - // For the purposes of this checker, we'll regard the object under checking as // a directed tree, where // * the root is the object under checking @@ -175,6 +209,16 @@ class FindUninitializedFields { // often left uninitialized intentionally even when it is of a C++ record // type, so we'll assume that an array is always initialized. // TODO: Add a support for nonloc::LocAsInteger. + + /// Processes LocalChain and attempts to insert it into UninitFields. Returns + /// true on success. + /// + /// Since this class analyzes regions with recursion, we'll only store + /// references to temporary FieldNode objects created on the stack. This means + /// that after analyzing a leaf of the directed tree described above, the + /// elements LocalChain references will be destructed, so we can't store it + /// directly. + bool addFieldToUninits(FieldChainInfo LocalChain); }; /// Returns true if T is a primitive type. We defined this type so that for @@ -186,6 +230,19 @@ static bool isPrimitiveType(const QualType &T) { return T->isBuiltinType() || T->isEnumeralType() || T->isMemberPointerType(); } +// Template method definitions. + +template +inline FieldChainInfo FieldChainInfo::add(const FieldNodeT &FN) { + assert(!contains(FN.getRegion()) && + "Can't add a field that is already a part of the " + "fieldchain! Is this a cyclic reference?"); + + FieldChainInfo NewChain = *this; + NewChain.Chain = ChainFactory.add(FN, Chain); + return NewChain; +} + } // end of namespace ento } // end of namespace clang diff --git a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObjectChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObjectChecker.cpp index ad1a89e71bfb1..c2f4b053fdd82 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObjectChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObjectChecker.cpp @@ -72,6 +72,27 @@ class UninitializedObjectChecker : public Checker { void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const; }; +/// A basic field type, that is not a pointer or a reference, it's dynamic and +/// static type is the same. +class RegularField : public FieldNode { +public: + RegularField(const FieldRegion *FR) : FieldNode(FR) {} + + virtual void printNoteMsg(llvm::raw_ostream &Out) const override { + Out << "uninitialized field "; + } + + virtual void printPrefix(llvm::raw_ostream &Out) const override {} + + virtual void printNode(llvm::raw_ostream &Out) const { + Out << getVariableName(getDecl()); + } + + virtual void printSeparator(llvm::raw_ostream &Out) const override { + Out << '.'; + } +}; + } // end of anonymous namespace // Utility function declarations. @@ -89,14 +110,6 @@ getObjectVal(const CXXConstructorDecl *CtorDecl, CheckerContext &Context); static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor, CheckerContext &Context); -/// Constructs a note message for a given FieldChainInfo object. -static void printNoteMessage(llvm::raw_ostream &Out, - const FieldChainInfo &Chain); - -/// Returns with Field's name. This is a helper function to get the correct name -/// even if Field is a captured lambda variable. -static StringRef getVariableName(const FieldDecl *Field); - //===----------------------------------------------------------------------===// // Methods for UninitializedObjectChecker. //===----------------------------------------------------------------------===// @@ -126,7 +139,7 @@ void UninitializedObjectChecker::checkEndFunction( FindUninitializedFields F(Context.getState(), Object->getRegion(), IsPedantic, CheckPointeeInitialization); - const UninitFieldSet &UninitFields = F.getUninitFields(); + const UninitFieldMap &UninitFields = F.getUninitFields(); if (UninitFields.empty()) return; @@ -146,14 +159,10 @@ void UninitializedObjectChecker::checkEndFunction( // For Plist consumers that don't support notes just yet, we'll convert notes // to warnings. if (ShouldConvertNotesToWarnings) { - for (const auto &Chain : UninitFields) { - SmallString<100> WarningBuf; - llvm::raw_svector_ostream WarningOS(WarningBuf); - - printNoteMessage(WarningOS, Chain); + for (const auto &Pair : UninitFields) { auto Report = llvm::make_unique( - *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing, + *BT_uninitField, Pair.second, Node, LocUsedForUniqueing, Node->getLocationContext()->getDecl()); Context.emitReport(std::move(Report)); } @@ -170,14 +179,9 @@ void UninitializedObjectChecker::checkEndFunction( *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing, Node->getLocationContext()->getDecl()); - for (const auto &Chain : UninitFields) { - SmallString<200> NoteBuf; - llvm::raw_svector_ostream NoteOS(NoteBuf); - - printNoteMessage(NoteOS, Chain); - - Report->addNote(NoteOS.str(), - PathDiagnosticLocation::create(Chain.getEndOfChain(), + for (const auto &Pair : UninitFields) { + Report->addNote(Pair.second, + PathDiagnosticLocation::create(Pair.first->getDecl(), Context.getSourceManager())); } Context.emitReport(std::move(Report)); @@ -193,8 +197,8 @@ FindUninitializedFields::FindUninitializedFields( : State(State), ObjectR(R), IsPedantic(IsPedantic), CheckPointeeInitialization(CheckPointeeInitialization) {} -const UninitFieldSet &FindUninitializedFields::getUninitFields() { - isNonUnionUninit(ObjectR, FieldChainInfo(Factory)); +const UninitFieldMap &FindUninitializedFields::getUninitFields() { + isNonUnionUninit(ObjectR, FieldChainInfo(ChainFactory)); if (!IsPedantic && !IsAnyFieldInitialized) UninitFields.clear(); @@ -204,10 +208,15 @@ const UninitFieldSet &FindUninitializedFields::getUninitFields() { bool FindUninitializedFields::addFieldToUninits(FieldChainInfo Chain) { if (State->getStateManager().getContext().getSourceManager().isInSystemHeader( - Chain.getEndOfChain()->getLocation())) + Chain.getUninitRegion()->getDecl()->getLocation())) return false; - return UninitFields.insert(Chain).second; + UninitFieldMap::mapped_type NoteMsgBuf; + llvm::raw_svector_ostream OS(NoteMsgBuf); + Chain.printNoteMsg(OS); + return UninitFields + .insert(std::make_pair(Chain.getUninitRegion(), std::move(NoteMsgBuf))) + .second; } bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R, @@ -237,14 +246,14 @@ bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R, return false; if (T->isStructureOrClassType()) { - if (isNonUnionUninit(FR, {LocalChain, FR})) + if (isNonUnionUninit(FR, LocalChain.add(RegularField(FR)))) ContainsUninitField = true; continue; } if (T->isUnionType()) { if (isUnionUninit(FR)) { - if (addFieldToUninits({LocalChain, FR})) + if (addFieldToUninits(LocalChain.add(RegularField(FR)))) ContainsUninitField = true; } else IsAnyFieldInitialized = true; @@ -266,7 +275,7 @@ bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R, SVal V = State->getSVal(FieldVal); if (isPrimitiveUninit(V)) { - if (addFieldToUninits({LocalChain, FR})) + if (addFieldToUninits(LocalChain.add(RegularField(FR)))) ContainsUninitField = true; } continue; @@ -323,27 +332,17 @@ bool FindUninitializedFields::isPrimitiveUninit(const SVal &V) { // Methods for FieldChainInfo. //===----------------------------------------------------------------------===// -FieldChainInfo::FieldChainInfo(const FieldChainInfo &Other, - const FieldRegion *FR, const bool IsDereferenced) - : FieldChainInfo(Other, IsDereferenced) { - assert(!contains(FR) && "Can't add a field that is already a part of the " - "fieldchain! Is this a cyclic reference?"); - Chain = Factory.add(FR, Other.Chain); -} - -bool FieldChainInfo::isPointer() const { +const FieldRegion *FieldChainInfo::getUninitRegion() const { assert(!Chain.isEmpty() && "Empty fieldchain!"); - return (*Chain.begin())->getDecl()->getType()->isPointerType(); -} - -bool FieldChainInfo::isDereferenced() const { - assert(isPointer() && "Only pointers may or may not be dereferenced!"); - return IsDereferenced; + return (*Chain.begin()).getRegion(); } -const FieldDecl *FieldChainInfo::getEndOfChain() const { - assert(!Chain.isEmpty() && "Empty fieldchain!"); - return (*Chain.begin())->getDecl(); +bool FieldChainInfo::contains(const FieldRegion *FR) const { + for (const FieldNode &Node : Chain) { + if (Node.isSameRegion(FR)) + return true; + } + return false; } /// Prints every element except the last to `Out`. Since ImmutableLists store @@ -386,13 +385,23 @@ static void printTail(llvm::raw_ostream &Out, // "uninitialized field 'this->x'", but we can't refer to 'x' directly, // we need an explicit namespace resolution whether the uninit field was // 'D1::x' or 'D2::x'. -void FieldChainInfo::print(llvm::raw_ostream &Out) const { +void FieldChainInfo::printNoteMsg(llvm::raw_ostream &Out) const { if (Chain.isEmpty()) return; const FieldChainImpl *L = Chain.getInternalPointer(); + const FieldNode &LastField = L->getHead(); + + LastField.printNoteMsg(Out); + Out << '\''; + + for (const FieldNode &Node : Chain) + Node.printPrefix(Out); + + Out << "this->"; printTail(Out, L->getTail()); - Out << getVariableName(L->getHead()->getDecl()); + LastField.printNode(Out); + Out << '\''; } static void printTail(llvm::raw_ostream &Out, @@ -401,9 +410,9 @@ static void printTail(llvm::raw_ostream &Out, return; printTail(Out, L->getTail()); - const FieldDecl *Field = L->getHead()->getDecl(); - Out << getVariableName(Field); - Out << (Field->getType()->isPointerType() ? "->" : "."); + + L->getHead().printNode(Out); + L->getHead().printSeparator(Out); } //===----------------------------------------------------------------------===// @@ -453,20 +462,7 @@ static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor, return false; } -static void printNoteMessage(llvm::raw_ostream &Out, - const FieldChainInfo &Chain) { - if (Chain.isPointer()) { - if (Chain.isDereferenced()) - Out << "uninitialized pointee 'this->"; - else - Out << "uninitialized pointer 'this->"; - } else - Out << "uninitialized field 'this->"; - Chain.print(Out); - Out << "'"; -} - -static StringRef getVariableName(const FieldDecl *Field) { +StringRef clang::ento::getVariableName(const FieldDecl *Field) { // If Field is a captured lambda variable, Field->getName() will return with // an empty string. We can however acquire it's name from the lambda's // captures. diff --git a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp index 90d2ff00510ec..61e96ef9ae7dd 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp @@ -28,6 +28,40 @@ using namespace clang; using namespace clang::ento; +namespace { + +/// Represents a pointer or a reference field. +class LocField : public FieldNode { + /// We'll store whether the pointee or the pointer itself is uninitialited. + const bool IsDereferenced; + +public: + LocField(const FieldRegion *FR, const bool IsDereferenced = true) + : FieldNode(FR), IsDereferenced(IsDereferenced) {} + + virtual void printNoteMsg(llvm::raw_ostream &Out) const override { + if (IsDereferenced) + Out << "uninitialized pointee "; + else + Out << "uninitialized pointer "; + } + + virtual void printPrefix(llvm::raw_ostream &Out) const override {} + + virtual void printNode(llvm::raw_ostream &Out) const override { + Out << getVariableName(getDecl()); + } + + virtual void printSeparator(llvm::raw_ostream &Out) const override { + if (getDecl()->getType()->isPointerType()) + Out << "->"; + else + Out << '.'; + } +}; + +} // end of anonymous namespace + // Utility function declarations. /// Returns whether T can be (transitively) dereferenced to a void pointer type @@ -57,7 +91,8 @@ bool FindUninitializedFields::isPointerOrReferenceUninit( } if (V.isUndef()) { - return addFieldToUninits({LocalChain, FR}); + return addFieldToUninits( + LocalChain.add(LocField(FR, /*IsDereferenced*/ false))); } if (!CheckPointeeInitialization) { @@ -126,11 +161,11 @@ bool FindUninitializedFields::isPointerOrReferenceUninit( const TypedValueRegion *R = RecordV->getRegion(); if (DynT->getPointeeType()->isStructureOrClassType()) - return isNonUnionUninit(R, {LocalChain, FR}); + return isNonUnionUninit(R, LocalChain.add(LocField(FR))); if (DynT->getPointeeType()->isUnionType()) { if (isUnionUninit(R)) { - return addFieldToUninits({LocalChain, FR, /*IsDereferenced*/ true}); + return addFieldToUninits(LocalChain.add(LocField(FR))); } else { IsAnyFieldInitialized = true; return false; @@ -151,7 +186,7 @@ bool FindUninitializedFields::isPointerOrReferenceUninit( "must be a null, undefined, unknown or concrete pointer!"); if (isPrimitiveUninit(DerefdV)) - return addFieldToUninits({LocalChain, FR, /*IsDereferenced*/ true}); + return addFieldToUninits(LocalChain.add(LocField(FR))); IsAnyFieldInitialized = true; return false; diff --git a/clang/test/Analysis/cxx-uninitialized-object.cpp b/clang/test/Analysis/cxx-uninitialized-object.cpp index 4fc455fea8a79..e9079d99af3fd 100644 --- a/clang/test/Analysis/cxx-uninitialized-object.cpp +++ b/clang/test/Analysis/cxx-uninitialized-object.cpp @@ -781,7 +781,7 @@ struct LambdaTest2 { void fLambdaTest2() { int b; - auto equals = [&b](int a) { return a == b; }; // expected-note{{uninitialized field 'this->functor.b'}} + auto equals = [&b](int a) { return a == b; }; // expected-note{{uninitialized pointee 'this->functor.b'}} LambdaTest2(equals, int()); } #else @@ -857,8 +857,8 @@ struct MultipleLambdaCapturesTest1 { void fMultipleLambdaCapturesTest1() { int b1, b2 = 3, b3; - auto equals = [&b1, &b2, &b3](int a) { return a == b1 == b2 == b3; }; // expected-note{{uninitialized field 'this->functor.b1'}} - // expected-note@-1{{uninitialized field 'this->functor.b3'}} + auto equals = [&b1, &b2, &b3](int a) { return a == b1 == b2 == b3; }; // expected-note{{uninitialized pointee 'this->functor.b1'}} + // expected-note@-1{{uninitialized pointee 'this->functor.b3'}} MultipleLambdaCapturesTest1(equals, int()); } @@ -872,7 +872,7 @@ struct MultipleLambdaCapturesTest2 { void fMultipleLambdaCapturesTest2() { int b1, b2 = 3, b3; - auto equals = [b1, &b2, &b3](int a) { return a == b1 == b2 == b3; }; // expected-note{{uninitialized field 'this->functor.b3'}} + auto equals = [b1, &b2, &b3](int a) { return a == b1 == b2 == b3; }; // expected-note{{uninitialized pointee 'this->functor.b3'}} MultipleLambdaCapturesTest2(equals, int()); } diff --git a/clang/test/Analysis/objcpp-uninitialized-object.mm b/clang/test/Analysis/objcpp-uninitialized-object.mm index 7f2177e5791ce..3ec1eb75182ef 100644 --- a/clang/test/Analysis/objcpp-uninitialized-object.mm +++ b/clang/test/Analysis/objcpp-uninitialized-object.mm @@ -4,7 +4,7 @@ struct StructWithBlock { int a; - myBlock z; // expected-note{{uninitialized field 'this->z'}} + myBlock z; // expected-note{{uninitialized pointer 'this->z'}} StructWithBlock() : a(0), z(^{}) {}