From 9035307bd5090505e8a6aecc527abf5baeebf870 Mon Sep 17 00:00:00 2001 From: zrlk Date: Tue, 4 May 2021 21:36:48 -0700 Subject: [PATCH] fix(cxx_indexer): refactor lvalue handling for dataflow (#4907) * fix(cxx_indexer): refactor lvalue handling for dataflow Tag particular AST nodes as writes. This is cleaner than the previous approach, which tries to scramble up the tree to figure out if a node is in a write context. This PR also unifies lvalue interpretation and influence target identification. We can probably turn dataflow edges on by default, but that will be a separate PR. --- kythe/cxx/indexer/cxx/IndexerASTHooks.cc | 31 ++++------------- kythe/cxx/indexer/cxx/IndexerASTHooks.h | 15 ++++++++ kythe/cxx/indexer/cxx/clang_utils.cc | 44 +++++++++++++----------- kythe/cxx/indexer/cxx/clang_utils.h | 13 ++++--- kythe/cxx/indexer/cxx/testdata/BUILD | 8 +++++ 5 files changed, 61 insertions(+), 50 deletions(-) diff --git a/kythe/cxx/indexer/cxx/IndexerASTHooks.cc b/kythe/cxx/indexer/cxx/IndexerASTHooks.cc index abd9ed1a6f..f4708d96d7 100644 --- a/kythe/cxx/indexer/cxx/IndexerASTHooks.cc +++ b/kythe/cxx/indexer/cxx/IndexerASTHooks.cc @@ -328,23 +328,6 @@ clang::InitListExpr* GetSyntacticForm(clang::InitListExpr* ILE) { const clang::InitListExpr* GetSyntacticForm(const clang::InitListExpr* ILE) { return (ILE->isSyntacticForm() ? ILE : ILE->getSyntacticForm()); } - -clang::Decl* GetInfluencedDeclFromLExpression(clang::Expr* lhs) { - if (auto* expr = llvm::dyn_cast_or_null(lhs); - expr != nullptr && expr->getFoundDecl() != nullptr && - (expr->getFoundDecl()->getKind() == clang::Decl::Kind::Var || - expr->getFoundDecl()->getKind() == clang::Decl::Kind::ParmVar)) { - return expr->getFoundDecl(); - } - if (auto* expr = llvm::dyn_cast_or_null(lhs); - expr != nullptr) { - if (auto* member = expr->getMemberDecl(); member != nullptr) { - return member; - } - } - return nullptr; -} - } // anonymous namespace bool IsClaimableForTraverse(const clang::Decl* decl) { @@ -1347,9 +1330,8 @@ bool IndexerASTVisitor::VisitMemberExpr(const clang::MemberExpr* E) { auto StmtId = BuildNodeIdForImplicitStmt(E); if (auto RCC = RangeInCurrentContext(StmtId, Range)) { RecordBlame(FieldDecl, *RCC); - auto semantic = IsUsedAsWrite(*getAllParents(), E) - ? GraphObserver::UseKind::kWrite - : GraphObserver::UseKind::kUnknown; + auto semantic = IsUsedAsWrite(E) ? GraphObserver::UseKind::kWrite + : GraphObserver::UseKind::kUnknown; Observer.recordSemanticDeclUseLocation( *RCC, BuildNodeIdForRefToDecl(FieldDecl), semantic, GraphObserver::Claimability::Unclaimable, IsImplicit(*RCC)); @@ -2167,16 +2149,16 @@ bool IndexerASTVisitor::TraverseBinaryOperator(clang::BinaryOperator* BO) { } if (BO->getOpcode() != clang::BO_Assign) return Base::TraverseBinaryOperator(BO); - if (auto rhs = BO->getRHS(), lhs = BO->getLHS(); lhs != nullptr && rhs != nullptr) { if (!WalkUpFromBinaryOperator(BO)) return false; + auto* lvhead = UsedAsWrite(FindLValueHead(lhs)); if (!TraverseStmt(lhs)) return false; auto scope_guard = PushScope(Job->InfluenceSets, {}); if (!TraverseStmt(rhs)) { return false; } - if (auto* influenced = GetInfluencedDeclFromLExpression(lhs)) { + if (auto* influenced = GetInfluencedDeclFromLValueHead(lvhead)) { for (const auto* decl : Job->InfluenceSets.back()) { Observer.recordInfluences(BuildNodeIdForDecl(decl), BuildNodeIdForDecl(influenced)); @@ -2285,9 +2267,8 @@ bool IndexerASTVisitor::VisitDeclRefOrIvarRefExpr( if (auto RCC = RangeInCurrentContext(StmtId, Range)) { GraphObserver::NodeId DeclId = BuildNodeIdForRefToDecl(FoundDecl); RecordBlame(FoundDecl, *RCC); - auto semantic = IsUsedAsWrite(*getAllParents(), Expr) - ? GraphObserver::UseKind::kWrite - : GraphObserver::UseKind::kUnknown; + auto semantic = IsUsedAsWrite(Expr) ? GraphObserver::UseKind::kWrite + : GraphObserver::UseKind::kUnknown; if (DataflowEdges) { if (!Job->InfluenceSets.empty() && (FoundDecl->getKind() == clang::Decl::Kind::Var || diff --git a/kythe/cxx/indexer/cxx/IndexerASTHooks.h b/kythe/cxx/indexer/cxx/IndexerASTHooks.h index f29a77aa8a..c14370e886 100644 --- a/kythe/cxx/indexer/cxx/IndexerASTHooks.h +++ b/kythe/cxx/indexer/cxx/IndexerASTHooks.h @@ -969,6 +969,18 @@ class IndexerASTVisitor : public RecursiveTypeVisitor { /// \brief Returns whether `Decl` should be indexed. bool ShouldIndex(const clang::Decl* Decl); + /// \brief Returns whether `stmt` is used as a write target. + bool IsUsedAsWrite(const clang::Stmt* stmt) { + return is_used_as_write_.find(stmt) != is_used_as_write_.end(); + } + + /// \brief Marks that `stmt` was used as a write target. + /// \return `stmt` as passed. + const clang::Stmt* UsedAsWrite(const clang::Stmt* stmt) { + if (stmt != nullptr) is_used_as_write_.insert(stmt); + return stmt; + } + /// \brief Maps known Decls to their NodeIds. llvm::DenseMap DeclToNodeId; @@ -1011,6 +1023,9 @@ class IndexerASTVisitor : public RecursiveTypeVisitor { /// \brief if nonempty, the pattern to match a path against to see whether /// it should be excluded from template instance indexing. std::shared_ptr TemplateInstanceExcludePathPattern = nullptr; + + /// \brief AST nodes we know are used in a write context. + absl::flat_hash_set is_used_as_write_; }; /// \brief An `ASTConsumer` that passes events to a `GraphObserver`. diff --git a/kythe/cxx/indexer/cxx/clang_utils.cc b/kythe/cxx/indexer/cxx/clang_utils.cc index ca1351c7ed..ad56d9a0e4 100644 --- a/kythe/cxx/indexer/cxx/clang_utils.cc +++ b/kythe/cxx/indexer/cxx/clang_utils.cc @@ -96,30 +96,32 @@ bool ShouldHaveBlameContext(const clang::Decl* decl) { } } -bool IsUsedAsWrite(const IndexedParentMap& map, const clang::Stmt* stmt) { - // TODO(zarko): Improve coverage (or get rid of this entirely and maintain - // traversal state in the AST walker; this would be more of a maintenance - // and correctness burden, but may be required for richer representations.) - if (stmt == nullptr) return false; - const auto* indexed_parent = map.GetIndexedParent(*stmt); - if (indexed_parent == nullptr) return false; - const auto* parent_stmt = indexed_parent->parent.get(); - while (llvm::isa_and_nonnull(parent_stmt)) { - indexed_parent = map.GetIndexedParent(*parent_stmt); - if (indexed_parent == nullptr) return false; - parent_stmt = indexed_parent->parent.get(); +const clang::Stmt* FindLValueHead(const clang::Stmt* stmt) { + if (stmt == nullptr) return nullptr; + switch (stmt->getStmtClass()) { + case clang::Stmt::StmtClass::DeclRefExprClass: + case clang::Stmt::StmtClass::ObjCIvarRefExprClass: + case clang::Stmt::StmtClass::MemberExprClass: + return stmt; + default: + return nullptr; } - if (parent_stmt == nullptr) return false; +} - switch (parent_stmt->getStmtClass()) { - case clang::Stmt::StmtClass::BinaryOperatorClass: { - const auto* binop = clang::dyn_cast(parent_stmt); - if (binop == nullptr) return false; - return binop->getOpcode() == clang::BinaryOperator::Opcode::BO_Assign && - binop->getLHS() == stmt; +const clang::Decl* GetInfluencedDeclFromLValueHead(const clang::Stmt* head) { + if (head == nullptr) return nullptr; + if (auto* expr = llvm::dyn_cast_or_null(head); + expr != nullptr && expr->getFoundDecl() != nullptr && + (expr->getFoundDecl()->getKind() == clang::Decl::Kind::Var || + expr->getFoundDecl()->getKind() == clang::Decl::Kind::ParmVar)) { + return expr->getFoundDecl(); + } + if (auto* expr = llvm::dyn_cast_or_null(head); + expr != nullptr) { + if (auto* member = expr->getMemberDecl(); member != nullptr) { + return member; } - default: - return false; } + return nullptr; } } // namespace kythe diff --git a/kythe/cxx/indexer/cxx/clang_utils.h b/kythe/cxx/indexer/cxx/clang_utils.h index f727f33ea3..46161fccec 100644 --- a/kythe/cxx/indexer/cxx/clang_utils.h +++ b/kythe/cxx/indexer/cxx/clang_utils.h @@ -35,10 +35,15 @@ const clang::Decl* FindSpecializedTemplate(const clang::Decl* decl); /// \return true if a reference to `decl` should be given blame context. bool ShouldHaveBlameContext(const clang::Decl* decl); -/// \return true if `stmt` is being used in a write position according to -/// `map`. -bool IsUsedAsWrite(const IndexedParentMap& map, const clang::Stmt* stmt); - +/// \return the `Stmt` that is at the lvalue head position of `stmt`, or +/// null otherwise. For example, in `foo[x].bar(y).z`, the member expression +/// for `z` is in the root position. +const clang::Stmt* FindLValueHead(const clang::Stmt* stmt); + +/// \return the `Decl` that is the target of influence by an lexpression with +/// head `head`, or null. For example, in `foo[x].bar(y).z`, the target of +/// influence is the member decl for `z`. +const clang::Decl* GetInfluencedDeclFromLValueHead(const clang::Stmt* head); } // namespace kythe #endif // KYTHE_CXX_INDEXER_CXX_CLANG_UTILS_H_ diff --git a/kythe/cxx/indexer/cxx/testdata/BUILD b/kythe/cxx/indexer/cxx/testdata/BUILD index 502fee2ded..2dc486dd8d 100644 --- a/kythe/cxx/indexer/cxx/testdata/BUILD +++ b/kythe/cxx/indexer/cxx/testdata/BUILD @@ -1828,6 +1828,7 @@ cc_indexer_test( name = "rec_class_member_ptr", srcs = ["rec/rec_class_member_ptr.cc"], check_for_singletons = True, + experimental_record_dataflow_edges = True, tags = ["rec"], ) @@ -2844,6 +2845,7 @@ objc_indexer_test( objc_indexer_test( name = "extension_property", srcs = ["objc/categories/extension_property.m"], + experimental_record_dataflow_edges = True, ignore_dups = True, tags = ["objc"], deps = ["objc/categories/extension_property.h"], @@ -3051,6 +3053,7 @@ objc_indexer_test( objc_indexer_test( name = "ivar_decl", srcs = ["objc/ivar_decl.m"], + experimental_record_dataflow_edges = True, tags = ["objc"], ) @@ -3144,6 +3147,7 @@ objc_indexer_test( objc_indexer_test( name = "property_decl_defn_synth", srcs = ["objc/property_decl_defn_synth.m"], + experimental_record_dataflow_edges = True, ignore_dups = True, tags = ["objc"], ) @@ -3244,6 +3248,7 @@ test_suite( cc_indexer_test( name = "df_field_ref_blame", srcs = ["df/df_field_ref_blame.cc"], + experimental_record_dataflow_edges = True, ignore_dups = True, tags = ["df"], ) @@ -3251,6 +3256,7 @@ cc_indexer_test( cc_indexer_test( name = "df_field_rw", srcs = ["df/df_field_rw.cc"], + experimental_record_dataflow_edges = True, tags = ["df"], ) @@ -3297,6 +3303,7 @@ cc_indexer_test( cc_indexer_test( name = "df_var_ref_blame", srcs = ["df/df_var_ref_blame.cc"], + experimental_record_dataflow_edges = True, tags = ["df"], ) @@ -3309,6 +3316,7 @@ cc_indexer_test( cc_indexer_test( name = "df_var_rw", srcs = ["df/df_var_rw.cc"], + experimental_record_dataflow_edges = True, tags = ["df"], )