Skip to content

Commit

Permalink
fix(cxx_indexer): refactor lvalue handling for dataflow (#4907)
Browse files Browse the repository at this point in the history
* fix(cxx_indexer): refactor lvalue handling for dataflow

Tag particular AST nodes as writes. This is cleaner than the previous
approach, which tries to scramble up the tree to figure out if a node
is in a write context. This PR also unifies lvalue interpretation
and influence target identification.

We can probably turn dataflow edges on by default, but that will be
a separate PR.
  • Loading branch information
zrlk committed May 5, 2021
1 parent 795ce80 commit 9035307
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 50 deletions.
31 changes: 6 additions & 25 deletions kythe/cxx/indexer/cxx/IndexerASTHooks.cc
Expand Up @@ -328,23 +328,6 @@ clang::InitListExpr* GetSyntacticForm(clang::InitListExpr* ILE) {
const clang::InitListExpr* GetSyntacticForm(const clang::InitListExpr* ILE) {
return (ILE->isSyntacticForm() ? ILE : ILE->getSyntacticForm());
}

clang::Decl* GetInfluencedDeclFromLExpression(clang::Expr* lhs) {
if (auto* expr = llvm::dyn_cast_or_null<clang::DeclRefExpr>(lhs);
expr != nullptr && expr->getFoundDecl() != nullptr &&
(expr->getFoundDecl()->getKind() == clang::Decl::Kind::Var ||
expr->getFoundDecl()->getKind() == clang::Decl::Kind::ParmVar)) {
return expr->getFoundDecl();
}
if (auto* expr = llvm::dyn_cast_or_null<clang::MemberExpr>(lhs);
expr != nullptr) {
if (auto* member = expr->getMemberDecl(); member != nullptr) {
return member;
}
}
return nullptr;
}

} // anonymous namespace

bool IsClaimableForTraverse(const clang::Decl* decl) {
Expand Down Expand Up @@ -1347,9 +1330,8 @@ bool IndexerASTVisitor::VisitMemberExpr(const clang::MemberExpr* E) {
auto StmtId = BuildNodeIdForImplicitStmt(E);
if (auto RCC = RangeInCurrentContext(StmtId, Range)) {
RecordBlame(FieldDecl, *RCC);
auto semantic = IsUsedAsWrite(*getAllParents(), E)
? GraphObserver::UseKind::kWrite
: GraphObserver::UseKind::kUnknown;
auto semantic = IsUsedAsWrite(E) ? GraphObserver::UseKind::kWrite
: GraphObserver::UseKind::kUnknown;
Observer.recordSemanticDeclUseLocation(
*RCC, BuildNodeIdForRefToDecl(FieldDecl), semantic,
GraphObserver::Claimability::Unclaimable, IsImplicit(*RCC));
Expand Down Expand Up @@ -2167,16 +2149,16 @@ bool IndexerASTVisitor::TraverseBinaryOperator(clang::BinaryOperator* BO) {
}
if (BO->getOpcode() != clang::BO_Assign)
return Base::TraverseBinaryOperator(BO);

if (auto rhs = BO->getRHS(), lhs = BO->getLHS();
lhs != nullptr && rhs != nullptr) {
if (!WalkUpFromBinaryOperator(BO)) return false;
auto* lvhead = UsedAsWrite(FindLValueHead(lhs));
if (!TraverseStmt(lhs)) return false;
auto scope_guard = PushScope(Job->InfluenceSets, {});
if (!TraverseStmt(rhs)) {
return false;
}
if (auto* influenced = GetInfluencedDeclFromLExpression(lhs)) {
if (auto* influenced = GetInfluencedDeclFromLValueHead(lvhead)) {
for (const auto* decl : Job->InfluenceSets.back()) {
Observer.recordInfluences(BuildNodeIdForDecl(decl),
BuildNodeIdForDecl(influenced));
Expand Down Expand Up @@ -2285,9 +2267,8 @@ bool IndexerASTVisitor::VisitDeclRefOrIvarRefExpr(
if (auto RCC = RangeInCurrentContext(StmtId, Range)) {
GraphObserver::NodeId DeclId = BuildNodeIdForRefToDecl(FoundDecl);
RecordBlame(FoundDecl, *RCC);
auto semantic = IsUsedAsWrite(*getAllParents(), Expr)
? GraphObserver::UseKind::kWrite
: GraphObserver::UseKind::kUnknown;
auto semantic = IsUsedAsWrite(Expr) ? GraphObserver::UseKind::kWrite
: GraphObserver::UseKind::kUnknown;
if (DataflowEdges) {
if (!Job->InfluenceSets.empty() &&
(FoundDecl->getKind() == clang::Decl::Kind::Var ||
Expand Down
15 changes: 15 additions & 0 deletions kythe/cxx/indexer/cxx/IndexerASTHooks.h
Expand Up @@ -969,6 +969,18 @@ class IndexerASTVisitor : public RecursiveTypeVisitor<IndexerASTVisitor> {
/// \brief Returns whether `Decl` should be indexed.
bool ShouldIndex(const clang::Decl* Decl);

/// \brief Returns whether `stmt` is used as a write target.
bool IsUsedAsWrite(const clang::Stmt* stmt) {
return is_used_as_write_.find(stmt) != is_used_as_write_.end();
}

/// \brief Marks that `stmt` was used as a write target.
/// \return `stmt` as passed.
const clang::Stmt* UsedAsWrite(const clang::Stmt* stmt) {
if (stmt != nullptr) is_used_as_write_.insert(stmt);
return stmt;
}

/// \brief Maps known Decls to their NodeIds.
llvm::DenseMap<const clang::Decl*, GraphObserver::NodeId> DeclToNodeId;

Expand Down Expand Up @@ -1011,6 +1023,9 @@ class IndexerASTVisitor : public RecursiveTypeVisitor<IndexerASTVisitor> {
/// \brief if nonempty, the pattern to match a path against to see whether
/// it should be excluded from template instance indexing.
std::shared_ptr<re2::RE2> TemplateInstanceExcludePathPattern = nullptr;

/// \brief AST nodes we know are used in a write context.
absl::flat_hash_set<const clang::Stmt*> is_used_as_write_;
};

/// \brief An `ASTConsumer` that passes events to a `GraphObserver`.
Expand Down
44 changes: 23 additions & 21 deletions kythe/cxx/indexer/cxx/clang_utils.cc
Expand Up @@ -96,30 +96,32 @@ bool ShouldHaveBlameContext(const clang::Decl* decl) {
}
}

bool IsUsedAsWrite(const IndexedParentMap& map, const clang::Stmt* stmt) {
// TODO(zarko): Improve coverage (or get rid of this entirely and maintain
// traversal state in the AST walker; this would be more of a maintenance
// and correctness burden, but may be required for richer representations.)
if (stmt == nullptr) return false;
const auto* indexed_parent = map.GetIndexedParent(*stmt);
if (indexed_parent == nullptr) return false;
const auto* parent_stmt = indexed_parent->parent.get<clang::Stmt>();
while (llvm::isa_and_nonnull<clang::MemberExpr>(parent_stmt)) {
indexed_parent = map.GetIndexedParent(*parent_stmt);
if (indexed_parent == nullptr) return false;
parent_stmt = indexed_parent->parent.get<clang::Stmt>();
const clang::Stmt* FindLValueHead(const clang::Stmt* stmt) {
if (stmt == nullptr) return nullptr;
switch (stmt->getStmtClass()) {
case clang::Stmt::StmtClass::DeclRefExprClass:
case clang::Stmt::StmtClass::ObjCIvarRefExprClass:
case clang::Stmt::StmtClass::MemberExprClass:
return stmt;
default:
return nullptr;
}
if (parent_stmt == nullptr) return false;
}

switch (parent_stmt->getStmtClass()) {
case clang::Stmt::StmtClass::BinaryOperatorClass: {
const auto* binop = clang::dyn_cast<clang::BinaryOperator>(parent_stmt);
if (binop == nullptr) return false;
return binop->getOpcode() == clang::BinaryOperator::Opcode::BO_Assign &&
binop->getLHS() == stmt;
const clang::Decl* GetInfluencedDeclFromLValueHead(const clang::Stmt* head) {
if (head == nullptr) return nullptr;
if (auto* expr = llvm::dyn_cast_or_null<clang::DeclRefExpr>(head);
expr != nullptr && expr->getFoundDecl() != nullptr &&
(expr->getFoundDecl()->getKind() == clang::Decl::Kind::Var ||
expr->getFoundDecl()->getKind() == clang::Decl::Kind::ParmVar)) {
return expr->getFoundDecl();
}
if (auto* expr = llvm::dyn_cast_or_null<clang::MemberExpr>(head);
expr != nullptr) {
if (auto* member = expr->getMemberDecl(); member != nullptr) {
return member;
}
default:
return false;
}
return nullptr;
}
} // namespace kythe
13 changes: 9 additions & 4 deletions kythe/cxx/indexer/cxx/clang_utils.h
Expand Up @@ -35,10 +35,15 @@ const clang::Decl* FindSpecializedTemplate(const clang::Decl* decl);
/// \return true if a reference to `decl` should be given blame context.
bool ShouldHaveBlameContext(const clang::Decl* decl);

/// \return true if `stmt` is being used in a write position according to
/// `map`.
bool IsUsedAsWrite(const IndexedParentMap& map, const clang::Stmt* stmt);

/// \return the `Stmt` that is at the lvalue head position of `stmt`, or
/// null otherwise. For example, in `foo[x].bar(y).z`, the member expression
/// for `z` is in the root position.
const clang::Stmt* FindLValueHead(const clang::Stmt* stmt);

/// \return the `Decl` that is the target of influence by an lexpression with
/// head `head`, or null. For example, in `foo[x].bar(y).z`, the target of
/// influence is the member decl for `z`.
const clang::Decl* GetInfluencedDeclFromLValueHead(const clang::Stmt* head);
} // namespace kythe

#endif // KYTHE_CXX_INDEXER_CXX_CLANG_UTILS_H_
8 changes: 8 additions & 0 deletions kythe/cxx/indexer/cxx/testdata/BUILD
Expand Up @@ -1828,6 +1828,7 @@ cc_indexer_test(
name = "rec_class_member_ptr",
srcs = ["rec/rec_class_member_ptr.cc"],
check_for_singletons = True,
experimental_record_dataflow_edges = True,
tags = ["rec"],
)

Expand Down Expand Up @@ -2844,6 +2845,7 @@ objc_indexer_test(
objc_indexer_test(
name = "extension_property",
srcs = ["objc/categories/extension_property.m"],
experimental_record_dataflow_edges = True,
ignore_dups = True,
tags = ["objc"],
deps = ["objc/categories/extension_property.h"],
Expand Down Expand Up @@ -3051,6 +3053,7 @@ objc_indexer_test(
objc_indexer_test(
name = "ivar_decl",
srcs = ["objc/ivar_decl.m"],
experimental_record_dataflow_edges = True,
tags = ["objc"],
)

Expand Down Expand Up @@ -3144,6 +3147,7 @@ objc_indexer_test(
objc_indexer_test(
name = "property_decl_defn_synth",
srcs = ["objc/property_decl_defn_synth.m"],
experimental_record_dataflow_edges = True,
ignore_dups = True,
tags = ["objc"],
)
Expand Down Expand Up @@ -3244,13 +3248,15 @@ test_suite(
cc_indexer_test(
name = "df_field_ref_blame",
srcs = ["df/df_field_ref_blame.cc"],
experimental_record_dataflow_edges = True,
ignore_dups = True,
tags = ["df"],
)

cc_indexer_test(
name = "df_field_rw",
srcs = ["df/df_field_rw.cc"],
experimental_record_dataflow_edges = True,
tags = ["df"],
)

Expand Down Expand Up @@ -3297,6 +3303,7 @@ cc_indexer_test(
cc_indexer_test(
name = "df_var_ref_blame",
srcs = ["df/df_var_ref_blame.cc"],
experimental_record_dataflow_edges = True,
tags = ["df"],
)

Expand All @@ -3309,6 +3316,7 @@ cc_indexer_test(
cc_indexer_test(
name = "df_var_rw",
srcs = ["df/df_var_rw.cc"],
experimental_record_dataflow_edges = True,
tags = ["df"],
)

Expand Down

0 comments on commit 9035307

Please sign in to comment.