Skip to content

Commit

Permalink
feat(cxx_indexer): support kTakeAlias semantics in simple alias exprs (
Browse files Browse the repository at this point in the history
…#5541)

* feat(cxx_indexer): support kTakeAlias semantics in simple alias exprs

This PR supports marking writes to decls marked with kTakeAlias
semantics (that may have indirect targets, as in proto fields).
Materially, this means that in:

`*m->mutable_foo() = 4;`

`mutable_foo` will ref/write the proto field `foo` directly.
In contrast, `m->mutable_foo()` on its own (e.g., not as the
head of an lexpression) will only be marked as a simple ref
of `foo`.

Handling decls with marked semantics and exprs with marked
semantics appears to be converging somewhat and could do with
a little refactoring. Part of the trouble is that different
information about the context is available at different times
(like when traversing through assignment versus visiting a
declref at a leaf). One wonders how much longer we have until
we end up with more than one and a half phases (maybe once
templates are overhauled?)

* fix: switch to structs with named members
  • Loading branch information
zrlk committed Mar 16, 2023
1 parent b056148 commit 5719e42
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 63 deletions.
3 changes: 1 addition & 2 deletions kythe/cxx/common/protobuf_metadata_file.cc
Expand Up @@ -100,8 +100,7 @@ std::unique_ptr<kythe::MetadataFile> ProtobufMetadataSupport::ParseFile(
absl::StartsWith(token, "release_")) {
rule.semantic = kythe::MetadataFile::Semantic::kWrite;
} else if (absl::StartsWith(token, "mutable_")) {
// TODO(zarko): kTakeAlias in a PR after #5538
rule.semantic = kythe::MetadataFile::Semantic::kReadWrite;
rule.semantic = kythe::MetadataFile::Semantic::kTakeAlias;
}
}
rules.push_back(rule);
Expand Down
94 changes: 60 additions & 34 deletions kythe/cxx/indexer/cxx/IndexerASTHooks.cc
Expand Up @@ -571,7 +571,7 @@ bool IndexerASTVisitor::declDominatesPrunableSubtree(const clang::Decl* Decl) {
const clang::Decl* IndexerASTVisitor::GetInfluencedDeclFromLValueHead(
const clang::Expr* head) {
if (head == nullptr) return nullptr;
head = SkipTrivialAliasing(head);
head = SkipTrivialAliasing(head).head;
if (auto* expr = llvm::dyn_cast_or_null<clang::DeclRefExpr>(head);
expr != nullptr && expr->getFoundDecl() != nullptr &&
(expr->getFoundDecl()->getKind() == clang::Decl::Kind::Var ||
Expand All @@ -587,10 +587,10 @@ const clang::Decl* IndexerASTVisitor::GetInfluencedDeclFromLValueHead(
return nullptr;
}

const clang::Expr* IndexerASTVisitor::SkipTrivialAliasing(
IndexerASTVisitor::TargetExpr IndexerASTVisitor::SkipTrivialAliasing(
const clang::Expr* expr) {
// TODO(zarko): calls with alternate semantics
if (expr == nullptr) return nullptr;
if (expr == nullptr) return {nullptr, std::nullopt};
if (const auto* star =
llvm::dyn_cast_or_null<clang::UnaryOperator>(expr->IgnoreParens());
star != nullptr && star->getOpcode() == clang::UO_Deref &&
Expand All @@ -603,11 +603,22 @@ const clang::Expr* IndexerASTVisitor::SkipTrivialAliasing(
// star := *&var
const auto* var = amp->getSubExpr()->IgnoreParens();
if (const auto* dre = llvm::dyn_cast_or_null<clang::DeclRefExpr>(var)) {
return dre;
return {dre, std::nullopt};
}
}
if (const auto* call = llvm::dyn_cast_or_null<clang::CallExpr>(body);
call != nullptr && call->getCallee() != nullptr &&
call->getCalleeDecl() != nullptr) {
// star := *foo() || *x->foo()
// TODO(zarko): here and above: trace down deref/call chains?
// we don't currently handle x.y.z->mutable_foo()
if (const auto* as = AlternateSemanticForDecl(call->getCalleeDecl());
as != nullptr && as->use_kind == GraphObserver::UseKind::kTakeAlias) {
return {call->getCallee(), as->node};
}
}
}
return expr;
return {expr, std::nullopt};
}

bool IndexerASTVisitor::IsDefinition(const clang::VarDecl* VD) {
Expand Down Expand Up @@ -1435,9 +1446,11 @@ bool IndexerASTVisitor::VisitMemberExpr(const clang::MemberExpr* E) {
auto StmtId = BuildNodeIdForImplicitStmt(E);
if (auto RCC = RangeInCurrentContext(StmtId, Range)) {
RecordBlame(FieldDecl, *RCC);
auto [use_kind, target] =
UseKindFor(E, BuildNodeIdForRefToDecl(FieldDecl));
Observer.recordSemanticDeclUseLocation(
*RCC, BuildNodeIdForRefToDecl(FieldDecl), UseKindFor(E),
GraphObserver::Claimability::Unclaimable, IsImplicit(*RCC));
*RCC, target, use_kind, GraphObserver::Claimability::Unclaimable,
IsImplicit(*RCC));
if (E->hasExplicitTemplateArgs()) {
// We still want to link the template args.
BuildTemplateArgumentList(E->template_arguments());
Expand Down Expand Up @@ -1713,25 +1726,26 @@ bool IndexerASTVisitor::TraverseCXXOperatorCallExpr(
auto arg_end = E->arg_end();
if (arg_begin != arg_end && *arg_begin != nullptr) {
// `this` is the first argument in the case of a member function.
auto* lvhead = FindLValueHead(*arg_begin);
if (E->getOperator() == clang::OO_Equal) {
UsedAsWrite(lvhead);
} else {
UsedAsReadWrite(lvhead);
}
auto* raw_lvhead = FindLValueHead(*arg_begin);
auto [lvhead, influenced] = (E->getOperator() == clang::OO_Equal)
? UsedAsWrite(raw_lvhead)
: UsedAsReadWrite(raw_lvhead);
if (!TraverseStmt(*arg_begin)) return false;
auto scope_guard = PushScope(Job->InfluenceSets, {});
for (++arg_begin; arg_begin != arg_end; ++arg_begin) {
if (*arg_begin != nullptr && !TraverseStmt(*arg_begin)) return false;
}
if (auto* influenced = GetInfluencedDeclFromLValueHead(lvhead)) {
if (!influenced) {
if (auto* from_expr = GetInfluencedDeclFromLValueHead(lvhead)) {
influenced = BuildNodeIdForDecl(from_expr);
}
}
if (influenced) {
for (const auto* decl : Job->InfluenceSets.back()) {
Observer.recordInfluences(BuildNodeIdForDecl(decl),
BuildNodeIdForDecl(influenced));
Observer.recordInfluences(BuildNodeIdForDecl(decl), *influenced);
}
if (E->getOperator() != clang::OO_Equal) {
Observer.recordInfluences(BuildNodeIdForDecl(influenced),
BuildNodeIdForDecl(influenced));
Observer.recordInfluences(*influenced, *influenced);
}
}
return true;
Expand Down Expand Up @@ -2354,16 +2368,20 @@ bool IndexerASTVisitor::TraverseBinaryOperator(clang::BinaryOperator* BO) {
if (auto rhs = BO->getRHS(), lhs = BO->getLHS();
lhs != nullptr && rhs != nullptr) {
if (!WalkUpFromBinaryOperator(BO)) return false;
auto* lvhead = UsedAsWrite(FindLValueHead(lhs));
auto [lvhead, influenced] = UsedAsWrite(FindLValueHead(lhs));
if (!TraverseStmt(lhs)) return false;
auto scope_guard = PushScope(Job->InfluenceSets, {});
if (!TraverseStmt(rhs)) {
return false;
}
if (auto* influenced = GetInfluencedDeclFromLValueHead(lvhead)) {
if (!influenced) {
if (auto* from_expr = GetInfluencedDeclFromLValueHead(lvhead)) {
influenced = BuildNodeIdForDecl(from_expr);
}
}
if (influenced) {
for (const auto* decl : Job->InfluenceSets.back()) {
Observer.recordInfluences(BuildNodeIdForDecl(decl),
BuildNodeIdForDecl(influenced));
Observer.recordInfluences(BuildNodeIdForDecl(decl), *influenced);
}
}
return true;
Expand All @@ -2379,19 +2397,22 @@ bool IndexerASTVisitor::TraverseCompoundAssignOperator(
if (auto rhs = CAO->getRHS(), lhs = CAO->getLHS();
lhs != nullptr && rhs != nullptr) {
if (!WalkUpFromCompoundAssignOperator(CAO)) return false;
auto* lvhead = UsedAsReadWrite(FindLValueHead(lhs));
auto [lvhead, influenced] = UsedAsReadWrite(FindLValueHead(lhs));
if (!TraverseStmt(lhs)) return false;
auto scope_guard = PushScope(Job->InfluenceSets, {});
if (!TraverseStmt(rhs)) {
return false;
}
if (auto* influenced = GetInfluencedDeclFromLValueHead(lvhead)) {
if (!influenced) {
if (auto* from_expr = GetInfluencedDeclFromLValueHead(lvhead)) {
influenced = BuildNodeIdForDecl(from_expr);
}
}
if (influenced) {
for (const auto* decl : Job->InfluenceSets.back()) {
Observer.recordInfluences(BuildNodeIdForDecl(decl),
BuildNodeIdForDecl(influenced));
Observer.recordInfluences(BuildNodeIdForDecl(decl), *influenced);
}
Observer.recordInfluences(BuildNodeIdForDecl(influenced),
BuildNodeIdForDecl(influenced));
Observer.recordInfluences(*influenced, *influenced);
}
return true;
}
Expand All @@ -2410,11 +2431,15 @@ bool IndexerASTVisitor::TraverseUnaryOperator(clang::UnaryOperator* UO) {
}
if (auto* lval = UO->getSubExpr(); lval != nullptr) {
if (!WalkUpFromUnaryOperator(UO)) return false;
auto* lvhead = UsedAsReadWrite(FindLValueHead(lval));
auto [lvhead, influenced] = UsedAsReadWrite(FindLValueHead(lval));
if (!TraverseStmt(lval)) return false;
if (auto* influenced = GetInfluencedDeclFromLValueHead(lvhead)) {
Observer.recordInfluences(BuildNodeIdForDecl(influenced),
BuildNodeIdForDecl(influenced));
if (!influenced) {
if (auto* from_expr = GetInfluencedDeclFromLValueHead(lvhead)) {
influenced = BuildNodeIdForDecl(from_expr);
}
}
if (influenced) {
Observer.recordInfluences(*influenced, *influenced);
}
return true;
}
Expand Down Expand Up @@ -2540,9 +2565,10 @@ bool IndexerASTVisitor::VisitDeclRefOrIvarRefExpr(
this->IsImplicit(RCC.value()));
}
}
auto [use_kind, target] = UseKindFor(Expr, DeclId);
Observer.recordSemanticDeclUseLocation(
*RCC, DeclId, UseKindFor(Expr),
GraphObserver::Claimability::Unclaimable, this->IsImplicit(*RCC));
*RCC, target, use_kind, GraphObserver::Claimability::Unclaimable,
this->IsImplicit(*RCC));
for (const auto& S : Supports) {
S->InspectDeclRef(*this, SL, *RCC, DeclId, FoundDecl);
}
Expand Down
92 changes: 66 additions & 26 deletions kythe/cxx/indexer/cxx/IndexerASTHooks.h
Expand Up @@ -745,16 +745,36 @@ class IndexerASTVisitor : public RecursiveTypeVisitor<IndexerASTVisitor> {
/// influence is the member decl for `z`.
const clang::Decl* GetInfluencedDeclFromLValueHead(const clang::Expr* head);

/// \brief Describes a target (possibly sub-)expression resulting from some
/// operation.
///
/// Often parts of an expression are especially interesting, like
/// lvalue heads (`z` in `x.y.z`) or the results of eliminating simple aliases
/// (`x` in `*&x`). The `head` field will contain that subexpression. If
/// the entire expression is relevant, or if no subexpression could be found,
/// `head` will contain the input expression.
///
/// It may also be the case that the indexer has more information about
/// a particular target, e.g. that it should semantically refer to some
/// remote node rather than the one indicated by the (sub)expression. In this
/// case the `alt` field will contain that remote node's ID; this should be
/// preferred to `head`. For example, `alt` will be set to the ID of the
/// `foo` field in `*x->mutable_foo()`.
struct TargetExpr {
const clang::Expr* head; ///< The target (sub)expression.
std::optional<GraphObserver::NodeId> alt; ///< The preferred target.
};

/// \brief Eliminates the trivial introduction of aliasing.
///
/// In some situations (and modulo undefined behavior), the expression
/// `*(&foo)` can be reduced to `foo`. `foo` may be a simple DeclRefExpr, but
/// it could also be a MemberExpr that has alias semantics tied to another
/// field or some piece of generated code.
///
/// \return the head of a trivial alias/dealias operation or `expr` if the
/// operation failed to find one.
const clang::Expr* SkipTrivialAliasing(const clang::Expr* expr);
/// \return a TargetExpr with the alias eliminated, or with the original
/// `expr` set if no alias could be eliminated.
TargetExpr SkipTrivialAliasing(const clang::Expr* expr);

/// \brief The result of calling into the lexer.
enum class LexerResult {
Expand Down Expand Up @@ -998,35 +1018,44 @@ class IndexerASTVisitor : public RecursiveTypeVisitor<IndexerASTVisitor> {
/// \brief Returns whether `Decl` should be indexed.
bool ShouldIndex(const clang::Decl* Decl);

GraphObserver::UseKind UseKindFor(const clang::Expr* expr) {
/// \brief Binds a particular `target` with a semantic `kind` (e.g., this
/// expr is a use that is a write to some node).
struct KindWithTarget {
GraphObserver::UseKind kind; ///< The way `target` is being used.
GraphObserver::NodeId target; ///< The node being used.
};

KindWithTarget UseKindFor(const clang::Expr* expr,
const GraphObserver::NodeId& default_target) {
auto i = use_kinds_.find(expr);
return i == use_kinds_.end() ? GraphObserver::UseKind::kUnknown : i->second;
return i == use_kinds_.end()
? KindWithTarget{GraphObserver::UseKind::kUnknown,
default_target}
: KindWithTarget{i->second.kind, i->second.target
? *i->second.target
: default_target};
}

/// \brief Marks that `expr` was used as a write target.
/// \return `expr` as passed.
const clang::Expr* UsedAsWrite(const clang::Expr* expr) {
if (expr != nullptr)
use_kinds_[SkipTrivialAliasing(expr)] = GraphObserver::UseKind::kWrite;
return expr;
/// \return `expr` as passed, as well as an optional indirect target.
TargetExpr UsedAsWrite(const clang::Expr* expr) {
if (expr != nullptr) {
auto [head, alt] = SkipTrivialAliasing(expr);
use_kinds_[head] = {GraphObserver::UseKind::kWrite, alt};
return {expr, alt};
}
return {expr, std::nullopt};
}

/// \brief Marks that `expr` was used as a read+write target.
/// \return `expr` as passed.
const clang::Expr* UsedAsReadWrite(const clang::Expr* expr) {
if (expr != nullptr)
use_kinds_[SkipTrivialAliasing(expr)] =
GraphObserver::UseKind::kReadWrite;
return expr;
}

/// \brief Marks that `expr` was used as an alias target.
/// \return `expr` as passed.
const clang::Expr* UsedAsAlias(const clang::Expr* expr) {
if (expr != nullptr)
use_kinds_[SkipTrivialAliasing(expr)] =
GraphObserver::UseKind::kTakeAlias;
return expr;
/// \return `expr` as passed, as well as an optional indirect target.
TargetExpr UsedAsReadWrite(const clang::Expr* expr) {
if (expr != nullptr) {
auto [head, alt] = SkipTrivialAliasing(expr);
use_kinds_[head] = {GraphObserver::UseKind::kReadWrite, alt};
return {expr, alt};
}
return {expr, std::nullopt};
}

/// \brief Maps known Decls to their NodeIds.
Expand All @@ -1050,8 +1079,19 @@ class IndexerASTVisitor : public RecursiveTypeVisitor<IndexerASTVisitor> {
/// \brief Comments we've already visited.
std::unordered_set<const clang::RawComment*> VisitedComments;

/// \brief Binds a particular implicit `expr` with a semantic `kind` (e.g.,
/// this expr is a use that is a write to some node). May provide an
/// overriding `target` if there is a more specific node than the one
/// implied by the `expr`.
struct KindWithOptionalTarget {
GraphObserver::UseKind kind; ///< The way `target` is being used.
std::optional<GraphObserver::NodeId>
target; ///< The node being used, if different from
///< the implicit `expr`.
};

/// \brief AST nodes we know are used in specific ways.
absl::flat_hash_map<const clang::Expr*, GraphObserver::UseKind> use_kinds_;
absl::flat_hash_map<const clang::Expr*, KindWithOptionalTarget> use_kinds_;

struct AlternateSemantic {
GraphObserver::UseKind use_kind;
Expand Down
3 changes: 2 additions & 1 deletion kythe/cxx/indexer/cxx/KytheGraphObserver.cc
Expand Up @@ -1119,7 +1119,8 @@ void KytheGraphObserver::recordSemanticDeclUseLocation(
const GraphObserver::Range& source_range, const NodeId& node, UseKind kind,
Claimability claimability, Implicit i) {
if (kind == GraphObserver::UseKind::kUnknown ||
kind == GraphObserver::UseKind::kReadWrite) {
kind == GraphObserver::UseKind::kReadWrite ||
kind == GraphObserver::UseKind::kTakeAlias) {
auto out_kind =
(i == GraphObserver::Implicit::Yes ? EdgeKindID::kRefImplicit
: EdgeKindID::kRef);
Expand Down
3 changes: 3 additions & 0 deletions kythe/cxx/indexer/cxx/testdata/proto/proto_semantic.cc
Expand Up @@ -33,6 +33,9 @@ void fn() {

//- @mutable_nested_message ref/writes NestedMessageField
*msg.mutable_nested_message() = nested;
//- @mutable_nested_message ref NestedMessageField
//- !{ @mutable_nested_message ref/writes NestedMessageField }
msg.mutable_nested_message();
//- @nested_message ref CxxGetNestedMessageField
msg.nested_message();

Expand Down

0 comments on commit 5719e42

Please sign in to comment.