Skip to content

Commit

Permalink
[clangd] Filter out private proto symbols in SymbolCollector.
Browse files Browse the repository at this point in the history
Summary:
This uses heuristics to identify private proto symbols. For example,
top-level symbols whose name contains "_" are considered private. These symbols
are not expected to be used by users.

Reviewers: ilya-biryukov, malaperle

Reviewed By: ilya-biryukov

Subscribers: sammccall, klimek, MaskRay, jkorous, cfe-commits

Differential Revision: https://reviews.llvm.org/D46751

llvm-svn: 332456
  • Loading branch information
Eric Liu committed May 16, 2018
1 parent c922e07 commit d67ec24
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 0 deletions.
43 changes: 43 additions & 0 deletions clang-tools-extra/clangd/index/SymbolCollector.cpp
Expand Up @@ -89,6 +89,46 @@ llvm::Optional<std::string> toURI(const SourceManager &SM, StringRef Path,
return llvm::None;
}

// All proto generated headers should start with this line.
static const char *PROTO_HEADER_COMMENT =
"// Generated by the protocol buffer compiler. DO NOT EDIT!";

// Checks whether the decl is a private symbol in a header generated by
// protobuf compiler.
// To identify whether a proto header is actually generated by proto compiler,
// we check whether it starts with PROTO_HEADER_COMMENT.
// FIXME: make filtering extensible when there are more use cases for symbol
// filters.
bool isPrivateProtoDecl(const NamedDecl &ND) {
const auto &SM = ND.getASTContext().getSourceManager();
auto Loc = findNameLoc(&ND);
auto FileName = SM.getFilename(Loc);
if (!FileName.endswith(".proto.h") && !FileName.endswith(".pb.h"))
return false;
auto FID = SM.getFileID(Loc);
// Double check that this is an actual protobuf header.
if (!SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT))
return false;

// ND without identifier can be operators.
if (ND.getIdentifier() == nullptr)
return false;
auto Name = ND.getIdentifier()->getName();
if (!Name.contains('_'))
return false;
// Nested proto entities (e.g. Message::Nested) have top-level decls
// that shouldn't be used (Message_Nested). Ignore them completely.
// The nested entities are dangling type aliases, we may want to reconsider
// including them in the future.
// For enum constants, SOME_ENUM_CONSTANT is not private and should be
// indexed. Outer_INNER is private. This heuristic relies on naming style, it
// will include OUTER_INNER and exclude some_enum_constant.
// FIXME: the heuristic relies on naming style (i.e. no underscore in
// user-defined names) and can be improved.
return (ND.getKind() != Decl::EnumConstant) ||
std::any_of(Name.begin(), Name.end(), islower);
}

bool shouldFilterDecl(const NamedDecl *ND, ASTContext *ASTCtx,
const SymbolCollector::Options &Opts) {
using namespace clang::ast_matchers;
Expand Down Expand Up @@ -129,6 +169,9 @@ bool shouldFilterDecl(const NamedDecl *ND, ASTContext *ASTCtx,
.empty())
return true;

// Avoid indexing internal symbols in protobuf generated headers.
if (isPrivateProtoDecl(*ND))
return true;
return false;
}

Expand Down
35 changes: 35 additions & 0 deletions clang-tools-extra/unittests/clangd/SymbolCollectorTests.cpp
Expand Up @@ -697,6 +697,41 @@ TEST_F(SymbolCollectorTest, UTF16Character) {
AllOf(QName("pörk"), DeclRange(Header.range()))));
}

TEST_F(SymbolCollectorTest, FilterPrivateProtoSymbols) {
TestHeaderName = testPath("x.proto.h");
const std::string Header =
R"(// Generated by the protocol buffer compiler. DO NOT EDIT!
namespace nx {
class Top_Level {};
class TopLevel {};
enum Kind {
KIND_OK,
Kind_Not_Ok,
};
bool operator<(const TopLevel &, const TopLevel &);
})";
runSymbolCollector(Header, /*Main=*/"");
EXPECT_THAT(Symbols,
UnorderedElementsAre(QName("nx"), QName("nx::TopLevel"),
QName("nx::Kind"), QName("nx::KIND_OK"),
QName("nx::operator<")));
}

TEST_F(SymbolCollectorTest, DoubleCheckProtoHeaderComment) {
TestHeaderName = testPath("x.proto.h");
const std::string Header = R"(
namespace nx {
class Top_Level {};
enum Kind {
Kind_Fine
};
}
)";
runSymbolCollector(Header, /*Main=*/"");
EXPECT_THAT(Symbols,
UnorderedElementsAre(QName("nx"), QName("nx::Top_Level"),
QName("nx::Kind"), QName("nx::Kind_Fine")));
}

} // namespace
} // namespace clangd
Expand Down

0 comments on commit d67ec24

Please sign in to comment.