Skip to content

Commit

Permalink
[clangd] Collect symbol occurrences in SymbolCollector.
Browse files Browse the repository at this point in the history
SymbolCollector will be used for two cases:
 - collect Symbol type only, used for indexing preamble AST.
 - collect Symbol and SymbolOccurrences, used for indexing main AST.

For finding local references from the AST, we will implement it in other ways.

llvm-svn: 341208
  • Loading branch information
hokein committed Aug 31, 2018
1 parent d2c2c57 commit d81e314
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 27 deletions.
43 changes: 43 additions & 0 deletions clang-tools-extra/clangd/index/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,5 +128,48 @@ SymbolSlab SymbolSlab::Builder::build() && {
return SymbolSlab(std::move(NewArena), std::move(Symbols));
}

raw_ostream &operator<<(raw_ostream &OS, SymbolOccurrenceKind K) {
if (K == SymbolOccurrenceKind::Unknown)
return OS << "Unknown";
static const std::vector<const char *> Messages = {"Decl", "Def", "Ref"};
bool VisitedOnce = false;
for (unsigned I = 0; I < Messages.size(); ++I) {
if (static_cast<uint8_t>(K) & 1u << I) {
if (VisitedOnce)
OS << ", ";
OS << Messages[I];
VisitedOnce = true;
}
}
return OS;
}

llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
const SymbolOccurrence &Occurrence) {
OS << Occurrence.Location << ":" << Occurrence.Kind;
return OS;
}

void SymbolOccurrenceSlab::insert(const SymbolID &SymID,
const SymbolOccurrence &Occurrence) {
assert(!Frozen &&
"Can't insert a symbol occurrence after the slab has been frozen!");
auto &SymOccurrences = Occurrences[SymID];
SymOccurrences.push_back(Occurrence);
SymOccurrences.back().Location.FileURI =
UniqueStrings.save(Occurrence.Location.FileURI);
}

void SymbolOccurrenceSlab::freeze() {
// Deduplicate symbol occurrenes.
for (auto &IDAndOccurrence : Occurrences) {
auto &Occurrence = IDAndOccurrence.getSecond();
std::sort(Occurrence.begin(), Occurrence.end());
Occurrence.erase(std::unique(Occurrence.begin(), Occurrence.end()),
Occurrence.end());
}
Frozen = true;
}

} // namespace clangd
} // namespace clang
81 changes: 74 additions & 7 deletions clang-tools-extra/clangd/index/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,6 @@ struct SymbolLocation {
uint32_t Line = 0; // 0-based
// Using UTF-16 code units.
uint32_t Column = 0; // 0-based
bool operator==(const Position& P) const {
return Line == P.Line && Column == P.Column;
}
};

// The URI of the source file where a symbol occurs.
Expand All @@ -45,11 +42,23 @@ struct SymbolLocation {
Position End;

explicit operator bool() const { return !FileURI.empty(); }
bool operator==(const SymbolLocation& Loc) const {
return std::tie(FileURI, Start, End) ==
std::tie(Loc.FileURI, Loc.Start, Loc.End);
}
};
inline bool operator==(const SymbolLocation::Position &L,
const SymbolLocation::Position &R) {
return std::tie(L.Line, L.Column) == std::tie(R.Line, R.Column);
}
inline bool operator<(const SymbolLocation::Position &L,
const SymbolLocation::Position &R) {
return std::tie(L.Line, L.Column) < std::tie(R.Line, R.Column);
}
inline bool operator==(const SymbolLocation &L, const SymbolLocation &R) {
return std::tie(L.FileURI, L.Start, L.End) ==
std::tie(R.FileURI, R.Start, R.End);
}
inline bool operator<(const SymbolLocation &L, const SymbolLocation &R) {
return std::tie(L.FileURI, L.Start, L.End) <
std::tie(R.FileURI, R.Start, R.End);
}
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &);

// The class identifies a particular C++ symbol (class, function, method, etc).
Expand Down Expand Up @@ -314,6 +323,9 @@ inline SymbolOccurrenceKind operator&(SymbolOccurrenceKind A,
return static_cast<SymbolOccurrenceKind>(static_cast<uint8_t>(A) &
static_cast<uint8_t>(B));
}
static const SymbolOccurrenceKind AllOccurrenceKinds =
SymbolOccurrenceKind::Declaration | SymbolOccurrenceKind::Definition |
SymbolOccurrenceKind::Reference;

// Represents a symbol occurrence in the source file. It could be a
// declaration/definition/reference occurrence.
Expand All @@ -324,6 +336,61 @@ struct SymbolOccurrence {
SymbolLocation Location;
SymbolOccurrenceKind Kind = SymbolOccurrenceKind::Unknown;
};
inline bool operator<(const SymbolOccurrence &L, const SymbolOccurrence &R) {
return std::tie(L.Location, L.Kind) < std::tie(R.Location, R.Kind);
}
inline bool operator==(const SymbolOccurrence &L, const SymbolOccurrence &R) {
return std::tie(L.Location, L.Kind) == std::tie(R.Location, R.Kind);
}
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
const SymbolOccurrence &Occurrence);

// An efficient structure of storing large set of symbol occurrences in memory.
// Filenames are deduplicated.
class SymbolOccurrenceSlab {
public:
using const_iterator =
llvm::DenseMap<SymbolID, std::vector<SymbolOccurrence>>::const_iterator;
using iterator = const_iterator;

SymbolOccurrenceSlab() : UniqueStrings(Arena) {}

// Define move semantics for the slab, allowing assignment from an rvalue.
// Implicit move assignment is deleted by the compiler because
// StringSaver has a reference type member.
SymbolOccurrenceSlab(SymbolOccurrenceSlab &&Slab) = default;
SymbolOccurrenceSlab &operator=(SymbolOccurrenceSlab &&RHS) {
assert(RHS.Frozen &&
"SymbolOcucrrenceSlab must be frozen when move assigned!");
Arena = std::move(RHS.Arena);
Frozen = true;
Occurrences = std::move(RHS.Occurrences);
return *this;
}

const_iterator begin() const { return Occurrences.begin(); }
const_iterator end() const { return Occurrences.end(); }

// Adds a symbol occurrence.
// This is a deep copy: underlying FileURI will be owned by the slab.
void insert(const SymbolID &SymID, const SymbolOccurrence &Occurrence);

llvm::ArrayRef<SymbolOccurrence> find(const SymbolID &ID) const {
assert(Frozen && "SymbolOccurrenceSlab must be frozen before looking up!");
auto It = Occurrences.find(ID);
if (It == Occurrences.end())
return {};
return It->second;
}

void freeze();

private:
bool Frozen = false;
llvm::BumpPtrAllocator Arena;
llvm::UniqueStringSaver UniqueStrings;
llvm::DenseMap<SymbolID, std::vector<SymbolOccurrence>> Occurrences;
};

struct FuzzyFindRequest {
/// \brief A query string for the fuzzy find. This is matched against symbols'
Expand Down
74 changes: 59 additions & 15 deletions clang-tools-extra/clangd/index/SymbolCollector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,24 @@ getIncludeHeader(llvm::StringRef QName, const SourceManager &SM,
return toURI(SM, Header, Opts);
}

// Return the symbol location of the token at \p Loc.
// Return the symbol range of the token at \p TokLoc.
std::pair<SymbolLocation::Position, SymbolLocation::Position>
getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
const LangOptions &LangOpts) {
auto CreatePosition = [&SM](SourceLocation Loc) {
auto LSPLoc = sourceLocToPosition(SM, Loc);
SymbolLocation::Position Pos;
Pos.Line = LSPLoc.line;
Pos.Column = LSPLoc.character;
return Pos;
};

auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
return {CreatePosition(TokLoc),
CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
}

// Return the symbol location of the token at \p TokLoc.
llvm::Optional<SymbolLocation>
getTokenLocation(SourceLocation TokLoc, const SourceManager &SM,
const SymbolCollector::Options &Opts,
Expand All @@ -194,19 +211,9 @@ getTokenLocation(SourceLocation TokLoc, const SourceManager &SM,
FileURIStorage = std::move(*U);
SymbolLocation Result;
Result.FileURI = FileURIStorage;
auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);

auto CreatePosition = [&SM](SourceLocation Loc) {
auto LSPLoc = sourceLocToPosition(SM, Loc);
SymbolLocation::Position Pos;
Pos.Line = LSPLoc.line;
Pos.Column = LSPLoc.character;
return Pos;
};

Result.Start = CreatePosition(TokLoc);
auto EndLoc = TokLoc.getLocWithOffset(TokenLength);
Result.End = CreatePosition(EndLoc);
auto Range = getTokenRange(TokLoc, SM, LangOpts);
Result.Start = Range.first;
Result.End = Range.second;

return std::move(Result);
}
Expand All @@ -224,6 +231,11 @@ bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
match(decl(isExpansionInMainFile()), ND, ND.getASTContext()).empty();
}

SymbolOccurrenceKind toOccurrenceKind(index::SymbolRoleSet Roles) {
return static_cast<SymbolOccurrenceKind>(
static_cast<unsigned>(AllOccurrenceKinds) & Roles);
}

} // namespace

SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
Expand Down Expand Up @@ -308,11 +320,16 @@ bool SymbolCollector::handleDeclOccurence(
// Mark D as referenced if this is a reference coming from the main file.
// D may not be an interesting symbol, but it's cheaper to check at the end.
auto &SM = ASTCtx->getSourceManager();
auto SpellingLoc = SM.getSpellingLoc(Loc);
if (Opts.CountReferences &&
(Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
SM.getFileID(SpellingLoc) == SM.getMainFileID())
ReferencedDecls.insert(ND);

if ((static_cast<unsigned>(Opts.OccurrenceFilter) & Roles) &&
SM.getFileID(SpellingLoc) == SM.getMainFileID())
DeclOccurrences[ND].emplace_back(SpellingLoc, Roles);

// Don't continue indexing if this is a mere reference.
if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
Expand Down Expand Up @@ -436,8 +453,35 @@ void SymbolCollector::finish() {
IncRef(SymbolID(USR));
}
}

const auto &SM = ASTCtx->getSourceManager();
auto* MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());

if (auto MainFileURI = toURI(SM, MainFileEntry->getName(), Opts)) {
std::string MainURI = *MainFileURI;
for (const auto &It : DeclOccurrences) {
if (auto ID = getSymbolID(It.first)) {
if (Symbols.find(*ID)) {
for (const auto &LocAndRole : It.second) {
SymbolOccurrence Occurrence;
auto Range =
getTokenRange(LocAndRole.first, SM, ASTCtx->getLangOpts());
Occurrence.Location.Start = Range.first;
Occurrence.Location.End = Range.second;
Occurrence.Location.FileURI = MainURI;
Occurrence.Kind = toOccurrenceKind(LocAndRole.second);
SymbolOccurrences.insert(*ID, Occurrence);
}
}
}
}
} else {
log("Failed to create URI for main file: {0}", MainFileEntry->getName());
}

ReferencedDecls.clear();
ReferencedMacros.clear();
DeclOccurrences.clear();
}

const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND,
Expand Down
16 changes: 16 additions & 0 deletions clang-tools-extra/clangd/index/SymbolCollector.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ class SymbolCollector : public index::IndexDataConsumer {
const CanonicalIncludes *Includes = nullptr;
// Populate the Symbol.References field.
bool CountReferences = false;
/// The symbol occurrence kind that will be collected.
/// If not set (Unknown), SymbolCollector will not collect any symbol
/// occurrences.
SymbolOccurrenceKind OccurrenceFilter = SymbolOccurrenceKind::Unknown;
// Every symbol collected will be stamped with this origin.
SymbolOrigin Origin = SymbolOrigin::Unknown;
/// Collect macros.
Expand Down Expand Up @@ -86,6 +90,11 @@ class SymbolCollector : public index::IndexDataConsumer {

SymbolSlab takeSymbols() { return std::move(Symbols).build(); }

SymbolOccurrenceSlab takeOccurrences() {
SymbolOccurrences.freeze();
return std::move(SymbolOccurrences);
}

void finish() override;

private:
Expand All @@ -94,14 +103,21 @@ class SymbolCollector : public index::IndexDataConsumer {

// All Symbols collected from the AST.
SymbolSlab::Builder Symbols;
// All symbol occurrences collected from the AST.
// Only symbols declared in preamble (from #inclues) and references from the
// main file will be included.
SymbolOccurrenceSlab SymbolOccurrences;
ASTContext *ASTCtx;
std::shared_ptr<Preprocessor> PP;
std::shared_ptr<GlobalCodeCompletionAllocator> CompletionAllocator;
std::unique_ptr<CodeCompletionTUInfo> CompletionTUInfo;
Options Opts;
using DeclOccurrence = std::pair<SourceLocation, index::SymbolRoleSet>;
// Symbols referenced from the current TU, flushed on finish().
llvm::DenseSet<const NamedDecl *> ReferencedDecls;
llvm::DenseSet<const IdentifierInfo *> ReferencedMacros;
llvm::DenseMap<const NamedDecl *, std::vector<DeclOccurrence>>
DeclOccurrences;
// Maps canonical declaration provided by clang to canonical declaration for
// an index symbol, if clangd prefers a different declaration than that
// provided by clang. For example, friend declaration might be considered
Expand Down
Loading

0 comments on commit d81e314

Please sign in to comment.