Skip to content
This repository was archived by the owner on Apr 25, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 11 additions & 135 deletions src/index/Merge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,139 +22,10 @@
namespace clang {
namespace clangd {

// FIXME: Deleted symbols in dirty files are still returned (from Static).
// To identify these eliminate these, we should:
// - find the generating file from each Symbol which is Static-only
// - ask Dynamic if it has that file (needs new SymbolIndex method)
// - if so, drop the Symbol.
bool MergedIndex::fuzzyFind(
const FuzzyFindRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const {
// We can't step through both sources in parallel. So:
// 1) query all dynamic symbols, slurping results into a slab
// 2) query the static symbols, for each one:
// a) if it's not in the dynamic slab, yield it directly
// b) if it's in the dynamic slab, merge it and yield the result
// 3) now yield all the dynamic symbols we haven't processed.
trace::Span Tracer("MergedIndex fuzzyFind");
bool More = false; // We'll be incomplete if either source was.
SymbolSlab::Builder DynB;
unsigned DynamicCount = 0;
unsigned StaticCount = 0;
unsigned MergedCount = 0;
More |= Dynamic->fuzzyFind(Req, [&](const Symbol &S) {
++DynamicCount;
DynB.insert(S);
});
SymbolSlab Dyn = std::move(DynB).build();

llvm::DenseSet<SymbolID> SeenDynamicSymbols;
More |= Static->fuzzyFind(Req, [&](const Symbol &S) {
auto DynS = Dyn.find(S.ID);
++StaticCount;
if (DynS == Dyn.end())
return Callback(S);
++MergedCount;
SeenDynamicSymbols.insert(S.ID);
Callback(mergeSymbol(*DynS, S));
});
SPAN_ATTACH(Tracer, "dynamic", DynamicCount);
SPAN_ATTACH(Tracer, "static", StaticCount);
SPAN_ATTACH(Tracer, "merged", MergedCount);
for (const Symbol &S : Dyn)
if (!SeenDynamicSymbols.count(S.ID))
Callback(S);
return More;
}

void MergedIndex::lookup(
const LookupRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const {
trace::Span Tracer("MergedIndex lookup");
SymbolSlab::Builder B;

Dynamic->lookup(Req, [&](const Symbol &S) { B.insert(S); });

auto RemainingIDs = Req.IDs;
Static->lookup(Req, [&](const Symbol &S) {
const Symbol *Sym = B.find(S.ID);
RemainingIDs.erase(S.ID);
if (!Sym)
Callback(S);
else
Callback(mergeSymbol(*Sym, S));
});
for (const auto &ID : RemainingIDs)
if (const Symbol *Sym = B.find(ID))
Callback(*Sym);
}

bool MergedIndex::refs(const RefsRequest &Req,
llvm::function_ref<void(const Ref &)> Callback) const {
trace::Span Tracer("MergedIndex refs");
bool More = false;
uint32_t Remaining =
Req.Limit.getValueOr(std::numeric_limits<uint32_t>::max());
// We don't want duplicated refs from the static/dynamic indexes,
// and we can't reliably deduplicate them because offsets may differ slightly.
// We consider the dynamic index authoritative and report all its refs,
// and only report static index refs from other files.
//
// FIXME: The heuristic fails if the dynamic index contains a file, but all
// refs were removed (we will report stale ones from the static index).
// Ultimately we should explicit check which index has the file instead.
llvm::StringSet<> DynamicIndexFileURIs;
More |= Dynamic->refs(Req, [&](const Ref &O) {
DynamicIndexFileURIs.insert(O.Location.FileURI);
Callback(O);
assert(Remaining != 0);
--Remaining;
});
if (Remaining == 0 && More)
return More;
// We return less than Req.Limit if static index returns more refs for dirty
// files.
bool StaticHadMore = Static->refs(Req, [&](const Ref &O) {
if (DynamicIndexFileURIs.count(O.Location.FileURI))
return; // ignore refs that have been seen from dynamic index.
if (Remaining == 0) {
More = true;
return;
}
--Remaining;
Callback(O);
});
return More || StaticHadMore;
}

void MergedIndex::relations(
const RelationsRequest &Req,
llvm::function_ref<void(const SymbolID &, const Symbol &)> Callback) const {
uint32_t Remaining =
Req.Limit.getValueOr(std::numeric_limits<uint32_t>::max());
// Return results from both indexes but avoid duplicates.
// We might return stale relations from the static index;
// we don't currently have a good way of identifying them.
llvm::DenseSet<std::pair<SymbolID, SymbolID>> SeenRelations;
Dynamic->relations(Req, [&](const SymbolID &Subject, const Symbol &Object) {
Callback(Subject, Object);
SeenRelations.insert(std::make_pair(Subject, Object.ID));
--Remaining;
});
if (Remaining == 0)
return;
Static->relations(Req, [&](const SymbolID &Subject, const Symbol &Object) {
if (Remaining > 0 &&
!SeenRelations.count(std::make_pair(Subject, Object.ID))) {
--Remaining;
Callback(Subject, Object);
}
});
}

// Returns true if \p L is (strictly) preferred to \p R (e.g. by file paths). If
// neither is preferred, this returns false.
bool prefer(const SymbolLocation &L, const SymbolLocation &R) {
bool prefer(const SymbolLocation &L, const SymbolLocation &R,
const std::string &ProjectRoot) {
if (!L)
return false;
if (!R)
Expand All @@ -166,10 +37,15 @@ bool prefer(const SymbolLocation &L, const SymbolLocation &R) {
return llvm::StringRef(Loc.FileURI).endswith(Suffix);
});
};
return HasCodeGenSuffix(L) && !HasCodeGenSuffix(R);
auto InProject = [ProjectRoot](const SymbolLocation &Loc) {
return Loc.FileURI && llvm::StringRef(Loc.FileURI).startswith(ProjectRoot);
};

return (HasCodeGenSuffix(L) && !HasCodeGenSuffix(R)) || (InProject(L) && !InProject(R));
}

Symbol mergeSymbol(const Symbol &L, const Symbol &R) {
Symbol mergeSymbol(const Symbol &L, const Symbol &R,
const std::string &ProjectRoot) {
assert(L.ID == R.ID);
// We prefer information from TUs that saw the definition.
// Classes: this is the def itself. Functions: hopefully the header decl.
Expand All @@ -184,9 +60,9 @@ Symbol mergeSymbol(const Symbol &L, const Symbol &R) {
const Symbol &O = PreferR ? L : R; // The "other" less-preferred symbol.

// Only use locations in \p O if it's (strictly) preferred.
if (prefer(O.CanonicalDeclaration, S.CanonicalDeclaration))
if (prefer(O.CanonicalDeclaration, S.CanonicalDeclaration, ProjectRoot))
S.CanonicalDeclaration = O.CanonicalDeclaration;
if (prefer(O.Definition, S.Definition))
if (prefer(O.Definition, S.Definition, ProjectRoot))
S.Definition = O.Definition;
S.References += O.References;
if (S.Signature == "")
Expand Down
34 changes: 2 additions & 32 deletions src/index/Merge.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,38 +17,8 @@ namespace clangd {
// Merge symbols L and R, preferring data from L in case of conflict.
// The two symbols must have the same ID.
// Returned symbol may contain data owned by either source.
Symbol mergeSymbol(const Symbol &L, const Symbol &R);

// MergedIndex is a composite index based on two provided Indexes:
// - the Dynamic index covers few files, but is relatively up-to-date.
// - the Static index covers a bigger set of files, but is relatively stale.
// The returned index attempts to combine results, and avoid duplicates.
//
// FIXME: We don't have a mechanism in Index to track deleted symbols and
// refs in dirty files, so the merged index may return stale symbols
// and refs from Static index.
class MergedIndex : public SymbolIndex {
const SymbolIndex *Dynamic, *Static;

public:
// The constructor does not access the symbols.
// It's safe to inherit from this class and pass pointers to derived members.
MergedIndex(const SymbolIndex *Dynamic, const SymbolIndex *Static)
: Dynamic(Dynamic), Static(Static) {}

bool fuzzyFind(const FuzzyFindRequest &,
llvm::function_ref<void(const Symbol &)>) const override;
void lookup(const LookupRequest &,
llvm::function_ref<void(const Symbol &)>) const override;
bool refs(const RefsRequest &,
llvm::function_ref<void(const Ref &)>) const override;
void relations(const RelationsRequest &,
llvm::function_ref<void(const SymbolID &, const Symbol &)>)
const override;
size_t estimateMemoryUsage() const override {
return Dynamic->estimateMemoryUsage() + Static->estimateMemoryUsage();
}
};
Symbol mergeSymbol(const Symbol &L, const Symbol &R,
const std::string &ProjectRoot);

} // namespace clangd
} // namespace clang
Expand Down
7 changes: 4 additions & 3 deletions src/indexer/IndexerMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ static cl::opt<bool> DebugArg("debug", cl::desc("Enable verbose debug output."),

class IndexActionFactory : public FrontendActionFactory {
public:
IndexActionFactory(clang::clangd::IndexFileIn &Result) : Result(Result) {}
IndexActionFactory(clang::clangd::IndexFileIn &Result, std::string &ProjectRoot) : Result(Result), ProjectRoot(ProjectRoot) {}

std::unique_ptr<clang::FrontendAction> create() override {
clang::clangd::SymbolCollector::Options Opts;
Expand All @@ -76,7 +76,7 @@ class IndexActionFactory : public FrontendActionFactory {
std::lock_guard<std::mutex> Lock(SymbolsMu);
for (const auto &Sym : S) {
if (const auto *Existing = Symbols.find(Sym.ID))
Symbols.insert(mergeSymbol(*Existing, Sym));
Symbols.insert(mergeSymbol(*Existing, Sym, ProjectRoot));
else
Symbols.insert(Sym);
}
Expand Down Expand Up @@ -112,6 +112,7 @@ class IndexActionFactory : public FrontendActionFactory {
clang::clangd::SymbolSlab::Builder Symbols;
clang::clangd::RefSlab::Builder Refs;
clang::clangd::RelationSlab::Builder Relations;
std::string &ProjectRoot;
};

int main(int argc, const char **argv) {
Expand Down Expand Up @@ -141,7 +142,7 @@ int main(int argc, const char **argv) {

AllTUsToolExecutor Executor(OptionsParser.getCompilations(), 0);
auto Err =
Executor.execute(std::make_unique<IndexActionFactory>(Data), Adjuster);
Executor.execute(std::make_unique<IndexActionFactory>(Data, ProjectRoot), Adjuster);
if (Err) {
}

Expand Down