Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[clangd] Minimal implementation of automatic static index (not enabled).
Summary: See tinyurl.com/clangd-automatic-index for design and goals. Lots of limitations to keep this patch smallish, TODOs everywhere: - no serialization to disk - no changes to dynamic index, which now has a much simpler job - no partitioning of symbols by file to avoid duplication of header symbols - no reindexing of edited files - only a single worker thread - compilation database is slurped synchronously (doesn't scale) - uses memindex, rebuilds after every file (should be dex, periodically) It's not hooked up to ClangdServer/ClangdLSPServer yet: the layering isn't clear (it should really be in ClangdServer, but ClangdLSPServer has all the CDB interactions). Reviewers: ioeric Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, jfb, cfe-commits Differential Revision: https://reviews.llvm.org/D53032 llvm-svn: 344513
- Loading branch information
1 parent
5abb607
commit 8dc9dbb
Showing
7 changed files
with
325 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
//===-- Background.cpp - Build an index in a background thread ------------===// | ||
// | ||
// The LLVM Compiler Infrastructure | ||
// | ||
// This file is distributed under the University of Illinois Open Source | ||
// License. See LICENSE.TXT for details. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "index/Background.h" | ||
#include "ClangdUnit.h" | ||
#include "Compiler.h" | ||
#include "Logger.h" | ||
#include "Trace.h" | ||
#include "index/IndexAction.h" | ||
#include "index/MemIndex.h" | ||
#include "index/Serialization.h" | ||
#include "llvm/Support/SHA1.h" | ||
#include <random> | ||
|
||
using namespace llvm; | ||
namespace clang { | ||
namespace clangd { | ||
|
||
BackgroundIndex::BackgroundIndex(Context BackgroundContext, | ||
StringRef ResourceDir, | ||
const FileSystemProvider &FSProvider) | ||
: SwapIndex(llvm::make_unique<MemIndex>()), ResourceDir(ResourceDir), | ||
FSProvider(FSProvider), BackgroundContext(std::move(BackgroundContext)), | ||
Thread([this] { run(); }) {} | ||
|
||
BackgroundIndex::~BackgroundIndex() { | ||
stop(); | ||
Thread.join(); | ||
} | ||
|
||
void BackgroundIndex::stop() { | ||
{ | ||
std::lock_guard<std::mutex> Lock(QueueMu); | ||
ShouldStop = true; | ||
} | ||
QueueCV.notify_all(); | ||
} | ||
|
||
void BackgroundIndex::run() { | ||
WithContext Background(std::move(BackgroundContext)); | ||
while (true) { | ||
llvm::Optional<Task> Task; | ||
{ | ||
std::unique_lock<std::mutex> Lock(QueueMu); | ||
QueueCV.wait(Lock, [&] { return ShouldStop || !Queue.empty(); }); | ||
if (ShouldStop) { | ||
Queue.clear(); | ||
QueueCV.notify_all(); | ||
return; | ||
} | ||
++NumActiveTasks; | ||
Task = std::move(Queue.front()); | ||
Queue.pop_front(); | ||
} | ||
(*Task)(); | ||
{ | ||
std::unique_lock<std::mutex> Lock(QueueMu); | ||
assert(NumActiveTasks > 0 && "before decrementing"); | ||
--NumActiveTasks; | ||
} | ||
QueueCV.notify_all(); | ||
} | ||
} | ||
|
||
void BackgroundIndex::blockUntilIdleForTest() { | ||
std::unique_lock<std::mutex> Lock(QueueMu); | ||
QueueCV.wait(Lock, [&] { return Queue.empty() && NumActiveTasks == 0; }); | ||
} | ||
|
||
void BackgroundIndex::enqueue(StringRef Directory, | ||
tooling::CompileCommand Cmd) { | ||
std::lock_guard<std::mutex> Lock(QueueMu); | ||
enqueueLocked(std::move(Cmd)); | ||
} | ||
|
||
void BackgroundIndex::enqueueAll(StringRef Directory, | ||
const tooling::CompilationDatabase &CDB) { | ||
trace::Span Tracer("BackgroundIndexEnqueueCDB"); | ||
// FIXME: this function may be slow. Perhaps enqueue a task to re-read the CDB | ||
// from disk and enqueue the commands asynchronously? | ||
auto Cmds = CDB.getAllCompileCommands(); | ||
SPAN_ATTACH(Tracer, "commands", int64_t(Cmds.size())); | ||
std::mt19937 Generator(std::random_device{}()); | ||
std::shuffle(Cmds.begin(), Cmds.end(), Generator); | ||
log("Enqueueing {0} commands for indexing from {1}", Cmds.size(), Directory); | ||
{ | ||
std::lock_guard<std::mutex> Lock(QueueMu); | ||
for (auto &Cmd : Cmds) | ||
enqueueLocked(std::move(Cmd)); | ||
} | ||
QueueCV.notify_all(); | ||
} | ||
|
||
void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd) { | ||
Queue.push_back(Bind( | ||
[this](tooling::CompileCommand Cmd) { | ||
std::string Filename = Cmd.Filename; | ||
Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir); | ||
if (auto Error = index(std::move(Cmd))) | ||
log("Indexing {0} failed: {1}", Filename, std::move(Error)); | ||
}, | ||
std::move(Cmd))); | ||
} | ||
|
||
llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { | ||
trace::Span Tracer("BackgroundIndex"); | ||
SPAN_ATTACH(Tracer, "file", Cmd.Filename); | ||
SmallString<128> AbsolutePath; | ||
if (llvm::sys::path::is_absolute(Cmd.Filename)) { | ||
AbsolutePath = Cmd.Filename; | ||
} else { | ||
AbsolutePath = Cmd.Directory; | ||
llvm::sys::path::append(AbsolutePath, Cmd.Filename); | ||
} | ||
|
||
auto FS = FSProvider.getFileSystem(); | ||
auto Buf = FS->getBufferForFile(AbsolutePath); | ||
if (!Buf) | ||
return errorCodeToError(Buf.getError()); | ||
StringRef Contents = Buf->get()->getBuffer(); | ||
auto Hash = SHA1::hash({(const uint8_t *)Contents.data(), Contents.size()}); | ||
|
||
if (FileHash.lookup(AbsolutePath) == Hash) { | ||
vlog("No need to index {0}, already up to date", AbsolutePath); | ||
return Error::success(); | ||
} | ||
|
||
log("Indexing {0}", Cmd.Filename, toHex(Hash)); | ||
ParseInputs Inputs; | ||
Inputs.FS = std::move(FS); | ||
Inputs.FS->setCurrentWorkingDirectory(Cmd.Directory); | ||
Inputs.CompileCommand = std::move(Cmd); | ||
auto CI = buildCompilerInvocation(Inputs); | ||
if (!CI) | ||
return createStringError(llvm::inconvertibleErrorCode(), | ||
"Couldn't build compiler invocation"); | ||
IgnoreDiagnostics IgnoreDiags; | ||
auto Clang = prepareCompilerInstance( | ||
std::move(CI), /*Preamble=*/nullptr, std::move(*Buf), | ||
std::make_shared<PCHContainerOperations>(), Inputs.FS, IgnoreDiags); | ||
if (!Clang) | ||
return createStringError(llvm::inconvertibleErrorCode(), | ||
"Couldn't build compiler instance"); | ||
|
||
SymbolCollector::Options IndexOpts; | ||
SymbolSlab Symbols; | ||
RefSlab Refs; | ||
IndexFileIn IndexData; | ||
auto Action = createStaticIndexingAction( | ||
IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); }, | ||
[&](RefSlab R) { Refs = std::move(R); }); | ||
|
||
// We're going to run clang here, and it could potentially crash. | ||
// We could use CrashRecoveryContext to try to make indexing crashes nonfatal, | ||
// but the leaky "recovery" is pretty scary too in a long-running process. | ||
// If crashes are a real problem, maybe we should fork a child process. | ||
|
||
const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front(); | ||
if (!Action->BeginSourceFile(*Clang, Input)) | ||
return createStringError(llvm::inconvertibleErrorCode(), | ||
"BeginSourceFile() failed"); | ||
if (!Action->Execute()) | ||
return createStringError(llvm::inconvertibleErrorCode(), | ||
"Execute() failed"); | ||
Action->EndSourceFile(); | ||
|
||
log("Indexed {0} ({1} symbols, {2} refs)", Inputs.CompileCommand.Filename, | ||
Symbols.size(), Refs.size()); | ||
SPAN_ATTACH(Tracer, "symbols", int(Symbols.size())); | ||
SPAN_ATTACH(Tracer, "refs", int(Refs.size())); | ||
// FIXME: partition the symbols by file rather than TU, to avoid duplication. | ||
IndexedSymbols.update(AbsolutePath, | ||
llvm::make_unique<SymbolSlab>(std::move(Symbols)), | ||
llvm::make_unique<RefSlab>(std::move(Refs))); | ||
FileHash[AbsolutePath] = Hash; | ||
|
||
// FIXME: this should rebuild once-in-a-while, not after every file. | ||
// At that point we should use Dex, too. | ||
vlog("Rebuilding automatic index"); | ||
reset(IndexedSymbols.buildMemIndex()); | ||
return Error::success(); | ||
} | ||
|
||
} // namespace clangd | ||
} // namespace clang |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
//===--- Background.h - Build an index in a background thread ----*- C++-*-===// | ||
// | ||
// The LLVM Compiler Infrastructure | ||
// | ||
// This file is distributed under the University of Illinois Open Source | ||
// License. See LICENSE.TXT for details. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_H | ||
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_H | ||
|
||
#include "Context.h" | ||
#include "FSProvider.h" | ||
#include "index/FileIndex.h" | ||
#include "index/Index.h" | ||
#include "clang/Tooling/CompilationDatabase.h" | ||
#include "llvm/Support/SHA1.h" | ||
#include <condition_variable> | ||
#include <deque> | ||
#include <thread> | ||
|
||
namespace clang { | ||
namespace clangd { | ||
|
||
// Builds an in-memory index by by running the static indexer action over | ||
// all commands in a compilation database. Indexing happens in the background. | ||
// FIXME: it should also persist its state on disk for fast start. | ||
// FIXME: it should watch for changes to files on disk. | ||
class BackgroundIndex : public SwapIndex { | ||
public: | ||
// FIXME: resource-dir injection should be hoisted somewhere common. | ||
BackgroundIndex(Context BackgroundContext, StringRef ResourceDir, | ||
const FileSystemProvider &); | ||
~BackgroundIndex(); // Blocks while the current task finishes. | ||
|
||
// Enqueue a translation unit for indexing. | ||
// The indexing happens in a background thread, so the symbols will be | ||
// available sometime later. | ||
void enqueue(llvm::StringRef Directory, tooling::CompileCommand); | ||
// Index all TUs described in the compilation database. | ||
void enqueueAll(llvm::StringRef Directory, | ||
const tooling::CompilationDatabase &); | ||
|
||
// Cause background threads to stop after ther current task, any remaining | ||
// tasks will be discarded. | ||
void stop(); | ||
|
||
// Wait until the queue is empty, to allow deterministic testing. | ||
void blockUntilIdleForTest(); | ||
|
||
private: | ||
// configuration | ||
std::string ResourceDir; | ||
const FileSystemProvider &FSProvider; | ||
Context BackgroundContext; | ||
|
||
// index state | ||
llvm::Error index(tooling::CompileCommand); | ||
FileSymbols IndexedSymbols; // Index contents. | ||
using Hash = decltype(llvm::SHA1::hash({})); | ||
llvm::StringMap<Hash> FileHash; // Digest of indexed file. | ||
|
||
// queue management | ||
using Task = std::function<void()>; // FIXME: use multiple worker threads. | ||
void run(); // Main loop executed by Thread. Runs tasks from Queue. | ||
void enqueueLocked(tooling::CompileCommand Cmd); | ||
std::thread Thread; | ||
std::mutex QueueMu; | ||
unsigned NumActiveTasks = 0; // Only idle when queue is empty *and* no tasks. | ||
std::condition_variable QueueCV; | ||
bool ShouldStop = false; | ||
std::deque<Task> Queue; | ||
}; | ||
|
||
} // namespace clangd | ||
} // namespace clang | ||
|
||
#endif |
37 changes: 37 additions & 0 deletions
37
clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#include "SyncAPI.h" | ||
#include "TestFS.h" | ||
#include "index/Background.h" | ||
#include "gmock/gmock.h" | ||
#include "gtest/gtest.h" | ||
|
||
using testing::UnorderedElementsAre; | ||
|
||
namespace clang { | ||
namespace clangd { | ||
|
||
MATCHER_P(Named, N, "") { return arg.Name == N; } | ||
|
||
TEST(BackgroundIndexTest, IndexTwoFiles) { | ||
MockFSProvider FS; | ||
// a.h yields different symbols when included by A.cc vs B.cc. | ||
// Currently we store symbols for each TU, so we get both. | ||
FS.Files[testPath("root/A.h")] = "void a_h(); void NAME(){}"; | ||
FS.Files[testPath("root/A.cc")] = "#include \"A.h\""; | ||
FS.Files[testPath("root/B.cc")] = "#define NAME bar\n#include \"A.h\""; | ||
BackgroundIndex Idx(Context::empty(), "", FS); | ||
|
||
tooling::CompileCommand Cmd; | ||
Cmd.Filename = testPath("root/A.cc"); | ||
Cmd.Directory = testPath("root"); | ||
Cmd.CommandLine = {"clang++", "-DNAME=foo", testPath("root/A.cc")}; | ||
Idx.enqueue(testPath("root"), Cmd); | ||
Cmd.CommandLine.back() = Cmd.Filename = testPath("root/B.cc"); | ||
Idx.enqueue(testPath("root"), Cmd); | ||
|
||
Idx.blockUntilIdleForTest(); | ||
EXPECT_THAT(runFuzzyFind(Idx, ""), | ||
UnorderedElementsAre(Named("a_h"), Named("foo"), Named("bar"))); | ||
} | ||
|
||
} // namespace clangd | ||
} // namespace clang |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters