diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp index 1ccfb448563838..143e7686377739 100644 --- a/clang-tools-extra/clangd/index/FileIndex.cpp +++ b/clang-tools-extra/clangd/index/FileIndex.cpp @@ -266,11 +266,14 @@ FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle, std::vector> SymbolSlabs; std::vector> RefSlabs; std::vector> RelationSlabs; + llvm::StringSet<> Files; std::vector MainFileRefs; { std::lock_guard Lock(Mutex); - for (const auto &FileAndSymbols : SymbolsSnapshot) + for (const auto &FileAndSymbols : SymbolsSnapshot) { SymbolSlabs.push_back(FileAndSymbols.second); + Files.insert(FileAndSymbols.first()); + } for (const auto &FileAndRefs : RefsSnapshot) { RefSlabs.push_back(FileAndRefs.second.Slab); if (FileAndRefs.second.CountReferences) @@ -372,14 +375,14 @@ FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle, case IndexType::Light: return std::make_unique( llvm::make_pointee_range(AllSymbols), std::move(AllRefs), - std::move(AllRelations), + std::move(AllRelations), std::move(Files), std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs), std::move(RefsStorage), std::move(SymsStorage)), StorageSize); case IndexType::Heavy: return std::make_unique( llvm::make_pointee_range(AllSymbols), std::move(AllRefs), - std::move(AllRelations), + std::move(AllRelations), std::move(Files), std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs), std::move(RefsStorage), std::move(SymsStorage)), StorageSize); diff --git a/clang-tools-extra/clangd/index/Index.cpp b/clang-tools-extra/clangd/index/Index.cpp index b309053972eb4b..5da06f36ffe4f6 100644 --- a/clang-tools-extra/clangd/index/Index.cpp +++ b/clang-tools-extra/clangd/index/Index.cpp @@ -76,6 +76,11 @@ void SwapIndex::relations( return snapshot()->relations(R, CB); } +llvm::unique_function +SwapIndex::indexedFiles() const { + return snapshot()->indexedFiles(); +} + size_t SwapIndex::estimateMemoryUsage() const { return snapshot()->estimateMemoryUsage(); } diff --git a/clang-tools-extra/clangd/index/Index.h b/clang-tools-extra/clangd/index/Index.h index f0959e71d50f36..c961aa9d8bd951 100644 --- a/clang-tools-extra/clangd/index/Index.h +++ b/clang-tools-extra/clangd/index/Index.h @@ -14,6 +14,7 @@ #include "Symbol.h" #include "SymbolID.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/JSON.h" @@ -121,6 +122,11 @@ class SymbolIndex { llvm::function_ref Callback) const = 0; + /// Returns function which checks if the specified file was used to build this + /// index or not. The function must only be called while the index is alive. + virtual llvm::unique_function + indexedFiles() const = 0; + /// Returns estimated size of index (in bytes). virtual size_t estimateMemoryUsage() const = 0; }; @@ -145,6 +151,9 @@ class SwapIndex : public SymbolIndex { llvm::function_ref) const override; + llvm::unique_function + indexedFiles() const override; + size_t estimateMemoryUsage() const override; private: diff --git a/clang-tools-extra/clangd/index/MemIndex.cpp b/clang-tools-extra/clangd/index/MemIndex.cpp index 46e9c0a8ee45eb..2352e801d1fcfa 100644 --- a/clang-tools-extra/clangd/index/MemIndex.cpp +++ b/clang-tools-extra/clangd/index/MemIndex.cpp @@ -109,6 +109,18 @@ void MemIndex::relations( } } +llvm::unique_function +MemIndex::indexedFiles() const { + return [this](llvm::StringRef FileURI) { + auto Path = URI::resolve(FileURI); + if (!Path) { + llvm::consumeError(Path.takeError()); + return false; + } + return Files.contains(*Path); + }; +} + size_t MemIndex::estimateMemoryUsage() const { return Index.getMemorySize() + Refs.getMemorySize() + Relations.getMemorySize() + BackingDataSize; diff --git a/clang-tools-extra/clangd/index/MemIndex.h b/clang-tools-extra/clangd/index/MemIndex.h index 10ecd79e7c547e..7855630fda0243 100644 --- a/clang-tools-extra/clangd/index/MemIndex.h +++ b/clang-tools-extra/clangd/index/MemIndex.h @@ -10,6 +10,7 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H #include "Index.h" +#include "llvm/ADT/StringSet.h" #include namespace clang { @@ -44,6 +45,17 @@ class MemIndex : public SymbolIndex { this->BackingDataSize = BackingDataSize; } + template + MemIndex(SymbolRange &&Symbols, RefRange &&Refs, RelationRange &&Relations, + FileRange &&Files, Payload &&BackingData, size_t BackingDataSize) + : MemIndex(std::forward(Symbols), + std::forward(Refs), + std::forward(Relations), + std::forward(BackingData), BackingDataSize) { + this->Files = std::forward(Files); + } + /// Builds an index from slabs. The index takes ownership of the data. static std::unique_ptr build(SymbolSlab Symbols, RefSlab Refs, RelationSlab Relations); @@ -62,6 +74,9 @@ class MemIndex : public SymbolIndex { llvm::function_ref Callback) const override; + llvm::unique_function + indexedFiles() const override; + size_t estimateMemoryUsage() const override; private: @@ -73,6 +88,8 @@ class MemIndex : public SymbolIndex { static_assert(sizeof(RelationKind) == sizeof(uint8_t), "RelationKind should be of same size as a uint8_t"); llvm::DenseMap, std::vector> Relations; + // Set of files which were used during this index build. + llvm::StringSet<> Files; std::shared_ptr KeepAlive; // poor man's move-only std::any // Size of memory retained by KeepAlive. size_t BackingDataSize = 0; diff --git a/clang-tools-extra/clangd/index/Merge.cpp b/clang-tools-extra/clangd/index/Merge.cpp index a93aa204e18fb4..97babacf2b38ef 100644 --- a/clang-tools-extra/clangd/index/Merge.cpp +++ b/clang-tools-extra/clangd/index/Merge.cpp @@ -99,23 +99,18 @@ bool MergedIndex::refs(const RefsRequest &Req, // and we can't reliably deduplicate them because offsets may differ slightly. // We consider the dynamic index authoritative and report all its refs, // and only report static index refs from other files. - // - // FIXME: The heuristic fails if the dynamic index contains a file, but all - // refs were removed (we will report stale ones from the static index). - // Ultimately we should explicit check which index has the file instead. - llvm::StringSet<> DynamicIndexFileURIs; More |= Dynamic->refs(Req, [&](const Ref &O) { - DynamicIndexFileURIs.insert(O.Location.FileURI); Callback(O); assert(Remaining != 0); --Remaining; }); if (Remaining == 0 && More) return More; + auto DynamicContainsFile = Dynamic->indexedFiles(); // We return less than Req.Limit if static index returns more refs for dirty // files. - bool StaticHadMore = Static->refs(Req, [&](const Ref &O) { - if (DynamicIndexFileURIs.count(O.Location.FileURI)) + bool StaticHadMore = Static->refs(Req, [&](const Ref &O) { + if (DynamicContainsFile(O.Location.FileURI)) return; // ignore refs that have been seen from dynamic index. if (Remaining == 0) { More = true; @@ -127,6 +122,14 @@ bool MergedIndex::refs(const RefsRequest &Req, return More || StaticHadMore; } +llvm::unique_function +MergedIndex::indexedFiles() const { + return [DynamicContainsFile{Dynamic->indexedFiles()}, + StaticContainsFile{Static->indexedFiles()}](llvm::StringRef FileURI) { + return DynamicContainsFile(FileURI) || StaticContainsFile(FileURI); + }; +} + void MergedIndex::relations( const RelationsRequest &Req, llvm::function_ref Callback) const { diff --git a/clang-tools-extra/clangd/index/Merge.h b/clang-tools-extra/clangd/index/Merge.h index 3288aef120fc16..0cdff38f06786c 100644 --- a/clang-tools-extra/clangd/index/Merge.h +++ b/clang-tools-extra/clangd/index/Merge.h @@ -45,6 +45,8 @@ class MergedIndex : public SymbolIndex { void relations(const RelationsRequest &, llvm::function_ref) const override; + llvm::unique_function + indexedFiles() const override; size_t estimateMemoryUsage() const override { return Dynamic->estimateMemoryUsage() + Static->estimateMemoryUsage(); } diff --git a/clang-tools-extra/clangd/index/ProjectAware.cpp b/clang-tools-extra/clangd/index/ProjectAware.cpp index 63f8f823f3a726..bafe5550f605e2 100644 --- a/clang-tools-extra/clangd/index/ProjectAware.cpp +++ b/clang-tools-extra/clangd/index/ProjectAware.cpp @@ -54,6 +54,9 @@ class ProjectAwareIndex : public SymbolIndex { llvm::function_ref Callback) const override; + llvm::unique_function + indexedFiles() const override; + ProjectAwareIndex(IndexFactory Gen) : Gen(std::move(Gen)) {} private: @@ -112,6 +115,14 @@ void ProjectAwareIndex::relations( return Idx->relations(Req, Callback); } +llvm::unique_function +ProjectAwareIndex::indexedFiles() const { + trace::Span Tracer("ProjectAwareIndex::indexedFiles"); + if (auto *Idx = getIndex()) + return Idx->indexedFiles(); + return [](llvm::StringRef) { return false; }; +} + SymbolIndex *ProjectAwareIndex::getIndex() const { const auto &C = Config::current(); if (!C.Index.External) diff --git a/clang-tools-extra/clangd/index/dex/Dex.cpp b/clang-tools-extra/clangd/index/dex/Dex.cpp index 29aea353e07cf0..dc072accb05b7b 100644 --- a/clang-tools-extra/clangd/index/dex/Dex.cpp +++ b/clang-tools-extra/clangd/index/dex/Dex.cpp @@ -313,6 +313,17 @@ void Dex::relations( } } +llvm::unique_function Dex::indexedFiles() const { + return [this](llvm::StringRef FileURI) { + auto Path = URI::resolve(FileURI); + if (!Path) { + llvm::consumeError(Path.takeError()); + return false; + } + return Files.contains(*Path); + }; +} + size_t Dex::estimateMemoryUsage() const { size_t Bytes = Symbols.size() * sizeof(const Symbol *); Bytes += SymbolQuality.size() * sizeof(float); diff --git a/clang-tools-extra/clangd/index/dex/Dex.h b/clang-tools-extra/clangd/index/dex/Dex.h index 3a66031a89d07d..318a74951379c0 100644 --- a/clang-tools-extra/clangd/index/dex/Dex.h +++ b/clang-tools-extra/clangd/index/dex/Dex.h @@ -67,6 +67,16 @@ class Dex : public SymbolIndex { this->BackingDataSize = BackingDataSize; } + template + Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, + FileRange &&Files, Payload &&BackingData, size_t BackingDataSize) + : Dex(std::forward(Symbols), std::forward(Refs), + std::forward(Relations), + std::forward(BackingData), BackingDataSize) { + this->Files = std::forward(Files); + } + /// Builds an index from slabs. The index takes ownership of the slab. static std::unique_ptr build(SymbolSlab, RefSlab, RelationSlab); @@ -84,6 +94,9 @@ class Dex : public SymbolIndex { llvm::function_ref Callback) const override; + llvm::unique_function + indexedFiles() const override; + size_t estimateMemoryUsage() const override; private: @@ -112,6 +125,8 @@ class Dex : public SymbolIndex { "RelationKind should be of same size as a uint8_t"); llvm::DenseMap, std::vector> Relations; std::shared_ptr KeepAlive; // poor man's move-only std::any + // Set of files which were used during this index build. + llvm::StringSet<> Files; // Size of memory retained by KeepAlive. size_t BackingDataSize = 0; }; diff --git a/clang-tools-extra/clangd/index/remote/Client.cpp b/clang-tools-extra/clangd/index/remote/Client.cpp index bda3a971f418fb..b09dbf915e462e 100644 --- a/clang-tools-extra/clangd/index/remote/Client.cpp +++ b/clang-tools-extra/clangd/index/remote/Client.cpp @@ -152,6 +152,14 @@ class IndexClient : public clangd::SymbolIndex { }); } + llvm::unique_function indexedFiles() const { + // FIXME: For now we always return "false" regardless of whether the file + // was indexed or not. A possible implementation could be based on + // the idea that we do not want to send a request at every + // call of a function returned by IndexClient::indexedFiles(). + return [](llvm::StringRef) { return false; }; + } + // IndexClient does not take any space since the data is stored on the // server. size_t estimateMemoryUsage() const override { return 0; } diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index a19c6a83e95460..76b193b4979162 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -1349,6 +1349,11 @@ class IndexRequestCollector : public SymbolIndex { llvm::function_ref) const override {} + llvm::unique_function + indexedFiles() const override { + return [](llvm::StringRef) { return false; }; + } + // This is incorrect, but IndexRequestCollector is not an actual index and it // isn't used in production code. size_t estimateMemoryUsage() const override { return 0; } diff --git a/clang-tools-extra/clangd/unittests/DexTests.cpp b/clang-tools-extra/clangd/unittests/DexTests.cpp index e83cd3052b6540..d2cd22df1e36c0 100644 --- a/clang-tools-extra/clangd/unittests/DexTests.cpp +++ b/clang-tools-extra/clangd/unittests/DexTests.cpp @@ -732,6 +732,20 @@ TEST(DexTests, Relations) { EXPECT_THAT(Results, UnorderedElementsAre(Child1.ID, Child2.ID)); } +TEST(DexIndex, IndexedFiles) { + SymbolSlab Symbols; + RefSlab Refs; + auto Size = Symbols.bytes() + Refs.bytes(); + auto Data = std::make_pair(std::move(Symbols), std::move(Refs)); + llvm::StringSet<> Files = {testPath("foo.cc"), testPath("bar.cc")}; + Dex I(std::move(Data.first), std::move(Data.second), RelationSlab(), + std::move(Files), std::move(Data), Size); + auto ContainsFile = I.indexedFiles(); + EXPECT_TRUE(ContainsFile("unittest:///foo.cc")); + EXPECT_TRUE(ContainsFile("unittest:///bar.cc")); + EXPECT_FALSE(ContainsFile("unittest:///foobar.cc")); +} + TEST(DexTest, PreferredTypesBoosting) { auto Sym1 = symbol("t1"); Sym1.Type = "T1"; diff --git a/clang-tools-extra/clangd/unittests/IndexTests.cpp b/clang-tools-extra/clangd/unittests/IndexTests.cpp index f04ac8a455a798..8efc637d1250b7 100644 --- a/clang-tools-extra/clangd/unittests/IndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/IndexTests.cpp @@ -224,6 +224,20 @@ TEST(MemIndexTest, Lookup) { EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre()); } +TEST(MemIndexTest, IndexedFiles) { + SymbolSlab Symbols; + RefSlab Refs; + auto Size = Symbols.bytes() + Refs.bytes(); + auto Data = std::make_pair(std::move(Symbols), std::move(Refs)); + llvm::StringSet<> Files = {testPath("foo.cc"), testPath("bar.cc")}; + MemIndex I(std::move(Data.first), std::move(Data.second), RelationSlab(), + std::move(Files), std::move(Data), Size); + auto ContainsFile = I.indexedFiles(); + EXPECT_TRUE(ContainsFile("unittest:///foo.cc")); + EXPECT_TRUE(ContainsFile("unittest:///bar.cc")); + EXPECT_FALSE(ContainsFile("unittest:///foobar.cc")); +} + TEST(MemIndexTest, TemplateSpecialization) { SymbolSlab::Builder B; @@ -367,7 +381,7 @@ TEST(MergeIndexTest, Refs) { Test.Code = std::string(Test1Code.code()); Test.Filename = "test.cc"; auto AST = Test.build(); - Dyn.updateMain(Test.Filename, AST); + Dyn.updateMain(testPath(Test.Filename), AST); // Build static index for test.cc. Test.HeaderCode = HeaderCode; @@ -375,7 +389,7 @@ TEST(MergeIndexTest, Refs) { Test.Filename = "test.cc"; auto StaticAST = Test.build(); // Add stale refs for test.cc. - StaticIndex.updateMain(Test.Filename, StaticAST); + StaticIndex.updateMain(testPath(Test.Filename), StaticAST); // Add refs for test2.cc Annotations Test2Code(R"(class $Foo[[Foo]] {};)"); @@ -384,7 +398,7 @@ TEST(MergeIndexTest, Refs) { Test2.Code = std::string(Test2Code.code()); Test2.Filename = "test2.cc"; StaticAST = Test2.build(); - StaticIndex.updateMain(Test2.Filename, StaticAST); + StaticIndex.updateMain(testPath(Test2.Filename), StaticAST); RefsRequest Request; Request.IDs = {Foo.ID}; @@ -403,10 +417,47 @@ TEST(MergeIndexTest, Refs) { RefSlab::Builder Results2; EXPECT_TRUE( Merge.refs(Request, [&](const Ref &O) { Results2.insert(Foo.ID, O); })); - EXPECT_THAT(std::move(Results2).build(), - ElementsAre(Pair( - _, ElementsAre(AnyOf(FileURI("unittest:///test.cc"), - FileURI("unittest:///test2.cc")))))); + + // Remove all refs for test.cc from dynamic index, + // merged index should not return results from static index for test.cc. + Test.Code = ""; + AST = Test.build(); + Dyn.updateMain(testPath(Test.Filename), AST); + + Request.Limit = llvm::None; + RefSlab::Builder Results3; + EXPECT_FALSE( + Merge.refs(Request, [&](const Ref &O) { Results3.insert(Foo.ID, O); })); + EXPECT_THAT(std::move(Results3).build(), + ElementsAre(Pair(_, UnorderedElementsAre(AllOf( + RefRange(Test2Code.range("Foo")), + FileURI("unittest:///test2.cc")))))); +} + +TEST(MergeIndexTest, IndexedFiles) { + SymbolSlab DynSymbols; + RefSlab DynRefs; + auto DynSize = DynSymbols.bytes() + DynRefs.bytes(); + auto DynData = std::make_pair(std::move(DynSymbols), std::move(DynRefs)); + llvm::StringSet<> DynFiles = {testPath("foo.cc")}; + MemIndex DynIndex(std::move(DynData.first), std::move(DynData.second), + RelationSlab(), std::move(DynFiles), std::move(DynData), + DynSize); + SymbolSlab StaticSymbols; + RefSlab StaticRefs; + auto StaticData = + std::make_pair(std::move(StaticSymbols), std::move(StaticRefs)); + llvm::StringSet<> StaticFiles = {testPath("bar.cc")}; + MemIndex StaticIndex(std::move(StaticData.first), + std::move(StaticData.second), RelationSlab(), + std::move(StaticFiles), std::move(StaticData), + StaticSymbols.bytes() + StaticRefs.bytes()); + MergedIndex Merge(&DynIndex, &StaticIndex); + + auto ContainsFile = Merge.indexedFiles(); + EXPECT_TRUE(ContainsFile("unittest:///foo.cc")); + EXPECT_TRUE(ContainsFile("unittest:///bar.cc")); + EXPECT_FALSE(ContainsFile("unittest:///foobar.cc")); } TEST(MergeIndexTest, NonDocumentation) { diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index 0aa87c61baebc7..6402f4eac6c659 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -1237,6 +1237,12 @@ TEST(CrossFileRenameTests, DirtyBuffer) { void relations(const RelationsRequest &Req, llvm::function_ref Callback) const override {} + + llvm::unique_function + indexedFiles() const override { + return [](llvm::StringRef) { return false; }; + } + size_t estimateMemoryUsage() const override { return 0; } } PIndex; Results = rename({MainCode.point(), @@ -1285,6 +1291,12 @@ TEST(CrossFileRenameTests, DeduplicateRefsFromIndex) { void relations(const RelationsRequest &, llvm::function_ref) const override {} + + llvm::unique_function + indexedFiles() const override { + return [](llvm::StringRef) { return false; }; + } + size_t estimateMemoryUsage() const override { return 0; } Ref ReturnedRef; } DIndex(XRefInBarCC); diff --git a/clang-tools-extra/clangd/unittests/TestFS.cpp b/clang-tools-extra/clangd/unittests/TestFS.cpp index ba4010cb45817d..0926c6d72a8a3c 100644 --- a/clang-tools-extra/clangd/unittests/TestFS.cpp +++ b/clang-tools-extra/clangd/unittests/TestFS.cpp @@ -99,8 +99,9 @@ class TestScheme : public URIScheme { llvm::Expected getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body, llvm::StringRef HintPath) const override { - if (!HintPath.startswith(testRoot())) - return error("Hint path doesn't start with test root: {0}", HintPath); + if (!HintPath.empty() && !HintPath.startswith(testRoot())) + return error("Hint path is not empty and doesn't start with {0}: {1}", + testRoot(), HintPath); if (!Body.consume_front("/")) return error("Body of an unittest: URI must start with '/'"); llvm::SmallString<16> Path(Body.begin(), Body.end());