diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index 770477220e9be7..6153b35edcfdb9 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -21,11 +21,14 @@ #include "clang/AST/DeclBase.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" +#include "clang/AST/DeclarationName.h" +#include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Index/IndexSymbol.h" #include "clang/Lex/Preprocessor.h" -#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -171,6 +174,22 @@ const Decl *getRefContainer(const Decl *Enclosing, return Enclosing; } +// Check if there is an exact spelling of \p ND at \p Loc. +bool isSpelled(SourceLocation Loc, const NamedDecl &ND) { + auto Name = ND.getDeclName(); + const auto NameKind = Name.getNameKind(); + if (NameKind != DeclarationName::Identifier && + NameKind != DeclarationName::CXXConstructorName) + return false; + const auto &AST = ND.getASTContext(); + const auto &SM = AST.getSourceManager(); + const auto &LO = AST.getLangOpts(); + clang::Token Tok; + if (clang::Lexer::getRawToken(Loc, Tok, SM, LO)) + return false; + auto StrName = Name.getAsString(); + return clang::Lexer::getSpelling(Tok, SM, LO) == StrName; +} } // namespace // Encapsulates decisions about how to record header paths in the index, @@ -545,17 +564,17 @@ bool SymbolCollector::handleDeclOccurrence( if (!ND) return true; + auto ID = getSymbolIDCached(ND); + if (!ID) + return true; + // Mark D as referenced if this is a reference coming from the main file. // D may not be an interesting symbol, but it's cheaper to check at the end. auto &SM = ASTCtx->getSourceManager(); if (Opts.CountReferences && (Roles & static_cast(index::SymbolRole::Reference)) && SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID()) - ReferencedDecls.insert(ND); - - auto ID = getSymbolID(ND); - if (!ID) - return true; + ReferencedSymbols.insert(ID); // ND is the canonical (i.e. first) declaration. If it's in the main file // (which is not a header), then no public declaration was visible, so assume @@ -576,13 +595,6 @@ bool SymbolCollector::handleDeclOccurrence( processRelations(*ND, ID, Relations); bool CollectRef = static_cast(Opts.RefFilter & toRefKind(Roles)); - bool IsOnlyRef = - !(Roles & (static_cast(index::SymbolRole::Declaration) | - static_cast(index::SymbolRole::Definition))); - - if (IsOnlyRef && !CollectRef) - return true; - // Unlike other fields, e.g. Symbols (which use spelling locations), we use // file locations for references (as it aligns the behavior of clangd's // AST-based xref). @@ -590,13 +602,18 @@ bool SymbolCollector::handleDeclOccurrence( if (CollectRef && (!IsMainFileOnly || Opts.CollectMainFileRefs || ND->isExternallyVisible()) && - !isa(ND) && - (Opts.RefsInHeaders || - SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID())) - DeclRefs[ND].push_back(SymbolRef{SM.getFileLoc(Loc), Roles, - getRefContainer(ASTNode.Parent, Opts)}); + !isa(ND)) { + auto FileLoc = SM.getFileLoc(Loc); + auto FID = SM.getFileID(FileLoc); + if (Opts.RefsInHeaders || FID == SM.getMainFileID()) { + addRef(ID, SymbolRef{FileLoc, FID, Roles, + getRefContainer(ASTNode.Parent, Opts), + isSpelled(FileLoc, *ND)}); + } + } // Don't continue indexing if this is a mere reference. - if (IsOnlyRef) + if (!(Roles & (static_cast(index::SymbolRole::Declaration) | + static_cast(index::SymbolRole::Definition)))) return true; // FIXME: ObjCPropertyDecl are not properly indexed here: @@ -682,7 +699,7 @@ bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name, Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM") return true; - auto ID = getSymbolID(Name->getName(), MI, SM); + auto ID = getSymbolIDCached(Name->getName(), MI, SM); if (!ID) return true; @@ -693,9 +710,13 @@ bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name, ASTCtx->getLangOpts()); // Do not store references to main-file macros. if ((static_cast(Opts.RefFilter) & Roles) && !IsMainFileOnly && - (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID())) + (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID())) { // FIXME: Populate container information for macro references. - MacroRefs[ID].push_back({Loc, Roles, /*Container=*/nullptr}); + // FIXME: All MacroRefs are marked as Spelled now, but this should be + // checked. + addRef(ID, SymbolRef{Loc, SM.getFileID(Loc), Roles, /*Container=*/nullptr, + /*Spelled=*/true}); + } // Collect symbols. if (!Opts.CollectMacro) @@ -711,7 +732,7 @@ bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name, if (Opts.CountReferences && (Roles & static_cast(index::SymbolRole::Reference)) && SM.getFileID(SpellingLoc) == SM.getMainFileID()) - ReferencedMacros.insert(Name); + ReferencedSymbols.insert(ID); // Don't continue indexing if this is a mere reference. // FIXME: remove macro with ID if it is undefined. @@ -761,7 +782,7 @@ void SymbolCollector::processRelations( continue; const Decl *Object = R.RelatedSymbol; - auto ObjectID = getSymbolID(Object); + auto ObjectID = getSymbolIDCached(Object); if (!ObjectID) continue; @@ -792,16 +813,13 @@ void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation Loc) { void SymbolCollector::finish() { // At the end of the TU, add 1 to the refcount of all referenced symbols. - auto IncRef = [this](const SymbolID &ID) { + for (const auto &ID : ReferencedSymbols) { if (const auto *S = Symbols.find(ID)) { - Symbol Inc = *S; - ++Inc.References; - Symbols.insert(Inc); - } - }; - for (const NamedDecl *ND : ReferencedDecls) { - if (auto ID = getSymbolID(ND)) { - IncRef(ID); + // SymbolSlab::Builder returns const symbols because strings are interned + // and modifying returned symbols without inserting again wouldn't go + // well. const_cast is safe here as we're modifying a data owned by the + // Symbol. This reduces time spent in SymbolCollector by ~1%. + ++const_cast(S)->References; } } if (Opts.CollectMacro) { @@ -809,16 +827,11 @@ void SymbolCollector::finish() { // First, drop header guards. We can't identify these until EOF. for (const IdentifierInfo *II : IndexedMacros) { if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo()) - if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager())) + if (auto ID = + getSymbolIDCached(II->getName(), MI, PP->getSourceManager())) if (MI->isUsedForHeaderGuard()) Symbols.erase(ID); } - // Now increment refcounts. - for (const IdentifierInfo *II : ReferencedMacros) { - if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo()) - if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager())) - IncRef(ID); - } } // Fill in IncludeHeaders. // We delay this until end of TU so header guards are all resolved. @@ -852,58 +865,7 @@ void SymbolCollector::finish() { } } - const auto &SM = ASTCtx->getSourceManager(); - auto CollectRef = [&](SymbolID ID, const SymbolRef &LocAndRole, - bool Spelled = false) { - auto FileID = SM.getFileID(LocAndRole.Loc); - // FIXME: use the result to filter out references. - shouldIndexFile(FileID); - if (const auto *FE = SM.getFileEntryForID(FileID)) { - auto Range = getTokenRange(LocAndRole.Loc, SM, ASTCtx->getLangOpts()); - Ref R; - R.Location.Start = Range.first; - R.Location.End = Range.second; - R.Location.FileURI = HeaderFileURIs->toURI(FE).c_str(); - R.Kind = toRefKind(LocAndRole.Roles, Spelled); - R.Container = getSymbolID(LocAndRole.Container); - Refs.insert(ID, R); - } - }; - // Populate Refs slab from MacroRefs. - // FIXME: All MacroRefs are marked as Spelled now, but this should be checked. - for (const auto &IDAndRefs : MacroRefs) - for (const auto &LocAndRole : IDAndRefs.second) - CollectRef(IDAndRefs.first, LocAndRole, /*Spelled=*/true); - // Populate Refs slab from DeclRefs. - llvm::DenseMap> FilesToTokensCache; - for (auto &DeclAndRef : DeclRefs) { - if (auto ID = getSymbolID(DeclAndRef.first)) { - for (auto &LocAndRole : DeclAndRef.second) { - const auto FileID = SM.getFileID(LocAndRole.Loc); - // FIXME: It's better to use TokenBuffer by passing spelled tokens from - // the caller of SymbolCollector. - if (!FilesToTokensCache.count(FileID)) - FilesToTokensCache[FileID] = - syntax::tokenize(FileID, SM, ASTCtx->getLangOpts()); - llvm::ArrayRef Tokens = FilesToTokensCache[FileID]; - // Check if the referenced symbol is spelled exactly the same way the - // corresponding NamedDecl is. If it is, mark this reference as spelled. - const auto *IdentifierToken = - spelledIdentifierTouching(LocAndRole.Loc, Tokens); - DeclarationName Name = DeclAndRef.first->getDeclName(); - const auto NameKind = Name.getNameKind(); - bool IsTargetKind = NameKind == DeclarationName::Identifier || - NameKind == DeclarationName::CXXConstructorName; - bool Spelled = IdentifierToken && IsTargetKind && - Name.getAsString() == IdentifierToken->text(SM); - CollectRef(ID, LocAndRole, Spelled); - } - } - } - - ReferencedDecls.clear(); - ReferencedMacros.clear(); - DeclRefs.clear(); + ReferencedSymbols.clear(); IncludeFiles.clear(); } @@ -983,16 +945,18 @@ void SymbolCollector::addDefinition(const NamedDecl &ND, const Symbol &DeclSym) { if (DeclSym.Definition) return; + const auto &SM = ND.getASTContext().getSourceManager(); + auto Loc = nameLocation(ND, SM); + shouldIndexFile(SM.getFileID(Loc)); + auto DefLoc = getTokenLocation(Loc); // If we saw some forward declaration, we end up copying the symbol. // This is not ideal, but avoids duplicating the "is this a definition" check // in clang::index. We should only see one definition. + if (!DefLoc) + return; Symbol S = DeclSym; - const auto &SM = ND.getASTContext().getSourceManager(); - auto Loc = nameLocation(ND, SM); // FIXME: use the result to filter out symbols. - shouldIndexFile(SM.getFileID(Loc)); - if (auto DefLoc = getTokenLocation(Loc)) - S.Definition = *DefLoc; + S.Definition = *DefLoc; Symbols.insert(S); } @@ -1005,5 +969,36 @@ bool SymbolCollector::shouldIndexFile(FileID FID) { return I.first->second; } +void SymbolCollector::addRef(SymbolID ID, const SymbolRef &SR) { + const auto &SM = ASTCtx->getSourceManager(); + // FIXME: use the result to filter out references. + shouldIndexFile(SR.FID); + if (const auto *FE = SM.getFileEntryForID(SR.FID)) { + auto Range = getTokenRange(SR.Loc, SM, ASTCtx->getLangOpts()); + Ref R; + R.Location.Start = Range.first; + R.Location.End = Range.second; + R.Location.FileURI = HeaderFileURIs->toURI(FE).c_str(); + R.Kind = toRefKind(SR.Roles, SR.Spelled); + R.Container = getSymbolIDCached(SR.Container); + Refs.insert(ID, R); + } +} + +SymbolID SymbolCollector::getSymbolIDCached(const Decl *D) { + auto It = DeclToIDCache.try_emplace(D, SymbolID{}); + if (It.second) + It.first->second = getSymbolID(D); + return It.first->second; +} + +SymbolID SymbolCollector::getSymbolIDCached(const llvm::StringRef MacroName, + const MacroInfo *MI, + const SourceManager &SM) { + auto It = MacroToIDCache.try_emplace(MI, SymbolID{}); + if (It.second) + It.first->second = getSymbolID(MacroName, MI, SM); + return It.first->second; +} } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/index/SymbolCollector.h b/clang-tools-extra/clangd/index/SymbolCollector.h index 4e1b563a8831e0..1c6205a4022cac 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.h +++ b/clang-tools-extra/clangd/index/SymbolCollector.h @@ -8,11 +8,12 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLCOLLECTOR_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLCOLLECTOR_H -#include "index/CanonicalIncludes.h" #include "CollectMacros.h" +#include "index/CanonicalIncludes.h" #include "index/Ref.h" #include "index/Relation.h" #include "index/Symbol.h" +#include "index/SymbolID.h" #include "index/SymbolOrigin.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" @@ -21,6 +22,7 @@ #include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexSymbol.h" #include "clang/Sema/CodeCompleteConsumer.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include @@ -142,6 +144,10 @@ class SymbolCollector : public index::IndexDataConsumer { llvm::Optional getIncludeHeader(const Symbol &S, FileID); + SymbolID getSymbolIDCached(const Decl *D); + SymbolID getSymbolIDCached(const llvm::StringRef MacroName, + const MacroInfo *MI, const SourceManager &SM); + // All Symbols collected from the AST. SymbolSlab::Builder Symbols; // File IDs for Symbol.IncludeHeaders. @@ -164,14 +170,14 @@ class SymbolCollector : public index::IndexDataConsumer { Options Opts; struct SymbolRef { SourceLocation Loc; + FileID FID; index::SymbolRoleSet Roles; const Decl *Container; + bool Spelled; }; + void addRef(SymbolID ID, const SymbolRef &SR); // Symbols referenced from the current TU, flushed on finish(). - llvm::DenseSet ReferencedDecls; - llvm::DenseSet ReferencedMacros; - llvm::DenseMap> DeclRefs; - llvm::DenseMap> MacroRefs; + llvm::DenseSet ReferencedSymbols; // Maps canonical declaration provided by clang to canonical declaration for // an index symbol, if clangd prefers a different declaration than that // provided by clang. For example, friend declaration might be considered @@ -184,6 +190,8 @@ class SymbolCollector : public index::IndexDataConsumer { // to insert for which symbol, etc. class HeaderFileURICache; std::unique_ptr HeaderFileURIs; + llvm::DenseMap DeclToIDCache; + llvm::DenseMap MacroToIDCache; }; } // namespace clangd diff --git a/clang-tools-extra/clangd/index/SymbolID.cpp b/clang-tools-extra/clangd/index/SymbolID.cpp index 2bb3d4f0b6a0da..d6686d2b1084a7 100644 --- a/clang-tools-extra/clangd/index/SymbolID.cpp +++ b/clang-tools-extra/clangd/index/SymbolID.cpp @@ -46,14 +46,5 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID) { return OS << llvm::toHex(ID.raw()); } -llvm::hash_code hash_value(const SymbolID &ID) { - // We already have a good hash, just return the first bytes. - static_assert(sizeof(size_t) <= SymbolID::RawSize, - "size_t longer than SHA1!"); - size_t Result; - memcpy(&Result, ID.raw().data(), sizeof(size_t)); - return llvm::hash_code(Result); -} - } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/index/SymbolID.h b/clang-tools-extra/clangd/index/SymbolID.h index 989818ef8bd7f8..d1512080edbfb7 100644 --- a/clang-tools-extra/clangd/index/SymbolID.h +++ b/clang-tools-extra/clangd/index/SymbolID.h @@ -14,6 +14,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include @@ -36,9 +37,7 @@ class SymbolID { bool operator==(const SymbolID &Sym) const { return HashValue == Sym.HashValue; } - bool operator!=(const SymbolID &Sym) const { - return !(*this == Sym); - } + bool operator!=(const SymbolID &Sym) const { return !(*this == Sym); } bool operator<(const SymbolID &Sym) const { return HashValue < Sym.HashValue; } @@ -60,7 +59,14 @@ class SymbolID { std::array HashValue{}; }; -llvm::hash_code hash_value(const SymbolID &ID); +inline llvm::hash_code hash_value(const SymbolID &ID) { + // We already have a good hash, just return the first bytes. + static_assert(sizeof(size_t) <= SymbolID::RawSize, + "size_t longer than SHA1!"); + size_t Result; + memcpy(&Result, ID.raw().data(), sizeof(size_t)); + return llvm::hash_code(Result); +} // Write SymbolID into the given stream. SymbolID is encoded as ID.str(). llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID); diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp index 93d324d1de4462..f8a891d12f5a07 100644 --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -1014,10 +1014,21 @@ TEST_F(SymbolCollectorTest, SpelledReferences) { )cpp", "Foo::Foo" /// constructor. }, + { // Unclean identifiers + R"cpp( + struct Foo {}; + )cpp", + R"cpp( + $spelled[[Fo\ +o]] f{}; + )cpp", + "Foo", + }, }; CollectorOpts.RefFilter = RefKind::All; CollectorOpts.RefsInHeaders = false; for (const auto& T : TestCases) { + SCOPED_TRACE(T.Header + "\n---\n" + T.Main); Annotations Header(T.Header); Annotations Main(T.Main); // Reset the file system. @@ -1040,10 +1051,14 @@ TEST_F(SymbolCollectorTest, SpelledReferences) { } const auto SpelledRefs = std::move(SpelledSlabBuilder).build(), ImplicitRefs = std::move(ImplicitSlabBuilder).build(); - EXPECT_THAT(SpelledRefs, - Contains(Pair(TargetID, haveRanges(SpelledRanges)))); - EXPECT_THAT(ImplicitRefs, - Contains(Pair(TargetID, haveRanges(ImplicitRanges)))); + EXPECT_EQ(SpelledRanges.empty(), SpelledRefs.empty()); + EXPECT_EQ(ImplicitRanges.empty(), ImplicitRefs.empty()); + if (!SpelledRanges.empty()) + EXPECT_THAT(SpelledRefs, + Contains(Pair(TargetID, haveRanges(SpelledRanges)))); + if (!ImplicitRanges.empty()) + EXPECT_THAT(ImplicitRefs, + Contains(Pair(TargetID, haveRanges(ImplicitRanges)))); } }