Skip to content

Commit

Permalink
[clangd] Use pre-populated mappings for standard symbols
Browse files Browse the repository at this point in the history
Summary:
This takes ~5% of time when running clangd unit tests.

To achieve this, move mapping of system includes out of CanonicalIncludes
and into a separate class

Reviewers: sammccall, hokein

Reviewed By: sammccall

Subscribers: MaskRay, jkorous, arphaman, kadircet, jfb, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D67172

llvm-svn: 371408
  • Loading branch information
ilya-biryukov committed Sep 9, 2019
1 parent 8e3bc9b commit 8b76709
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 76 deletions.
2 changes: 1 addition & 1 deletion clang-tools-extra/clangd/ParsedAST.cpp
Expand Up @@ -367,7 +367,7 @@ ParsedAST::build(std::unique_ptr<clang::CompilerInvocation> CI,
if (Preamble)
CanonIncludes = Preamble->CanonIncludes;
else
addSystemHeadersMapping(&CanonIncludes, Clang->getLangOpts());
CanonIncludes.addSystemHeadersMapping(Clang->getLangOpts());
std::unique_ptr<CommentHandler> IWYUHandler =
collectIWYUHeaderMaps(&CanonIncludes);
Clang->getPreprocessor().addCommentHandler(IWYUHandler.get());
Expand Down
2 changes: 1 addition & 1 deletion clang-tools-extra/clangd/Preamble.cpp
Expand Up @@ -78,7 +78,7 @@ class CppFilePreambleCallbacks : public PreambleCallbacks {
}

void BeforeExecute(CompilerInstance &CI) override {
addSystemHeadersMapping(&CanonIncludes, CI.getLangOpts());
CanonIncludes.addSystemHeadersMapping(CI.getLangOpts());
SourceMgr = &CI.getSourceManager();
}

Expand Down
81 changes: 44 additions & 37 deletions clang-tools-extra/clangd/index/CanonicalIncludes.cpp
Expand Up @@ -9,6 +9,7 @@
#include "CanonicalIncludes.h"
#include "Headers.h"
#include "clang/Driver/Types.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Path.h"
#include <algorithm>

Expand All @@ -18,43 +19,40 @@ namespace {
const char IWYUPragma[] = "// IWYU pragma: private, include ";
} // namespace

void CanonicalIncludes::addPathSuffixMapping(llvm::StringRef Suffix,
llvm::StringRef CanonicalPath) {
int Components = std::distance(llvm::sys::path::begin(Suffix),
llvm::sys::path::end(Suffix));
MaxSuffixComponents = std::max(MaxSuffixComponents, Components);
SuffixHeaderMapping[Suffix] = CanonicalPath;
}

void CanonicalIncludes::addMapping(llvm::StringRef Path,
llvm::StringRef CanonicalPath) {
FullPathMapping[Path] = CanonicalPath;
}

void CanonicalIncludes::addSymbolMapping(llvm::StringRef QualifiedName,
llvm::StringRef CanonicalPath) {
this->SymbolMapping[QualifiedName] = CanonicalPath;
}
/// The maximum number of path components in a key from StdSuffixHeaderMapping.
/// Used to minimize the number of lookups in suffix path mappings.
constexpr int MaxSuffixComponents = 3;

llvm::StringRef
CanonicalIncludes::mapHeader(llvm::StringRef Header,
llvm::StringRef QualifiedName) const {
assert(!Header.empty());
auto SE = SymbolMapping.find(QualifiedName);
if (SE != SymbolMapping.end())
return SE->second;
if (StdSymbolMapping) {
auto SE = StdSymbolMapping->find(QualifiedName);
if (SE != StdSymbolMapping->end())
return SE->second;
}

auto MapIt = FullPathMapping.find(Header);
if (MapIt != FullPathMapping.end())
return MapIt->second;

if (!StdSuffixHeaderMapping)
return Header;

int Components = 1;

for (auto It = llvm::sys::path::rbegin(Header),
End = llvm::sys::path::rend(Header);
It != End && Components <= MaxSuffixComponents; ++It, ++Components) {
auto SubPath = Header.substr(It->data() - Header.begin());
auto MappingIt = SuffixHeaderMapping.find(SubPath);
if (MappingIt != SuffixHeaderMapping.end())
auto MappingIt = StdSuffixHeaderMapping->find(SubPath);
if (MappingIt != StdSuffixHeaderMapping->end())
return MappingIt->second;
}
return Header;
Expand Down Expand Up @@ -86,29 +84,27 @@ collectIWYUHeaderMaps(CanonicalIncludes *Includes) {
return std::make_unique<PragmaCommentHandler>(Includes);
}

void addSystemHeadersMapping(CanonicalIncludes *Includes,
const LangOptions &Language) {
static constexpr std::pair<const char *, const char *> SymbolMap[] = {
#define SYMBOL(Name, NameSpace, Header) { #NameSpace#Name, #Header },
#include "StdSymbolMap.inc"
#undef SYMBOL
};
static constexpr std::pair<const char *, const char *> CSymbolMap[] = {
#define SYMBOL(Name, NameSpace, Header) { #Name, #Header },
#include "CSymbolMap.inc"
#undef SYMBOL
};
void CanonicalIncludes::addSystemHeadersMapping(const LangOptions &Language) {
if (Language.CPlusPlus) {
for (const auto &Pair : SymbolMap)
Includes->addSymbolMapping(Pair.first, Pair.second);
static const auto *Symbols = new llvm::StringMap<llvm::StringRef>({
#define SYMBOL(Name, NameSpace, Header) {#NameSpace #Name, #Header},
#include "StdSymbolMap.inc"
#undef SYMBOL
});
StdSymbolMapping = Symbols;
} else if (Language.C11) {
for (const auto &Pair : CSymbolMap)
Includes->addSymbolMapping(Pair.first, Pair.second);
static const auto *CSymbols = new llvm::StringMap<llvm::StringRef>({
#define SYMBOL(Name, NameSpace, Header) {#Name, #Header},
#include "CSymbolMap.inc"
#undef SYMBOL
});
StdSymbolMapping = CSymbols;
}

// FIXME: remove the std header mapping once we support ambiguous symbols, now
// it serves as a fallback to disambiguate:
// - symbols with multiple headers (e.g. std::move)
static constexpr std::pair<const char *, const char *> SystemHeaderMap[] = {
static const auto *SystemHeaderMap = new llvm::StringMap<llvm::StringRef>({
{"include/__stddef_max_align_t.h", "<cstddef>"},
{"include/__wmmintrin_aes.h", "<wmmintrin.h>"},
{"include/__wmmintrin_pclmul.h", "<wmmintrin.h>"},
Expand Down Expand Up @@ -760,9 +756,20 @@ void addSystemHeadersMapping(CanonicalIncludes *Includes,
{"bits/waitflags.h", "<sys/wait.h>"},
{"bits/waitstatus.h", "<sys/wait.h>"},
{"bits/xtitypes.h", "<stropts.h>"},
};
for (const auto &Pair : SystemHeaderMap)
Includes->addPathSuffixMapping(Pair.first, Pair.second);
});
// Check MaxSuffixComponents constant is correct.
assert(llvm::all_of(SystemHeaderMap->keys(), [](llvm::StringRef Path) {
return std::distance(llvm::sys::path::begin(Path),
llvm::sys::path::end(Path)) <= MaxSuffixComponents;
}));
// ... and precise.
assert(llvm::find_if(SystemHeaderMap->keys(), [](llvm::StringRef Path) {
return std::distance(llvm::sys::path::begin(Path),
llvm::sys::path::end(Path)) ==
MaxSuffixComponents;
}) != SystemHeaderMap->keys().end());

StdSuffixHeaderMapping = SystemHeaderMap;
}

} // namespace clangd
Expand Down
43 changes: 15 additions & 28 deletions clang-tools-extra/clangd/index/CanonicalIncludes.h
Expand Up @@ -21,6 +21,7 @@

#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Regex.h"
#include <mutex>
#include <string>
Expand All @@ -34,37 +35,34 @@ namespace clangd {
/// Only const methods (i.e. mapHeader) in this class are thread safe.
class CanonicalIncludes {
public:
CanonicalIncludes() = default;

/// Adds a string-to-string mapping from \p Path to \p CanonicalPath.
void addMapping(llvm::StringRef Path, llvm::StringRef CanonicalPath);

/// Maps files with last path components matching \p Suffix to \p
/// CanonicalPath.
void addPathSuffixMapping(llvm::StringRef Suffix,
llvm::StringRef CanonicalPath);

/// Sets the canonical include for any symbol with \p QualifiedName.
/// Symbol mappings take precedence over header mappings.
void addSymbolMapping(llvm::StringRef QualifiedName,
llvm::StringRef CanonicalPath);

/// Returns the canonical include for symbol with \p QualifiedName.
/// \p Header is the file the declaration was reachable from.
/// Header itself will be returned if there is no relevant mapping.
llvm::StringRef mapHeader(llvm::StringRef Header,
llvm::StringRef QualifiedName) const;

/// Adds mapping for system headers and some special symbols (e.g. STL symbols
/// in <iosfwd> need to be mapped individually). Approximately, the following
/// system headers are handled:
/// - C++ standard library e.g. bits/basic_string.h$ -> <string>
/// - Posix library e.g. bits/pthreadtypes.h$ -> <pthread.h>
/// - Compiler extensions, e.g. include/avx512bwintrin.h$ -> <immintrin.h>
/// The mapping is hardcoded and hand-maintained, so it might not cover all
/// headers.
void addSystemHeadersMapping(const LangOptions &Language);

private:
/// A map from full include path to a canonical path.
llvm::StringMap<std::string> FullPathMapping;
/// A map from a suffix (one or components of a path) to a canonical path.
llvm::StringMap<std::string> SuffixHeaderMapping;
/// Maximum number of path components stored in a key of SuffixHeaderMapping.
/// Used to reduce the number of lookups into SuffixHeaderMapping.
int MaxSuffixComponents = 0;
/// Used only for mapping standard headers.
const llvm::StringMap<llvm::StringRef> *StdSuffixHeaderMapping = nullptr;
/// A map from fully qualified symbol names to header names.
llvm::StringMap<std::string> SymbolMapping;
/// Used only for mapping standard symbols.
const llvm::StringMap<llvm::StringRef> *StdSymbolMapping = nullptr;
};

/// Returns a CommentHandler that parses pragma comment on include files to
Expand All @@ -76,17 +74,6 @@ class CanonicalIncludes {
std::unique_ptr<CommentHandler>
collectIWYUHeaderMaps(CanonicalIncludes *Includes);

/// Adds mapping for system headers and some special symbols (e.g. STL symbols
/// in <iosfwd> need to be mapped individually). Approximately, the following
/// system headers are handled:
/// - C++ standard library e.g. bits/basic_string.h$ -> <string>
/// - Posix library e.g. bits/pthreadtypes.h$ -> <pthread.h>
/// - Compiler extensions, e.g. include/avx512bwintrin.h$ -> <immintrin.h>
/// The mapping is hardcoded and hand-maintained, so it might not cover all
/// headers.
void addSystemHeadersMapping(CanonicalIncludes *Includes,
const LangOptions &Language);

} // namespace clangd
} // namespace clang

Expand Down
2 changes: 1 addition & 1 deletion clang-tools-extra/clangd/index/IndexAction.cpp
Expand Up @@ -141,7 +141,7 @@ class IndexAction : public ASTFrontendAction {
std::unique_ptr<ASTConsumer>
CreateASTConsumer(CompilerInstance &CI, llvm::StringRef InFile) override {
CI.getPreprocessor().addCommentHandler(PragmaHandler.get());
addSystemHeadersMapping(Includes.get(), CI.getLangOpts());
Includes->addSystemHeadersMapping(CI.getLangOpts());
if (IncludeGraphCallback != nullptr)
CI.getPreprocessor().addPPCallbacks(
std::make_unique<IncludeGraphCollector>(CI.getSourceManager(), IG));
Expand Down
28 changes: 21 additions & 7 deletions clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "index/CanonicalIncludes.h"
#include "clang/Basic/LangOptions.h"
#include "gtest/gtest.h"

namespace clang {
Expand All @@ -17,7 +18,7 @@ TEST(CanonicalIncludesTest, CStandardLibrary) {
CanonicalIncludes CI;
auto Language = LangOptions();
Language.C11 = true;
addSystemHeadersMapping(&CI, Language);
CI.addSystemHeadersMapping(Language);
// Usual standard library symbols are mapped correctly.
EXPECT_EQ("<stdio.h>", CI.mapHeader("path/stdio.h", "printf"));
}
Expand All @@ -26,7 +27,7 @@ TEST(CanonicalIncludesTest, CXXStandardLibrary) {
CanonicalIncludes CI;
auto Language = LangOptions();
Language.CPlusPlus = true;
addSystemHeadersMapping(&CI, Language);
CI.addSystemHeadersMapping(Language);

// Usual standard library symbols are mapped correctly.
EXPECT_EQ("<vector>", CI.mapHeader("path/vector.h", "std::vector"));
Expand Down Expand Up @@ -54,19 +55,32 @@ TEST(CanonicalIncludesTest, PathMapping) {
TEST(CanonicalIncludesTest, SymbolMapping) {
// As used for standard library.
CanonicalIncludes CI;
CI.addSymbolMapping("some::symbol", "<baz>");
LangOptions Language;
Language.CPlusPlus = true;
// Ensures 'std::vector' is mapped to '<vector>'.
CI.addSystemHeadersMapping(Language);

EXPECT_EQ("<baz>", CI.mapHeader("foo/bar", "some::symbol"));
EXPECT_EQ("<vector>", CI.mapHeader("foo/bar", "std::vector"));
EXPECT_EQ("foo/bar", CI.mapHeader("foo/bar", "other::symbol"));
}

TEST(CanonicalIncludesTest, Precedence) {
CanonicalIncludes CI;
CI.addMapping("some/path", "<path>");
CI.addSymbolMapping("some::symbol", "<symbol>");
LangOptions Language;
Language.CPlusPlus = true;
CI.addSystemHeadersMapping(Language);

// We added a mapping from some/path to <path>.
ASSERT_EQ("<path>", CI.mapHeader("some/path", ""));
// We should have a path from 'bits/stl_vector.h' to '<vector>'.
ASSERT_EQ("<vector>", CI.mapHeader("bits/stl_vector.h", ""));
// We should also have a symbol mapping from 'std::map' to '<map>'.
ASSERT_EQ("<map>", CI.mapHeader("some/header.h", "std::map"));

// Symbol mapping beats path mapping.
EXPECT_EQ("<symbol>", CI.mapHeader("some/path", "some::symbol"));
// And the symbol mapping should take precedence over paths mapping.
EXPECT_EQ("<map>", CI.mapHeader("bits/stl_vector.h", "std::map"));
EXPECT_EQ("<map>", CI.mapHeader("some/path", "std::map"));
}

} // namespace
Expand Down
Expand Up @@ -974,7 +974,7 @@ TEST_F(SymbolCollectorTest, CanonicalSTLHeader) {
CanonicalIncludes Includes;
auto Language = LangOptions();
Language.CPlusPlus = true;
addSystemHeadersMapping(&Includes, Language);
Includes.addSystemHeadersMapping(Language);
CollectorOpts.Includes = &Includes;
runSymbolCollector("namespace std { class string {}; }", /*Main=*/"");
EXPECT_THAT(Symbols,
Expand Down

0 comments on commit 8b76709

Please sign in to comment.