Skip to content

Commit

Permalink
[clangd] Loose include-cleaner matching for verbatim headers
Browse files Browse the repository at this point in the history
This updates clangd to take advantage of the APIs added in D155819.
The main difficulties here are around path normalization.

For layering and performance reasons Includes compares paths lexically, and so
we should have consistent paths that can be compared across addSearchPath() and
add(): symlinks resolved or not, relative or absolute.
This patch spells out that requirement, for most tools consistent use of
FileManager/HeaderSearch is enough.

For clangd this does not work: IncludeStructure doesn't hold FileEntrys due to
the preamble/main-file split. It records paths, but canonicalizes them first.
We choose to use this canonical form as our common representation, so we have
to canonicalize the directory entries too. This is done in preamble-build and
recorded in IncludeStructure, as canonicalization is quite expensive.

Differential Revision: https://reviews.llvm.org/D155878
  • Loading branch information
sam-mccall committed Jul 27, 2023
1 parent e5f0483 commit d97a341
Show file tree
Hide file tree
Showing 8 changed files with 124 additions and 87 deletions.
12 changes: 12 additions & 0 deletions clang-tools-extra/clangd/Headers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Lex/DirectoryLookup.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Preprocessor.h"
Expand Down Expand Up @@ -178,6 +179,17 @@ void IncludeStructure::collect(const CompilerInstance &CI) {
MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
auto Collector = std::make_unique<RecordHeaders>(CI, this);
CI.getPreprocessor().addPPCallbacks(std::move(Collector));

// If we're reusing a preamble, don't repopulate SearchPathsCanonical.
// The entries will be the same, but canonicalizing to find out is expensive!
if (SearchPathsCanonical.empty()) {
for (const auto &Dir :
CI.getPreprocessor().getHeaderSearchInfo().search_dir_range()) {
if (Dir.getLookupType() == DirectoryLookup::LT_NormalDir)
SearchPathsCanonical.emplace_back(
SM.getFileManager().getCanonicalName(*Dir.getDirRef()));
}
}
}

std::optional<IncludeStructure::HeaderID>
Expand Down
5 changes: 5 additions & 0 deletions clang-tools-extra/clangd/Headers.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,11 @@ class IncludeStructure {

std::vector<Inclusion> MainFileIncludes;

// The entries of the header search path. (HeaderSearch::search_dir_range())
// Only includes the plain-directory entries (not header maps or frameworks).
// All paths are canonical (FileManager::getCanonicalPath()).
std::vector<std::string> SearchPathsCanonical;

// We reserve HeaderID(0) for the main file and will manually check for that
// in getID and getOrCreateID because the UniqueID is not stable when the
// content of the main file changes.
Expand Down
15 changes: 4 additions & 11 deletions clang-tools-extra/clangd/Hover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1194,8 +1194,7 @@ void maybeAddSymbolProviders(ParsedAST &AST, HoverInfo &HI,
return;

std::string Result;
include_cleaner::Includes ConvertedIncludes =
convertIncludes(SM, AST.getIncludeStructure().MainFileIncludes);
include_cleaner::Includes ConvertedIncludes = convertIncludes(AST);
for (const auto &P : RankedProviders) {
if (P.kind() == include_cleaner::Header::Physical &&
P.physical() == SM.getFileEntryForID(SM.getMainFileID()))
Expand Down Expand Up @@ -1247,9 +1246,7 @@ std::string getSymbolName(include_cleaner::Symbol Sym) {

void maybeAddUsedSymbols(ParsedAST &AST, HoverInfo &HI, const Inclusion &Inc) {
const SourceManager &SM = AST.getSourceManager();
const auto &ConvertedMainFileIncludes =
convertIncludes(SM, AST.getIncludeStructure().MainFileIncludes);
const auto &HoveredInclude = convertIncludes(SM, llvm::ArrayRef{Inc});
auto Converted = convertIncludes(AST);
llvm::DenseSet<include_cleaner::Symbol> UsedSymbols;
include_cleaner::walkUsed(
AST.getLocalTopLevelDecls(), collectMacroReferences(AST),
Expand All @@ -1260,12 +1257,8 @@ void maybeAddUsedSymbols(ParsedAST &AST, HoverInfo &HI, const Inclusion &Inc) {
UsedSymbols.contains(Ref.Target))
return;

auto Provider =
firstMatchedProvider(ConvertedMainFileIncludes, Providers);
if (!Provider || HoveredInclude.match(*Provider).empty())
return;

UsedSymbols.insert(Ref.Target);
if (isPreferredProvider(Inc, Converted, Providers))
UsedSymbols.insert(Ref.Target);
});

for (const auto &UsedSymbolDecl : UsedSymbols)
Expand Down
47 changes: 29 additions & 18 deletions clang-tools-extra/clangd/IncludeCleaner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "clang/Lex/DirectoryLookup.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Tooling/Core/Replacement.h"
Expand Down Expand Up @@ -347,18 +348,25 @@ collectMacroReferences(ParsedAST &AST) {
return Macros;
}

include_cleaner::Includes
convertIncludes(const SourceManager &SM,
const llvm::ArrayRef<Inclusion> Includes) {
include_cleaner::Includes convertIncludes(const ParsedAST &AST) {
auto &SM = AST.getSourceManager();

include_cleaner::Includes ConvertedIncludes;
for (const Inclusion &Inc : Includes) {
// We satisfy Includes's contract that search dirs and included files have
// matching path styles: both ultimately use FileManager::getCanonicalName().
for (const auto &Dir : AST.getIncludeStructure().SearchPathsCanonical)
ConvertedIncludes.addSearchDirectory(Dir);

for (const Inclusion &Inc : AST.getIncludeStructure().MainFileIncludes) {
include_cleaner::Include TransformedInc;
llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written);
TransformedInc.Spelled = WrittenRef.trim("\"<>");
TransformedInc.HashLocation =
SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset);
TransformedInc.Line = Inc.HashLine + 1;
TransformedInc.Angled = WrittenRef.starts_with("<");
// Inc.Resolved is canonicalized with clangd::getCanonicalPath(),
// which is based on FileManager::getCanonicalName(ParentDir).
auto FE = SM.getFileManager().getFileRef(Inc.Resolved);
if (!FE) {
elog("IncludeCleaner: Failed to get an entry for resolved path {0}: {1}",
Expand All @@ -376,9 +384,7 @@ IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST) {
if (AST.getLangOpts().ObjC)
return {};
const auto &SM = AST.getSourceManager();
const auto &Includes = AST.getIncludeStructure();
include_cleaner::Includes ConvertedIncludes =
convertIncludes(SM, Includes.MainFileIncludes);
include_cleaner::Includes ConvertedIncludes = convertIncludes(AST);
const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
auto *PreamblePatch = PreamblePatch::getPatchEntry(AST.tuPath(), SM);

Expand All @@ -401,7 +407,8 @@ IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST) {
}
for (auto *Inc : ConvertedIncludes.match(H)) {
Satisfied = true;
auto HeaderID = Includes.getID(&Inc->Resolved->getFileEntry());
auto HeaderID =
AST.getIncludeStructure().getID(&Inc->Resolved->getFileEntry());
assert(HeaderID.has_value() &&
"ConvertedIncludes only contains resolved includes.");
Used.insert(*HeaderID);
Expand Down Expand Up @@ -456,6 +463,20 @@ IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST) {
return {std::move(UnusedIncludes), std::move(MissingIncludes)};
}

bool isPreferredProvider(const Inclusion &Inc,
const include_cleaner::Includes &Includes,
llvm::ArrayRef<include_cleaner::Header> Providers) {
for (const auto &H : Providers) {
auto Matches = Includes.match(H);
for (const include_cleaner::Include *Match : Matches)
if (Match->Line == unsigned(Inc.HashLine + 1))
return true; // this header is (equal) best
if (!Matches.empty())
return false; // another header is better
}
return false; // no header provides the symbol
}

std::vector<Diag>
issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code,
const IncludeCleanerFindings &Findings,
Expand Down Expand Up @@ -494,14 +515,4 @@ issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code,
return Result;
}

std::optional<include_cleaner::Header>
firstMatchedProvider(const include_cleaner::Includes &Includes,
llvm::ArrayRef<include_cleaner::Header> Providers) {
for (const auto &H : Providers) {
if (!Includes.match(H).empty())
return H;
}
// No match for this provider in the includes list.
return std::nullopt;
}
} // namespace clang::clangd
15 changes: 8 additions & 7 deletions clang-tools-extra/clangd/IncludeCleaner.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,18 @@ void setIncludeCleanerAnalyzesStdlib(bool B);

/// Converts the clangd include representation to include-cleaner
/// include representation.
include_cleaner::Includes
convertIncludes(const SourceManager &SM,
const llvm::ArrayRef<Inclusion> Includes);
include_cleaner::Includes convertIncludes(const ParsedAST &);

std::vector<include_cleaner::SymbolReference>
collectMacroReferences(ParsedAST &AST);

/// Find the first provider in the list that is matched by the includes.
std::optional<include_cleaner::Header>
firstMatchedProvider(const include_cleaner::Includes &Includes,
llvm::ArrayRef<include_cleaner::Header> Providers);
/// Whether this #include is considered to provide a particular symbol.
///
/// This means it satisfies the reference, and no other #include does better.
/// `Providers` is the symbol's candidate headers according to walkUsed().
bool isPreferredProvider(const Inclusion &, const include_cleaner::Includes &,
llvm::ArrayRef<include_cleaner::Header> Providers);

} // namespace clangd
} // namespace clang

Expand Down
16 changes: 5 additions & 11 deletions clang-tools-extra/clangd/XRefs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1334,22 +1334,16 @@ maybeFindIncludeReferences(ParsedAST &AST, Position Pos,
if (IncludeOnLine == Includes.end())
return std::nullopt;

const auto &Inc = *IncludeOnLine;
const SourceManager &SM = AST.getSourceManager();
ReferencesResult Results;
auto ConvertedMainFileIncludes = convertIncludes(SM, Includes);
auto ReferencedInclude = convertIncludes(SM, Inc);
auto Converted = convertIncludes(AST);
include_cleaner::walkUsed(
AST.getLocalTopLevelDecls(), collectMacroReferences(AST),
AST.getPragmaIncludes().get(), SM,
[&](const include_cleaner::SymbolReference &Ref,
llvm::ArrayRef<include_cleaner::Header> Providers) {
if (Ref.RT != include_cleaner::RefType::Explicit)
return;

auto Provider =
firstMatchedProvider(ConvertedMainFileIncludes, Providers);
if (!Provider || ReferencedInclude.match(*Provider).empty())
if (Ref.RT != include_cleaner::RefType::Explicit ||
!isPreferredProvider(*IncludeOnLine, Converted, Providers))
return;

auto Loc = SM.getFileLoc(Ref.RefLocation);
Expand All @@ -1370,8 +1364,8 @@ maybeFindIncludeReferences(ParsedAST &AST, Position Pos,

// Add the #include line to the references list.
ReferencesResult::Reference Result;
Result.Loc.range =
rangeTillEOL(SM.getBufferData(SM.getMainFileID()), Inc.HashOffset);
Result.Loc.range = rangeTillEOL(SM.getBufferData(SM.getMainFileID()),
IncludeOnLine->HashOffset);
Result.Loc.uri = URIMainFile;
Results.References.push_back(std::move(Result));
return Results;
Expand Down
9 changes: 9 additions & 0 deletions clang-tools-extra/clangd/unittests/HeadersTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,15 @@ TEST_F(HeadersTest, IncludeDirective) {
directive(tok::pp_include_next)));
}

TEST_F(HeadersTest, SearchPath) {
FS.Files["foo/bar.h"] = "x";
FS.Files["foo/bar/baz.h"] = "y";
CDB.ExtraClangFlags.push_back("-Ifoo/bar");
CDB.ExtraClangFlags.push_back("-Ifoo/bar/..");
EXPECT_THAT(collectIncludes().SearchPathsCanonical,
ElementsAre(Subdir, testPath("foo/bar"), testPath("foo")));
}

TEST_F(HeadersTest, InsertInclude) {
std::string Path = testPath("sub/bar.h");
FS.Files[Path] = "";
Expand Down
92 changes: 52 additions & 40 deletions clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
#include "clang/AST/DeclBase.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
Expand Down Expand Up @@ -455,46 +457,30 @@ TEST(IncludeCleaner, NoCrash) {
MainCode.range());
}

TEST(IncludeCleaner, FirstMatchedProvider) {
struct {
const char *Code;
const std::vector<include_cleaner::Header> Providers;
const std::optional<include_cleaner::Header> ExpectedProvider;
} Cases[] = {
{R"cpp(
#include "bar.h"
#include "foo.h"
)cpp",
{include_cleaner::Header{"bar.h"}, include_cleaner::Header{"foo.h"}},
include_cleaner::Header{"bar.h"}},
{R"cpp(
#include "bar.h"
#include "foo.h"
)cpp",
{include_cleaner::Header{"foo.h"}, include_cleaner::Header{"bar.h"}},
include_cleaner::Header{"foo.h"}},
{"#include \"bar.h\"",
{include_cleaner::Header{"bar.h"}},
include_cleaner::Header{"bar.h"}},
{"#include \"bar.h\"", {include_cleaner::Header{"foo.h"}}, std::nullopt},
{"#include \"bar.h\"", {}, std::nullopt}};
for (const auto &Case : Cases) {
Annotations Code{Case.Code};
SCOPED_TRACE(Code.code());

TestTU TU;
TU.Code = Code.code();
TU.AdditionalFiles["bar.h"] = "";
TU.AdditionalFiles["foo.h"] = "";

auto AST = TU.build();
std::optional<include_cleaner::Header> MatchedProvider =
firstMatchedProvider(
convertIncludes(AST.getSourceManager(),
AST.getIncludeStructure().MainFileIncludes),
Case.Providers);
EXPECT_EQ(MatchedProvider, Case.ExpectedProvider);
}
TEST(IncludeCleaner, IsPreferredProvider) {
auto TU = TestTU::withCode(R"cpp(
#include "decl.h"
#include "def.h"
#include "def.h"
)cpp");
TU.AdditionalFiles["decl.h"] = "";
TU.AdditionalFiles["def.h"] = "";

auto AST = TU.build();
auto &IncludeDecl = AST.getIncludeStructure().MainFileIncludes[0];
auto &IncludeDef1 = AST.getIncludeStructure().MainFileIncludes[1];
auto &IncludeDef2 = AST.getIncludeStructure().MainFileIncludes[2];

auto &FM = AST.getSourceManager().getFileManager();
auto *DeclH = &FM.getOptionalFileRef("decl.h")->getFileEntry();
auto *DefH = &FM.getOptionalFileRef("def.h")->getFileEntry();

auto Includes = convertIncludes(AST);
std::vector<include_cleaner::Header> Providers = {
include_cleaner::Header(DefH), include_cleaner::Header(DeclH)};
EXPECT_FALSE(isPreferredProvider(IncludeDecl, Includes, Providers));
EXPECT_TRUE(isPreferredProvider(IncludeDef1, Includes, Providers));
EXPECT_TRUE(isPreferredProvider(IncludeDef2, Includes, Providers));
}

TEST(IncludeCleaner, BatchFix) {
Expand Down Expand Up @@ -560,6 +546,32 @@ TEST(IncludeCleaner, BatchFix) {
FixMessage("fix all includes")})));
}

// In the presence of IWYU pragma private, we should accept spellings other
// than the recommended one if they appear to name the same public header.
TEST(IncludeCleaner, VerbatimEquivalence) {
auto TU = TestTU::withCode(R"cpp(
#include "lib/rel/public.h"
int x = Public;
)cpp");
TU.AdditionalFiles["repo/lib/rel/private.h"] = R"cpp(
#pragma once
// IWYU pragma: private, include "rel/public.h"
int Public;
)cpp";
TU.AdditionalFiles["repo/lib/rel/public.h"] = R"cpp(
#pragma once
#include "rel/private.h"
)cpp";

TU.ExtraArgs.push_back("-Irepo");
TU.ExtraArgs.push_back("-Irepo/lib");

auto AST = TU.build();
auto Findings = computeIncludeCleanerFindings(AST);
EXPECT_THAT(Findings.MissingIncludes, IsEmpty());
EXPECT_THAT(Findings.UnusedIncludes, IsEmpty());
}

} // namespace
} // namespace clangd
} // namespace clang

0 comments on commit d97a341

Please sign in to comment.