-
Notifications
You must be signed in to change notification settings - Fork 11.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[include-cleaner] Include-cleaner library structure, and simplistic A…
…ST walking. Include-cleaner is a library that uses the clang AST and preprocessor to determine which headers are used. It will be used in clang-tidy, in clangd, in a standalone tool at least for testing, and in out-of-tree tools. Roughly, it walks the AST, finds referenced decls, maps these to used sourcelocations, then to FileEntrys, then matching these against #includes. However there are many wrinkles: dealing with macros, standard library symbols, umbrella headers, IWYU directives etc. It is not built on the C++20 modules concept of usage, to allow: - use with existing non-modules codebases - a flexible API embeddable in clang-tidy, clangd, and other tools - avoiding a chicken-and-egg problem where include cleanups are needed before modules can be adopted This library is based on existing functionality in clangd that provides an unused-include warning. However it has design changes: - it accommodates diagnosing missing includes too (this means tracking where references come from, not just the set of targets) - it more clearly separates the different mappings (symbol => location => header => include) for better testing - it handles special cases like standard library symbols and IWYU directives more elegantly by adding unified Location and Header types instead of side-tables - it will support some customization of policy where necessary (e.g. for style questions of what constitutes a use, or to allow both missing-include and unused-include modes to be conservative) This patch adds the basic directory structure under clang-tools-extra and a skeleton version of the AST traversal, which will be the central piece. A more end-to-end prototype is in https://reviews.llvm.org/D122677 RFC: https://discourse.llvm.org/t/rfc-lifting-include-cleaner-missing-unused-include-detection-out-of-clangd/61228 Differential Revision: https://reviews.llvm.org/D124164
- Loading branch information
1 parent
14869bd
commit 41ac245
Showing
15 changed files
with
336 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
add_subdirectory(lib) | ||
if(CLANG_INCLUDE_TESTS) | ||
add_subdirectory(test) | ||
add_subdirectory(unittests) | ||
endif() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
//===--- AnalysisInternal.h - Analysis building blocks ------------- C++-*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This file provides smaller, testable pieces of the used-header analysis. | ||
// We find the headers by chaining together several mappings. | ||
// | ||
// AST => AST node => Symbol => Location => Header | ||
// / | ||
// Macro expansion => | ||
// | ||
// The individual steps are declared here. | ||
// (AST => AST Node => Symbol is one API to avoid materializing DynTypedNodes). | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H | ||
#define CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H | ||
|
||
#include "clang/Basic/SourceLocation.h" | ||
#include "llvm/ADT/STLFunctionalExtras.h" | ||
|
||
namespace clang { | ||
class Decl; | ||
class NamedDecl; | ||
namespace include_cleaner { | ||
|
||
/// Traverses part of the AST from \p Root, finding uses of symbols. | ||
/// | ||
/// Each use is reported to the callback: | ||
/// - the SourceLocation describes where the symbol was used. This is usually | ||
/// the primary location of the AST node found under Root. | ||
/// - the NamedDecl is the symbol referenced. It is canonical, rather than e.g. | ||
/// the redecl actually found by lookup. | ||
/// | ||
/// walkAST is typically called once per top-level declaration in the file | ||
/// being analyzed, in order to find all references within it. | ||
void walkAST(Decl &Root, llvm::function_ref<void(SourceLocation, NamedDecl &)>); | ||
|
||
} // namespace include_cleaner | ||
} // namespace clang | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
set(LLVM_LINK_COMPONENTS Support) | ||
|
||
add_clang_library(clangIncludeCleaner | ||
WalkAST.cpp | ||
|
||
LINK_LIBS | ||
clangBasic | ||
clangAST | ||
) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
//===--- WalkAST.cpp - Find declaration references in the AST -------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "AnalysisInternal.h" | ||
#include "clang/AST/RecursiveASTVisitor.h" | ||
|
||
namespace clang { | ||
namespace include_cleaner { | ||
namespace { | ||
using DeclCallback = llvm::function_ref<void(SourceLocation, NamedDecl &)>; | ||
|
||
class ASTWalker : public RecursiveASTVisitor<ASTWalker> { | ||
DeclCallback Callback; | ||
|
||
void report(SourceLocation Loc, NamedDecl *ND) { | ||
if (!ND || Loc.isInvalid()) | ||
return; | ||
Callback(Loc, *cast<NamedDecl>(ND->getCanonicalDecl())); | ||
} | ||
|
||
public: | ||
ASTWalker(DeclCallback Callback) : Callback(Callback) {} | ||
|
||
bool VisitTagTypeLoc(TagTypeLoc TTL) { | ||
report(TTL.getNameLoc(), TTL.getDecl()); | ||
return true; | ||
} | ||
|
||
bool VisitDeclRefExpr(DeclRefExpr *DRE) { | ||
report(DRE->getLocation(), DRE->getFoundDecl()); | ||
return true; | ||
} | ||
}; | ||
|
||
} // namespace | ||
|
||
void walkAST(Decl &Root, DeclCallback Callback) { | ||
ASTWalker(Callback).TraverseDecl(&Root); | ||
} | ||
|
||
} // namespace include_cleaner | ||
} // namespace clang |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
set(CLANG_INCLUDE_CLEANER_TEST_DEPS | ||
ClangIncludeCleanerTests | ||
) | ||
|
||
foreach (dep FileCheck not count) | ||
if(TARGET ${dep}) | ||
list(APPEND CLANG_INCLUDE_CLEANER_TEST_DEPS ${dep}) | ||
endif() | ||
endforeach() | ||
|
||
configure_lit_site_cfg( | ||
${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in | ||
${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py | ||
MAIN_CONFIG | ||
${CMAKE_CURRENT_BINARY_DIR}/lit.cfg.py) | ||
|
||
configure_lit_site_cfg( | ||
${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.py.in | ||
${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg.py | ||
MAIN_CONFIG | ||
${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.cfg.py) | ||
|
||
add_lit_testsuite(check-clang-include-cleaner "Running the clang-include-cleaner regression tests" | ||
${CMAKE_CURRENT_BINARY_DIR} | ||
DEPENDS ${CLANG_INCLUDE_CLEANER_TEST_DEPS}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import lit.formats | ||
config.name = "clangIncludeCleaner Unit Tests" | ||
config.test_format = lit.formats.GoogleTest('.', 'Tests') | ||
config.test_source_root = config.clang_include_cleaner_binary_dir + "/unittests" | ||
config.test_exec_root = config.clang_include_cleaner_binary_dir + "/unittests" | ||
|
||
# Point the dynamic loader at dynamic libraries in 'lib'. | ||
# FIXME: it seems every project has a copy of this logic. Move it somewhere. | ||
import platform | ||
if platform.system() == 'Darwin': | ||
shlibpath_var = 'DYLD_LIBRARY_PATH' | ||
elif platform.system() == 'Windows': | ||
shlibpath_var = 'PATH' | ||
else: | ||
shlibpath_var = 'LD_LIBRARY_PATH' | ||
config.environment[shlibpath_var] = os.path.pathsep.join(( | ||
"@SHLIBDIR@", "@LLVM_LIBS_DIR@", | ||
config.environment.get(shlibpath_var,''))) |
10 changes: 10 additions & 0 deletions
10
clang-tools-extra/include-cleaner/test/Unit/lit.site.cfg.py.in
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
@LIT_SITE_CFG_IN_HEADER@ | ||
# This is a shim to run the gtest unittests in ../unittests using lit. | ||
|
||
config.llvm_libs_dir = path("@LLVM_LIBS_DIR@") | ||
config.shlibdir = path("@SHLIBDIR@") | ||
|
||
config.clang_include_cleaner_binary_dir = path("@CMAKE_CURRENT_BINARY_DIR@/..") | ||
|
||
# Delegate logic to lit.cfg.py. | ||
lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/Unit/lit.cfg.py") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import lit.llvm | ||
|
||
lit.llvm.initialize(lit_config, config) | ||
lit.llvm.llvm_config.use_default_substitutions() | ||
|
||
config.name = 'ClangIncludeCleaner' | ||
config.suffixes = ['.test', '.c', '.cpp'] | ||
config.excludes = ['Inputs'] | ||
config.test_format = lit.formats.ShTest(not lit.llvm.llvm_config.use_lit_shell) | ||
config.test_source_root = config.clang_include_cleaner_source_dir + "/test" | ||
config.test_exec_root = config.clang_include_cleaner_binary_dir + "/test" | ||
|
||
config.environment['PATH'] = os.path.pathsep.join(( | ||
config.clang_tools_dir, | ||
config.llvm_tools_dir, | ||
config.environment['PATH'])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
@LIT_SITE_CFG_IN_HEADER@ | ||
|
||
# Variables needed for common llvm config. | ||
config.clang_tools_dir = path("@CURRENT_TOOLS_DIR@") | ||
config.lit_tools_dir = path("@LLVM_LIT_TOOLS_DIR@") | ||
config.llvm_tools_dir = path(lit_config.substitute("@LLVM_TOOLS_DIR@")) | ||
config.llvm_libs_dir = path(lit_config.substitute("@LLVM_LIBS_DIR@")) | ||
config.target_triple = "@TARGET_TRIPLE@" | ||
config.python_executable = "@Python3_EXECUTABLE@" | ||
|
||
config.clang_include_cleaner_source_dir = path("@CMAKE_CURRENT_SOURCE_DIR@/..") | ||
config.clang_include_cleaner_binary_dir = path("@CMAKE_CURRENT_BINARY_DIR@/..") | ||
# Delegate logic to lit.cfg.py. | ||
lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg.py") |
25 changes: 25 additions & 0 deletions
25
clang-tools-extra/include-cleaner/unittests/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
set(LLVM_LINK_COMPONENTS | ||
Support | ||
TestingSupport | ||
) | ||
|
||
add_custom_target(ClangIncludeCleanerUnitTests) | ||
add_unittest(ClangIncludeCleanerUnitTests ClangIncludeCleanerTests | ||
WalkASTTest.cpp | ||
) | ||
|
||
target_include_directories(ClangIncludeCleanerTests | ||
PRIVATE | ||
${CMAKE_CURRENT_SOURCE_DIR}/../lib) | ||
|
||
clang_target_link_libraries(ClangIncludeCleanerTests | ||
PRIVATE | ||
clangBasic | ||
) | ||
|
||
target_link_libraries(ClangIncludeCleanerTests | ||
PRIVATE | ||
clangIncludeCleaner | ||
clangTesting | ||
) | ||
|
110 changes: 110 additions & 0 deletions
110
clang-tools-extra/include-cleaner/unittests/WalkASTTest.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
#include "AnalysisInternal.h" | ||
#include "clang/AST/ASTContext.h" | ||
#include "clang/Basic/FileManager.h" | ||
#include "clang/Frontend/TextDiagnostic.h" | ||
#include "clang/Testing/TestAST.h" | ||
#include "llvm/Support/Error.h" | ||
#include "llvm/Testing/Support/Annotations.h" | ||
#include "gtest/gtest.h" | ||
|
||
namespace clang { | ||
namespace include_cleaner { | ||
namespace { | ||
|
||
// Specifies a test of which symbols are referenced by a piece of code. | ||
// | ||
// Example: | ||
// Target: int ^foo(); | ||
// Referencing: int x = ^foo(); | ||
// There must be exactly one referencing location marked. | ||
void testWalk(llvm::StringRef TargetCode, llvm::StringRef ReferencingCode) { | ||
llvm::Annotations Target(TargetCode); | ||
llvm::Annotations Referencing(ReferencingCode); | ||
|
||
TestInputs Inputs(Referencing.code()); | ||
Inputs.ExtraFiles["target.h"] = Target.code().str(); | ||
Inputs.ExtraArgs.push_back("-include"); | ||
Inputs.ExtraArgs.push_back("target.h"); | ||
TestAST AST(Inputs); | ||
const auto &SM = AST.sourceManager(); | ||
|
||
// We're only going to record references from the nominated point, | ||
// to the target file. | ||
FileID ReferencingFile = SM.getMainFileID(); | ||
SourceLocation ReferencingLoc = | ||
SM.getComposedLoc(ReferencingFile, Referencing.point()); | ||
FileID TargetFile = SM.translateFile( | ||
llvm::cantFail(AST.fileManager().getFileRef("target.h"))); | ||
|
||
// Perform the walk, and capture the offsets of the referenced targets. | ||
std::vector<size_t> ReferencedOffsets; | ||
for (Decl *D : AST.context().getTranslationUnitDecl()->decls()) { | ||
if (ReferencingFile != SM.getDecomposedExpansionLoc(D->getLocation()).first) | ||
continue; | ||
walkAST(*D, [&](SourceLocation Loc, NamedDecl &ND) { | ||
if (SM.getFileLoc(Loc) != ReferencingLoc) | ||
return; | ||
auto NDLoc = SM.getDecomposedLoc(SM.getFileLoc(ND.getLocation())); | ||
if (NDLoc.first != TargetFile) | ||
return; | ||
ReferencedOffsets.push_back(NDLoc.second); | ||
}); | ||
} | ||
llvm::sort(ReferencedOffsets); | ||
|
||
// Compare results to the expected points. | ||
// For each difference, show the target point in context, like a diagnostic. | ||
std::string DiagBuf; | ||
llvm::raw_string_ostream DiagOS(DiagBuf); | ||
auto *DiagOpts = new DiagnosticOptions(); | ||
DiagOpts->ShowLevel = 0; | ||
DiagOpts->ShowNoteIncludeStack = 0; | ||
TextDiagnostic Diag(DiagOS, AST.context().getLangOpts(), DiagOpts); | ||
auto DiagnosePoint = [&](const char *Message, unsigned Offset) { | ||
Diag.emitDiagnostic( | ||
FullSourceLoc(SM.getComposedLoc(TargetFile, Offset), SM), | ||
DiagnosticsEngine::Note, Message, {}, {}); | ||
}; | ||
for (auto Expected : Target.points()) | ||
if (!llvm::is_contained(ReferencedOffsets, Expected)) | ||
DiagnosePoint("location not marked used", Expected); | ||
for (auto Actual : ReferencedOffsets) | ||
if (!llvm::is_contained(Target.points(), Actual)) | ||
DiagnosePoint("location unexpectedly used", Actual); | ||
|
||
// If there were any differences, we print the entire referencing code once. | ||
if (!DiagBuf.empty()) | ||
ADD_FAILURE() << DiagBuf << "\nfrom code:\n" << ReferencingCode; | ||
} | ||
|
||
TEST(WalkAST, DeclRef) { | ||
testWalk("int ^x;", "int y = ^x;"); | ||
testWalk("int ^foo();", "int y = ^foo();"); | ||
testWalk("namespace ns { int ^x; }", "int y = ns::^x;"); | ||
testWalk("struct S { static int ^x; };", "int y = S::^x;"); | ||
// Canonical declaration only. | ||
testWalk("extern int ^x; int x;", "int y = ^x;"); | ||
} | ||
|
||
TEST(WalkAST, TagType) { | ||
testWalk("struct ^S {};", "^S *y;"); | ||
testWalk("enum ^E {};", "^E *y;"); | ||
testWalk("struct ^S { static int x; };", "int y = ^S::x;"); | ||
} | ||
|
||
TEST(WalkAST, Alias) { | ||
testWalk(R"cpp( | ||
namespace ns { int x; } | ||
using ns::^x; | ||
)cpp", | ||
"int y = ^x;"); | ||
testWalk(R"cpp( | ||
namespace ns { struct S; } // Not used | ||
using ns::S; // FIXME: S should be used | ||
)cpp", | ||
"^S *x;"); | ||
} | ||
|
||
} // namespace | ||
} // namespace include_cleaner | ||
} // namespace clang |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters