Skip to content

Commit

Permalink
[clang][ScanDeps] Canonicalize -D and -U flags (#82298)
Browse files Browse the repository at this point in the history
Canonicalize `-D` and `-U` flags by sorting them and only keeping the
last instance of a given name.

This optimization will only fire if all `-D` and `-U` flags start with a
simple identifier that we can guarantee a simple analysis of can
determine if two flags refer to the same identifier or not. See the
comment on `getSimpleMacroName()` for details of what the issues are.
  • Loading branch information
Bigcheese committed Feb 20, 2024
1 parent 54b014b commit 3ff8055
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@ enum class ScanningOptimizations {
/// Remove unused -ivfsoverlay arguments.
VFS = 4,

DSS_LAST_BITMASK_ENUM(VFS),
/// Canonicalize -D and -U options.
Macros = 8,

DSS_LAST_BITMASK_ENUM(Macros),
Default = All
};

Expand Down
74 changes: 74 additions & 0 deletions clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,78 @@ static void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
DiagOpts.IgnoreWarnings = true;
}

// Clang implements -D and -U by splatting text into a predefines buffer. This
// allows constructs such as `-DFඞ=3 "-D F\u{0D9E} 4 3 2”` to be accepted and
// define the same macro, or adding C++ style comments before the macro name.
//
// This function checks that the first non-space characters in the macro
// obviously form an identifier that can be uniqued on without lexing. Failing
// to do this could lead to changing the final definition of a macro.
//
// We could set up a preprocessor and actually lex the name, but that's very
// heavyweight for a situation that will almost never happen in practice.
static std::optional<StringRef> getSimpleMacroName(StringRef Macro) {
StringRef Name = Macro.split("=").first.ltrim(" \t");
std::size_t I = 0;

auto FinishName = [&]() -> std::optional<StringRef> {
StringRef SimpleName = Name.slice(0, I);
if (SimpleName.empty())
return std::nullopt;
return SimpleName;
};

for (; I != Name.size(); ++I) {
switch (Name[I]) {
case '(': // Start of macro parameter list
case ' ': // End of macro name
case '\t':
return FinishName();
case '_':
continue;
default:
if (llvm::isAlnum(Name[I]))
continue;
return std::nullopt;
}
}
return FinishName();
}

static void canonicalizeDefines(PreprocessorOptions &PPOpts) {
using MacroOpt = std::pair<StringRef, std::size_t>;
std::vector<MacroOpt> SimpleNames;
SimpleNames.reserve(PPOpts.Macros.size());
std::size_t Index = 0;
for (const auto &M : PPOpts.Macros) {
auto SName = getSimpleMacroName(M.first);
// Skip optimizing if we can't guarantee we can preserve relative order.
if (!SName)
return;
SimpleNames.emplace_back(*SName, Index);
++Index;
}

llvm::stable_sort(SimpleNames, [](const MacroOpt &A, const MacroOpt &B) {
return A.first < B.first;
});
// Keep the last instance of each macro name by going in reverse
auto NewEnd = std::unique(
SimpleNames.rbegin(), SimpleNames.rend(),
[](const MacroOpt &A, const MacroOpt &B) { return A.first == B.first; });
SimpleNames.erase(SimpleNames.begin(), NewEnd.base());

// Apply permutation.
decltype(PPOpts.Macros) NewMacros;
NewMacros.reserve(SimpleNames.size());
for (std::size_t I = 0, E = SimpleNames.size(); I != E; ++I) {
std::size_t OriginalIndex = SimpleNames[I].second;
// We still emit undefines here as they may be undefining a predefined macro
NewMacros.push_back(std::move(PPOpts.Macros[OriginalIndex]));
}
std::swap(PPOpts.Macros, NewMacros);
}

/// A clang tool that runs the preprocessor in a mode that's optimized for
/// dependency scanning for the given compiler invocation.
class DependencyScanningAction : public tooling::ToolAction {
Expand All @@ -203,6 +275,8 @@ class DependencyScanningAction : public tooling::ToolAction {
CompilerInvocation OriginalInvocation(*Invocation);
// Restore the value of DisableFree, which may be modified by Tooling.
OriginalInvocation.getFrontendOpts().DisableFree = DisableFree;
if (any(OptimizeArgs & ScanningOptimizations::Macros))
canonicalizeDefines(OriginalInvocation.getPreprocessorOpts());

if (Scanned) {
// Scanning runs once for the first -cc1 invocation in a chain of driver
Expand Down
87 changes: 87 additions & 0 deletions clang/test/ClangScanDeps/optimize-canonicalize-macros.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// This test verifies that command lines with equivalent -D and -U arguments
// are canonicalized to the same module variant.

// RUN: rm -rf %t
// RUN: split-file %s %t
// RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
// RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
// RUN: -j 1 -format experimental-full -optimize-args=canonicalize-macros > %t/deps.db
// RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t

// Verify that there are only two variants and that the expected merges have
// happened.

// CHECK: {
// CHECK-NEXT: "modules": [
// CHECK-NEXT: {
// CHECK-NEXT: "clang-module-deps": [],
// CHECK-NEXT: "clang-modulemap-file":
// CHECK-NEXT: "command-line": [
// CHECK-NOT: "J=1"
// CHECK-NOT: "J"
// CHECK-NOT: "K"
// CHECK: ],
// CHECK-NEXT: "context-hash": "{{.*}}",
// CHECK-NEXT: "file-deps": [
// CHECK: ],
// CHECK-NEXT: "name": "A"
// CHECK-NEXT: },
// CHECK-NEXT: {
// CHECK-NEXT: "clang-module-deps": [],
// CHECK-NEXT: "clang-modulemap-file":
// CHECK-NEXT: "command-line": [
// CHECK: "Fඞ"
// CHECK: "F\\u{0D9E}"
// CHECK: "K"
// CHECK: "K"
// CHECK: ],
// CHECK-NEXT: "context-hash": "{{.*}}",
// CHECK-NEXT: "file-deps": [
// CHECK: ],
// CHECK-NEXT: "name": "A"
// CHECK-NEXT: }
// CHECK-NEXT: ],
// CHECK-NEXT: "translation-units": [
// CHECK: ]
// CHECK: }


//--- build/compile-commands.json.in

[
{
"directory": "DIR",
"command": "clang -c DIR/tu0.m -DJ=1 -UJ -DJ=2 -DI -DK(x)=x -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
"file": "DIR/tu0.m"
},
{
"directory": "DIR",
"command": "clang -c DIR/tu1.m -DK -DK(x)=x -DI -D \"J=2\" -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
"file": "DIR/tu1.m"
},
{
"directory": "DIR",
"command": "clang -c DIR/tu2.m -I modules/A -DFඞ '-DF\\u{0D9E}' -DK -DK -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
"file": "DIR/tu2.m"
}
]

//--- modules/A/module.modulemap

module A {
umbrella header "A.h"
}

//--- modules/A/A.h

//--- tu0.m

#include <A.h>

//--- tu1.m

#include <A.h>

//--- tu2.m

#include <A.h>
1 change: 1 addition & 0 deletions clang/tools/clang-scan-deps/ClangScanDeps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ static void ParseArgs(int argc, char **argv) {
.Case("header-search", ScanningOptimizations::HeaderSearch)
.Case("system-warnings", ScanningOptimizations::SystemWarnings)
.Case("vfs", ScanningOptimizations::VFS)
.Case("canonicalize-macros", ScanningOptimizations::Macros)
.Case("all", ScanningOptimizations::All)
.Default(std::nullopt);
if (!Optimization) {
Expand Down

0 comments on commit 3ff8055

Please sign in to comment.