diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h index 05c9f56b4cf637..377dcd5a68feca 100644 --- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h +++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h @@ -46,9 +46,11 @@ struct ModuleID { /// or a header-name for C++20 header units. std::string ModuleName; - /// The context hash of a module represents the set of compiler options that - /// may make one version of a module incompatible with another. This includes - /// things like language mode, predefined macros, header search paths, etc... + /// The context hash of a module represents the compiler options that affect + /// the resulting command-line invocation. + /// + /// Modules with the same name and ContextHash but different invocations could + /// cause non-deterministic build results. /// /// Modules with the same name but a different \c ContextHash should be /// treated as separate modules for the purpose of a build. diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index 725bb2c318ac2c..8d58a53a5971c5 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -12,6 +12,7 @@ #include "clang/Frontend/CompilerInstance.h" #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" +#include "llvm/Support/BLAKE3.h" #include "llvm/Support/StringSaver.h" using namespace clang; @@ -77,6 +78,19 @@ CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths( CI.getHeaderSearchOpts().ModuleCachePruneInterval = 7 * 24 * 60 * 60; CI.getHeaderSearchOpts().ModuleCachePruneAfter = 31 * 24 * 60 * 60; + // Remove any macro definitions that are explicitly ignored. + if (!CI.getHeaderSearchOpts().ModulesIgnoreMacros.empty()) { + llvm::erase_if( + CI.getPreprocessorOpts().Macros, + [&CI](const std::pair &Def) { + StringRef MacroDef = Def.first; + return CI.getHeaderSearchOpts().ModulesIgnoreMacros.contains( + llvm::CachedHashString(MacroDef.split('=').first)); + }); + // Remove the now unused option. + CI.getHeaderSearchOpts().ModulesIgnoreMacros.clear(); + } + // Report the prebuilt modules this module uses. for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps) CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile); @@ -156,6 +170,50 @@ std::vector ModuleDeps::getCanonicalCommandLine( return serializeCompilerInvocation(CI); } +static std::string getModuleContextHash(const ModuleDeps &MD) { + llvm::HashBuilder, + llvm::support::endianness::native> + HashBuilder; + SmallString<32> Scratch; + + // Hash the compiler version and serialization version to ensure the module + // will be readable. + HashBuilder.add(getClangFullRepositoryVersion()); + HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR); + + // Hash the BuildInvocation without any input files. + SmallVector DummyArgs; + MD.BuildInvocation.generateCC1CommandLine(DummyArgs, [&](const Twine &Arg) { + Scratch.clear(); + StringRef Str = Arg.toStringRef(Scratch); + HashBuilder.add(Str); + return ""; + }); + + // Hash the input file paths and module dependencies. These paths may differ + // even if the invocation is identical if they depend on the contents of the + // files in the TU -- for example, case-insensitive paths to modulemap files. + // Usually such a case would indicate a missed optimization to canonicalize, + // but it may be difficult to canonicalize all cases when there is a VFS. + HashBuilder.add(MD.ClangModuleMapFile); + for (const auto &Dep : MD.PrebuiltModuleDeps) + HashBuilder.add(Dep.PCMFile); + for (const auto &ID : MD.ClangModuleDeps) { + HashBuilder.add(ID.ModuleName); + HashBuilder.add(ID.ContextHash); + } + + // Hash options that affect which callbacks are made for outputs. + HashBuilder.add(MD.HadDependencyFile); + HashBuilder.add(MD.HadSerializedDiagnostics); + + llvm::BLAKE3Result<16> Hash = HashBuilder.final(); + std::array Words; + static_assert(sizeof(Hash) == sizeof(Words), "Hash must match Words"); + std::memcpy(Words.data(), Hash.data(), sizeof(Hash)); + return toString(llvm::APInt(sizeof(Words) * 8, Words), 36, /*Signed=*/false); +} + std::vector ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const { return serializeCompilerInvocation(BuildInvocation); @@ -346,13 +404,12 @@ ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { .DiagnosticSerializationFile.empty(); MD.HadDependencyFile = !MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty(); - // FIXME: HadSerializedDiagnostics and HadDependencyFile should be included in - // the context hash since it can affect the command-line. - MD.ID.ContextHash = MD.BuildInvocation.getModuleHash(); llvm::DenseSet AddedModules; addAllSubmoduleDeps(M, MD, AddedModules); + // Do this last since it requires the dependencies. + MD.ID.ContextHash = getModuleContextHash(MD); return MD.ID; } diff --git a/clang/test/ClangScanDeps/modules-context-hash-ignore-macros.c b/clang/test/ClangScanDeps/modules-context-hash-ignore-macros.c new file mode 100644 index 00000000000000..982bf1e06b5192 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-context-hash-ignore-macros.c @@ -0,0 +1,100 @@ +// Ensure '-DFOO -fmodules-ignore-macro=FOO' and '' both produce the same +// canonical module build command. + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json + +// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 \ +// RUN: -format experimental-full > %t/deps.json +// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s + +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK: { +// CHECK: "command-line": [ +// CHECK-NOT: "FOO" +// CHECK-NOT: "-fmodules-ignore-macro +// CHECK: ] +// CHECK: "context-hash": "[[HASH_NO_FOO:.*]]" +// CHECK: "name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK: "command-line": [ +// CHECK: "-D" +// CHECK-NEXT: "FOO" +// CHECK: ] +// CHECK: "context-hash": "[[HASH_FOO:.*]]" +// CHECK: "name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: "translation-units": [ +// CHECK-NEXT: { +// CHECK: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH_NO_FOO]]" +// CHECK-NEXT: "module-name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: "command-line": [ +// CHECK-NOT: "-DFOO" +// CHECK: ] +// CHECK: "input-file": "{{.*}}tu1.c" +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH_FOO]]" +// CHECK-NEXT: "module-name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: "command-line": [ +// CHECK: "-DFOO" +// CHECK: ] +// CHECK: "input-file": "{{.*}}tu2.c" +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH_NO_FOO]]" +// CHECK-NEXT: "module-name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: "command-line": [ +// CHECK: "-DFOO" +// CHECK: "-fmodules-ignore-macro=FOO" +// CHECK: ] +// CHECK: "input-file": "{{.*}}tu3.c" + +//--- cdb.json.template +[ + { + "directory": "DIR", + "command": "clang -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache", + "file": "DIR/tu1.c" + }, + { + "directory": "DIR", + "command": "clang -DFOO -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache", + "file": "DIR/tu2.c" + }, + { + "directory": "DIR", + "command": "clang -DFOO -fmodules-ignore-macro=FOO -fsyntax-only DIR/tu3.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache", + "file": "DIR/tu3.c" + }, +] + +//--- module.modulemap +module Mod { header "Mod.h" } + +//--- Mod.h + +//--- tu1.c +#include "Mod.h" + +//--- tu2.c +#include "Mod.h" + +//--- tu3.c +#include "Mod.h" diff --git a/clang/test/ClangScanDeps/modules-context-hash-module-map-path.c b/clang/test/ClangScanDeps/modules-context-hash-module-map-path.c new file mode 100644 index 00000000000000..0fde558ab095d3 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-context-hash-module-map-path.c @@ -0,0 +1,77 @@ +// Ensure the path to the modulemap input is included in the module context hash +// irrespective of other TU command-line arguments, as it effects the canonical +// module build command. In this test we use the difference in spelling between +// module.modulemap and module.map, but it also applies to situations such as +// differences in case-insensitive paths if they are not canonicalized away. + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json + +// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 -generate-modules-path-args \ +// RUN: -format experimental-full > %t/deps.json + +// RUN: mv %t/module.modulemap %t/module.map +// RUN: echo 'AFTER_MOVE' >> %t/deps.json + +// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 -generate-modules-path-args \ +// RUN: -format experimental-full >> %t/deps.json + +// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s + +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK: { +// CHECK: "command-line": [ +// CHECK: "{{.*}}module.modulemap" +// CHECK: ] +// CHECK: "context-hash": "[[HASH1:.*]]" +// CHECK: "name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: "translation-units": [ +// CHECK-NEXT: { +// CHECK: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH1]]" +// CHECK-NEXT: "module-name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-LABEL: AFTER_MOVE +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK: { +// CHECK-NOT: [[HASH1]] +// CHECK: "command-line": [ +// CHECK: "{{.*}}module.map" +// CHECK: ] +// CHECK-NOT: [[HASH1]] +// CHECK: "name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: "translation-units": [ +// CHECK-NEXT: { +// CHECK: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": +// CHECK-NOT: [[HASH1]] +// CHECK-NEXT: "module-name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] + +//--- cdb.json.template +[ + { + "directory": "DIR", + "command": "clang -fsyntax-only DIR/tu.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache", + "file": "DIR/tu.c" + } +] + +//--- module.modulemap +module Mod { header "Mod.h" } + +//--- Mod.h + +//--- tu.c +#include "Mod.h" diff --git a/clang/test/ClangScanDeps/modules-context-hash-outputs.c b/clang/test/ClangScanDeps/modules-context-hash-outputs.c new file mode 100644 index 00000000000000..3eb7ec7020b7e5 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-context-hash-outputs.c @@ -0,0 +1,77 @@ +// If secondary output files such as .d are enabled, ensure it affects the +// module context hash since it may impact the resulting module build commands. + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json + +// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 -generate-modules-path-args \ +// RUN: -format experimental-full > %t/deps.json +// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s + +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK: { +// CHECK: "command-line": [ +// CHECK: "-dependency-file" +// CHECK: ] +// CHECK: "context-hash": "[[HASH1:.*]]" +// CHECK: "name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK: "command-line": [ +// CHECK-NOT: "-dependency-file" +// CHECK: ] +// CHECK: "context-hash": "[[HASH2:.*]]" +// CHECK: "name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: "translation-units": [ +// CHECK-NEXT: { +// CHECK: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH1]]" +// CHECK-NEXT: "module-name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: "command-line": [ +// CHECK: "-MF" +// CHECK: ] +// CHECK: "input-file": "{{.*}}tu1.c" +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH2]]" +// CHECK-NEXT: "module-name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: "command-line": [ +// CHECK-NOT: "-MF" +// CHECK: ] +// CHECK: "input-file": "{{.*}}tu2.c" + +//--- cdb.json.template +[ + { + "directory": "DIR", + "command": "clang -MD -MF DIR/tu1.d -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache", + "file": "DIR/tu1.c" + }, + { + "directory": "DIR", + "command": "clang -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache", + "file": "DIR/tu2.c" + }, +] + +//--- module.modulemap +module Mod { header "Mod.h" } + +//--- Mod.h + +//--- tu1.c +#include "Mod.h" + +//--- tu2.c +#include "Mod.h" diff --git a/clang/test/ClangScanDeps/modules-context-hash-warnings.c b/clang/test/ClangScanDeps/modules-context-hash-warnings.c new file mode 100644 index 00000000000000..8457c7db53fb09 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-context-hash-warnings.c @@ -0,0 +1,77 @@ +// Differences in -W warning options should result in different canonical module +// build commands. + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json + +// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 \ +// RUN: -format experimental-full > %t/deps.json +// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s + +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK: { +// CHECK: "command-line": [ +// CHECK: "-Wall" +// CHECK: ] +// CHECK: "context-hash": "[[HASH1:.*]]" +// CHECK: "name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK: "command-line": [ +// CHECK-NOT: "-Wall" +// CHECK: ] +// CHECK: "context-hash": "[[HASH2:.*]]" +// CHECK: "name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: "translation-units": [ +// CHECK-NEXT: { +// CHECK: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH1]]" +// CHECK-NEXT: "module-name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: "command-line": [ +// CHECK: "-Wall" +// CHECK: ] +// CHECK: "input-file": "{{.*}}tu1.c" +// CHECK-NEXT: } +// CHECK-NEXT: { +// CHECK: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH2]]" +// CHECK-NEXT: "module-name": "Mod" +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: "command-line": [ +// CHECK-NOT: "-Wall" +// CHECK: ] +// CHECK: "input-file": "{{.*}}tu2.c" + +//--- cdb.json.template +[ + { + "directory": "DIR", + "command": "clang -Wall -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache", + "file": "DIR/tu1.c" + }, + { + "directory": "DIR", + "command": "clang -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache", + "file": "DIR/tu2.c" + }, +] + +//--- module.modulemap +module Mod { header "Mod.h" } + +//--- Mod.h + +//--- tu1.c +#include "Mod.h" + +//--- tu2.c +#include "Mod.h"