Skip to content

Commit

Permalink
[clang][modules][deps] Create more efficient API for visitation of `M…
Browse files Browse the repository at this point in the history
…oduleFile` inputs

The current `ASTReader::visitInputFiles()` function calls into `FileManager` to create `FileEntryRef` objects. This ends up being fairly costly in `clang-scan-deps`, where we mostly only care about file paths.

This patch introduces new `ASTReader` API that gives clients access to just the serialized paths. Since the scanner needs both the as-requested path and the on-disk one (and doesn't want to transform the former into the latter via `FileManager`), this patch starts serializing both of them into the PCM file if they differ.

This increases the size of scanning PCMs by 0.1% and speeds up scanning by 5%.

Reviewed By: benlangmuir, vsapsai

Differential Revision: https://reviews.llvm.org/D157066
  • Loading branch information
jansvoboda11 committed Aug 9, 2023
1 parent 6556e29 commit dcd3a0c
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 24 deletions.
2 changes: 1 addition & 1 deletion clang/include/clang/Serialization/ASTBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace serialization {
/// Version 4 of AST files also requires that the version control branch and
/// revision match exactly, since there is no backward compatibility of
/// AST files at this time.
const unsigned VERSION_MAJOR = 27;
const unsigned VERSION_MAJOR = 28;

/// AST file minor version number supported by this version of
/// Clang.
Expand Down
7 changes: 7 additions & 0 deletions clang/include/clang/Serialization/ASTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -2373,6 +2373,13 @@ class ASTReader
/// Loads comments ranges.
void ReadComments() override;

/// Visit all the input file infos of the given module file.
void visitInputFileInfos(
serialization::ModuleFile &MF, bool IncludeSystem,
llvm::function_ref<void(const serialization::InputFileInfo &IFI,
bool IsSystem)>
Visitor);

/// Visit all the input files of the given module file.
void visitInputFiles(serialization::ModuleFile &MF,
bool IncludeSystem, bool Complain,
Expand Down
4 changes: 3 additions & 1 deletion clang/include/clang/Serialization/ModuleFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,15 @@ enum ModuleKind {

/// The input file info that has been loaded from an AST file.
struct InputFileInfo {
std::string FilenameAsRequested;
std::string Filename;
uint64_t ContentHash;
off_t StoredSize;
time_t StoredTime;
bool Overridden;
bool Transient;
bool TopLevelModuleMap;
bool TopLevel;
bool ModuleMap;
};

/// The input file that has been loaded from this AST file, along with
Expand Down
45 changes: 37 additions & 8 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2342,9 +2342,22 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
R.StoredTime = static_cast<time_t>(Record[2]);
R.Overridden = static_cast<bool>(Record[3]);
R.Transient = static_cast<bool>(Record[4]);
R.TopLevelModuleMap = static_cast<bool>(Record[5]);
R.Filename = std::string(Blob);
ResolveImportedPath(F, R.Filename);
R.TopLevel = static_cast<bool>(Record[5]);
R.ModuleMap = static_cast<bool>(Record[6]);
std::tie(R.FilenameAsRequested, R.Filename) = [&]() {
uint16_t AsRequestedLength = Record[7];

std::string NameAsRequested = Blob.substr(0, AsRequestedLength).str();
std::string Name = Blob.substr(AsRequestedLength).str();

ResolveImportedPath(F, NameAsRequested);
ResolveImportedPath(F, Name);

if (Name.empty())
Name = NameAsRequested;

return std::make_pair(std::move(NameAsRequested), std::move(Name));
}();

Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
if (!MaybeEntry) // FIXME this drops errors on the floor.
Expand Down Expand Up @@ -2395,7 +2408,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
time_t StoredTime = FI.StoredTime;
bool Overridden = FI.Overridden;
bool Transient = FI.Transient;
StringRef Filename = FI.Filename;
StringRef Filename = FI.FilenameAsRequested;
uint64_t StoredContentHash = FI.ContentHash;

// For standard C++ modules, we don't need to check the inputs.
Expand Down Expand Up @@ -2742,9 +2755,9 @@ ASTReader::ReadControlBlock(ModuleFile &F,
for (unsigned I = 0; I < N; ++I) {
bool IsSystem = I >= NumUserInputs;
InputFileInfo FI = getInputFileInfo(F, I + 1);
Listener->visitInputFile(FI.Filename, IsSystem, FI.Overridden,
F.Kind == MK_ExplicitModule ||
F.Kind == MK_PrebuiltModule);
Listener->visitInputFile(
FI.FilenameAsRequested, IsSystem, FI.Overridden,
F.Kind == MK_ExplicitModule || F.Kind == MK_PrebuiltModule);
}
}

Expand Down Expand Up @@ -9307,6 +9320,22 @@ void ASTReader::ReadComments() {
}
}

void ASTReader::visitInputFileInfos(
serialization::ModuleFile &MF, bool IncludeSystem,
llvm::function_ref<void(const serialization::InputFileInfo &IFI,
bool IsSystem)>
Visitor) {
unsigned NumUserInputs = MF.NumUserInputFiles;
unsigned NumInputs = MF.InputFilesLoaded.size();
assert(NumUserInputs <= NumInputs);
unsigned N = IncludeSystem ? NumInputs : NumUserInputs;
for (unsigned I = 0; I < N; ++I) {
bool IsSystem = I >= NumUserInputs;
InputFileInfo IFI = getInputFileInfo(MF, I+1);
Visitor(IFI, IsSystem);
}
}

void ASTReader::visitInputFiles(serialization::ModuleFile &MF,
bool IncludeSystem, bool Complain,
llvm::function_ref<void(const serialization::InputFile &IF,
Expand All @@ -9328,7 +9357,7 @@ void ASTReader::visitTopLevelModuleMaps(
unsigned NumInputs = MF.InputFilesLoaded.size();
for (unsigned I = 0; I < NumInputs; ++I) {
InputFileInfo IFI = getInputFileInfo(MF, I + 1);
if (IFI.TopLevelModuleMap)
if (IFI.TopLevel && IFI.ModuleMap)
if (auto FE = getInputFile(MF, I + 1).getFile())
Visitor(*FE);
}
Expand Down
27 changes: 21 additions & 6 deletions clang/lib/Serialization/ASTWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1525,7 +1525,8 @@ struct InputFileEntry {
bool IsSystemFile;
bool IsTransient;
bool BufferOverridden;
bool IsTopLevelModuleMap;
bool IsTopLevel;
bool IsModuleMap;
uint32_t ContentHash[2];

InputFileEntry(FileEntryRef File) : File(File) {}
Expand All @@ -1547,8 +1548,10 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // Modification time
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Module map
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // Name as req. len
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name as req. + name
unsigned IFAbbrevCode = Stream.EmitAbbrev(std::move(IFAbbrev));

// Create input file hash abbreviation.
Expand Down Expand Up @@ -1582,8 +1585,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
Entry.IsSystemFile = isSystem(File.getFileCharacteristic());
Entry.IsTransient = Cache->IsTransient;
Entry.BufferOverridden = Cache->BufferOverridden;
Entry.IsTopLevelModuleMap = isModuleMap(File.getFileCharacteristic()) &&
File.getIncludeLoc().isInvalid();
Entry.IsTopLevel = File.getIncludeLoc().isInvalid();
Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic());

auto ContentHash = hash_code(-1);
if (PP->getHeaderSearchInfo()
Expand Down Expand Up @@ -1631,16 +1634,28 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
// Emit size/modification time for this file.
// And whether this file was overridden.
{
SmallString<128> NameAsRequested = Entry.File.getNameAsRequested();
SmallString<128> Name = Entry.File.getName();

PreparePathForOutput(NameAsRequested);
PreparePathForOutput(Name);

if (Name == NameAsRequested)
Name.clear();

RecordData::value_type Record[] = {
INPUT_FILE,
InputFileOffsets.size(),
(uint64_t)Entry.File.getSize(),
(uint64_t)getTimestampForOutput(Entry.File),
Entry.BufferOverridden,
Entry.IsTransient,
Entry.IsTopLevelModuleMap};
Entry.IsTopLevel,
Entry.IsModuleMap,
NameAsRequested.size()};

EmitRecordWithPath(IFAbbrevCode, Record, Entry.File.getNameAsRequested());
Stream.EmitRecordWithBlob(IFAbbrevCode, Record,
(NameAsRequested + Name).str());
}

// Emit content hash for this file.
Expand Down
21 changes: 13 additions & 8 deletions clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,30 +459,35 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
serialization::ModuleFile *MF =
MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
M->getASTFile());
MDC.ScanInstance.getASTReader()->visitInputFiles(
*MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) {
MDC.ScanInstance.getASTReader()->visitInputFileInfos(
*MF, /*IncludeSystem=*/true,
[&](const serialization::InputFileInfo &IFI, bool IsSystem) {
// __inferred_module.map is the result of the way in which an implicit
// module build handles inferred modules. It adds an overlay VFS with
// this file in the proper directory and relies on the rest of Clang to
// handle it like normal. With explicitly built modules we don't need
// to play VFS tricks, so replace it with the correct module map.
if (IF.getFile()->getName().endswith("__inferred_module.map")) {
if (StringRef(IFI.Filename).endswith("__inferred_module.map")) {
MDC.addFileDep(MD, ModuleMap->getName());
return;
}
MDC.addFileDep(MD, IF.getFile()->getName());
MDC.addFileDep(MD, IFI.Filename);
});

llvm::DenseSet<const Module *> SeenDeps;
addAllSubmodulePrebuiltDeps(M, MD, SeenDeps);
addAllSubmoduleDeps(M, MD, SeenDeps);
addAllAffectingClangModules(M, MD, SeenDeps);

MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps(
*MF, [&](FileEntryRef FE) {
if (FE.getNameAsRequested().endswith("__inferred_module.map"))
MDC.ScanInstance.getASTReader()->visitInputFileInfos(
*MF, /*IncludeSystem=*/true,
[&](const serialization::InputFileInfo &IFI, bool IsSystem) {
if (!(IFI.TopLevel && IFI.ModuleMap))
return;
MD.ModuleMapFileDeps.emplace_back(FE.getNameAsRequested());
if (StringRef(IFI.FilenameAsRequested)
.endswith("__inferred_module.map"))
return;
MD.ModuleMapFileDeps.emplace_back(IFI.FilenameAsRequested);
});

CompilerInvocation CI = MDC.makeInvocationForModuleBuildWithoutOutputs(
Expand Down

0 comments on commit dcd3a0c

Please sign in to comment.