Skip to content

Commit

Permalink
[clang][DependencyScanner] Remove unused -ivfsoverlay files (#73734)
Browse files Browse the repository at this point in the history
`-ivfsoverlay` files are unused when building most modules. Enable
removing them by,
* adding a way to visit the filesystem tree with extensible RTTI to
  access each `RedirectingFileSystem`.
* Adding tracking to `RedirectingFileSystem` to record when it
  actually redirects a file access.
* Storing this information in each PCM.

Usage tracking is only enabled when iterating over the source manager
and affecting modulemaps. Here each path is stated to cause an access.
During scanning these stats all hit the cache.
  • Loading branch information
Bigcheese committed Jan 30, 2024
1 parent 16c15b5 commit 7847e44
Show file tree
Hide file tree
Showing 26 changed files with 947 additions and 79 deletions.
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticSerializationKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ def err_pch_diagopt_mismatch : Error<"%0 is currently enabled, but was not in "
"the PCH file">;
def err_pch_modulecache_mismatch : Error<"PCH was compiled with module cache "
"path '%0', but the path is currently '%1'">;
def err_pch_vfsoverlay_mismatch : Error<"PCH was compiled with different VFS overlay files than are currently in use">;
def note_pch_vfsoverlay_files : Note<"%select{PCH|current translation unit}0 has the following VFS overlays:\n%1">;
def note_pch_vfsoverlay_empty : Note<"%select{PCH|current translation unit}0 has no VFS overlays">;

def err_pch_version_too_old : Error<
"PCH file uses an older PCH format that is no longer supported">;
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/FileManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,10 @@ class FileManager : public RefCountedBase<FileManager> {
return FS;
}

/// Enable or disable tracking of VFS usage. Used to not track full header
/// search and implicit modulemap lookup.
void trackVFSUsage(bool Active);

void setVirtualFileSystem(IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) {
this->FS = std::move(FS);
}
Expand Down
7 changes: 7 additions & 0 deletions clang/include/clang/Lex/HeaderSearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,13 @@ class HeaderSearch {
/// Note: implicit module maps don't contribute to entry usage.
std::vector<bool> computeUserEntryUsage() const;

/// Collect which HeaderSearchOptions::VFSOverlayFiles have been meaningfully
/// used so far and mark their index with 'true' in the resulting bit vector.
///
/// Note: this ignores VFSs that redirect non-affecting files such as unused
/// modulemaps.
std::vector<bool> collectVFSUsageAndClear() const;

/// This method returns a HeaderMap for the specified
/// FileEntry, uniquing them through the 'HeaderMaps' datastructure.
const HeaderMap *CreateHeaderMap(FileEntryRef FE);
Expand Down
6 changes: 5 additions & 1 deletion clang/include/clang/Lex/HeaderSearchOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,10 @@ class HeaderSearchOptions {
LLVM_PREFERRED_TYPE(bool)
unsigned ModulesStrictContextHash : 1;

/// Whether to include ivfsoverlay usage information in written AST files.
LLVM_PREFERRED_TYPE(bool)
unsigned ModulesIncludeVFSUsage : 1;

HeaderSearchOptions(StringRef _Sysroot = "/")
: Sysroot(_Sysroot), ModuleFormat("raw"), DisableModuleHash(false),
ImplicitModuleMaps(false), ModuleMapFileHomeIsCwd(false),
Expand All @@ -277,7 +281,7 @@ class HeaderSearchOptions {
ModulesSkipDiagnosticOptions(false),
ModulesSkipHeaderSearchPaths(false),
ModulesSkipPragmaDiagnosticMappings(false), ModulesHashContent(false),
ModulesStrictContextHash(false) {}
ModulesStrictContextHash(false), ModulesIncludeVFSUsage(false) {}

/// AddPath - Add the \p Path path to the specified \p Group list.
void AddPath(StringRef Path, frontend::IncludeDirGroup Group,
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Serialization/ASTBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,9 @@ enum UnhashedControlBlockRecordTypes {

/// Record code for the indices of used header search entries.
HEADER_SEARCH_ENTRY_USAGE,

/// Record code for the indices of used VFSs.
VFS_USAGE,
};

/// Record code for extension blocks.
Expand Down
16 changes: 10 additions & 6 deletions clang/include/clang/Serialization/ASTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -1780,12 +1780,13 @@ class ASTReader
/// Read the control block for the named AST file.
///
/// \returns true if an error occurred, false otherwise.
static bool readASTFileControlBlock(StringRef Filename, FileManager &FileMgr,
const InMemoryModuleCache &ModuleCache,
const PCHContainerReader &PCHContainerRdr,
bool FindModuleFileExtensions,
ASTReaderListener &Listener,
bool ValidateDiagnosticOptions);
static bool readASTFileControlBlock(
StringRef Filename, FileManager &FileMgr,
const InMemoryModuleCache &ModuleCache,
const PCHContainerReader &PCHContainerRdr, bool FindModuleFileExtensions,
ASTReaderListener &Listener, bool ValidateDiagnosticOptions,
unsigned ClientLoadCapabilities = ARR_ConfigurationMismatch |
ARR_OutOfDate);

/// Determine whether the given AST file is acceptable to load into a
/// translation unit with the given language and target options.
Expand Down Expand Up @@ -2270,6 +2271,9 @@ class ASTReader
SourceRange ReadSourceRange(ModuleFile &F, const RecordData &Record,
unsigned &Idx, LocSeq *Seq = nullptr);

static llvm::BitVector ReadBitVector(const RecordData &Record,
const StringRef Blob);

// Read a string
static std::string ReadString(const RecordDataImpl &Record, unsigned &Idx);

Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Serialization/ASTWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -467,10 +467,10 @@ class ASTWriter : public ASTDeserializationListener,
std::vector<SourceRange> NonAffectingRanges;
std::vector<SourceLocation::UIntTy> NonAffectingOffsetAdjustments;

/// Collects input files that didn't affect compilation of the current module,
/// Computes input files that didn't affect compilation of the current module,
/// and initializes data structures necessary for leaving those files out
/// during \c SourceManager serialization.
void collectNonAffectingInputFiles();
void computeNonAffectingInputFiles();

/// Returns an adjusted \c FileID, accounting for any non-affecting input
/// files.
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Serialization/ModuleFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,9 @@ class ModuleFile {
/// The bit vector denoting usage of each header search entry (true = used).
llvm::BitVector SearchPathUsage;

/// The bit vector denoting usage of each VFS entry (true = used).
llvm::BitVector VFSUsage;

/// Whether this module has been directly imported by the
/// user.
bool DirectlyImported = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,12 @@ class EntryRef {
/// This is not a thread safe VFS. A single instance is meant to be used only in
/// one thread. Multiple instances are allowed to service multiple threads
/// running in parallel.
class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
class DependencyScanningWorkerFilesystem
: public llvm::RTTIExtends<DependencyScanningWorkerFilesystem,
llvm::vfs::ProxyFileSystem> {
public:
static const char ID;

DependencyScanningWorkerFilesystem(
DependencyScanningFilesystemSharedCache &SharedCache,
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ enum class ScanningOutputFormat {
P1689,
};

#define DSS_LAST_BITMASK_ENUM(Id) \
LLVM_MARK_AS_BITMASK_ENUM(Id), All = llvm::NextPowerOf2(Id) - 1

enum class ScanningOptimizations {
None = 0,

Expand All @@ -54,11 +57,15 @@ enum class ScanningOptimizations {
/// Remove warnings from system modules.
SystemWarnings = 2,

LLVM_MARK_AS_BITMASK_ENUM(SystemWarnings),
All = HeaderSearch | SystemWarnings,
/// Remove unused -ivfsoverlay arguments.
VFS = 4,

DSS_LAST_BITMASK_ENUM(VFS),
Default = All
};

#undef DSS_LAST_BITMASK_ENUM

/// The dependency scanning service contains shared configuration and state that
/// is used by the individual dependency scanning workers.
class DependencyScanningService {
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Basic/FileManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,13 @@ llvm::Expected<FileEntryRef> FileManager::getSTDIN() {
return *STDIN;
}

void FileManager::trackVFSUsage(bool Active) {
FS->visit([Active](llvm::vfs::FileSystem &FileSys) {
if (auto *RFS = dyn_cast<llvm::vfs::RedirectingFileSystem>(&FileSys))
RFS->setUsageTrackingActive(Active);
});
}

const FileEntry *FileManager::getVirtualFile(StringRef Filename, off_t Size,
time_t ModificationTime) {
return &getVirtualFileRef(Filename, Size, ModificationTime).getFileEntry();
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4763,6 +4763,7 @@ std::string CompilerInvocation::getModuleHash() const {
if (hsOpts.ModulesStrictContextHash) {
HBuilder.addRange(hsOpts.SystemHeaderPrefixes);
HBuilder.addRange(hsOpts.UserEntries);
HBuilder.addRange(hsOpts.VFSOverlayFiles);

const DiagnosticOptions &diagOpts = getDiagnosticOpts();
#define DIAGOPT(Name, Bits, Default) HBuilder.add(diagOpts.Name);
Expand Down
22 changes: 22 additions & 0 deletions clang/lib/Lex/HeaderSearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,28 @@ std::vector<bool> HeaderSearch::computeUserEntryUsage() const {
return UserEntryUsage;
}

std::vector<bool> HeaderSearch::collectVFSUsageAndClear() const {
std::vector<bool> VFSUsage;
if (!getHeaderSearchOpts().ModulesIncludeVFSUsage)
return VFSUsage;

llvm::vfs::FileSystem &RootFS = FileMgr.getVirtualFileSystem();
// TODO: This only works if the `RedirectingFileSystem`s were all created by
// `createVFSFromOverlayFiles`.
RootFS.visit([&](llvm::vfs::FileSystem &FS) {
if (auto *RFS = dyn_cast<llvm::vfs::RedirectingFileSystem>(&FS)) {
VFSUsage.push_back(RFS->hasBeenUsed());
RFS->clearHasBeenUsed();
}
});
assert(VFSUsage.size() == getHeaderSearchOpts().VFSOverlayFiles.size() &&
"A different number of RedirectingFileSystem's were present than "
"-ivfsoverlay options passed to Clang!");
// VFS visit order is the opposite of VFSOverlayFiles order.
std::reverse(VFSUsage.begin(), VFSUsage.end());
return VFSUsage;
}

/// CreateHeaderMap - This method returns a HeaderMap for the specified
/// FileEntry, uniquing them through the 'HeaderMaps' datastructure.
const HeaderMap *HeaderSearch::CreateHeaderMap(FileEntryRef FE) {
Expand Down
36 changes: 23 additions & 13 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4977,7 +4977,7 @@ ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl(
}
case HEADER_SEARCH_PATHS: {
bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
if (!AllowCompatibleConfigurationMismatch &&
if (Listener && !AllowCompatibleConfigurationMismatch &&
ParseHeaderSearchPaths(Record, Complain, *Listener))
Result = ConfigurationMismatch;
break;
Expand All @@ -4992,15 +4992,12 @@ ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl(
Record.begin(), Record.end());
break;
case HEADER_SEARCH_ENTRY_USAGE:
if (!F)
break;
unsigned Count = Record[0];
const char *Byte = Blob.data();
F->SearchPathUsage = llvm::BitVector(Count, false);
for (unsigned I = 0; I < Count; ++Byte)
for (unsigned Bit = 0; Bit < 8 && I < Count; ++Bit, ++I)
if (*Byte & (1 << Bit))
F->SearchPathUsage[I] = true;
if (F)
F->SearchPathUsage = ReadBitVector(Record, Blob);
break;
case VFS_USAGE:
if (F)
F->VFSUsage = ReadBitVector(Record, Blob);
break;
}
}
Expand Down Expand Up @@ -5398,7 +5395,8 @@ bool ASTReader::readASTFileControlBlock(
StringRef Filename, FileManager &FileMgr,
const InMemoryModuleCache &ModuleCache,
const PCHContainerReader &PCHContainerRdr, bool FindModuleFileExtensions,
ASTReaderListener &Listener, bool ValidateDiagnosticOptions) {
ASTReaderListener &Listener, bool ValidateDiagnosticOptions,
unsigned ClientLoadCapabilities) {
// Open the AST file.
std::unique_ptr<llvm::MemoryBuffer> OwnedBuffer;
llvm::MemoryBuffer *Buffer = ModuleCache.lookupPCM(Filename);
Expand Down Expand Up @@ -5453,7 +5451,7 @@ bool ASTReader::readASTFileControlBlock(
switch (Entry.ID) {
case OPTIONS_BLOCK_ID: {
std::string IgnoredSuggestedPredefines;
if (ReadOptionsBlock(Stream, ARR_ConfigurationMismatch | ARR_OutOfDate,
if (ReadOptionsBlock(Stream, ClientLoadCapabilities,
/*AllowCompatibleConfigurationMismatch*/ false,
Listener, IgnoredSuggestedPredefines) != Success)
return true;
Expand Down Expand Up @@ -5679,7 +5677,7 @@ bool ASTReader::readASTFileControlBlock(

// Scan for the UNHASHED_CONTROL_BLOCK_ID block.
if (readUnhashedControlBlockImpl(
nullptr, Bytes, ARR_ConfigurationMismatch | ARR_OutOfDate,
nullptr, Bytes, ClientLoadCapabilities,
/*AllowCompatibleConfigurationMismatch*/ false, &Listener,
ValidateDiagnosticOptions) != Success)
return true;
Expand Down Expand Up @@ -9316,6 +9314,18 @@ SourceRange ASTReader::ReadSourceRange(ModuleFile &F, const RecordData &Record,
return SourceRange(beg, end);
}

llvm::BitVector ASTReader::ReadBitVector(const RecordData &Record,
const StringRef Blob) {
unsigned Count = Record[0];
const char *Byte = Blob.data();
llvm::BitVector Ret = llvm::BitVector(Count, false);
for (unsigned I = 0; I < Count; ++Byte)
for (unsigned Bit = 0; Bit < 8 && I < Count; ++Bit, ++I)
if (*Byte & (1 << Bit))
Ret[I] = true;
return Ret;
}

/// Read a floating-point value
llvm::APFloat ASTRecordReader::readAPFloat(const llvm::fltSemantics &Sem) {
return llvm::APFloat(Sem, readAPInt());
Expand Down
52 changes: 44 additions & 8 deletions clang/lib/Serialization/ASTWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1270,18 +1270,30 @@ void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
WritePragmaDiagnosticMappings(Diags, /* isModule = */ WritingModule);

// Header search entry usage.
auto HSEntryUsage = PP.getHeaderSearchInfo().computeUserEntryUsage();
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_ENTRY_USAGE));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Number of bits.
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Bit vector.
unsigned HSUsageAbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
{
auto HSEntryUsage = PP.getHeaderSearchInfo().computeUserEntryUsage();
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_ENTRY_USAGE));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Number of bits.
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Bit vector.
unsigned HSUsageAbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
RecordData::value_type Record[] = {HEADER_SEARCH_ENTRY_USAGE,
HSEntryUsage.size()};
Stream.EmitRecordWithBlob(HSUsageAbbrevCode, Record, bytes(HSEntryUsage));
}

// VFS usage.
{
auto VFSUsage = PP.getHeaderSearchInfo().collectVFSUsageAndClear();
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(VFS_USAGE));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Number of bits.
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Bit vector.
unsigned VFSUsageAbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
RecordData::value_type Record[] = {VFS_USAGE, VFSUsage.size()};
Stream.EmitRecordWithBlob(VFSUsageAbbrevCode, Record, bytes(VFSUsage));
}

// Leave the options block.
Stream.ExitBlock();
UnhashedControlBlockRange.second = Stream.GetCurrentBitNo() >> 3;
Expand Down Expand Up @@ -4672,7 +4684,7 @@ static void AddLazyVectorDecls(ASTWriter &Writer, Vector &Vec,
}
}

void ASTWriter::collectNonAffectingInputFiles() {
void ASTWriter::computeNonAffectingInputFiles() {
SourceManager &SrcMgr = PP->getSourceManager();
unsigned N = SrcMgr.local_sloc_entry_size();

Expand Down Expand Up @@ -4732,6 +4744,30 @@ void ASTWriter::collectNonAffectingInputFiles() {
NonAffectingFileIDAdjustments.push_back(FileIDAdjustment);
NonAffectingOffsetAdjustments.push_back(OffsetAdjustment);
}

if (!PP->getHeaderSearchInfo().getHeaderSearchOpts().ModulesIncludeVFSUsage)
return;

FileManager &FileMgr = PP->getFileManager();
FileMgr.trackVFSUsage(true);
// Lookup the paths in the VFS to trigger `-ivfsoverlay` usage tracking.
for (StringRef Path :
PP->getHeaderSearchInfo().getHeaderSearchOpts().VFSOverlayFiles)
FileMgr.getVirtualFileSystem().exists(Path);
for (unsigned I = 1; I != N; ++I) {
if (IsSLocAffecting[I]) {
const SrcMgr::SLocEntry *SLoc = &SrcMgr.getLocalSLocEntry(I);
if (!SLoc->isFile())
continue;
const SrcMgr::FileInfo &File = SLoc->getFile();
const SrcMgr::ContentCache *Cache = &File.getContentCache();
if (!Cache->OrigEntry)
continue;
FileMgr.getVirtualFileSystem().exists(
Cache->OrigEntry->getNameAsRequested());
}
}
FileMgr.trackVFSUsage(false);
}

ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
Expand All @@ -4749,7 +4785,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,

// This needs to be done very early, since everything that writes
// SourceLocations or FileIDs depends on it.
collectNonAffectingInputFiles();
computeNonAffectingInputFiles();

writeUnhashedControlBlock(PP, Context);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,9 @@ static bool shouldCacheStatFailures(StringRef Filename) {
DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem(
DependencyScanningFilesystemSharedCache &SharedCache,
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
: ProxyFileSystem(std::move(FS)), SharedCache(SharedCache),
: llvm::RTTIExtends<DependencyScanningWorkerFilesystem,
llvm::vfs::ProxyFileSystem>(std::move(FS)),
SharedCache(SharedCache),
WorkingDirForCacheLookup(llvm::errc::invalid_argument) {
updateWorkingDirForCacheLookup();
}
Expand Down Expand Up @@ -379,3 +381,5 @@ void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() {
assert(!WorkingDirForCacheLookup ||
llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup));
}

const char DependencyScanningWorkerFilesystem::ID = 0;

0 comments on commit 7847e44

Please sign in to comment.