Skip to content

Commit

Permalink
[clang][deps] Only cache files with specific extension
Browse files Browse the repository at this point in the history
In the scanner's VFS, we cache all files by default and only avoid caching stat failures for certain files. This tanks the performance of scanning with pre-populated module cache. When there is a stale PCM file, it gets cached by the scanner at the start and the rebuilt version never makes it through the VFS again. The TU invocation that rebuilds the PCM only sees the copy in its InMemoryModuleCache, which is invisible to other invocations. This means the PCM gets rebuilt for every TU given to the scanner.

This patch fixes the situation by flipping the default, only caching files that are known to be important, and letting everything else fall through to the underlying VFS.

rdar://106376153

Reviewed By: Bigcheese

Differential Revision: https://reviews.llvm.org/D146328
  • Loading branch information
jansvoboda11 committed Mar 20, 2023
1 parent 0c0468e commit d1e00b6
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,32 @@ class EntryRef {
}
};

enum class ScanFile { Yes, No };
enum class CacheStatFailure { Yes, No };

struct PathPolicy {
/// Implies caching of all open and stat results.
unsigned Enable : 1;
/// Controls whether a file will be scanned for dependency directives.
unsigned ScanFile : 1;
/// Explicitly disables stat failure caching when false.
unsigned CacheStatFailure : 1;

static PathPolicy fallThrough() { return {false, false, false}; }

static PathPolicy cache(enum ScanFile SF,
enum CacheStatFailure CSF = CacheStatFailure::Yes) {
return {true, SF == ScanFile::Yes, CSF == CacheStatFailure::Yes};
}

private:
PathPolicy(bool E, bool SF, bool CSF)
: Enable(E), ScanFile(SF), CacheStatFailure(CSF) {}
};

/// Determine caching and scanning behavior based on file extension.
PathPolicy getPolicy(StringRef Filename);

/// A virtual file system optimized for the dependency discovery.
///
/// It is primarily designed to work with source files whose contents was
Expand All @@ -293,24 +319,25 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
///
/// Attempts to use the local and shared caches first, then falls back to
/// using the underlying filesystem.
llvm::ErrorOr<EntryRef>
getOrCreateFileSystemEntry(StringRef Filename,
bool DisableDirectivesScanning = false);
llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename) {
return getOrCreateFileSystemEntry(Filename, getPolicy(Filename));
}

private:
/// Check whether the file should be scanned for preprocessor directives.
bool shouldScanForDirectives(StringRef Filename);
/// Same as the public version, but with explicit PathPolicy parameter.
llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename,
PathPolicy Policy);

/// For a filename that's not yet associated with any entry in the caches,
/// uses the underlying filesystem to either look up the entry based in the
/// shared cache indexed by unique ID, or creates new entry from scratch.
llvm::ErrorOr<const CachedFileSystemEntry &>
computeAndStoreResult(StringRef Filename);
computeAndStoreResult(StringRef Filename, PathPolicy Policy);

/// Scan for preprocessor directives for the given entry if necessary and
/// returns a wrapper object with reference semantics.
EntryRef scanForDirectivesIfNecessary(const CachedFileSystemEntry &Entry,
StringRef Filename, bool Disable);
StringRef Filename, PathPolicy Policy);

/// Represents a filesystem entry that has been stat-ed (and potentially read)
/// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,8 @@ DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
}

EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
if (Entry.isError() || Entry.isDirectory() || Disable ||
!shouldScanForDirectives(Filename))
const CachedFileSystemEntry &Entry, StringRef Filename, PathPolicy Policy) {
if (Entry.isError() || Entry.isDirectory() || !Policy.ScanFile)
return EntryRef(Filename, Entry);

CachedFileContents *Contents = Entry.getCachedContents();
Expand Down Expand Up @@ -159,39 +158,22 @@ DependencyScanningFilesystemSharedCache::CacheShard::
return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
}

/// Whitelist file extensions that should be minimized, treating no extension as
/// a source file that should be minimized.
///
/// This is kinda hacky, it would be better if we knew what kind of file Clang
/// was expecting instead.
static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) {
PathPolicy clang::tooling::dependencies::getPolicy(StringRef Filename) {
StringRef Ext = llvm::sys::path::extension(Filename);
if (Ext.empty())
return true; // C++ standard library
return llvm::StringSwitch<bool>(Ext)
.CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
.CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
.CasesLower(".m", ".mm", true)
.CasesLower(".i", ".ii", ".mi", ".mmi", true)
.CasesLower(".def", ".inc", true)
.Default(false);
}

static bool shouldCacheStatFailures(StringRef Filename) {
StringRef Ext = llvm::sys::path::extension(Filename);
if (Ext.empty())
return false; // This may be the module cache directory.
// Only cache stat failures on files that are not expected to change during
// the build.
StringRef FName = llvm::sys::path::filename(Filename);
if (FName == "module.modulemap" || FName == "module.map")
return true;
return shouldScanForDirectivesBasedOnExtension(Filename);
}

bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
StringRef Filename) {
return shouldScanForDirectivesBasedOnExtension(Filename);
return PathPolicy::cache(ScanFile::Yes, CacheStatFailure::No);
// clang-format off
return llvm::StringSwitch<PathPolicy>(Ext)
.CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", PathPolicy::cache(ScanFile::Yes))
.CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", PathPolicy::cache(ScanFile::Yes))
.CasesLower(".m", ".mm", PathPolicy::cache(ScanFile::Yes))
.CasesLower(".i", ".ii", ".mi", ".mmi", PathPolicy::cache(ScanFile::Yes))
.CasesLower(".def", ".inc", PathPolicy::cache(ScanFile::Yes))
.CasesLower(".modulemap", ".map", PathPolicy::cache(ScanFile::No))
.CasesLower(".framework", ".apinotes", PathPolicy::cache(ScanFile::No))
.CasesLower(".yaml", ".json", ".hmap", PathPolicy::cache(ScanFile::No))
.Default(PathPolicy::fallThrough());
// clang-format on
}

const CachedFileSystemEntry &
Expand All @@ -215,10 +197,11 @@ DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
}

llvm::ErrorOr<const CachedFileSystemEntry &>
DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename,
PathPolicy Policy) {
llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
if (!Stat) {
if (!shouldCacheStatFailures(Filename))
if (!Policy.CacheStatFailure)
return Stat.getError();
const auto &Entry =
getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
Expand All @@ -244,25 +227,25 @@ DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {

llvm::ErrorOr<EntryRef>
DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
StringRef Filename, bool DisableDirectivesScanning) {
StringRef Filename, PathPolicy Policy) {
if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
return scanForDirectivesIfNecessary(*Entry, Filename,
DisableDirectivesScanning)
.unwrapError();
auto MaybeEntry = computeAndStoreResult(Filename);
return scanForDirectivesIfNecessary(*Entry, Filename, Policy).unwrapError();
auto MaybeEntry = computeAndStoreResult(Filename, Policy);
if (!MaybeEntry)
return MaybeEntry.getError();
return scanForDirectivesIfNecessary(*MaybeEntry, Filename,
DisableDirectivesScanning)
return scanForDirectivesIfNecessary(*MaybeEntry, Filename, Policy)
.unwrapError();
}

llvm::ErrorOr<llvm::vfs::Status>
DependencyScanningWorkerFilesystem::status(const Twine &Path) {
SmallString<256> OwnedFilename;
StringRef Filename = Path.toStringRef(OwnedFilename);
PathPolicy Policy = getPolicy(Filename);
if (!Policy.Enable)
return getUnderlyingFS().status(Path);

llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename, Policy);
if (!Result)
return Result.getError();
return Result->getStatus();
Expand Down Expand Up @@ -318,8 +301,11 @@ llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
SmallString<256> OwnedFilename;
StringRef Filename = Path.toStringRef(OwnedFilename);
PathPolicy Policy = getPolicy(Filename);
if (!Policy.Enable)
return getUnderlyingFS().openFileForRead(Path);

llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename, Policy);
if (!Result)
return Result.getError();
return DepScanFile::create(Result.get());
Expand Down
127 changes: 127 additions & 0 deletions clang/unittests/Tooling/DependencyScannerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,3 +239,130 @@ TEST(DependencyScanner, ScanDepsWithFS) {
EXPECT_EQ(convert_to_slash(DepFile),
"test.cpp.o: /root/test.cpp /root/header.h\n");
}

// Note: We want to test caching in DependencyScanningWorkerFilesystem. To do
// that, we need to be able to mutate the underlying file system. However,
// InMemoryFileSystem does not allow changing the contents of a file after it's
// been created.
// To simulate the behavior, we create two separate in-memory file systems, each
// containing different version of the same file. We pass those to two scanning
// file systems that share the same cache.

TEST(DependencyScanningFileSystemTest, CacheFileContentsEnabled) {
DependencyScanningFilesystemSharedCache SharedCache;

StringRef Path = "/root/source.c";
auto Contents1 = llvm::MemoryBuffer::getMemBuffer("contents1");
auto Contents2 = llvm::MemoryBuffer::getMemBuffer("contents2");

{
auto InMemoryFS =
llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
ASSERT_TRUE(InMemoryFS->addFile(Path, 0, std::move(Contents1)));
DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS);
auto File = ScanningFS.openFileForRead(Path);
ASSERT_TRUE(File);
auto Buffer = (*File)->getBuffer("Buffer for /root/source.c.");
ASSERT_TRUE(Buffer);
auto Contents = (*Buffer)->getBuffer();
EXPECT_EQ(Contents, "contents1");
}

{
auto InMemoryFS =
llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
ASSERT_TRUE(InMemoryFS->addFile(Path, 0, std::move(Contents2)));
DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS);
auto File = ScanningFS.openFileForRead(Path);
ASSERT_TRUE(File);
auto Buffer = (*File)->getBuffer("Buffer for /root/source.c.");
ASSERT_TRUE(Buffer);
auto Contents = (*Buffer)->getBuffer();
EXPECT_EQ(Contents, "contents1");
}
}

TEST(DependencyScanningFileSystemTest, CacheFileContentsDisabled) {
DependencyScanningFilesystemSharedCache SharedCache;

StringRef Path = "/root/module.pcm";
auto Contents1 = llvm::MemoryBuffer::getMemBuffer("contents1");
auto Contents2 = llvm::MemoryBuffer::getMemBuffer("contents2");

{
auto InMemoryFS =
llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
ASSERT_TRUE(InMemoryFS->addFile(Path, 0, std::move(Contents1)));
DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS);
auto File = ScanningFS.openFileForRead(Path);
ASSERT_TRUE(File);
auto Buffer = (*File)->getBuffer("Buffer for /root/module.pcm.");
ASSERT_TRUE(Buffer);
auto Contents = (*Buffer)->getBuffer();
EXPECT_EQ(Contents, "contents1");
}

{
auto InMemoryFS =
llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
ASSERT_TRUE(InMemoryFS->addFile(Path, 0, std::move(Contents2)));
DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS);
auto File = ScanningFS.openFileForRead(Path);
ASSERT_TRUE(File);
auto Buffer = (*File)->getBuffer("Buffer for /root/module.pcm.");
ASSERT_TRUE(Buffer);
auto Contents = (*Buffer)->getBuffer();
EXPECT_EQ(Contents, "contents2");
}
}

TEST(DependencyScanningFileSystemTest, CacheStatFailureEnabled) {
DependencyScanningFilesystemSharedCache SharedCache;
auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS);

StringRef Path = "/root/source.c";

auto Stat1 = ScanningFS.status(Path);
EXPECT_FALSE(Stat1);

auto Contents = llvm::MemoryBuffer::getMemBuffer("contents");
InMemoryFS->addFile(Path, 0, std::move(Contents));

auto Stat2 = ScanningFS.status(Path);
EXPECT_FALSE(Stat2);
}

TEST(DependencyScanningFileSystemTest, CacheStatFailureDisabledFile) {
DependencyScanningFilesystemSharedCache SharedCache;
auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS);

StringRef Path = "/root/vector";

auto Stat1 = ScanningFS.status(Path);
EXPECT_FALSE(Stat1);

auto Contents = llvm::MemoryBuffer::getMemBuffer("contents");
InMemoryFS->addFile(Path, 0, std::move(Contents));

auto Stat2 = ScanningFS.status(Path);
EXPECT_TRUE(Stat2);
}

TEST(DependencyScanningFileSystemTest, CacheStatFailureDisabledDirectory) {
DependencyScanningFilesystemSharedCache SharedCache;
auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS);

StringRef Path = "/root/dir";

auto Stat1 = ScanningFS.status(Path);
EXPECT_FALSE(Stat1);

auto Contents = llvm::MemoryBuffer::getMemBuffer("contents");
InMemoryFS->addFile("/root/dir/file", 0, std::move(Contents));

auto Stat2 = ScanningFS.status(Path);
EXPECT_TRUE(Stat2);
}

0 comments on commit d1e00b6

Please sign in to comment.