Skip to content

Commit bc1a297

Browse files
committed
[clang][deps] Separate filesystem caches for minimized and original files
This patch separates the local and global caches of `DependencyScanningFilesystem` into two buckets: minimized files and original files. This is necessary to deal with precompiled modules/headers. Consider a single worker with its instance of filesystem: 1. Build system uses the worker to scan dependencies of module A => filesystem cache gets populated with minimized input files. 2. Build system uses the results to explicitly build module A => explicitly built module captures the state of the real filesystem (containing non-minimized input files). 3. Build system uses the prebuilt module A as an explicit precompiled dependency for another compile job B. 4. Build system uses the same worker to scan dependencies for job B => worker uses implicit modular build to discover dependencies, which validates the filesystem state embedded in the prebuilt module (non-minimized files) to the current view of the filesystem (minimized files), resulting in validation failures. This problem can be avoided in step 4 by collecting input files from the precompiled module and marking them as "ignored" in the minimizing filesystem. This way, the validation should succeed, since we should be always dealing with the original (non-minized) input files. However, the filesystem already minimized the input files in step 1 and put it in the cache, which gets used in step 4 as well even though it's marked ignored (do not minimize). This patch essentially fixes this oversight by making the `"file is minimized"` part of the cache key (from high level). Depends on D106064. Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D106146
1 parent 9dc2636 commit bc1a297

File tree

4 files changed

+101
-36
lines changed

4 files changed

+101
-36
lines changed

clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h

Lines changed: 53 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ class CachedFileSystemEntry {
103103
};
104104

105105
/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
106-
/// underlying real file system.
106+
/// underlying real file system. It distinguishes between minimized and original
107+
/// files.
107108
///
108109
/// It is sharded based on the hash of the key to reduce the lock contention for
109110
/// the worker threads.
@@ -114,21 +115,62 @@ class DependencyScanningFilesystemSharedCache {
114115
CachedFileSystemEntry Value;
115116
};
116117

117-
DependencyScanningFilesystemSharedCache();
118-
119118
/// Returns a cache entry for the corresponding key.
120119
///
121120
/// A new cache entry is created if the key is not in the cache. This is a
122121
/// thread safe call.
123-
SharedFileSystemEntry &get(StringRef Key);
122+
SharedFileSystemEntry &get(StringRef Key, bool Minimized);
124123

125124
private:
126-
struct CacheShard {
127-
std::mutex CacheLock;
128-
llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
125+
class SingleCache {
126+
public:
127+
SingleCache();
128+
129+
SharedFileSystemEntry &get(StringRef Key);
130+
131+
private:
132+
struct CacheShard {
133+
std::mutex CacheLock;
134+
llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
135+
};
136+
std::unique_ptr<CacheShard[]> CacheShards;
137+
unsigned NumShards;
129138
};
130-
std::unique_ptr<CacheShard[]> CacheShards;
131-
unsigned NumShards;
139+
140+
SingleCache CacheMinimized;
141+
SingleCache CacheOriginal;
142+
};
143+
144+
/// This class is a local cache, that caches the 'stat' and 'open' calls to the
145+
/// underlying real file system. It distinguishes between minimized and original
146+
/// files.
147+
class DependencyScanningFilesystemLocalCache {
148+
private:
149+
using SingleCache =
150+
llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>;
151+
152+
SingleCache CacheMinimized;
153+
SingleCache CacheOriginal;
154+
155+
SingleCache &selectCache(bool Minimized) {
156+
return Minimized ? CacheMinimized : CacheOriginal;
157+
}
158+
159+
public:
160+
void setCachedEntry(StringRef Filename, bool Minimized,
161+
const CachedFileSystemEntry *Entry) {
162+
SingleCache &Cache = selectCache(Minimized);
163+
bool IsInserted = Cache.try_emplace(Filename, Entry).second;
164+
(void)IsInserted;
165+
assert(IsInserted && "local cache is updated more than once");
166+
}
167+
168+
const CachedFileSystemEntry *getCachedEntry(StringRef Filename,
169+
bool Minimized) {
170+
SingleCache &Cache = selectCache(Minimized);
171+
auto It = Cache.find(Filename);
172+
return It == Cache.end() ? nullptr : It->getValue();
173+
}
132174
};
133175

134176
/// A virtual file system optimized for the dependency discovery.
@@ -159,24 +201,14 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
159201
private:
160202
bool shouldIgnoreFile(StringRef Filename);
161203

162-
void setCachedEntry(StringRef Filename, const CachedFileSystemEntry *Entry) {
163-
bool IsInserted = Cache.try_emplace(Filename, Entry).second;
164-
(void)IsInserted;
165-
assert(IsInserted && "local cache is updated more than once");
166-
}
167-
168-
const CachedFileSystemEntry *getCachedEntry(StringRef Filename) {
169-
auto It = Cache.find(Filename);
170-
return It == Cache.end() ? nullptr : It->getValue();
171-
}
172-
173204
llvm::ErrorOr<const CachedFileSystemEntry *>
174205
getOrCreateFileSystemEntry(const StringRef Filename);
175206

207+
/// The global cache shared between worker threads.
176208
DependencyScanningFilesystemSharedCache &SharedCache;
177209
/// The local cache is used by the worker thread to cache file system queries
178210
/// locally instead of querying the global cache every time.
179-
llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
211+
DependencyScanningFilesystemLocalCache Cache;
180212
/// The optional mapping structure which records information about the
181213
/// excluded conditional directive skip mappings that are used by the
182214
/// currently active preprocessor.

clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,7 @@ CachedFileSystemEntry::createDirectoryEntry(llvm::vfs::Status &&Stat) {
9999
return Result;
100100
}
101101

102-
DependencyScanningFilesystemSharedCache::
103-
DependencyScanningFilesystemSharedCache() {
102+
DependencyScanningFilesystemSharedCache::SingleCache::SingleCache() {
104103
// This heuristic was chosen using a empirical testing on a
105104
// reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
106105
// sharding gives a performance edge by reducing the lock contention.
@@ -111,18 +110,20 @@ DependencyScanningFilesystemSharedCache::
111110
CacheShards = std::make_unique<CacheShard[]>(NumShards);
112111
}
113112

114-
/// Returns a cache entry for the corresponding key.
115-
///
116-
/// A new cache entry is created if the key is not in the cache. This is a
117-
/// thread safe call.
118113
DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
119-
DependencyScanningFilesystemSharedCache::get(StringRef Key) {
114+
DependencyScanningFilesystemSharedCache::SingleCache::get(StringRef Key) {
120115
CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
121116
std::unique_lock<std::mutex> LockGuard(Shard.CacheLock);
122117
auto It = Shard.Cache.try_emplace(Key);
123118
return It.first->getValue();
124119
}
125120

121+
DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
122+
DependencyScanningFilesystemSharedCache::get(StringRef Key, bool Minimized) {
123+
SingleCache &Cache = Minimized ? CacheMinimized : CacheOriginal;
124+
return Cache.get(Key);
125+
}
126+
126127
/// Whitelist file extensions that should be minimized, treating no extension as
127128
/// a source file that should be minimized.
128129
///
@@ -165,17 +166,17 @@ bool DependencyScanningWorkerFilesystem::shouldIgnoreFile(
165166
llvm::ErrorOr<const CachedFileSystemEntry *>
166167
DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
167168
const StringRef Filename) {
168-
if (const CachedFileSystemEntry *Entry = getCachedEntry(Filename)) {
169+
bool ShouldMinimize =
170+
!IgnoredFiles.count(Filename) && shouldMinimize(Filename);
171+
172+
if (const auto *Entry = Cache.getCachedEntry(Filename, ShouldMinimize))
169173
return Entry;
170-
}
171174

172175
// FIXME: Handle PCM/PCH files.
173176
// FIXME: Handle module map files.
174177

175-
bool KeepOriginalSource = shouldIgnoreFile(Filename) ||
176-
!shouldMinimize(Filename);
177178
DependencyScanningFilesystemSharedCache::SharedFileSystemEntry
178-
&SharedCacheEntry = SharedCache.get(Filename);
179+
&SharedCacheEntry = SharedCache.get(Filename, ShouldMinimize);
179180
const CachedFileSystemEntry *Result;
180181
{
181182
std::unique_lock<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
@@ -197,15 +198,15 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
197198
CacheEntry = CachedFileSystemEntry::createDirectoryEntry(
198199
std::move(*MaybeStatus));
199200
else
200-
CacheEntry = CachedFileSystemEntry::createFileEntry(
201-
Filename, FS, !KeepOriginalSource);
201+
CacheEntry = CachedFileSystemEntry::createFileEntry(Filename, FS,
202+
ShouldMinimize);
202203
}
203204

204205
Result = &CacheEntry;
205206
}
206207

207208
// Store the result in the local cache.
208-
setCachedEntry(Filename, Result);
209+
Cache.setCachedEntry(Filename, ShouldMinimize, Result);
209210
return Result;
210211
}
211212

clang/unittests/Tooling/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ clang_target_link_libraries(ToolingTests
6868
clangAST
6969
clangASTMatchers
7070
clangBasic
71+
clangDependencyScanning
7172
clangFormat
7273
clangFrontend
7374
clangLex

clang/unittests/Tooling/DependencyScannerTest.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "clang/Frontend/FrontendActions.h"
1616
#include "clang/Tooling/CompilationDatabase.h"
1717
#include "clang/Tooling/Tooling.h"
18+
#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
1819
#include "llvm/ADT/STLExtras.h"
1920
#include "llvm/Support/FormatVariadic.h"
2021
#include "llvm/Support/Path.h"
@@ -203,5 +204,35 @@ TEST(DependencyScanner, ScanDepsReuseFilemanagerHasInclude) {
203204
EXPECT_EQ(convert_to_slash(Deps[5]), "/root/symlink.h");
204205
}
205206

207+
namespace dependencies {
208+
TEST(DependencyScanningFilesystem, IgnoredFilesHaveSeparateCache) {
209+
auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
210+
VFS->addFile("/mod.h", 0, llvm::MemoryBuffer::getMemBuffer("// hi there!\n"));
211+
212+
DependencyScanningFilesystemSharedCache SharedCache;
213+
auto Mappings = std::make_unique<ExcludedPreprocessorDirectiveSkipMapping>();
214+
DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings.get());
215+
216+
auto StatusMinimized0 = DepFS.status("/mod.h");
217+
DepFS.ignoreFile("/mod.h");
218+
auto StatusFull1 = DepFS.status("/mod.h");
219+
DepFS.clearIgnoredFiles();
220+
221+
auto StatusMinimized2 = DepFS.status("/mod.h");
222+
DepFS.ignoreFile("/mod.h");
223+
auto StatusFull3 = DepFS.status("/mod.h");
224+
225+
EXPECT_TRUE(StatusMinimized0);
226+
EXPECT_EQ(StatusMinimized0->getSize(), 0u);
227+
EXPECT_TRUE(StatusFull1);
228+
EXPECT_EQ(StatusFull1->getSize(), 13u);
229+
230+
EXPECT_TRUE(StatusMinimized2);
231+
EXPECT_EQ(StatusMinimized2->getSize(), 0u);
232+
EXPECT_TRUE(StatusFull3);
233+
EXPECT_EQ(StatusFull3->getSize(), 13u);
234+
}
235+
236+
} // end namespace dependencies
206237
} // end namespace tooling
207238
} // end namespace clang

0 commit comments

Comments
 (0)