diff --git a/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h index f50332c964ced..8711226a4ef63 100644 --- a/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h @@ -16,6 +16,8 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/VirtualFileSystem.h" +#include +#include #include #include #include @@ -152,19 +154,54 @@ using CachedRealPath = llvm::ErrorOr; /// the worker threads. class DependencyScanningFilesystemSharedCache { public: + /// Tracks a cache entry whose value is currently being computed by one + /// worker so that other workers arriving at the same key can wait for the + /// result rather than producing it in parallel. The producer publishes the + /// resolved entry into \c Result, sets \c Done, and notifies waiters via + /// \c CondVar. Waiters synchronize through the owning shard's \c CacheLock. + struct InProgressEntry { + std::condition_variable CondVar; + bool Done = false; + const CachedFileSystemEntry *Result = nullptr; + }; + + /// Outcome of attempting to claim a slot for a given key. Exactly one of + /// \c Resolved or \c Produce is non-null: + /// - \c Resolved: the cache (or another worker) has already produced the + /// entry; the caller should use it. + /// - \c Produce: the caller has the right to produce the entry and + /// must call the matching \c fulfil*Slot once finished. + struct SlotAcquisitionResult { + const CachedFileSystemEntry *Resolved; + std::shared_ptr Produce; + }; + struct CacheShard { /// The mutex that needs to be locked before mutation of any member. mutable std::mutex CacheLock; - /// Map from filenames to cached entries and real paths. - llvm::StringMap< - std::pair, - llvm::BumpPtrAllocator> - CacheByFilename; + /// Cache state stored per filename: the resolved entry, the + /// resolved real path, and the in-flight slot for an outstanding + /// query (if any). \c InProgress is reset to null once the producing + /// worker publishes its result. + struct FilenameCacheState { + const CachedFileSystemEntry *Entry = nullptr; + const CachedRealPath *RealPath = nullptr; + std::shared_ptr InProgress; + }; + + /// Cache state stored per unique ID; similar to + /// \c FilenameCacheState. + struct UIDCacheState { + const CachedFileSystemEntry *Entry = nullptr; + std::shared_ptr InProgress; + }; + + /// Map from filenames to their cached state. + llvm::StringMap CacheByFilename; - /// Map from unique IDs to cached entries. - llvm::DenseMap - EntriesByUID; + /// Map from unique IDs to their cached state. + llvm::DenseMap EntriesByUID; /// The backing storage for cached entries. llvm::SpecificBumpPtrAllocator EntryStorage; @@ -175,13 +212,6 @@ class DependencyScanningFilesystemSharedCache { /// The backing storage for cached real paths. llvm::SpecificBumpPtrAllocator RealPathStorage; - /// Returns entry associated with the filename or nullptr if none is found. - const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const; - - /// Returns entry associated with the unique ID or nullptr if none is found. - const CachedFileSystemEntry * - findEntryByUID(llvm::sys::fs::UniqueID UID) const; - /// Returns entry associated with the filename if there is some. Otherwise, /// constructs new one with the given status, associates it with the /// filename and returns the result. @@ -202,6 +232,35 @@ class DependencyScanningFilesystemSharedCache { getOrInsertEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry); + /// Claims a slot for \p Filename. If a resolved entry already exists, + /// returns it. If another worker is currently producing a result for this + /// filename, blocks on its \c InProgressEntry until done and returns the + /// produced entry. Otherwise installs a fresh \c InProgressEntry and + /// returns it as a producer slot which the caller must complete via + /// \c fulfilFilenameSlot. + SlotAcquisitionResult acquireFilenameSlot(StringRef); + + /// Claims a slot for \p UID. Same semantics as \c acquireFilenameSlot but + /// keyed by unique ID; used after \c stat() has identified a file so that + /// concurrent \c readFile() calls for the same on-disk file (reached via + /// different filenames) collapse onto a single open. + SlotAcquisitionResult acquireUIDSlot(llvm::sys::fs::UniqueID); + + /// Completes a producer slot acquired via \c acquireFilenameSlot. + /// Publishes \p Result under \p Filename in \c CacheByFilename, + /// records it in the slot, marks the slot done, removes the + /// in-progress entry, and notifies waiters. + void fulfilFilenameSlot(StringRef, const std::shared_ptr &, + const CachedFileSystemEntry *); + + /// Completes a producer slot acquired via \c acquireUIDSlot. + /// Publishes \p Result under \p UID in \c EntriesByUID, records + /// it in the slot, marks the slot done, removes the in-progress + /// entry, and notifies waiters. + void fulfilUIDSlot(llvm::sys::fs::UniqueID, + const std::shared_ptr &, + const CachedFileSystemEntry *); + /// Returns the real path associated with the filename or nullptr if none is /// found. const CachedRealPath *findRealPathByFilename(StringRef Filename) const; @@ -409,14 +468,16 @@ class DependencyScanningWorkerFilesystem bool exists(const Twine &Path) override; private: - /// For a filename that's not yet associated with any entry in the caches, - /// uses the underlying filesystem to either look up the entry based in the - /// shared cache indexed by unique ID, or creates new entry from scratch. - /// \p FilenameForLookup will always be an absolute path, and different than - /// \p OriginalFilename if \p OriginalFilename is relative. + /// Resolves the cache entry for \p FilenameForLookup through the shared + /// cache: returns an entry already produced by another worker (a cache hit + /// or the result of an in-flight wait), or claims the producer slot and + /// computes the entry via the underlying + /// filesystem. The result is written through to the local cache. + /// \p FilenameForLookup is always absolute, and may differ from + /// \p OriginalFilename if the latter is relative. llvm::ErrorOr - computeAndStoreResult(StringRef OriginalFilename, - StringRef FilenameForLookup); + resolveFilenameThroughSharedCache(StringRef OriginalFilename, + StringRef FilenameForLookup); /// Represents a filesystem entry that has been stat-ed (and potentially read) /// and that's about to be inserted into the cache as `CachedFileSystemEntry`. @@ -439,22 +500,6 @@ class DependencyScanningWorkerFilesystem const CachedFileSystemEntry & getOrEmplaceSharedEntryForUID(TentativeEntry TEntry); - /// Returns entry associated with the filename or nullptr if none is found. - /// - /// Returns entry from local cache if there is some. Otherwise, if the entry - /// is found in the shared cache, writes it through the local cache and - /// returns it. Otherwise returns nullptr. - const CachedFileSystemEntry * - findEntryByFilenameWithWriteThrough(StringRef Filename); - - /// Returns entry associated with the unique ID in the shared cache or nullptr - /// if none is found. - const CachedFileSystemEntry * - findSharedEntryByUID(llvm::vfs::Status Stat) const { - return SharedCache.getShardForUID(Stat.getUniqueID()) - .findEntryByUID(Stat.getUniqueID()); - } - /// Associates the given entry with the filename in the local cache and /// returns it. const CachedFileSystemEntry & diff --git a/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp index 49dad3758cf57..030b9a702697a 100644 --- a/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp @@ -114,8 +114,12 @@ DependencyScanningFilesystemSharedCache::getOutOfDateEntries( for (unsigned i = 0; i < NumShards; i++) { const CacheShard &Shard = CacheShards[i]; std::lock_guard LockGuard(Shard.CacheLock); - for (const auto &[Path, CachedPair] : Shard.CacheByFilename) { - const CachedFileSystemEntry *Entry = CachedPair.first; + for (const auto &[Path, State] : Shard.CacheByFilename) { + const CachedFileSystemEntry *Entry = State.Entry; + // Cached state may carry only a real path or in-progress slot for a + // given filename; skip filenames whose entry has not been resolved. + if (!Entry) + continue; llvm::ErrorOr Status = UnderlyingFS.status(Path); if (Status) { if (Entry->getError()) { @@ -150,38 +154,112 @@ DependencyScanningFilesystemSharedCache::getOutOfDateEntries( return InvalidDiagInfo; } -const CachedFileSystemEntry * -DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename( - StringRef Filename) const { - assert(llvm::sys::path::is_absolute_gnu(Filename)); - std::lock_guard LockGuard(CacheLock); - auto It = CacheByFilename.find(Filename); - return It == CacheByFilename.end() ? nullptr : It->getValue().first; -} - -const CachedFileSystemEntry * -DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID( - llvm::sys::fs::UniqueID UID) const { - std::lock_guard LockGuard(CacheLock); - auto It = EntriesByUID.find(UID); - return It == EntriesByUID.end() ? nullptr : It->getSecond(); -} - const CachedFileSystemEntry & DependencyScanningFilesystemSharedCache::CacheShard:: getOrEmplaceEntryForFilename(StringRef Filename, llvm::ErrorOr Stat) { std::lock_guard LockGuard(CacheLock); - auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}}); - auto &[CachedEntry, CachedRealPath] = It->getValue(); - if (!CachedEntry) { - // The entry is not present in the shared cache. Either the cache doesn't - // know about the file at all, or it only knows about its real path. - assert((Inserted || CachedRealPath) && "existing file with empty pair"); - CachedEntry = + auto [It, Inserted] = CacheByFilename.try_emplace(Filename); + auto &State = It->getValue(); + if (!State.Entry) { + // The entry is not present in the shared cache. This method runs only + // from inside a producer slot held by the caller, so either the cache + // state was just freshly inserted or it already carries an in-flight + // slot. + assert((Inserted || State.InProgress) && + "cache state should be fresh or carry an in-flight slot held by " + "the caller"); + State.Entry = new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat)); } - return *CachedEntry; + return *State.Entry; +} + +DependencyScanningFilesystemSharedCache::SlotAcquisitionResult +DependencyScanningFilesystemSharedCache::CacheShard::acquireFilenameSlot( + StringRef Filename) { + assert(llvm::sys::path::is_absolute_gnu(Filename)); + std::unique_lock LockGuard(CacheLock); + auto &State = CacheByFilename[Filename]; + + // Cache hit. + if (State.Entry) + return SlotAcquisitionResult{State.Entry, nullptr}; + + // Another worker is producing for this filename, wait for it. Capture the + // shared_ptr by copy: the wait may release the lock and another insertion + // could invalidate the State reference. + if (State.InProgress) { + std::shared_ptr Pending = State.InProgress; + Pending->CondVar.wait(LockGuard, [&] { return Pending->Done; }); + assert(Pending->Result && + "in-progress filename slot fulfilled without an entry"); + return SlotAcquisitionResult{Pending->Result, nullptr}; + } + + // Install an in-progress entry and return it. + State.InProgress = std::make_shared(); + return SlotAcquisitionResult{nullptr, State.InProgress}; +} + +DependencyScanningFilesystemSharedCache::SlotAcquisitionResult +DependencyScanningFilesystemSharedCache::CacheShard::acquireUIDSlot( + llvm::sys::fs::UniqueID UID) { + std::unique_lock LockGuard(CacheLock); + auto &State = EntriesByUID[UID]; + + // Cache hit. + if (State.Entry) + return SlotAcquisitionResult{State.Entry, nullptr}; + + // Another worker is producing for this UID, wait for it. + if (State.InProgress) { + std::shared_ptr Pending = State.InProgress; + Pending->CondVar.wait(LockGuard, [&] { return Pending->Done; }); + assert(Pending->Result && + "in-progress UID slot fulfilled without an entry"); + return SlotAcquisitionResult{Pending->Result, nullptr}; + } + + // Install an in-progress entry and return it. + State.InProgress = std::make_shared(); + return SlotAcquisitionResult{nullptr, State.InProgress}; +} + +void DependencyScanningFilesystemSharedCache::CacheShard::fulfilFilenameSlot( + StringRef Filename, + const std::shared_ptr< + DependencyScanningFilesystemSharedCache::InProgressEntry> &IPE, + const CachedFileSystemEntry *Result) { + { + std::lock_guard LockGuard(CacheLock); + auto &State = CacheByFilename[Filename]; + if (Result && !State.Entry) + State.Entry = Result; + IPE->Result = Result; + IPE->Done = true; + State.InProgress.reset(); + } + // Notify other workers awaiting this result. + IPE->CondVar.notify_all(); +} + +void DependencyScanningFilesystemSharedCache::CacheShard::fulfilUIDSlot( + llvm::sys::fs::UniqueID UID, + const std::shared_ptr< + DependencyScanningFilesystemSharedCache::InProgressEntry> &IPE, + const CachedFileSystemEntry *Result) { + { + std::lock_guard LockGuard(CacheLock); + auto &State = EntriesByUID[UID]; + if (Result && !State.Entry) + State.Entry = Result; + IPE->Result = Result; + IPE->Done = true; + State.InProgress.reset(); + } + // Notify other workers awaiting this result. + IPE->CondVar.notify_all(); } const CachedFileSystemEntry & @@ -189,17 +267,16 @@ DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID( llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, std::unique_ptr Contents) { std::lock_guard LockGuard(CacheLock); - auto [It, Inserted] = EntriesByUID.try_emplace(UID); - auto &CachedEntry = It->getSecond(); - if (Inserted) { + auto &State = EntriesByUID[UID]; + if (!State.Entry) { CachedFileContents *StoredContents = nullptr; if (Contents) StoredContents = new (ContentsStorage.Allocate()) CachedFileContents(std::move(Contents)); - CachedEntry = new (EntryStorage.Allocate()) + State.Entry = new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat), StoredContents); } - return *CachedEntry; + return *State.Entry; } const CachedFileSystemEntry & @@ -207,11 +284,10 @@ DependencyScanningFilesystemSharedCache::CacheShard:: getOrInsertEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry) { std::lock_guard LockGuard(CacheLock); - auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}}); - auto &[CachedEntry, CachedRealPath] = It->getValue(); - if (!Inserted || !CachedEntry) - CachedEntry = &Entry; - return *CachedEntry; + auto &State = CacheByFilename[Filename]; + if (!State.Entry) + State.Entry = &Entry; + return *State.Entry; } const CachedRealPath * @@ -220,7 +296,7 @@ DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename( assert(llvm::sys::path::is_absolute_gnu(Filename)); std::lock_guard LockGuard(CacheLock); auto It = CacheByFilename.find(Filename); - return It == CacheByFilename.end() ? nullptr : It->getValue().second; + return It == CacheByFilename.end() ? nullptr : It->getValue().RealPath; } const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard:: @@ -228,7 +304,7 @@ const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard:: llvm::ErrorOr RealPath) { std::lock_guard LockGuard(CacheLock); - const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second; + const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].RealPath; if (!StoredRealPath) { auto OwnedRealPath = [&]() -> CachedRealPath { if (!RealPath) @@ -262,44 +338,64 @@ DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID( std::move(TEntry.Contents)); } -const CachedFileSystemEntry * -DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( - StringRef Filename) { - if (const auto *Entry = LocalCache.findEntryByFilename(Filename)) - return Entry; - auto &Shard = SharedCache.getShardForFilename(Filename); - if (const auto *Entry = Shard.findEntryByFilename(Filename)) - return &LocalCache.insertEntryForFilename(Filename, *Entry); - return nullptr; -} - llvm::ErrorOr -DependencyScanningWorkerFilesystem::computeAndStoreResult( +DependencyScanningWorkerFilesystem::resolveFilenameThroughSharedCache( StringRef OriginalFilename, StringRef FilenameForLookup) { - llvm::ErrorOr Stat = - getUnderlyingFS().status(OriginalFilename); - if (!Stat) { - const auto &Entry = - getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError()); - return insertLocalEntryForFilename(FilenameForLookup, Entry); - } - - if (const auto *Entry = findSharedEntryByUID(*Stat)) - return insertLocalEntryForFilename(FilenameForLookup, *Entry); - - auto TEntry = - Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(OriginalFilename); - - const CachedFileSystemEntry *SharedEntry = [&]() { - if (TEntry) { - const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry)); - return &getOrInsertSharedEntryForFilename(FilenameForLookup, UIDEntry); + auto &FilenameShard = SharedCache.getShardForFilename(FilenameForLookup); + + // Acquire a per-filename in-progress entry. If another worker has already + // produced an entry under this filename, or is currently producing one, + // adopt its result instead of duplicating the underlying filesystem. + auto FilenameSlot = FilenameShard.acquireFilenameSlot(FilenameForLookup); + if (FilenameSlot.Resolved) + return insertLocalEntryForFilename(FilenameForLookup, + *FilenameSlot.Resolved); + + // Compute the result, then publish it through the producer slot so any + // workers waiting on the same filename adopt it. + std::shared_ptr + ProducerSlot = std::move(FilenameSlot.Produce); + auto Result = [&]() -> llvm::ErrorOr { + llvm::ErrorOr Stat = + getUnderlyingFS().status(OriginalFilename); + if (!Stat) + return insertLocalEntryForFilename( + FilenameForLookup, getOrEmplaceSharedEntryForFilename( + FilenameForLookup, Stat.getError())); + + // Acquire a per-UID producer slot to dedup concurrent readFile() + // calls across workers that arrived under different filenames + // pointing at the same on-disk file. + auto &UIDShard = SharedCache.getShardForUID(Stat->getUniqueID()); + auto UIDSlot = UIDShard.acquireUIDSlot(Stat->getUniqueID()); + const CachedFileSystemEntry *SharedEntry = nullptr; + if (UIDSlot.Resolved) { + SharedEntry = UIDSlot.Resolved; + } else { + auto TEntry = Stat->isDirectory() ? TentativeEntry(*Stat) + : readFile(OriginalFilename); + if (TEntry) { + SharedEntry = &getOrEmplaceSharedEntryForUID(std::move(*TEntry)); + } else { + // `readFile` failed despite `stat` succeeding. Cache the failure under + // the filename, and publish that same entry under the UID so awaiting + // workers surface the error rather than racing to retry the open. + SharedEntry = &getOrEmplaceSharedEntryForFilename(FilenameForLookup, + TEntry.getError()); + } + UIDShard.fulfilUIDSlot(Stat->getUniqueID(), UIDSlot.Produce, SharedEntry); } - return &getOrEmplaceSharedEntryForFilename(FilenameForLookup, - TEntry.getError()); + + // Bind the resolved/produced entry to this filename in the shared cache + // (idempotent if already there) and the local cache. + return insertLocalEntryForFilename( + FilenameForLookup, + getOrInsertSharedEntryForFilename(FilenameForLookup, *SharedEntry)); }(); - return insertLocalEntryForFilename(FilenameForLookup, *SharedEntry); + FilenameShard.fulfilFilenameSlot(FilenameForLookup, ProducerSlot, + Result ? &*Result : nullptr); + return Result; } llvm::ErrorOr @@ -310,10 +406,11 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( if (!FilenameForLookup) return FilenameForLookup.getError(); - if (const auto *Entry = - findEntryByFilenameWithWriteThrough(*FilenameForLookup)) + if (const auto *Entry = LocalCache.findEntryByFilename(*FilenameForLookup)) return EntryRef(OriginalFilename, *Entry).unwrapError(); - auto MaybeEntry = computeAndStoreResult(OriginalFilename, *FilenameForLookup); + + auto MaybeEntry = + resolveFilenameThroughSharedCache(OriginalFilename, *FilenameForLookup); if (!MaybeEntry) return MaybeEntry.getError(); return EntryRef(OriginalFilename, *MaybeEntry).unwrapError(); diff --git a/clang/unittests/DependencyScanning/CMakeLists.txt b/clang/unittests/DependencyScanning/CMakeLists.txt index 4f2a36067209c..53d257a76784d 100644 --- a/clang/unittests/DependencyScanning/CMakeLists.txt +++ b/clang/unittests/DependencyScanning/CMakeLists.txt @@ -4,6 +4,8 @@ add_clang_unittest(ClangDependencyScanningTests CLANG_LIBS clangDependencyScanning clangFrontend # For TextDiagnosticPrinter. + LINK_LIBS + LLVMTestingSupport LLVM_COMPONENTS ${LLVM_TARGETS_TO_BUILD} Option diff --git a/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp b/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp index d9489a9bf27ca..e17db177fc550 100644 --- a/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp +++ b/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp @@ -9,10 +9,39 @@ #include "clang/DependencyScanning/DependencyScanningFilesystem.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Testing/Support/SupportHelpers.h" #include "gtest/gtest.h" +#include +#include +#include using namespace clang::dependencies; +namespace { + +/// Releases all waiting threads simultaneously so that the worker logic can +/// be observed under maximal concurrency rather than a thread-spawn cascade. +struct StartBarrier { + std::mutex M; + std::condition_variable CV; + bool Released = false; + + void wait() { + std::unique_lock Lock(M); + CV.wait(Lock, [&] { return Released; }); + } + + void release() { + { + std::lock_guard Lock(M); + Released = true; + } + CV.notify_all(); + } +}; + +} // namespace + TEST(DependencyScanningFilesystem, OpenFileAndGetBufferRepeatedly) { auto InMemoryFS = llvm::makeIntrusiveRefCnt(); InMemoryFS->setCurrentWorkingDirectory("/"); @@ -286,3 +315,147 @@ TEST(DependencyScanningFilesystem, DoNotDiagnoseDirSizeChange) { auto InvalidEntries = SharedCache.getOutOfDateEntries(*FS); EXPECT_EQ(InvalidEntries.size(), 0u); } + +TEST(DependencyScanningWorkerFilesystem, ConcurrentSameFilenameDeduplicates) { + llvm::unittest::TempDir TmpDir("dswf-same-filename", /*Unique=*/true); + llvm::unittest::TempFile TmpFile(TmpDir.path("foo.c"), /*Suffix=*/"", + /*Contents=*/"hello"); + + auto TracingFS = + llvm::makeIntrusiveRefCnt( + llvm::vfs::getRealFileSystem()); + DependencyScanningFilesystemSharedCache SharedCache; + + constexpr unsigned NumWorkers = 16; + std::vector> Workers; + Workers.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) + Workers.push_back(std::make_unique( + SharedCache, TracingFS)); + + StartBarrier Barrier; + std::vector Threads; + std::vector> Results( + NumWorkers, llvm::ErrorOr(std::error_code{})); + Threads.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) { + Threads.emplace_back([&, I] { + Barrier.wait(); + Results[I] = Workers[I]->getOrCreateFileSystemEntry(TmpFile.path()); + }); + } + Barrier.release(); + for (auto &T : Threads) + T.join(); + + EXPECT_EQ(TracingFS->NumStatusCalls.load(), 1u); + EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(), 1u); + + // All workers must have observed the same underlying entry. + ASSERT_TRUE(Results[0]); + llvm::sys::fs::UniqueID FirstUID = Results[0]->getStatus().getUniqueID(); + const char *FirstContents = Results[0]->getContents().data(); + for (unsigned I = 0; I < NumWorkers; ++I) { + ASSERT_TRUE(Results[I]) << "worker " << I << " failed"; + EXPECT_EQ(Results[I]->getStatus().getUniqueID(), FirstUID); + EXPECT_EQ(Results[I]->getContents().data(), FirstContents); + } +} + +TEST(DependencyScanningWorkerFilesystem, + ConcurrentSameUIDDifferentFilenamesDeduplicatesOpen) { + // Use a real on-disk file plus a hard link so the two filenames share a + // UniqueID, exercising the per-UID slot. + llvm::unittest::TempDir TmpDir("dswf-same-uid", /*Unique=*/true); + llvm::SmallString<128> RealPath = TmpDir.path("real.c"); + llvm::SmallString<128> AliasPath = TmpDir.path("alias.c"); + llvm::unittest::TempFile TmpFile(RealPath, /*Suffix=*/"", /*Contents=*/"hi"); + llvm::unittest::TempLink TmpLink(RealPath, AliasPath); + + auto TracingFS = + llvm::makeIntrusiveRefCnt( + llvm::vfs::getRealFileSystem()); + DependencyScanningFilesystemSharedCache SharedCache; + + constexpr unsigned NumWorkers = 16; + std::vector> Workers; + Workers.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) + Workers.push_back(std::make_unique( + SharedCache, TracingFS)); + + StartBarrier Barrier; + std::vector Threads; + std::vector> Results( + NumWorkers, llvm::ErrorOr(std::error_code{})); + Threads.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) { + llvm::StringRef Path = + (I % 2 == 0) ? llvm::StringRef(RealPath) : llvm::StringRef(AliasPath); + Threads.emplace_back([&, I, Path] { + Barrier.wait(); + Results[I] = Workers[I]->getOrCreateFileSystemEntry(Path); + }); + } + Barrier.release(); + for (auto &T : Threads) + T.join(); + + // Each filename's slot dedupes its own stats; with two filenames we expect + // at most two stats. The UID slot then collapses the actual reads. + EXPECT_LE(TracingFS->NumStatusCalls.load(), 2u); + EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(), 1u); + + ASSERT_TRUE(Results[0]); + llvm::sys::fs::UniqueID FirstUID = Results[0]->getStatus().getUniqueID(); + for (unsigned I = 0; I < NumWorkers; ++I) { + ASSERT_TRUE(Results[I]) << "worker " << I << " failed"; + EXPECT_EQ(Results[I]->getStatus().getUniqueID(), FirstUID); + } +} + +TEST(DependencyScanningWorkerFilesystem, ConcurrentNegativeStatDeduplicates) { + // Construct a path inside a temporary directory but never create the + // file, so concurrent stat() calls land on the negative-stat path through + // the real filesystem. + llvm::unittest::TempDir TmpDir("dswf-negative", /*Unique=*/true); + llvm::SmallString<128> MissingPath = TmpDir.path("missing.h"); + + auto TracingFS = + llvm::makeIntrusiveRefCnt( + llvm::vfs::getRealFileSystem()); + DependencyScanningFilesystemSharedCache SharedCache; + + constexpr unsigned NumWorkers = 16; + std::vector> Workers; + Workers.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) + Workers.push_back(std::make_unique( + SharedCache, TracingFS)); + + StartBarrier Barrier; + std::vector Threads; + std::vector> Results( + NumWorkers, llvm::ErrorOr(std::error_code{})); + Threads.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) { + Threads.emplace_back([&, I] { + Barrier.wait(); + Results[I] = Workers[I]->status(MissingPath); + }); + } + Barrier.release(); + for (auto &T : Threads) + T.join(); + + EXPECT_EQ(TracingFS->NumStatusCalls.load(), 1u); + EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(), 0u); + + // Every worker observed the same negative result. + ASSERT_FALSE(Results[0]); + std::error_code FirstError = Results[0].getError(); + for (unsigned I = 0; I < NumWorkers; ++I) { + ASSERT_FALSE(Results[I]) << "worker " << I << " unexpectedly succeeded"; + EXPECT_EQ(Results[I].getError(), FirstError); + } +} diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h index 5b8871b8f3db5..d22c534228331 100644 --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -27,6 +27,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" +#include #include #include #include @@ -1156,20 +1157,28 @@ class YAMLVFSWriter { /// File system that tracks the number of calls to the underlying file system. /// This is particularly useful when wrapped around \c RealFileSystem to add /// lightweight tracking of expensive syscalls. -class LLVM_ABI TracingFileSystem - : public llvm::RTTIExtends { +/// +/// Templated on the counter type so callers can choose between non-atomic +/// counters (suitable for single-threaded tracing) and atomic counters +/// (suitable for tracing under concurrent access). Use the +/// \c TracingFileSystem and \c AtomicTracingFileSystem aliases below. +template +class TracingFileSystemImpl + : public llvm::RTTIExtends, + ProxyFileSystem> { public: - static const char ID; + inline static const char ID = 0; - std::size_t NumStatusCalls = 0; - std::size_t NumOpenFileForReadCalls = 0; - std::size_t NumDirBeginCalls = 0; - std::size_t NumGetRealPathCalls = 0; - std::size_t NumExistsCalls = 0; - std::size_t NumIsLocalCalls = 0; + CounterT NumStatusCalls = 0; + CounterT NumOpenFileForReadCalls = 0; + CounterT NumDirBeginCalls = 0; + CounterT NumGetRealPathCalls = 0; + CounterT NumExistsCalls = 0; + CounterT NumIsLocalCalls = 0; - TracingFileSystem(llvm::IntrusiveRefCntPtr FS) - : RTTIExtends(std::move(FS)) {} + TracingFileSystemImpl(llvm::IntrusiveRefCntPtr FS) + : llvm::RTTIExtends, ProxyFileSystem>( + std::move(FS)) {} ErrorOr status(const Twine &Path) override { ++NumStatusCalls; @@ -1203,10 +1212,44 @@ class LLVM_ABI TracingFileSystem } protected: - void printImpl(raw_ostream &OS, PrintType Type, - unsigned IndentLevel) const override; + void printImpl(raw_ostream &OS, FileSystem::PrintType Type, + unsigned IndentLevel) const override { + FileSystem::printIndent(OS, IndentLevel); + OS << "TracingFileSystem\n"; + if (Type == FileSystem::PrintType::Summary) + return; + + FileSystem::printIndent(OS, IndentLevel); + OS << "NumStatusCalls=" << static_cast(NumStatusCalls) << "\n"; + FileSystem::printIndent(OS, IndentLevel); + OS << "NumOpenFileForReadCalls=" + << static_cast(NumOpenFileForReadCalls) << "\n"; + FileSystem::printIndent(OS, IndentLevel); + OS << "NumDirBeginCalls=" << static_cast(NumDirBeginCalls) + << "\n"; + FileSystem::printIndent(OS, IndentLevel); + OS << "NumGetRealPathCalls=" + << static_cast(NumGetRealPathCalls) << "\n"; + FileSystem::printIndent(OS, IndentLevel); + OS << "NumExistsCalls=" << static_cast(NumExistsCalls) << "\n"; + FileSystem::printIndent(OS, IndentLevel); + OS << "NumIsLocalCalls=" << static_cast(NumIsLocalCalls) + << "\n"; + + if (Type == FileSystem::PrintType::Contents) + Type = FileSystem::PrintType::Summary; + this->getUnderlyingFS().print(OS, Type, IndentLevel + 1); + } }; +/// Single-threaded tracing filesystem. Counters are plain \c std::size_t and +/// must not be incremented concurrently. +using TracingFileSystem = TracingFileSystemImpl; + +/// Concurrent-safe tracing filesystem. Counters are \c std::atomic +/// so the proxy can be shared across threads. +using AtomicTracingFileSystem = TracingFileSystemImpl>; + } // namespace vfs } // namespace llvm diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index 69f3bb8582b87..42e8bb4f9958e 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -2991,34 +2991,8 @@ recursive_directory_iterator::increment(std::error_code &EC) { return *this; } -void TracingFileSystem::printImpl(raw_ostream &OS, PrintType Type, - unsigned IndentLevel) const { - printIndent(OS, IndentLevel); - OS << "TracingFileSystem\n"; - if (Type == PrintType::Summary) - return; - - printIndent(OS, IndentLevel); - OS << "NumStatusCalls=" << NumStatusCalls << "\n"; - printIndent(OS, IndentLevel); - OS << "NumOpenFileForReadCalls=" << NumOpenFileForReadCalls << "\n"; - printIndent(OS, IndentLevel); - OS << "NumDirBeginCalls=" << NumDirBeginCalls << "\n"; - printIndent(OS, IndentLevel); - OS << "NumGetRealPathCalls=" << NumGetRealPathCalls << "\n"; - printIndent(OS, IndentLevel); - OS << "NumExistsCalls=" << NumExistsCalls << "\n"; - printIndent(OS, IndentLevel); - OS << "NumIsLocalCalls=" << NumIsLocalCalls << "\n"; - - if (Type == PrintType::Contents) - Type = PrintType::Summary; - getUnderlyingFS().print(OS, Type, IndentLevel + 1); -} - const char FileSystem::ID = 0; const char OverlayFileSystem::ID = 0; const char ProxyFileSystem::ID = 0; const char InMemoryFileSystem::ID = 0; const char RedirectingFileSystem::ID = 0; -const char TracingFileSystem::ID = 0; diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp index bb0172b28e373..507a0e4108a79 100644 --- a/llvm/unittests/Support/VirtualFileSystemTest.cpp +++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp @@ -22,6 +22,8 @@ #include "gtest/gtest.h" #include #include +#include +#include using namespace llvm; using llvm::sys::fs::UniqueID; @@ -3732,3 +3734,31 @@ TEST(TracingFileSystemTest, PrintOutput) { " InMemoryFileSystem\n", Output); } + +TEST(TracingFileSystemTest, AtomicCountersUnderConcurrency) { + auto InMemoryFS = makeIntrusiveRefCnt(); + InMemoryFS->setCurrentWorkingDirectory("/"); + InMemoryFS->addFile("/foo", 0, MemoryBuffer::getMemBuffer("hello")); + + auto TracingFS = + makeIntrusiveRefCnt(std::move(InMemoryFS)); + + constexpr unsigned NumThreads = 8; + constexpr unsigned CallsPerThread = 100; + std::vector Threads; + Threads.reserve(NumThreads); + for (unsigned I = 0; I < NumThreads; ++I) { + Threads.emplace_back([&] { + for (unsigned J = 0; J < CallsPerThread; ++J) { + (void)TracingFS->status("/foo"); + (void)TracingFS->openFileForRead("/foo"); + } + }); + } + for (auto &T : Threads) + T.join(); + + EXPECT_EQ(TracingFS->NumStatusCalls.load(), NumThreads * CallsPerThread); + EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(), + NumThreads * CallsPerThread); +}