From ffe90c97fde0cd8badfc5f2785a1e2d006f0c08f Mon Sep 17 00:00:00 2001 From: Robert Widmann Date: Tue, 18 Aug 2020 19:48:11 -0700 Subject: [PATCH] Increase the Entropy of ModuleManager Map Keys The ModuleManager's use of FileEntry nodes as the keys for its map of loaded modules is less than ideal. Uniqueness for FileEntry nodes is maintained by FileManager, which in turn uses inode numbers on hosts that support that. When coupled with the module cache's proclivity for turning over and deleting stale PCMs, this means entries for different module files can wind up reusing the same underlying inode. When this happens, subsequent accesses to the Modules map will disagree on the ModuleFile associated with a given file. It's fine to use the file management utilities to guarantee the presence of module data, but we need a better source of key material that is invariant with respect to these OS-level details. Using file paths alone is a similarly frought solution because the ASTWriter performs a custom canonicalization step (that is not equivalent to path canonicalization) that renders keys from loaded AST cores useless for looking up cached entries. To mitigate the effects of inode reuse, increase the entropy of the key material by incorporating the modtime and size. This ultimately decreases the likelihood that a PCM that is swapped on disk will confuse the cache, but it does not eliminate the possibility of collisions. rdar://48443680 --- .../clang/Serialization/ModuleManager.h | 46 ++++++++++++++++++- clang/lib/Serialization/ModuleManager.cpp | 12 ++--- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Serialization/ModuleManager.h b/clang/include/clang/Serialization/ModuleManager.h index 15ddb9875ff17..3c5ccb1d88fb0 100644 --- a/clang/include/clang/Serialization/ModuleManager.h +++ b/clang/include/clang/Serialization/ModuleManager.h @@ -59,8 +59,50 @@ class ModuleManager { // to implement short-circuiting logic when running DFS over the dependencies. SmallVector Roots; + /// An \c EntryKey is a thin wrapper around a \c FileEntry that implements + /// a richer notion of identity. + /// + /// A plain \c FileEntry has its identity tied to inode numbers. When the + /// module cache regenerates a PCM, some filesystem allocators may reuse + /// inode numbers for distinct modules, which can cause the cache to return + /// mismatched entries. An \c EntryKey ensures that the size and modification + /// time are taken into account when determining the identity of a key, which + /// significantly decreases - but does not eliminate - the chance of + /// a collision. + struct EntryKey { + const FileEntry *Entry; + + struct Info { + static inline EntryKey getEmptyKey() { + return EntryKey{llvm::DenseMapInfo::getEmptyKey()}; + } + static inline EntryKey getTombstoneKey() { + return EntryKey{ + llvm::DenseMapInfo::getTombstoneKey()}; + } + static unsigned getHashValue(const EntryKey &Val) { + return llvm::DenseMapInfo::getHashValue(Val.Entry); + } + static bool isEqual(const EntryKey &LHS, const EntryKey &RHS) { + if (LHS.Entry == getEmptyKey().Entry || + LHS.Entry == getTombstoneKey().Entry || + RHS.Entry == getEmptyKey().Entry || + RHS.Entry == getTombstoneKey().Entry) { + return LHS.Entry == RHS.Entry; + } + if (LHS.Entry == nullptr || RHS.Entry == nullptr) { + return LHS.Entry == RHS.Entry; + } + return LHS.Entry == RHS.Entry && + LHS.Entry->getSize() == RHS.Entry->getSize() && + LHS.Entry->getModificationTime() == + RHS.Entry->getModificationTime(); + } + }; + }; + /// All loaded modules, indexed by name. - llvm::DenseMap Modules; + llvm::DenseMap Modules; /// FileManager that handles translating between filenames and /// FileEntry *. @@ -76,7 +118,7 @@ class ModuleManager { const HeaderSearch &HeaderSearchInfo; /// A lookup of in-memory (virtual file) buffers - llvm::DenseMap> + llvm::DenseMap, EntryKey::Info> InMemoryBuffers; /// The visitation order. diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index a42ed2f3c179d..9520172a4bdc0 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -59,7 +59,7 @@ ModuleFile *ModuleManager::lookupByModuleName(StringRef Name) const { } ModuleFile *ModuleManager::lookup(const FileEntry *File) const { - auto Known = Modules.find(File); + auto Known = Modules.find(EntryKey{File}); if (Known == Modules.end()) return nullptr; @@ -72,7 +72,7 @@ ModuleManager::lookupBuffer(StringRef Name) { /*CacheFailure=*/false); if (!Entry) return nullptr; - return std::move(InMemoryBuffers[*Entry]); + return std::move(InMemoryBuffers[EntryKey{*Entry}]); } static bool checkSignature(ASTFileSignature Signature, @@ -133,7 +133,7 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type, } // Check whether we already loaded this module, before - if (ModuleFile *ModuleEntry = Modules.lookup(Entry)) { + if (ModuleFile *ModuleEntry = Modules.lookup(EntryKey{Entry})) { // Check the stored signature. if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr)) return OutOfDate; @@ -213,7 +213,7 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type, return OutOfDate; // We're keeping this module. Store it everywhere. - Module = Modules[Entry] = NewModule.get(); + Module = Modules[EntryKey{Entry}] = NewModule.get(); updateModuleImports(*NewModule, ImportedBy, ImportLoc); @@ -260,7 +260,7 @@ void ModuleManager::removeModules(ModuleIterator First, ModuleMap *modMap) { // Delete the modules and erase them from the various structures. for (ModuleIterator victim = First; victim != Last; ++victim) { - Modules.erase(victim->File); + Modules.erase(EntryKey{victim->File}); if (modMap) { StringRef ModuleName = victim->ModuleName; @@ -279,7 +279,7 @@ ModuleManager::addInMemoryBuffer(StringRef FileName, std::unique_ptr Buffer) { const FileEntry *Entry = FileMgr.getVirtualFile(FileName, Buffer->getBufferSize(), 0); - InMemoryBuffers[Entry] = std::move(Buffer); + InMemoryBuffers[EntryKey{Entry}] = std::move(Buffer); } ModuleManager::VisitState *ModuleManager::allocateVisitState() {