From 73988691058832028478e1fdb78da6c7bddb7f2c Mon Sep 17 00:00:00 2001 From: Augusto Noronha Date: Tue, 12 Aug 2025 13:12:55 -0700 Subject: [PATCH 1/3] Reland "[NFC][lldb] Speed up lookup of shared modules" (229d860) (#152607) The previous commit was reverted because it broke a test on the bots. Original commit message: By profiling LLDB debugging a Swift application without a dSYM and a large amount of .o files, I identified that querying shared modules was the biggest bottleneck when running "frame variable", and Clang types need to be searched. One of the reasons for that slowness is that the shared module list can can grow very large, and the search through it is O(n). To solve this issue, this patch adds a new hashmap to the shared module list whose key is the name of the module, and the value is all the modules that share that name. This should speed up any search where the query contains the module name. rdar://156753350 (cherry picked from commit bd1b1a5e1a38b36eff8c1c926af7e025e97ab611) --- lldb/source/Core/ModuleList.cpp | 224 ++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index 2f5428d27252f..3f50f62b66e51 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -1017,6 +1017,230 @@ size_t ModuleList::GetIndexForModule(const Module *module) const { } namespace { +/// A wrapper around ModuleList for shared modules. Provides fast lookups for +/// file-based ModuleSpec queries. +class SharedModuleList { +public: + /// Finds all the modules matching the module_spec, and adds them to \p + /// matching_module_list. + void FindModules(const ModuleSpec &module_spec, + ModuleList &matching_module_list) const { + std::lock_guard guard(GetMutex()); + // Try map first for performance - if found, skip expensive full list + // search. + FindModulesInMap(module_spec, matching_module_list); + if (!matching_module_list.IsEmpty()) + return; + m_list.FindModules(module_spec, matching_module_list); + // Assert that modules were found in the list but not the map, it's + // because the module_spec has no filename or the found module has a + // different filename. For example, when searching by UUID and finding a + // module with an alias. + assert((matching_module_list.IsEmpty() || + module_spec.GetFileSpec().GetFilename().IsEmpty() || + module_spec.GetFileSpec().GetFilename() != + matching_module_list.GetModuleAtIndex(0) + ->GetFileSpec() + .GetFilename()) && + "Search by name not found in SharedModuleList's map"); + } + + ModuleSP FindModule(const Module &module) { + + std::lock_guard guard(GetMutex()); + if (ModuleSP result = FindModuleInMap(module)) + return result; + return m_list.FindModule(&module); + } + + // UUID searches bypass map since UUIDs aren't indexed by filename. + ModuleSP FindModule(const UUID &uuid) const { + return m_list.FindModule(uuid); + } + + void Append(const ModuleSP &module_sp, bool use_notifier) { + if (!module_sp) + return; + std::lock_guard guard(GetMutex()); + m_list.Append(module_sp, use_notifier); + AddToMap(module_sp); + } + + size_t RemoveOrphans(bool mandatory) { + std::unique_lock lock(GetMutex(), std::defer_lock); + if (mandatory) { + lock.lock(); + } else { + if (!lock.try_lock()) + return 0; + } + size_t total_count = 0; + size_t run_count; + do { + // Remove indexed orphans first, then remove non-indexed orphans. This + // order is important because the shared count will be different if a + // module is indexed or not. + run_count = RemoveOrphansFromMapAndList(); + run_count += m_list.RemoveOrphans(mandatory); + total_count += run_count; + // Because removing orphans might make new orphans, remove from both + // containers until a fixed-point is reached. + } while (run_count != 0); + + return total_count; + } + + bool Remove(const ModuleSP &module_sp, bool use_notifier = true) { + if (!module_sp) + return false; + std::lock_guard guard(GetMutex()); + RemoveFromMap(*module_sp.get()); + return m_list.Remove(module_sp, use_notifier); + } + + void ReplaceEquivalent(const ModuleSP &module_sp, + llvm::SmallVectorImpl *old_modules) { + std::lock_guard guard(GetMutex()); + m_list.ReplaceEquivalent(module_sp, old_modules); + ReplaceEquivalentInMap(module_sp); + } + + bool RemoveIfOrphaned(const Module *module_ptr) { + std::lock_guard guard(GetMutex()); + RemoveFromMap(*module_ptr, /*if_orphaned=*/true); + return m_list.RemoveIfOrphaned(module_ptr); + } + + std::recursive_mutex &GetMutex() const { return m_list.GetMutex(); } + +private: + ModuleSP FindModuleInMap(const Module &module) const { + if (!module.GetFileSpec().GetFilename()) + return ModuleSP(); + ConstString name = module.GetFileSpec().GetFilename(); + auto it = m_name_to_modules.find(name); + if (it == m_name_to_modules.end()) + return ModuleSP(); + const llvm::SmallVectorImpl &vector = it->second; + for (const ModuleSP &module_sp : vector) { + if (module_sp.get() == &module) + return module_sp; + } + return ModuleSP(); + } + + void FindModulesInMap(const ModuleSpec &module_spec, + ModuleList &matching_module_list) const { + auto it = m_name_to_modules.find(module_spec.GetFileSpec().GetFilename()); + if (it == m_name_to_modules.end()) + return; + const llvm::SmallVectorImpl &vector = it->second; + for (const ModuleSP &module_sp : vector) { + if (module_sp->MatchesModuleSpec(module_spec)) + matching_module_list.Append(module_sp); + } + } + + void AddToMap(const ModuleSP &module_sp) { + ConstString name = module_sp->GetFileSpec().GetFilename(); + if (name.IsEmpty()) + return; + m_name_to_modules[name].push_back(module_sp); + } + + void RemoveFromMap(const Module &module, bool if_orphaned = false) { + ConstString name = module.GetFileSpec().GetFilename(); + if (!m_name_to_modules.contains(name)) + return; + llvm::SmallVectorImpl &vec = m_name_to_modules[name]; + for (auto *it = vec.begin(); it != vec.end(); ++it) { + if (it->get() == &module) { + if (!if_orphaned || it->use_count() == kUseCountOrphaned) { + vec.erase(it); + break; + } + } + } + } + + void ReplaceEquivalentInMap(const ModuleSP &module_sp) { + RemoveEquivalentModulesFromMap(module_sp); + AddToMap(module_sp); + } + + void RemoveEquivalentModulesFromMap(const ModuleSP &module_sp) { + ConstString name = module_sp->GetFileSpec().GetFilename(); + if (name.IsEmpty()) + return; + + auto it = m_name_to_modules.find(name); + if (it == m_name_to_modules.end()) + return; + + // First remove any equivalent modules. Equivalent modules are modules + // whose path, platform path and architecture match. + ModuleSpec equivalent_module_spec(module_sp->GetFileSpec(), + module_sp->GetArchitecture()); + equivalent_module_spec.GetPlatformFileSpec() = + module_sp->GetPlatformFileSpec(); + + llvm::SmallVectorImpl &vec = it->second; + llvm::erase_if(vec, [&equivalent_module_spec](ModuleSP &element) { + return element->MatchesModuleSpec(equivalent_module_spec); + }); + } + + /// Remove orphans from the vector and return the removed modules. + ModuleList RemoveOrphansFromVector(llvm::SmallVectorImpl &vec) { + // remove_if moves the elements that match the condition to the end of the + // container, and returns an iterator to the first element that was moved. + auto *to_remove_start = llvm::remove_if(vec, [](const ModuleSP &module) { + return module.use_count() == kUseCountOrphaned; + }); + + ModuleList to_remove; + for (ModuleSP *it = to_remove_start; it != vec.end(); ++it) + to_remove.Append(*it); + + vec.erase(to_remove_start, vec.end()); + return to_remove; + } + + /// Remove orphans that exist in both the map and list. This does not remove + /// any orphans that exist exclusively on the list. + /// + /// The mutex must be locked by the caller. + int RemoveOrphansFromMapAndList() { + // Modules might hold shared pointers to other modules, so removing one + // module might orphan other modules. Keep removing modules until + // there are no further modules that can be removed. + int remove_count = 0; + int previous_remove_count; + do { + previous_remove_count = remove_count; + for (auto &[name, vec] : m_name_to_modules) { + if (vec.empty()) + continue; + ModuleList to_remove = RemoveOrphansFromVector(vec); + remove_count += to_remove.GetSize(); + m_list.Remove(to_remove); + } + // Break when fixed-point is reached. + } while (previous_remove_count != remove_count); + + return remove_count; + } + + ModuleList m_list; + + /// A hash map from a module's filename to all the modules that share that + /// filename, for fast module lookups by name. + llvm::DenseMap> m_name_to_modules; + + /// The use count of a module held only by m_list and m_name_to_modules. + static constexpr long kUseCountOrphaned = 2; +}; + struct SharedModuleListInfo { ModuleList module_list; ModuleListProperties module_list_properties; From 84d443509d53c2a0dfc7af6ef4a7dbea1e1234db Mon Sep 17 00:00:00 2001 From: Augusto Noronha Date: Wed, 8 Oct 2025 15:35:24 -0700 Subject: [PATCH 2/3] [lldb] Fix use after free on ModuleList::RemoveSharedModuleIfOrphaned (#155331) This fixes a potential use after free where ModuleList::RemoveSharedModuleIfOrphaned -> SharedModuleList::RemoveIfOrphaned -> SharedModuleList::RemoveFromMap would potentially dereference a freed pointer. This fixes it by not calling ModuleList::RemoveSharedModuleIfOrphaned at all if the pointer was just freed. (cherry picked from commit 397181d5c191cf2f7ba3b4408383da6e5a149052) --- lldb/include/lldb/Core/ModuleList.h | 7 +++- lldb/source/Core/ModuleList.cpp | 59 +++++++++++++++++------------ lldb/source/Target/Target.cpp | 4 +- 3 files changed, 41 insertions(+), 29 deletions(-) diff --git a/lldb/include/lldb/Core/ModuleList.h b/lldb/include/lldb/Core/ModuleList.h index 1109572e27d21..13d819ed690ed 100644 --- a/lldb/include/lldb/Core/ModuleList.h +++ b/lldb/include/lldb/Core/ModuleList.h @@ -477,7 +477,7 @@ class ModuleList { size_t Remove(ModuleList &module_list); - bool RemoveIfOrphaned(const Module *module_ptr); + bool RemoveIfOrphaned(const lldb::ModuleWP module_ptr); size_t RemoveOrphans(bool mandatory); @@ -531,7 +531,7 @@ class ModuleList { static size_t RemoveOrphanSharedModules(bool mandatory); - static bool RemoveSharedModuleIfOrphaned(const Module *module_ptr); + static bool RemoveSharedModuleIfOrphaned(const lldb::ModuleWP module_ptr); /// Applies 'callback' to each module in this ModuleList. /// If 'callback' returns false, iteration terminates. @@ -575,6 +575,9 @@ class ModuleList { Notifier *m_notifier = nullptr; + /// An orphaned module that lives only in the ModuleList has a count of 1. + static constexpr long kUseCountModuleListOrphaned = 1; + public: typedef LockingAdaptedIterable ModuleIterable; diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index 3f50f62b66e51..e1eaf510bc172 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -503,17 +503,20 @@ bool ModuleList::ReplaceModule(const lldb::ModuleSP &old_module_sp, return true; } -bool ModuleList::RemoveIfOrphaned(const Module *module_ptr) { - if (module_ptr) { +bool ModuleList::RemoveIfOrphaned(const ModuleWP module_wp) { + if (auto module_sp = module_wp.lock()) { std::lock_guard guard(m_modules_mutex); collection::iterator pos, end = m_modules.end(); for (pos = m_modules.begin(); pos != end; ++pos) { - if (pos->get() == module_ptr) { - if (pos->use_count() == 1) { + if (pos->get() == module_sp.get()) { + // Since module_sp increases the refcount by 1, the use count should be + // the regular use count + 1. + constexpr long kUseCountOrphaned = kUseCountModuleListOrphaned + 1; + if (pos->use_count() == kUseCountOrphaned) { pos = RemoveImpl(pos); return true; - } else - return false; + } + return false; } } } @@ -540,7 +543,7 @@ size_t ModuleList::RemoveOrphans(bool mandatory) { made_progress = false; collection::iterator pos = m_modules.begin(); while (pos != m_modules.end()) { - if (pos->use_count() == 1) { + if (pos->use_count() == kUseCountModuleListOrphaned) { pos = RemoveImpl(pos); ++remove_count; // We did make progress. @@ -1094,7 +1097,7 @@ class SharedModuleList { if (!module_sp) return false; std::lock_guard guard(GetMutex()); - RemoveFromMap(*module_sp.get()); + RemoveFromMap(module_sp); return m_list.Remove(module_sp, use_notifier); } @@ -1105,10 +1108,10 @@ class SharedModuleList { ReplaceEquivalentInMap(module_sp); } - bool RemoveIfOrphaned(const Module *module_ptr) { + bool RemoveIfOrphaned(const ModuleWP module_wp) { std::lock_guard guard(GetMutex()); - RemoveFromMap(*module_ptr, /*if_orphaned=*/true); - return m_list.RemoveIfOrphaned(module_ptr); + RemoveFromMap(module_wp, /*if_orphaned=*/true); + return m_list.RemoveIfOrphaned(module_wp); } std::recursive_mutex &GetMutex() const { return m_list.GetMutex(); } @@ -1148,16 +1151,22 @@ class SharedModuleList { m_name_to_modules[name].push_back(module_sp); } - void RemoveFromMap(const Module &module, bool if_orphaned = false) { - ConstString name = module.GetFileSpec().GetFilename(); - if (!m_name_to_modules.contains(name)) - return; - llvm::SmallVectorImpl &vec = m_name_to_modules[name]; - for (auto *it = vec.begin(); it != vec.end(); ++it) { - if (it->get() == &module) { - if (!if_orphaned || it->use_count() == kUseCountOrphaned) { - vec.erase(it); - break; + void RemoveFromMap(const ModuleWP module_wp, bool if_orphaned = false) { + if (auto module_sp = module_wp.lock()) { + ConstString name = module_sp->GetFileSpec().GetFilename(); + if (!m_name_to_modules.contains(name)) + return; + llvm::SmallVectorImpl &vec = m_name_to_modules[name]; + for (auto *it = vec.begin(); it != vec.end(); ++it) { + if (it->get() == module_sp.get()) { + // Since module_sp increases the refcount by 1, the use count should + // be the regular use count + 1. + constexpr long kUseCountOrphaned = + kUseCountSharedModuleListOrphaned + 1; + if (!if_orphaned || it->use_count() == kUseCountOrphaned) { + vec.erase(it); + break; + } } } } @@ -1195,7 +1204,7 @@ class SharedModuleList { // remove_if moves the elements that match the condition to the end of the // container, and returns an iterator to the first element that was moved. auto *to_remove_start = llvm::remove_if(vec, [](const ModuleSP &module) { - return module.use_count() == kUseCountOrphaned; + return module.use_count() == kUseCountSharedModuleListOrphaned; }); ModuleList to_remove; @@ -1238,7 +1247,7 @@ class SharedModuleList { llvm::DenseMap> m_name_to_modules; /// The use count of a module held only by m_list and m_name_to_modules. - static constexpr long kUseCountOrphaned = 2; + static constexpr long kUseCountSharedModuleListOrphaned = 2; }; struct SharedModuleListInfo { @@ -1540,8 +1549,8 @@ bool ModuleList::RemoveSharedModule(lldb::ModuleSP &module_sp) { return GetSharedModuleList().Remove(module_sp); } -bool ModuleList::RemoveSharedModuleIfOrphaned(const Module *module_ptr) { - return GetSharedModuleList().RemoveIfOrphaned(module_ptr); +bool ModuleList::RemoveSharedModuleIfOrphaned(const ModuleWP module_wp) { + return GetSharedModuleList().RemoveIfOrphaned(module_wp); } bool ModuleList::LoadScriptingResourcesInTarget(Target *target, diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index a3414c89375aa..80ee375e07503 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -2596,9 +2596,9 @@ ModuleSP Target::GetOrCreateModule(const ModuleSpec &orig_module_spec, m_images.Append(module_sp, notify); for (ModuleSP &old_module_sp : replaced_modules) { - Module *old_module_ptr = old_module_sp.get(); + auto old_module_wp = old_module_sp->weak_from_this(); old_module_sp.reset(); - ModuleList::RemoveSharedModuleIfOrphaned(old_module_ptr); + ModuleList::RemoveSharedModuleIfOrphaned(old_module_wp); } } else module_sp.reset(); From ab65bf8808600b85aaa953108976bbfedce2242f Mon Sep 17 00:00:00 2001 From: Augusto Noronha Date: Thu, 9 Oct 2025 10:39:45 -0700 Subject: [PATCH 3/3] [lldb] Actually use new SharedModuleList class (#162574) Now that the use after free bug has been fixed (397181d5c), actually use the new SharedModuleList class. (cherry picked from commit 8523c6a448c3f01396b805aca30376072c469845) --- lldb/source/Core/ModuleList.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index e1eaf510bc172..9e58f2f3dff2c 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -1251,7 +1251,7 @@ class SharedModuleList { }; struct SharedModuleListInfo { - ModuleList module_list; + SharedModuleList module_list; ModuleListProperties module_list_properties; }; } @@ -1269,7 +1269,7 @@ static SharedModuleListInfo &GetSharedModuleListInfo() return *g_shared_module_list_info; } -static ModuleList &GetSharedModuleList() { +static SharedModuleList &GetSharedModuleList() { return GetSharedModuleListInfo().module_list; } @@ -1279,8 +1279,8 @@ ModuleListProperties &ModuleList::GetGlobalModuleListProperties() { bool ModuleList::ModuleIsInCache(const Module *module_ptr) { if (module_ptr) { - ModuleList &shared_module_list = GetSharedModuleList(); - return shared_module_list.FindModule(module_ptr).get() != nullptr; + SharedModuleList &shared_module_list = GetSharedModuleList(); + return shared_module_list.FindModule(*module_ptr).get() != nullptr; } return false; } @@ -1303,9 +1303,8 @@ ModuleList::GetSharedModule(const ModuleSpec &module_spec, ModuleSP &module_sp, const FileSpecList *module_search_paths_ptr, llvm::SmallVectorImpl *old_modules, bool *did_create_ptr, bool always_create) { - ModuleList &shared_module_list = GetSharedModuleList(); - std::lock_guard guard( - shared_module_list.m_modules_mutex); + SharedModuleList &shared_module_list = GetSharedModuleList(); + std::lock_guard guard(shared_module_list.GetMutex()); char path[PATH_MAX]; Status error;