Skip to content

Commit ec1a491

Browse files
committed
Create synthetic symbol names on demand to improve memory consumption and startup times.
This is a resubmission of https://reviews.llvm.org/D105160 after fixing testing issues. This fix was created after profiling the target creation of a large C/C++/ObjC application that contained almost 4,000,000 redacted symbol names. The symbol table parsing code was creating names for each of these synthetic symbols and adding them to the name indexes. The code was also adding the object file basename to the end of the symbol name which doesn't allow symbols from different shared libraries to share the names in the constant string pool. Prior to this fix this was creating 180MB of "___lldb_unnamed_symbol" symbol names and was taking a long time to generate each name, add them to the string pool and then add each of these names to the name index. This patch fixes the issue by: not adding a name to synthetic symbols at creation time, and allows name to be dynamically generated when accessed doesn't add synthetic symbol names to the name indexes, but catches this special case as name lookup time. Users won't typically set breakpoints or lookup these synthetic names, but support was added to do the lookup in case it does happen removes the object file baseanme from the generated names to allow the names to be shared in the constant string pool Prior to this fix the startup times for a large application was: 12.5 seconds (cold file caches) 8.5 seconds (warm file caches) After this fix: 9.7 seconds (cold file caches) 5.7 seconds (warm file caches) The names of the symbols are auto generated by appending the symbol's UserID to the end of the "___lldb_unnamed_symbol" string and is only done when the name is requested from a synthetic symbol if it has no name. Differential Revision: https://reviews.llvm.org/D106837
1 parent 64d5b6e commit ec1a491

File tree

10 files changed

+162
-71
lines changed

10 files changed

+162
-71
lines changed

lldb/include/lldb/Symbol/ObjectFile.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -722,8 +722,6 @@ class ObjectFile : public std::enable_shared_from_this<ObjectFile>,
722722
/// false otherwise.
723723
bool SetModulesArchitecture(const ArchSpec &new_arch);
724724

725-
ConstString GetNextSyntheticSymbolName();
726-
727725
static lldb::DataBufferSP MapFileData(const FileSpec &file, uint64_t Size,
728726
uint64_t Offset);
729727

lldb/include/lldb/Symbol/Symbol.h

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,14 +113,20 @@ class Symbol : public SymbolContextScope {
113113
lldb::LanguageType GetLanguage() const {
114114
// TODO: See if there is a way to determine the language for a symbol
115115
// somehow, for now just return our best guess
116-
return m_mangled.GuessLanguage();
116+
return GetMangled().GuessLanguage();
117117
}
118118

119119
void SetID(uint32_t uid) { m_uid = uid; }
120120

121-
Mangled &GetMangled() { return m_mangled; }
121+
Mangled &GetMangled() {
122+
SynthesizeNameIfNeeded();
123+
return m_mangled;
124+
}
122125

123-
const Mangled &GetMangled() const { return m_mangled; }
126+
const Mangled &GetMangled() const {
127+
SynthesizeNameIfNeeded();
128+
return m_mangled;
129+
}
124130

125131
ConstString GetReExportedSymbolName() const;
126132

@@ -149,6 +155,8 @@ class Symbol : public SymbolContextScope {
149155

150156
bool IsSynthetic() const { return m_is_synthetic; }
151157

158+
bool IsSyntheticWithAutoGeneratedName() const;
159+
152160
void SetIsSynthetic(bool b) { m_is_synthetic = b; }
153161

154162
bool GetSizeIsSynthesized() const { return m_size_is_synthesized; }
@@ -166,9 +174,9 @@ class Symbol : public SymbolContextScope {
166174
bool IsTrampoline() const;
167175

168176
bool IsIndirect() const;
169-
177+
170178
bool IsWeak() const { return m_is_weak; }
171-
179+
172180
void SetIsWeak (bool b) { m_is_weak = b; }
173181

174182
bool GetByteSizeIsValid() const { return m_size_is_valid; }
@@ -223,6 +231,10 @@ class Symbol : public SymbolContextScope {
223231

224232
bool ContainsFileAddress(lldb::addr_t file_addr) const;
225233

234+
static llvm::StringRef GetSyntheticSymbolPrefix() {
235+
return "___lldb_unnamed_symbol";
236+
}
237+
226238
protected:
227239
// This is the internal guts of ResolveReExportedSymbol, it assumes
228240
// reexport_name is not null, and that module_spec is valid. We track the
@@ -233,6 +245,8 @@ class Symbol : public SymbolContextScope {
233245
lldb_private::ModuleSpec &module_spec,
234246
lldb_private::ModuleList &seen_modules) const;
235247

248+
void SynthesizeNameIfNeeded() const;
249+
236250
uint32_t m_uid =
237251
UINT32_MAX; // User ID (usually the original symbol table index)
238252
uint16_t m_type_data = 0; // data specific to m_type
@@ -258,7 +272,7 @@ class Symbol : public SymbolContextScope {
258272
// doing name lookups
259273
m_is_weak : 1,
260274
m_type : 6; // Values from the lldb::SymbolType enum.
261-
Mangled m_mangled; // uniqued symbol name/mangled name pair
275+
mutable Mangled m_mangled; // uniqued symbol name/mangled name pair
262276
AddressRange m_addr_range; // Contains the value, or the section offset
263277
// address when the value is an address in a
264278
// section, and the size (if any)

lldb/include/lldb/Symbol/Symtab.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,26 @@ class Symtab {
219219
return false;
220220
}
221221

222+
/// A helper function that looks up full function names.
223+
///
224+
/// We generate unique names for synthetic symbols so that users can look
225+
/// them up by name when needed. But because doing so is uncommon in normal
226+
/// debugger use, we trade off some performance at lookup time for faster
227+
/// symbol table building by detecting these symbols and generating their
228+
/// names lazily, rather than adding them to the normal symbol indexes. This
229+
/// function does the job of first consulting the name indexes, and if that
230+
/// fails it extracts the information it needs from the synthetic name and
231+
/// locates the symbol.
232+
///
233+
/// @param[in] symbol_name The symbol name to search for.
234+
///
235+
/// @param[out] indexes The vector if symbol indexes to update with results.
236+
///
237+
/// @returns The number of indexes added to the index vector. Zero if no
238+
/// matches were found.
239+
uint32_t GetNameIndexes(ConstString symbol_name,
240+
std::vector<uint32_t> &indexes);
241+
222242
void SymbolIndicesToSymbolContextList(std::vector<uint32_t> &symbol_indexes,
223243
SymbolContextList &sc_list);
224244

lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1880,7 +1880,7 @@ void ObjectFileELF::CreateSections(SectionList &unified_section_list) {
18801880
unified_section_list.AddSection(symtab_section_sp);
18811881
}
18821882
}
1883-
}
1883+
}
18841884
}
18851885

18861886
std::shared_ptr<ObjectFileELF> ObjectFileELF::GetGnuDebugDataObjectFile() {
@@ -2813,31 +2813,37 @@ Symtab *ObjectFileELF::GetSymtab() {
28132813
if (is_valid_entry_point && !m_symtab_up->FindSymbolContainingFileAddress(
28142814
entry_point_file_addr)) {
28152815
uint64_t symbol_id = m_symtab_up->GetNumSymbols();
2816-
Symbol symbol(symbol_id,
2817-
GetNextSyntheticSymbolName().GetCString(), // Symbol name.
2818-
eSymbolTypeCode, // Type of this symbol.
2819-
true, // Is this globally visible?
2820-
false, // Is this symbol debug info?
2821-
false, // Is this symbol a trampoline?
2822-
true, // Is this symbol artificial?
2823-
entry_point_addr.GetSection(), // Section where this
2824-
// symbol is defined.
2825-
0, // Offset in section or symbol value.
2826-
0, // Size.
2827-
false, // Size is valid.
2828-
false, // Contains linker annotations?
2829-
0); // Symbol flags.
2830-
m_symtab_up->AddSymbol(symbol);
2816+
// Don't set the name for any synthetic symbols, the Symbol
2817+
// object will generate one if needed when the name is accessed
2818+
// via accessors.
2819+
SectionSP section_sp = entry_point_addr.GetSection();
2820+
Symbol symbol(
2821+
/*symID=*/symbol_id,
2822+
/*name=*/llvm::StringRef(), // Name will be auto generated.
2823+
/*type=*/eSymbolTypeCode,
2824+
/*external=*/true,
2825+
/*is_debug=*/false,
2826+
/*is_trampoline=*/false,
2827+
/*is_artificial=*/true,
2828+
/*section_sp=*/section_sp,
2829+
/*offset=*/0,
2830+
/*size=*/0, // FDE can span multiple symbols so don't use its size.
2831+
/*size_is_valid=*/false,
2832+
/*contains_linker_annotations=*/false,
2833+
/*flags=*/0);
28312834
// When the entry point is arm thumb we need to explicitly set its
28322835
// class address to reflect that. This is important because expression
28332836
// evaluation relies on correctly setting a breakpoint at this
28342837
// address.
28352838
if (arch.GetMachine() == llvm::Triple::arm &&
2836-
(entry_point_file_addr & 1))
2839+
(entry_point_file_addr & 1)) {
2840+
symbol.GetAddressRef().SetOffset(entry_point_addr.GetOffset() ^ 1);
28372841
m_address_class_map[entry_point_file_addr ^ 1] =
28382842
AddressClass::eCodeAlternateISA;
2839-
else
2843+
} else {
28402844
m_address_class_map[entry_point_file_addr] = AddressClass::eCode;
2845+
}
2846+
m_symtab_up->AddSymbol(symbol);
28412847
}
28422848
}
28432849

@@ -2917,22 +2923,24 @@ void ObjectFileELF::ParseUnwindSymbols(Symtab *symbol_table,
29172923
section_list->FindSectionContainingFileAddress(file_addr);
29182924
if (section_sp) {
29192925
addr_t offset = file_addr - section_sp->GetFileAddress();
2920-
const char *symbol_name = GetNextSyntheticSymbolName().GetCString();
29212926
uint64_t symbol_id = ++last_symbol_id;
2927+
// Don't set the name for any synthetic symbols, the Symbol
2928+
// object will generate one if needed when the name is accessed
2929+
// via accessors.
29222930
Symbol eh_symbol(
2923-
symbol_id, // Symbol table index.
2924-
symbol_name, // Symbol name.
2925-
eSymbolTypeCode, // Type of this symbol.
2926-
true, // Is this globally visible?
2927-
false, // Is this symbol debug info?
2928-
false, // Is this symbol a trampoline?
2929-
true, // Is this symbol artificial?
2930-
section_sp, // Section in which this symbol is defined or null.
2931-
offset, // Offset in section or symbol value.
2932-
0, // Size: Don't specify the size as an FDE can
2933-
false, // Size is valid: cover multiple symbols.
2934-
false, // Contains linker annotations?
2935-
0); // Symbol flags.
2931+
/*symID=*/symbol_id,
2932+
/*name=*/llvm::StringRef(), // Name will be auto generated.
2933+
/*type=*/eSymbolTypeCode,
2934+
/*external=*/true,
2935+
/*is_debug=*/false,
2936+
/*is_trampoline=*/false,
2937+
/*is_artificial=*/true,
2938+
/*section_sp=*/section_sp,
2939+
/*offset=*/offset,
2940+
/*size=*/0, // FDE can span multiple symbols so don't use its size.
2941+
/*size_is_valid=*/false,
2942+
/*contains_linker_annotations=*/false,
2943+
/*flags=*/0);
29362944
new_symbols.push_back(eh_symbol);
29372945
}
29382946
}

lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4697,8 +4697,10 @@ size_t ObjectFileMachO::ParseSymtab() {
46974697
symbol_byte_size = section_end_file_addr - symbol_file_addr;
46984698
}
46994699
sym[sym_idx].SetID(synthetic_sym_id++);
4700-
sym[sym_idx].GetMangled().SetDemangledName(
4701-
GetNextSyntheticSymbolName());
4700+
// Don't set the name for any synthetic symbols, the Symbol
4701+
// object will generate one if needed when the name is accessed
4702+
// via accessors.
4703+
sym[sym_idx].GetMangled().SetDemangledName(ConstString());
47024704
sym[sym_idx].SetType(eSymbolTypeCode);
47034705
sym[sym_idx].SetIsSynthetic(true);
47044706
sym[sym_idx].GetAddressRef() = symbol_addr;

lldb/source/Symbol/ObjectFile.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -616,16 +616,6 @@ ObjectFile::GetSymbolTypeFromName(llvm::StringRef name,
616616
return symbol_type_hint;
617617
}
618618

619-
ConstString ObjectFile::GetNextSyntheticSymbolName() {
620-
llvm::SmallString<256> name;
621-
llvm::raw_svector_ostream os(name);
622-
ConstString file_name = GetModule()->GetFileSpec().GetFilename();
623-
++m_synthetic_symbol_idx;
624-
os << "___lldb_unnamed_symbol" << m_synthetic_symbol_idx << "$$"
625-
<< file_name.GetStringRef();
626-
return ConstString(os.str());
627-
}
628-
629619
std::vector<ObjectFile::LoadableData>
630620
ObjectFile::GetLoadableData(Target &target) {
631621
std::vector<LoadableData> loadables;

lldb/source/Symbol/Symbol.cpp

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ Symbol::Symbol(uint32_t symID, const Mangled &mangled, SymbolType type,
5656
m_size_is_synthesized(false),
5757
m_size_is_valid(size_is_valid || range.GetByteSize() > 0),
5858
m_demangled_is_synthesized(false),
59-
m_contains_linker_annotations(contains_linker_annotations),
60-
m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range),
59+
m_contains_linker_annotations(contains_linker_annotations),
60+
m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range),
6161
m_flags(flags) {}
6262

6363
Symbol::Symbol(const Symbol &rhs)
@@ -119,7 +119,7 @@ bool Symbol::ValueIsAddress() const {
119119
}
120120

121121
ConstString Symbol::GetDisplayName() const {
122-
return m_mangled.GetDisplayDemangledName();
122+
return GetMangled().GetDisplayDemangledName();
123123
}
124124

125125
ConstString Symbol::GetReExportedSymbolName() const {
@@ -202,7 +202,7 @@ void Symbol::GetDescription(Stream *s, lldb::DescriptionLevel level,
202202
s->Printf(", value = 0x%16.16" PRIx64,
203203
m_addr_range.GetBaseAddress().GetOffset());
204204
}
205-
ConstString demangled = m_mangled.GetDemangledName();
205+
ConstString demangled = GetMangled().GetDemangledName();
206206
if (demangled)
207207
s->Printf(", name=\"%s\"", demangled.AsCString());
208208
if (m_mangled.GetMangledName())
@@ -218,7 +218,7 @@ void Symbol::Dump(Stream *s, Target *target, uint32_t index,
218218
// Make sure the size of the symbol is up to date before dumping
219219
GetByteSize();
220220

221-
ConstString name = m_mangled.GetName(name_preference);
221+
ConstString name = GetMangled().GetName(name_preference);
222222
if (ValueIsAddress()) {
223223
if (!m_addr_range.GetBaseAddress().Dump(s, nullptr,
224224
Address::DumpStyleFileAddress))
@@ -330,9 +330,11 @@ uint32_t Symbol::GetPrologueByteSize() {
330330
}
331331

332332
bool Symbol::Compare(ConstString name, SymbolType type) const {
333-
if (type == eSymbolTypeAny || m_type == type)
334-
return m_mangled.GetMangledName() == name ||
335-
m_mangled.GetDemangledName() == name;
333+
if (type == eSymbolTypeAny || m_type == type) {
334+
const Mangled &mangled = GetMangled();
335+
return mangled.GetMangledName() == name ||
336+
mangled.GetDemangledName() == name;
337+
}
336338
return false;
337339
}
338340

@@ -495,10 +497,10 @@ lldb::addr_t Symbol::GetLoadAddress(Target *target) const {
495497
return LLDB_INVALID_ADDRESS;
496498
}
497499

498-
ConstString Symbol::GetName() const { return m_mangled.GetName(); }
500+
ConstString Symbol::GetName() const { return GetMangled().GetName(); }
499501

500502
ConstString Symbol::GetNameNoArguments() const {
501-
return m_mangled.GetName(Mangled::ePreferDemangledWithoutArguments);
503+
return GetMangled().GetName(Mangled::ePreferDemangledWithoutArguments);
502504
}
503505

504506
lldb::addr_t Symbol::ResolveCallableAddress(Target &target) const {
@@ -565,3 +567,30 @@ bool Symbol::GetDisassembly(const ExecutionContext &exe_ctx, const char *flavor,
565567
bool Symbol::ContainsFileAddress(lldb::addr_t file_addr) const {
566568
return m_addr_range.ContainsFileAddress(file_addr);
567569
}
570+
571+
bool Symbol::IsSyntheticWithAutoGeneratedName() const {
572+
if (!IsSynthetic())
573+
return false;
574+
if (!m_mangled)
575+
return true;
576+
ConstString demangled = m_mangled.GetDemangledName();
577+
return demangled.GetStringRef().startswith(GetSyntheticSymbolPrefix());
578+
}
579+
580+
void Symbol::SynthesizeNameIfNeeded() const {
581+
if (m_is_synthetic && !m_mangled) {
582+
// Synthetic symbol names don't mean anything, but they do uniquely
583+
// identify individual symbols so we give them a unique name. The name
584+
// starts with the synthetic symbol prefix, followed by a unique number.
585+
// Typically the UserID of a real symbol is the symbol table index of the
586+
// symbol in the object file's symbol table(s), so it will be the same
587+
// every time you read in the object file. We want the same persistence for
588+
// synthetic symbols so that users can identify them across multiple debug
589+
// sessions, to understand crashes in those symbols and to reliably set
590+
// breakpoints on them.
591+
llvm::SmallString<256> name;
592+
llvm::raw_svector_ostream os(name);
593+
os << GetSyntheticSymbolPrefix() << GetID();
594+
m_mangled.SetDemangledName(ConstString(os.str()));
595+
}
596+
}

0 commit comments

Comments
 (0)