Skip to content

Commit

Permalink
MachObjectWriter: optimize the string table for common suffices
Browse files Browse the repository at this point in the history
This is a follow-up to r207670 (ELF) and r218636 (COFF).

Differential Revision: http://reviews.llvm.org/D5622

llvm-svn: 219126
  • Loading branch information
zmodem committed Oct 6, 2014
1 parent 1160333 commit 1b1a399
Show file tree
Hide file tree
Showing 28 changed files with 298 additions and 302 deletions.
6 changes: 3 additions & 3 deletions llvm/include/llvm/MC/MCMachObjectWriter.h
Expand Up @@ -14,6 +14,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/MachO.h"
#include <vector>
Expand Down Expand Up @@ -104,7 +105,7 @@ class MachObjectWriter : public MCObjectWriter {
/// @name Symbol Table Data
/// @{

SmallString<256> StringTable;
StringTableBuilder StringTable;
std::vector<MachSymbolData> LocalSymbolData;
std::vector<MachSymbolData> ExternalSymbolData;
std::vector<MachSymbolData> UndefinedSymbolData;
Expand Down Expand Up @@ -239,8 +240,7 @@ class MachObjectWriter : public MCObjectWriter {

/// ComputeSymbolTable - Compute the symbol table data
///
/// \param StringTable [out] - The string table data.
void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
void ComputeSymbolTable(MCAssembler &Asm,
std::vector<MachSymbolData> &LocalSymbolData,
std::vector<MachSymbolData> &ExternalSymbolData,
std::vector<MachSymbolData> &UndefinedSymbolData);
Expand Down
3 changes: 2 additions & 1 deletion llvm/include/llvm/MC/StringTableBuilder.h
Expand Up @@ -31,7 +31,8 @@ class StringTableBuilder {

enum Kind {
ELF,
WinCOFF
WinCOFF,
MachO
};

/// \brief Analyze the strings and build the final table. No more strings can
Expand Down
67 changes: 24 additions & 43 deletions llvm/lib/MC/MachObjectWriter.cpp
Expand Up @@ -525,15 +525,10 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
}

/// ComputeSymbolTable - Compute the symbol table data
///
/// \param StringTable [out] - The string table data.
/// \param StringIndexMap [out] - Map from symbol names to offsets in the
/// string table.
void MachObjectWriter::
ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
std::vector<MachSymbolData> &LocalSymbolData,
std::vector<MachSymbolData> &ExternalSymbolData,
std::vector<MachSymbolData> &UndefinedSymbolData) {
void MachObjectWriter::ComputeSymbolTable(
MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
std::vector<MachSymbolData> &ExternalSymbolData,
std::vector<MachSymbolData> &UndefinedSymbolData) {
// Build section lookup table.
DenseMap<const MCSection*, uint8_t> SectionIndexMap;
unsigned Index = 1;
Expand All @@ -542,37 +537,34 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
SectionIndexMap[&it->getSection()] = Index;
assert(Index <= 256 && "Too many sections!");

// Index 0 is always the empty string.
StringMap<uint64_t> StringIndexMap;
StringTable += '\x00';
// Build the string table.
for (MCSymbolData &SD : Asm.symbols()) {
const MCSymbol &Symbol = SD.getSymbol();
if (!Asm.isSymbolLinkerVisible(Symbol))
continue;

StringTable.add(Symbol.getName());
}
StringTable.finalize(StringTableBuilder::MachO);

// Build the symbol arrays and the string table, but only for non-local
// symbols.
// Build the symbol arrays but only for non-local symbols.
//
// The particular order that we collect the symbols and create the string
// table, then sort the symbols is chosen to match 'as'. Even though it
// doesn't matter for correctness, this is important for letting us diff .o
// files.
// The particular order that we collect and then sort the symbols is chosen to
// match 'as'. Even though it doesn't matter for correctness, this is
// important for letting us diff .o files.
for (MCSymbolData &SD : Asm.symbols()) {
const MCSymbol &Symbol = SD.getSymbol();

// Ignore non-linker visible symbols.
if (!Asm.isSymbolLinkerVisible(SD.getSymbol()))
if (!Asm.isSymbolLinkerVisible(Symbol))
continue;

if (!SD.isExternal() && !Symbol.isUndefined())
continue;

uint64_t &Entry = StringIndexMap[Symbol.getName()];
if (!Entry) {
Entry = StringTable.size();
StringTable += Symbol.getName();
StringTable += '\x00';
}

MachSymbolData MSD;
MSD.SymbolData = &SD;
MSD.StringIndex = Entry;
MSD.StringIndex = StringTable.getOffset(Symbol.getName());

if (Symbol.isUndefined()) {
MSD.SectionIndex = 0;
Expand All @@ -592,22 +584,15 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
const MCSymbol &Symbol = SD.getSymbol();

// Ignore non-linker visible symbols.
if (!Asm.isSymbolLinkerVisible(SD.getSymbol()))
if (!Asm.isSymbolLinkerVisible(Symbol))
continue;

if (SD.isExternal() || Symbol.isUndefined())
continue;

uint64_t &Entry = StringIndexMap[Symbol.getName()];
if (!Entry) {
Entry = StringTable.size();
StringTable += Symbol.getName();
StringTable += '\x00';
}

MachSymbolData MSD;
MSD.SymbolData = &SD;
MSD.StringIndex = Entry;
MSD.StringIndex = StringTable.getOffset(Symbol.getName());

if (Symbol.isAbsolute()) {
MSD.SectionIndex = 0;
Expand All @@ -631,10 +616,6 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
ExternalSymbolData[i].SymbolData->setIndex(Index++);
for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
UndefinedSymbolData[i].SymbolData->setIndex(Index++);

// The string table is padded to a multiple of 4.
while (StringTable.size() % 4)
StringTable += '\x00';
}

void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
Expand Down Expand Up @@ -683,7 +664,7 @@ void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
markAbsoluteVariableSymbols(Asm, Layout);

// Compute symbol table information and bind symbol indices.
ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
UndefinedSymbolData);
}

Expand Down Expand Up @@ -922,7 +903,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
sizeof(MachO::nlist_64) :
sizeof(MachO::nlist));
WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
StringTableOffset, StringTable.size());
StringTableOffset, StringTable.data().size());

WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
FirstExternalSymbol, NumExternalSymbols,
Expand Down Expand Up @@ -1028,7 +1009,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
WriteNlist(UndefinedSymbolData[i], Layout);

// Write the string table.
OS << StringTable.str();
OS << StringTable.data();
}
}

Expand Down
20 changes: 17 additions & 3 deletions llvm/lib/MC/StringTableBuilder.cpp
Expand Up @@ -36,12 +36,16 @@ void StringTableBuilder::finalize(Kind kind) {

std::sort(Strings.begin(), Strings.end(), compareBySuffix);

if (kind == ELF) {
switch (kind) {
case ELF:
case MachO:
// Start the table with a NUL byte.
StringTable += '\x00';
} else if (kind == WinCOFF) {
break;
case WinCOFF:
// Make room to write the table size later.
StringTable.append(4, '\x00');
break;
}

StringRef Previous;
Expand All @@ -60,11 +64,21 @@ void StringTableBuilder::finalize(Kind kind) {
Previous = s;
}

if (kind == WinCOFF) {
switch (kind) {
case ELF:
break;
case MachO:
// Pad to multiple of 4.
while (StringTable.size() % 4)
StringTable += '\x00';
break;
case WinCOFF:
// Write the table size in the first word.
assert(StringTable.size() <= std::numeric_limits<uint32_t>::max());
uint32_t size = static_cast<uint32_t>(StringTable.size());
support::endian::write<uint32_t, support::little, support::unaligned>(
StringTable.data(), size);
break;
}
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/MachO/ARM/aliased-symbols.s
Expand Up @@ -70,7 +70,7 @@ Ltmp0:
// alias_to_local is an alias, but what it points to has no
// MachO representation. We must resolve it.
// CHECK: Symbol {
// CHECK-NEXT: Name: alias_to_local (37)
// CHECK-NEXT: Name: alias_to_local (42)
// CHECK-NEXT: Type: Section (0xE)
// CHECK-NEXT: Section: (0x0)
// CHECK-NEXT: RefType: UndefinedNonLazy (0x0)
Expand All @@ -93,7 +93,7 @@ Ltmp0:

// var1 was another alias to an unknown variable. Not extern this time.
// CHECK: Symbol {
// CHECK-NEXT: Name: var1 (1)
// CHECK-NEXT: Name: var1 (89)
// CHECK-NEXT: Type: Indirect (0xA)
// CHECK-NEXT: Section: (0x0)
// CHECK-NEXT: RefType: UndefinedNonLazy (0x0)
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/MC/MachO/ARM/darwin-ARM-reloc.s
Expand Up @@ -110,18 +110,18 @@ Lsc0_0:
@ CHECK: ('nsyms', 4)
@ CHECK: ('stroff', 488)
@ CHECK: ('strsize', 24)
@ CHECK: ('_string_data', '\x00_printf\x00_f0\x00_f1\x00_d0\x00\x00\x00\x00')
@ CHECK: ('_string_data', '\x00_printf\x00_f1\x00_f0\x00_d0\x00\x00\x00\x00')
@ CHECK: ('_symbols', [
@ CHECK: # Symbol 0
@ CHECK: (('n_strx', 9)
@ CHECK: (('n_strx', 13)
@ CHECK: ('n_type', 0xe)
@ CHECK: ('n_sect', 1)
@ CHECK: ('n_desc', 0)
@ CHECK: ('n_value', 0)
@ CHECK: ('_string', '_f0')
@ CHECK: ),
@ CHECK: # Symbol 1
@ CHECK: (('n_strx', 13)
@ CHECK: (('n_strx', 9)
@ CHECK: ('n_type', 0xe)
@ CHECK: ('n_sect', 1)
@ CHECK: ('n_desc', 0)
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/MC/MachO/absolute.s
Expand Up @@ -63,66 +63,66 @@ foo_equals2 = (_foo - _bar + 0xffff0000)
// CHECK: ('nsyms', 8)
// CHECK: ('stroff', 420)
// CHECK: ('strsize', 84)
// CHECK: ('_string_data', '\x00foo_set1_global\x00foo_set2_global\x00_bar\x00_foo\x00foo_set1\x00foo_set2\x00foo_equals\x00foo_equals2\x00')
// CHECK: ('_string_data', '\x00foo_equals\x00_bar\x00_foo\x00foo_set2_global\x00foo_set1_global\x00foo_set2\x00foo_equals2\x00foo_set1\x00')
// CHECK: ('_symbols', [
// CHECK: # Symbol 0
// CHECK: (('n_strx', 33)
// CHECK: (('n_strx', 12)
// CHECK: ('n_type', 0xe)
// CHECK: ('n_sect', 1)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 0)
// CHECK: ('_string', '_bar')
// CHECK: ),
// CHECK: # Symbol 1
// CHECK: (('n_strx', 38)
// CHECK: (('n_strx', 17)
// CHECK: ('n_type', 0xe)
// CHECK: ('n_sect', 1)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 1)
// CHECK: ('_string', '_foo')
// CHECK: ),
// CHECK: # Symbol 2
// CHECK: (('n_strx', 43)
// CHECK: (('n_strx', 75)
// CHECK: ('n_type', 0xe)
// CHECK: ('n_sect', 1)
// CHECK: ('n_desc', 32)
// CHECK: ('n_value', 4294901761)
// CHECK: ('_string', 'foo_set1')
// CHECK: ),
// CHECK: # Symbol 3
// CHECK: (('n_strx', 52)
// CHECK: (('n_strx', 54)
// CHECK: ('n_type', 0x2)
// CHECK: ('n_sect', 0)
// CHECK: ('n_desc', 32)
// CHECK: ('n_value', 4294901761)
// CHECK: ('_string', 'foo_set2')
// CHECK: ),
// CHECK: # Symbol 4
// CHECK: (('n_strx', 61)
// CHECK: (('n_strx', 1)
// CHECK: ('n_type', 0xe)
// CHECK: ('n_sect', 1)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 4294901761)
// CHECK: ('_string', 'foo_equals')
// CHECK: ),
// CHECK: # Symbol 5
// CHECK: (('n_strx', 72)
// CHECK: (('n_strx', 63)
// CHECK: ('n_type', 0x2)
// CHECK: ('n_sect', 0)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 4294901761)
// CHECK: ('_string', 'foo_equals2')
// CHECK: ),
// CHECK: # Symbol 6
// CHECK: (('n_strx', 1)
// CHECK: (('n_strx', 38)
// CHECK: ('n_type', 0xf)
// CHECK: ('n_sect', 1)
// CHECK: ('n_desc', 32)
// CHECK: ('n_value', 4294901761)
// CHECK: ('_string', 'foo_set1_global')
// CHECK: ),
// CHECK: # Symbol 7
// CHECK: (('n_strx', 17)
// CHECK: (('n_strx', 22)
// CHECK: ('n_type', 0x3)
// CHECK: ('n_sect', 0)
// CHECK: ('n_desc', 32)
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/MC/MachO/absolutize.s
Expand Up @@ -150,34 +150,34 @@ Ldata_expr_2 = Ldata_d - Ldata_c
// CHECK: ('nsyms', 4)
// CHECK: ('stroff', 572)
// CHECK: ('strsize', 36)
// CHECK: ('_string_data', '\x00_text_a\x00_text_b\x00_data_a\x00_data_b\x00\x00\x00\x00')
// CHECK: ('_string_data', '\x00_text_b\x00_data_b\x00_text_a\x00_data_a\x00\x00\x00\x00')
// CHECK: ('_symbols', [
// CHECK: # Symbol 0
// CHECK: (('n_strx', 1)
// CHECK: (('n_strx', 17)
// CHECK: ('n_type', 0xe)
// CHECK: ('n_sect', 1)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 0)
// CHECK: ('_string', '_text_a')
// CHECK: ),
// CHECK: # Symbol 1
// CHECK: (('n_strx', 9)
// CHECK: (('n_strx', 1)
// CHECK: ('n_type', 0xe)
// CHECK: ('n_sect', 1)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 2)
// CHECK: ('_string', '_text_b')
// CHECK: ),
// CHECK: # Symbol 2
// CHECK: (('n_strx', 17)
// CHECK: (('n_strx', 25)
// CHECK: ('n_type', 0xe)
// CHECK: ('n_sect', 2)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 43)
// CHECK: ('_string', '_data_a')
// CHECK: ),
// CHECK: # Symbol 3
// CHECK: (('n_strx', 25)
// CHECK: (('n_strx', 9)
// CHECK: ('n_type', 0xe)
// CHECK: ('n_sect', 2)
// CHECK: ('n_desc', 0)
Expand Down

0 comments on commit 1b1a399

Please sign in to comment.