Skip to content

Commit

Permalink
[llvm-ar][Object] Fix detection of need for 64-bit archive symbol tables
Browse files Browse the repository at this point in the history
The code to detect the requirement for 64-bit offsets in the archive
symbol table was not correctly accounting for the archive file signature
and the size of all the contents of the symbol table itself, e.g. the
symbol table's header and string table. Also was not considering the
variation in symbol table formats. This could result in the creation of
large archives with a corrupt symbol table.

Change the testing environment variable SYM64_THRESHOLD to be an
absolute value rather than a power of 2 in order to enable precise
testing of this detection code.

Differential Revision: https://reviews.llvm.org/D89891
  • Loading branch information
nga888 committed Oct 26, 2020
1 parent e499186 commit 2add7c5
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 26 deletions.
70 changes: 47 additions & 23 deletions llvm/lib/Object/ArchiveWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,22 +286,12 @@ static void printNBits(raw_ostream &Out, object::Archive::Kind Kind,
print<uint32_t>(Out, Kind, Val);
}

static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
bool Deterministic, ArrayRef<MemberData> Members,
StringRef StringTable) {
// We don't write a symbol table on an archive with no members -- except on
// Darwin, where the linker will abort unless the archive has a symbol table.
if (StringTable.empty() && !isDarwin(Kind))
return;

unsigned NumSyms = 0;
for (const MemberData &M : Members)
NumSyms += M.Symbols.size();

unsigned Size = 0;
unsigned OffsetSize = is64BitKind(Kind) ? sizeof(uint64_t) : sizeof(uint32_t);

Size += OffsetSize; // Number of entries
static uint64_t computeSymbolTableSize(object::Archive::Kind Kind,
uint64_t NumSyms, uint64_t OffsetSize,
StringRef StringTable,
uint32_t *Padding = nullptr) {
assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize");
uint64_t Size = OffsetSize; // Number of entries
if (isBSDLike(Kind))
Size += NumSyms * OffsetSize * 2; // Table
else
Expand All @@ -313,10 +303,15 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
// least 4-byte aligned for 32-bit content. Opt for the larger encoding
// uniformly.
// We do this for all bsd formats because it simplifies aligning members.
const Align Alignment(isBSDLike(Kind) ? 8 : 2);
unsigned Pad = offsetToAlignment(Size, Alignment);
uint32_t Pad = offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2));
Size += Pad;
if (Padding)
*Padding = Pad;
return Size;
}

static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind,
bool Deterministic, uint64_t Size) {
if (isBSDLike(Kind)) {
const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF";
printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0,
Expand All @@ -325,6 +320,24 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
const char *Name = is64BitKind(Kind) ? "/SYM64" : "";
printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size);
}
}

static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
bool Deterministic, ArrayRef<MemberData> Members,
StringRef StringTable) {
// We don't write a symbol table on an archive with no members -- except on
// Darwin, where the linker will abort unless the archive has a symbol table.
if (StringTable.empty() && !isDarwin(Kind))
return;

unsigned NumSyms = 0;
for (const MemberData &M : Members)
NumSyms += M.Symbols.size();

uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4;
uint32_t Pad;
uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTable, &Pad);
writeSymbolTableHeader(Out, Kind, Deterministic, Size);

uint64_t Pos = Out.tell() + Size;

Expand Down Expand Up @@ -579,33 +592,44 @@ static Error writeArchiveToStream(raw_ostream &Out,

// We would like to detect if we need to switch to a 64-bit symbol table.
if (WriteSymtab) {
uint64_t MaxOffset = 0;
uint64_t MaxOffset = 8; // For the file signature.
uint64_t LastOffset = MaxOffset;
uint64_t NumSyms = 0;
for (const auto &M : Data) {
// Record the start of the member's offset
LastOffset = MaxOffset;
// Account for the size of each part associated with the member.
MaxOffset += M.Header.size() + M.Data.size() + M.Padding.size();
// We assume 32-bit symbols to see if 32-bit symbols are possible or not.
MaxOffset += M.Symbols.size() * 4;
NumSyms += M.Symbols.size();
}

// We assume 32-bit offsets to see if 32-bit symbols are possible or not.
uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, 4, SymNamesBuf);
auto computeSymbolTableHeaderSize =
[=] {
SmallString<0> TmpBuf;
raw_svector_ostream Tmp(TmpBuf);
writeSymbolTableHeader(Tmp, Kind, Deterministic, SymtabSize);
return TmpBuf.size();
};
LastOffset += computeSymbolTableHeaderSize() + SymtabSize;

// The SYM64 format is used when an archive's member offsets are larger than
// 32-bits can hold. The need for this shift in format is detected by
// writeArchive. To test this we need to generate a file with a member that
// has an offset larger than 32-bits but this demands a very slow test. To
// speed the test up we use this environment variable to pretend like the
// cutoff happens before 32-bits and instead happens at some much smaller
// value.
uint64_t Sym64Threshold = 1ULL << 32;
const char *Sym64Env = std::getenv("SYM64_THRESHOLD");
int Sym64Threshold = 32;
if (Sym64Env)
StringRef(Sym64Env).getAsInteger(10, Sym64Threshold);

// If LastOffset isn't going to fit in a 32-bit varible we need to switch
// to 64-bit. Note that the file can be larger than 4GB as long as the last
// member starts before the 4GB offset.
if (LastOffset >= (1ULL << Sym64Threshold)) {
if (LastOffset >= Sym64Threshold) {
if (Kind == object::Archive::K_DARWIN)
Kind = object::Archive::K_DARWIN64;
else
Expand Down
16 changes: 13 additions & 3 deletions llvm/test/Object/archive-symtab.test
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,14 @@ Symbols:
# RUN: llvm-nm -M %t.a | FileCheck %s

# RUN: rm -f %t.a
# RUN: env SYM64_THRESHOLD=1 llvm-ar rcsU %t.a %t.elf-x86-64 %t2.elf-x86-64
# RUN: env SYM64_THRESHOLD=836 llvm-ar rcsU %t.a %t.elf-x86-64 %t2.elf-x86-64
# RUN: llvm-nm -M %t.a | FileCheck %s
# RUXX: grep SYM64 %t.a
# RUN: grep '/SYM64/' %t.a

# RUN: rm -f %t.a
# RUN: env SYM64_THRESHOLD=837 llvm-ar rcsU %t.a %t.elf-x86-64 %t2.elf-x86-64
# RUN: llvm-nm -M %t.a | FileCheck %s
# RUN: not grep '/SYM64/' %t.a

# CHECK: Archive map
# CHECK-NEXT: main in {{.*}}.elf-x86-64
Expand Down Expand Up @@ -136,10 +141,15 @@ Symbols:
# RUN: llvm-nm -M %t.a | FileCheck --check-prefix=MACHO %s

# RUN: rm -f %t.a
# RUN: env SYM64_THRESHOLD=1 llvm-ar --format=darwin rcsU %t.a %p/Inputs/trivial-object-test.macho-x86-64 %p/Inputs/trivial-object-test2.macho-x86-64
# RUN: env SYM64_THRESHOLD=784 llvm-ar --format=darwin rcsU %t.a %p/Inputs/trivial-object-test.macho-x86-64 %p/Inputs/trivial-object-test2.macho-x86-64
# RUN: llvm-nm -M %t.a | FileCheck --check-prefix=MACHO %s
# RUN: grep '__\.SYMDEF_64' %t.a

# RUN: rm -f %t.a
# RUN: env SYM64_THRESHOLD=785 llvm-ar --format=darwin rcsU %t.a %p/Inputs/trivial-object-test.macho-x86-64 %p/Inputs/trivial-object-test2.macho-x86-64
# RUN: llvm-nm -M %t.a | FileCheck --check-prefix=MACHO %s
# RUN: not grep '__\.SYMDEF_64' %t.a

# MACHO: Archive map
# MACHO-NEXT: _main in trivial-object-test.macho-x86-64
# MACHO-NEXT: _foo in trivial-object-test2.macho-x86-64
Expand Down

0 comments on commit 2add7c5

Please sign in to comment.