Skip to content

Commit

Permalink
[COFF] Move section name encoding into BinaryFormat
Browse files Browse the repository at this point in the history
Large COFF section names are moved into the string table and the
section header field is the offset into the string table encoded in
ASCII for offset smaller than 7 digits and in base64 for larger
offsets.

The operation of taking the string table offsets is done in a few
places in the codebase, so it is helpful to move this operation into
`BinaryFormat` so that it can be shared everywhere it's done.

So this patch takes the implementation of this operation from
`llvm/lib/MC/WinCOFFObjectWriter.cpp` and moves it into `BinaryFormat`.

Reviewed By: jhenderson, rnk

Differential Revision: https://reviews.llvm.org/D118793

(cherry picked from commit 85f4023)
  • Loading branch information
npmiller authored and tstellar committed Feb 22, 2022
1 parent 9672d11 commit 3367c24
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 39 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/BinaryFormat/COFF.h
Expand Up @@ -731,6 +731,10 @@ inline bool isReservedSectionNumber(int32_t SectionNumber) {
return SectionNumber <= 0;
}

/// Encode section name based on string table offset.
/// The size of Out must be at least COFF::NameSize.
bool encodeSectionName(char *Out, uint64_t Offset);

} // End namespace COFF.
} // End namespace llvm.

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/BinaryFormat/CMakeLists.txt
@@ -1,5 +1,6 @@
add_llvm_component_library(LLVMBinaryFormat
AMDGPUMetadataVerifier.cpp
COFF.cpp
Dwarf.cpp
ELF.cpp
MachO.cpp
Expand Down
57 changes: 57 additions & 0 deletions llvm/lib/BinaryFormat/COFF.cpp
@@ -0,0 +1,57 @@
//===- llvm/BinaryFormat/COFF.cpp - The COFF format -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/BinaryFormat/COFF.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"

// Maximum offsets for different string table entry encodings.
enum : unsigned { Max7DecimalOffset = 9999999U };
enum : uint64_t { MaxBase64Offset = 0xFFFFFFFFFULL }; // 64^6, including 0

// Encode a string table entry offset in base 64, padded to 6 chars, and
// prefixed with a double slash: '//AAAAAA', '//AAAAAB', ...
// Buffer must be at least 8 bytes large. No terminating null appended.
static void encodeBase64StringEntry(char *Buffer, uint64_t Value) {
assert(Value > Max7DecimalOffset && Value <= MaxBase64Offset &&
"Illegal section name encoding for value");

static const char Alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";

Buffer[0] = '/';
Buffer[1] = '/';

char *Ptr = Buffer + 7;
for (unsigned i = 0; i < 6; ++i) {
unsigned Rem = Value % 64;
Value /= 64;
*(Ptr--) = Alphabet[Rem];
}
}

bool llvm::COFF::encodeSectionName(char *Out, uint64_t Offset) {
if (Offset <= Max7DecimalOffset) {
// Offsets of 7 digits or less are encoded in ASCII.
SmallVector<char, COFF::NameSize> Buffer;
Twine('/').concat(Twine(Offset)).toVector(Buffer);
assert(Buffer.size() <= COFF::NameSize && Buffer.size() >= 2);
std::memcpy(Out, Buffer.data(), Buffer.size());
return true;
}

if (Offset <= MaxBase64Offset) {
// Starting with 10,000,000, offsets are encoded as base64.
encodeBase64StringEntry(Out, Offset);
return true;
}

// The offset is too large to be encoded.
return false;
}
41 changes: 2 additions & 39 deletions llvm/lib/MC/WinCOFFObjectWriter.cpp
Expand Up @@ -452,52 +452,15 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym,
Sym->MC = &MCSym;
}

// Maximum offsets for different string table entry encodings.
enum : unsigned { Max7DecimalOffset = 9999999U };
enum : uint64_t { MaxBase64Offset = 0xFFFFFFFFFULL }; // 64^6, including 0

// Encode a string table entry offset in base 64, padded to 6 chars, and
// prefixed with a double slash: '//AAAAAA', '//AAAAAB', ...
// Buffer must be at least 8 bytes large. No terminating null appended.
static void encodeBase64StringEntry(char *Buffer, uint64_t Value) {
assert(Value > Max7DecimalOffset && Value <= MaxBase64Offset &&
"Illegal section name encoding for value");

static const char Alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";

Buffer[0] = '/';
Buffer[1] = '/';

char *Ptr = Buffer + 7;
for (unsigned i = 0; i < 6; ++i) {
unsigned Rem = Value % 64;
Value /= 64;
*(Ptr--) = Alphabet[Rem];
}
}

void WinCOFFObjectWriter::SetSectionName(COFFSection &S) {
if (S.Name.size() <= COFF::NameSize) {
std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
return;
}

uint64_t StringTableEntry = Strings.getOffset(S.Name);
if (StringTableEntry <= Max7DecimalOffset) {
SmallVector<char, COFF::NameSize> Buffer;
Twine('/').concat(Twine(StringTableEntry)).toVector(Buffer);
assert(Buffer.size() <= COFF::NameSize && Buffer.size() >= 2);
std::memcpy(S.Header.Name, Buffer.data(), Buffer.size());
return;
}
if (StringTableEntry <= MaxBase64Offset) {
// Starting with 10,000,000, offsets are encoded as base64.
encodeBase64StringEntry(S.Header.Name, StringTableEntry);
return;
}
report_fatal_error("COFF string table is greater than 64 GB.");
if (!COFF::encodeSectionName(S.Header.Name, StringTableEntry))
report_fatal_error("COFF string table is greater than 64 GB.");
}

void WinCOFFObjectWriter::SetSymbolName(COFFSymbol &S) {
Expand Down

0 comments on commit 3367c24

Please sign in to comment.