Skip to content

Commit

Permalink
Regenerate LC_CODE_SIGNATURE during llvm-objcopy operations
Browse files Browse the repository at this point in the history
**Context:**

This is a second attempt at introducing signature regeneration to llvm-objcopy. In this diff: https://reviews.llvm.org/D109840, a script was introduced to test
the validity of a code signature. In this diff: https://reviews.llvm.org/D109803 (now reverted), an effort was made to extract the signature generation behavior out of LLD into a common location for use in llvm-objcopy. In this diff: https://reviews.llvm.org/D109972 it was decided that there was no appropriate common location and that a small amount of duplication to bring signature generation to llvm-objcopy would be better. This diff introduces this duplication.

**Summary**

Prior to this change, if a LC_CODE_SIGNATURE load command
was included in the binary passed to llvm-objcopy, the command and
associated section were simply copied and included verbatim in the
new binary. If rest of the binary was modified at all, this results
in an invalid Mach-O file. This change regenerates the signature
rather than copying it.

The code_signature_lc.test test was modified to include the yaml
representation of a small signed MachO executable in order to
effectively test the signature generation.

Reviewed By: alexander-shaposhnikov, #lld-macho

Differential Revision: https://reviews.llvm.org/D111164
  • Loading branch information
Nuri Amari authored and drodriguez committed Oct 26, 2021
1 parent 566bfbb commit a299b24
Show file tree
Hide file tree
Showing 13 changed files with 1,033 additions and 36 deletions.
7 changes: 7 additions & 0 deletions lld/MachO/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,9 @@ CodeSignatureSection::CodeSignatureSection()
size_t slashIndex = fileName.rfind("/");
if (slashIndex != std::string::npos)
fileName = fileName.drop_front(slashIndex + 1);

// NOTE: Any changes to these calculations should be repeated
// in llvm-objcopy's MachOLayoutBuilder::layoutTail.
allHeadersSize = alignTo<16>(fixedHeadersSize + fileName.size() + 1);
fileNamePad = allHeadersSize - fixedHeadersSize - fileName.size();
}
Expand All @@ -1182,6 +1185,8 @@ uint64_t CodeSignatureSection::getRawSize() const {
}

void CodeSignatureSection::writeHashes(uint8_t *buf) const {
// NOTE: Changes to this functionality should be repeated in llvm-objcopy's
// MachOWriter::writeSignatureData.
uint8_t *code = buf;
uint8_t *codeEnd = buf + fileOff;
uint8_t *hashes = codeEnd + allHeadersSize;
Expand Down Expand Up @@ -1212,6 +1217,8 @@ void CodeSignatureSection::writeHashes(uint8_t *buf) const {
}

void CodeSignatureSection::writeTo(uint8_t *buf) const {
// NOTE: Changes to this functionality should be repeated in llvm-objcopy's
// MachOWriter::writeSignatureData.
uint32_t signatureSize = static_cast<uint32_t>(getSize());
auto *superBlob = reinterpret_cast<CS_SuperBlob *>(buf);
write32be(&superBlob->magic, CSMAGIC_EMBEDDED_SIGNATURE);
Expand Down
2 changes: 2 additions & 0 deletions lld/MachO/SyntheticSections.h
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,8 @@ class IndirectSymtabSection final : public LinkEditSection {
// The code signature comes at the very end of the linked output file.
class CodeSignatureSection final : public LinkEditSection {
public:
// NOTE: These values are duplicated in llvm-objcopy's MachO/Object.h file
// and any changes here, should be repeated there.
static constexpr uint8_t blockSizeShift = 12;
static constexpr size_t blockSize = (1 << blockSizeShift); // 4 KiB
static constexpr size_t hashSize = 256 / 8;
Expand Down
257 changes: 257 additions & 0 deletions llvm/test/tools/llvm-objcopy/MachO/Inputs/code-signature-check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
"""Checks the validity of MachO binary signatures
MachO binaries sometimes include a LC_CODE_SIGNATURE load command
and corresponding section in the __LINKEDIT segment that together
work to "sign" the binary. This script is used to check the validity
of this signature.
Usage:
./code-signature-check.py my_binary 800 300 0 800
Arguments:
binary - The MachO binary to be tested
offset - The offset from the start of the binary to where the code signature section begins
size - The size of the code signature section in the binary
code_offset - The point in the binary to begin hashing
code_size - The length starting from code_offset to hash
"""

import argparse
import collections
import hashlib
import itertools
import struct
import sys
import typing

class CodeDirectoryVersion:
SUPPORTSSCATTER = 0x20100
SUPPORTSTEAMID = 0x20200
SUPPORTSCODELIMIT64 = 0x20300
SUPPORTSEXECSEG = 0x20400

class CodeDirectory:
@staticmethod
def make(buf: memoryview) -> typing.Union['CodeDirectoryBase', 'CodeDirectoryV20100', 'CodeDirectoryV20200', 'CodeDirectoryV20300', 'CodeDirectoryV20400']:
_magic, _length, version = struct.unpack_from(">III", buf, 0)
subtype = {
CodeDirectoryVersion.SUPPORTSSCATTER: CodeDirectoryV20100,
CodeDirectoryVersion.SUPPORTSTEAMID: CodeDirectoryV20200,
CodeDirectoryVersion.SUPPORTSCODELIMIT64: CodeDirectoryV20300,
CodeDirectoryVersion.SUPPORTSEXECSEG: CodeDirectoryV20400,
}.get(version, CodeDirectoryBase)

return subtype._make(struct.unpack_from(subtype._format(), buf, 0))

class CodeDirectoryBase(typing.NamedTuple):
magic: int
length: int
version: int
flags: int
hashOffset: int
identOffset: int
nSpecialSlots: int
nCodeSlots: int
codeLimit: int
hashSize: int
hashType: int
platform: int
pageSize: int
spare2: int

@staticmethod
def _format() -> str:
return ">IIIIIIIIIBBBBI"

class CodeDirectoryV20100(typing.NamedTuple):
magic: int
length: int
version: int
flags: int
hashOffset: int
identOffset: int
nSpecialSlots: int
nCodeSlots: int
codeLimit: int
hashSize: int
hashType: int
platform: int
pageSize: int
spare2: int

scatterOffset: int

@staticmethod
def _format() -> str:
return CodeDirectoryBase._format() + "I"

class CodeDirectoryV20200(typing.NamedTuple):
magic: int
length: int
version: int
flags: int
hashOffset: int
identOffset: int
nSpecialSlots: int
nCodeSlots: int
codeLimit: int
hashSize: int
hashType: int
platform: int
pageSize: int
spare2: int

scatterOffset: int

teamOffset: int

@staticmethod
def _format() -> str:
return CodeDirectoryV20100._format() + "I"

class CodeDirectoryV20300(typing.NamedTuple):
magic: int
length: int
version: int
flags: int
hashOffset: int
identOffset: int
nSpecialSlots: int
nCodeSlots: int
codeLimit: int
hashSize: int
hashType: int
platform: int
pageSize: int
spare2: int

scatterOffset: int

teamOffset: int

spare3: int
codeLimit64: int

@staticmethod
def _format() -> str:
return CodeDirectoryV20200._format() + "IQ"

class CodeDirectoryV20400(typing.NamedTuple):
magic: int
length: int
version: int
flags: int
hashOffset: int
identOffset: int
nSpecialSlots: int
nCodeSlots: int
codeLimit: int
hashSize: int
hashType: int
platform: int
pageSize: int
spare2: int

scatterOffset: int

teamOffset: int

spare3: int
codeLimit64: int

execSegBase: int
execSegLimit: int
execSegFlags: int

@staticmethod
def _format() -> str:
return CodeDirectoryV20300._format() + "QQQ"

class CodeDirectoryBlobIndex(typing.NamedTuple):
type_: int
offset: int

@staticmethod
def make(buf: memoryview) -> 'CodeDirectoryBlobIndex':
return CodeDirectoryBlobIndex._make(struct.unpack_from(CodeDirectoryBlobIndex.__format(), buf, 0))

@staticmethod
def bytesize() -> int:
return struct.calcsize(CodeDirectoryBlobIndex.__format())

@staticmethod
def __format() -> str:
return ">II"

class CodeDirectorySuperBlob(typing.NamedTuple):
magic: int
length: int
count: int
blob_indices: typing.List[CodeDirectoryBlobIndex]

@staticmethod
def make(buf: memoryview) -> 'CodeDirectorySuperBlob':
super_blob_layout = ">III"
super_blob = struct.unpack_from(super_blob_layout, buf, 0)

offset = struct.calcsize(super_blob_layout)
blob_indices = []
for idx in range(super_blob[2]):
blob_indices.append(CodeDirectoryBlobIndex.make(buf[offset:]))
offset += CodeDirectoryBlobIndex.bytesize()

return CodeDirectorySuperBlob(*super_blob, blob_indices)

def unpack_null_terminated_string(buf: memoryview) -> str:
b = bytes(itertools.takewhile(lambda b: b != 0, buf))
return b.decode()

def main():
parser = argparse.ArgumentParser()
parser.add_argument('binary', type=argparse.FileType('rb'), help='The file to analyze')
parser.add_argument('offset', type=int, help='Offset to start of Code Directory data')
parser.add_argument('size', type=int, help='Size of Code Directory data')
parser.add_argument('code_offset', type=int, help='Offset to start of code pages to hash')
parser.add_argument('code_size', type=int, help='Size of the code pages to hash')

args = parser.parse_args()

args.binary.seek(args.offset)
super_blob_bytes = args.binary.read(args.size)
super_blob_mem = memoryview(super_blob_bytes)

super_blob = CodeDirectorySuperBlob.make(super_blob_mem)
print(super_blob)

for blob_index in super_blob.blob_indices:
code_directory_offset = blob_index.offset
code_directory = CodeDirectory.make(super_blob_mem[code_directory_offset:])
print(code_directory)

ident_offset = code_directory_offset + code_directory.identOffset
print("Code Directory ID: " + unpack_null_terminated_string(super_blob_mem[ident_offset:]))

code_offset = args.code_offset
code_end = code_offset + args.code_size
page_size = 1 << code_directory.pageSize
args.binary.seek(code_offset)

hashes_offset = code_directory_offset + code_directory.hashOffset
for idx in range(code_directory.nCodeSlots):
hash_bytes = bytes(super_blob_mem[hashes_offset:hashes_offset+code_directory.hashSize])
hashes_offset += code_directory.hashSize

hasher = hashlib.sha256()
read_size = min(page_size, code_end - code_offset)
hasher.update(args.binary.read(read_size))
calculated_hash_bytes = hasher.digest()
code_offset += read_size

print("%s <> %s" % (hash_bytes.hex(), calculated_hash_bytes.hex()))

if hash_bytes != calculated_hash_bytes:
sys.exit(-1)


if __name__ == '__main__':
main()
Loading

0 comments on commit a299b24

Please sign in to comment.