-
Notifications
You must be signed in to change notification settings - Fork 250
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor ELF parsing logic to standalone class
This makes it easier to reuse the same code for manylinux, musllinux, and test code.
- Loading branch information
Showing
6 changed files
with
276 additions
and
319 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
"""ELF file parser. | ||
This provides a class ``ElfFile`` that parses an ELF executable in a similar | ||
interface to ``ZipFile``. Only the read interface is implemented. | ||
Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca | ||
ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html | ||
""" | ||
|
||
import enum | ||
import os | ||
import struct | ||
from typing import IO, Any, ContextManager, Optional, Tuple | ||
|
||
|
||
class ElfInvalid(ValueError): | ||
pass | ||
|
||
|
||
class EIClass(enum.IntEnum): | ||
C32 = 1 | ||
C64 = 2 | ||
|
||
|
||
class EIData(enum.IntEnum): | ||
Lsb = 1 | ||
Msb = 2 | ||
|
||
|
||
class EMachine(enum.IntEnum): | ||
I386 = 3 | ||
S390 = 22 | ||
Arm = 40 | ||
X8664 = 62 | ||
AArc64 = 183 | ||
|
||
|
||
class ElfFile(ContextManager["ElfFile"]): | ||
"""Representation of an ELF executable.""" | ||
|
||
def __init__(self, f: IO[bytes]) -> None: | ||
self._f = f | ||
|
||
try: | ||
ident = self._read("16B") | ||
except struct.error: | ||
raise ElfInvalid | ||
if bytes(ident[:4]) != b"\x7fELF": # Invalid magic, not ELF. | ||
raise ElfInvalid | ||
|
||
self.capacity = ident[4] # Format for program header (bitness). | ||
self.encoding = ident[5] # Data structure encoding (endianess). | ||
|
||
try: | ||
# e_fmt: Format for program header. | ||
# p_fmt: Format for section header. | ||
# p_idx: Indexes to find p_type, p_offset, and p_filesz. | ||
e_fmt, self._p_fmt, self._p_idx = { | ||
(1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB. | ||
(1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. | ||
(2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB. | ||
(2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. | ||
}[(self.capacity, self.encoding)] | ||
except KeyError: | ||
raise ElfInvalid | ||
|
||
try: | ||
( | ||
_, | ||
self.machine, # Architecture type. | ||
_, | ||
_, | ||
self._e_phoff, # Offset of program header. | ||
_, | ||
self.flags, # Processor-specific flags. | ||
_, | ||
self._e_phentsize, # Size of section. | ||
self._e_phnum, # Number of sections. | ||
) = self._read(e_fmt) | ||
except struct.error: | ||
raise ElfInvalid | ||
|
||
def __enter__(self) -> "ElfFile": | ||
return self | ||
|
||
def __exit__(self, typ: Any, val: Any, tb: Any) -> None: | ||
self.close() | ||
|
||
def _read(self, fmt: str) -> Tuple[int, ...]: | ||
return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) | ||
|
||
def close(self) -> None: | ||
self._f.close() | ||
|
||
@property | ||
def interpreter(self) -> Optional[str]: | ||
"""Path recorded in the ``PT_INTERP`` section header.""" | ||
for index in range(self._e_phnum): | ||
self._f.seek(self._e_phoff + self._e_phentsize * index) | ||
try: | ||
data = self._read(self._p_fmt) | ||
except struct.error: | ||
continue | ||
if data[self._p_idx[0]] != 3: # Not PT_INTERP. | ||
continue | ||
self._f.seek(data[self._p_idx[1]]) | ||
return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.