Declarative binary file format parser and emitter for Python 3.
from restruct import parse, emit, UInt, Arr
# Parse a simple little-endian integer
>>> parse(UInt(32), b'\x39\x05\x00\x00')
1337
# Parse an array that continues 'til end-of-stream
>>> parse(Arr(UInt(8)), b'\x45\x45\x45\x45')
[69, 69, 69, 69]
# Parse any bytes-like structures and files!
>>> parse(Arr(UInt(8)), open('foo.bin', 'rb'))
[13, 37]
# Emit data out again!
>>> emit(UInt(32), 420, open('bar.bin', 'wb'))
# Or emit to a BytesIO object if none given
>>> emit(Arr(UInt(8)), [13, 37, 69, 69])
<_io.BytesIO object at 0x106cc0810>
>>> _.getvalue().hex()
'0d254545'
from restruct import parse, Struct
class Test(Struct):
# restruct standard types are injected by default, so no need to import them
foo: UInt(32)
bar: Str(type='c')
>>> parse(Test, b'\x39\x05\x00\x00Hello world!\x00Garbage')
Test {
foo: 1337,
bar: 'Hello world!'
}
>>> _.foo
1337
from restruct import parse, Struct
class Stats(Struct):
engine_level: UInt(32)
rpm: UInt(16)
class Message(Struct):
message: Str(length=64, exact=True)
priority: UInt(8)
def on_emit_priority(self, spec, context):
# called before `priority` is emitted
self.priority = len(self.message)
class Test(Struct):
type: UInt(32)
contents: Switch(options={
1: Stats,
2: Message,
})
def on_parse_type(self, spec, context):
# called when `type` field is set, spec contains the field types
spec.contents.selector = self.type
>>> parse(Test, b"\x02\x00\x00\x00Did you expect a cute foo? Too bad, it's just me, bar!\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x63")
Test {
type: 2,
contents: Message {
message: Did you expect a cute foo? Too bad, it's just me, bar!,
priority: 99
}
}
from restruct import parse, Struct
class Base(Struct):
a: UInt(8)
b: UInt(8)
class Derived(Base):
c: UInt(8)
d: UInt(8)
# just works!
>>> parse(Derived, b'\x01\x02\x03\x04')
Derived {
a: 1,
b: 2,
c: 3,
d: 4
}
TODO
from restruct import parse, Struct
class ReferencedStruct(Struct):
magic: Fixed(b'ENTRY')
foo: UInt(32)
bar: Str(type='c')
# declare variable name, one for each reference
class HasReference(Struct, partials={'P'}):
magic: Fixed(b'TARR')
offset: UInt(32) @ P.point
data: Ref(Arr(ReferencedStruct)) @ P
>>> parse(HasReference, b'TARR\x10\x00\x00\x00\xDE\xAD\xBE\xEF\xDE\xAD\xC0\xDEENTRY\x2A\x00\x00\x00First entry\x00ENTRY\x45\x00\x00\x00Second entry\x00')
HasReference {
magic: [54 41 52 52],
offset: 16,
data: [ReferencedStruct {
magic: [45 4e 54 52 59],
foo: 42,
bar: 'First entry'
}, ReferencedStruct {
magic: [45 4e 54 52 59],
foo: 69,
bar: 'Second entry'
}]
}
# compressed and with correct offsets!
>>> emit(HasReference, _).getvalue()
b'TARR\x08\x00\x00\x00ENTRY*\x00\x00\x00First entry\x00ENTRYE\x00\x00\x00Second entry\x00'
# use two partials: one for the reference, one for the array count
class TrickierReference(Struct, partials={'R', 'A'}):
magic: Fixed(b'WARR')
offset: UInt(32) @ R.point
count: UInt(32) @ A.count
data: Ref(Arr(ReferencedStruct) @ A) @ R
>>> parse(TrickierReference, b'WARR\x10\x00\x00\x00\x01\x00\x00\x00\xDE\xAD\xC0\xDEENTRY\x2A\x00\x00\x00There can only be one\x00ENTRY\x45\x00\x00\x00Second nonsense entry\x00')
TrickierReference {
magic: [57 41 52 52],
offset: 16,
count: 1,
data: [ReferencedStruct {
magic: [45 4e 54 52 59],
foo: 42,
bar: 'There can only be one'
}]
}
>>> emit(TrickierReference, _).getvalue()
b'WARR\x0c\x00\x00\x00\x01\x00\x00\x00ENTRY*\x00\x00\x00There can only be one\x00'
TODO: discuss streams
from restruct import parse, Struct, UInt
class GenericTest(Struct, generics=['T']):
# now you can use the variable T to stand in for any type and most values!
foo: UInt(32)
bar: Arr(T)
# use [] syntax on the type to resolve the generic
>>> parse(GenericTest[UInt(16)], b'\x39\x05\x00\x00\x45\x00\xa4\x01\x11\x22')
GenericTest[UInt(16, le)] {
foo: 1337,
bar: [69, 420, 8721]
}
# failing to resolve all generics before parsing predictably fails
>>> parse(GenericTest, b'\x39\x05\x00\x00huh?')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 695, in parse
val = parse(type, io, context)
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 619, in parse
raise Error(context, 'unresolved generic')
restruct.Error: [bar] ValueError: unresolved generic
# also works with inheritance!
from restruct import parse, Struct
class Inner(Struct):
foo: Str(length=32, type='c')
class Nested(Struct):
level: UInt(8)
inner: Arr(Inner, count=4)
class Base(Struct):
version: UInt(8)
nested: Nested
# errors contain the full path through the structures to the error'd value
>>> parse(Base, b'\x01\x45All\x00Good\x00So\x00\x81hmm\x00\x00')
Traceback (most recent call last):
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 695, in parse
val = parse(type, io, context)
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 695, in parse
val = parse(type, io, context)
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 907, in parse
elem = parse(type, io, context)
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 695, in parse
val = parse(type, io, context)
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 1133, in parse
return raw.decode(encoding)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x81 in position 0: invalid start byte
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "restruct.py", line 1230, in parse
raise Error(context, e)
restruct.Error: [nested.inner[3].foo] UnicodeDecodeError: 'utf-8' codec can't decode byte 0x81 in position 0: invalid start byte
# access the path programmatically
>>> e.path
[('nested', <struct: Nested>), ('inner', <Arr(<struct: Inner>[4])>), (3, <restruct.StructType object at 0x1079b2810>), ('foo', <CStr(32)>)]
# access the original exception
>>> e.exception
UnicodeDecodeError('utf-8', b'\x81', 0, 1, 'invalid start byte')
Int(bits, order='le', signed=True):
two's-complement integerUInt(bits, order='le'):
two's-complement unsigned integerFloat(bits):
IEEE754 binary floatStr(length?=None, type='c', encoding='utf-8', terminator?=None, exact=False, length_unit=1, length_type=UInt(8)):
string, supported types areraw
,c
andpascal
Bool(type=UInt(8), true_value=1, false_value=0):
generic boolean
Nothing:
parses nothing and emits nothing, returnsNone
Implied(value):
parses nothing and emits nothing, returnsvalue
Fixed(value):
reads bytes and emits bytes, making sure they equatevalue
Pad(size, value?=b'\x00'):
parses and emits padding bytes, returnsNone
Data(size?=None):
parses and returns raw bytesEnum(enum, type):
parses and emitstype
and constructsenum.Enum
subclassenum
with its result
StructType(fields, cls, generics=[], union=False, partial=False, bound=[]):
type class used byMetaStruct
Struct:
base class for automatic struct type generation throughMetaStruct
meta-class and field annotationsUnion:
base class for automatic union type generation throughMetaStruct
meta-class and field annotationsArr(type, count=None, size=None, stop_value=None):
parses and emits array oftypes
, of optionally maxcount
elements andsize
bytes total sizeSwitch(default=None, fallback=None, options={}):
parses and emits a choice of types chosen through theselector
field
Ref(type, point, reference=os.SEEK_SET, adjustment=0, stream=None)
: parses and emits a value oftype
elsewhere in the stream at offsetpoint
Rebased(type, base=None):
parses and emitstype
, shifting the input position for absoluteRef
references tobase
(default: file position on parse/emit time)Sized(type, limit=None, exact=False):
parses and emitstype
, limiting its size in the tream tolimit
bytesAlignTo(type, alignment, value?=b'\x00'):
parses and emitstype
, aligning stream to alignment bytes afterAlignedTo(type, alignment, value?=b'\x00'):
parse and emitstype
, aligning stream to alignment bytes beforeLazy(type, size):
parses and emitstype
lazily, returning a callable that will parse and return the type whenProcessed(type, parse, emit, with_context=False):
parses and emitstype
, processing it throughparse
andemit
callables, respectively, optionally passingcontext
Checked(type, check):
parses and emitstype
, callingcheck(value)
and erroring if it returnsFalse
Mapped(type, mapping, default?=None):
parses and emitstype
, looking up the result inmapping
BSD-2; see LICENSE
for details.
- Add
Maybe
andEither
types - Fix
Arr
EOF-handling masking errors - Port more features over from
destruct
- More examples