Declarative binary file format parser and emitter for Python 3.
from restruct import parse, emit, UInt, Arr
# Parse a simple little-endian integer
>>> parse(UInt(32), b'\x39\x05\x00\x00')
1337
# Parse an array that continues 'til end-of-stream
>>> parse(Arr(UInt(8)), b'\x45\x45\x45\x45')
[69, 69, 69, 69]
# Parse any bytes-like structures and files!
>>> parse(Arr(UInt(8)), open('foo.bin', 'rb'))
[13, 37]
# Emit data out again!
>>> emit(UInt(32), 420, open('bar.bin', 'wb'))
# Or emit to a BytesIO object if none given
>>> emit(Arr(UInt(8)), [13, 37, 69, 69])
<_io.BytesIO object at 0x106cc0810>
>>> _.getvalue().hex()
'0d254545'from restruct import parse, Struct
class Test(Struct):
# restruct standard types are injected by default, so no need to import them
foo: UInt(32)
bar: Str(type='c')
>>> parse(Test, b'\x39\x05\x00\x00Hello world!\x00Garbage')
Test {
foo: 1337,
bar: 'Hello world!'
}
>>> _.foo
1337from restruct import parse, Struct
class Stats(Struct):
engine_level: UInt(32)
rpm: UInt(16)
class Message(Struct):
message: Str(length=64, exact=True)
priority: UInt(8)
def on_emit_priority(self, spec, context):
# called before `priority` is emitted
self.priority = len(self.message)
class Test(Struct):
type: UInt(32)
contents: Switch(options={
1: Stats,
2: Message,
})
def on_parse_type(self, spec, context):
# called when `type` field is set, spec contains the field types
spec.contents.selector = self.type
>>> parse(Test, b"\x02\x00\x00\x00Did you expect a cute foo? Too bad, it's just me, bar!\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x63")
Test {
type: 2,
contents: Message {
message: Did you expect a cute foo? Too bad, it's just me, bar!,
priority: 99
}
}from restruct import parse, Struct
class Base(Struct):
a: UInt(8)
b: UInt(8)
class Derived(Base):
c: UInt(8)
d: UInt(8)
# just works!
>>> parse(Derived, b'\x01\x02\x03\x04')
Derived {
a: 1,
b: 2,
c: 3,
d: 4
}TODO
from restruct import parse, Struct
class ReferencedStruct(Struct):
magic: Fixed(b'ENTRY')
foo: UInt(32)
bar: Str(type='c')
# declare variable name, one for each reference
class HasReference(Struct, partials={'P'}):
magic: Fixed(b'TARR')
offset: UInt(32) @ P.point
data: Ref(Arr(ReferencedStruct)) @ P
>>> parse(HasReference, b'TARR\x10\x00\x00\x00\xDE\xAD\xBE\xEF\xDE\xAD\xC0\xDEENTRY\x2A\x00\x00\x00First entry\x00ENTRY\x45\x00\x00\x00Second entry\x00')
HasReference {
magic: [54 41 52 52],
offset: 16,
data: [ReferencedStruct {
magic: [45 4e 54 52 59],
foo: 42,
bar: 'First entry'
}, ReferencedStruct {
magic: [45 4e 54 52 59],
foo: 69,
bar: 'Second entry'
}]
}
# compressed and with correct offsets!
>>> emit(HasReference, _).getvalue()
b'TARR\x08\x00\x00\x00ENTRY*\x00\x00\x00First entry\x00ENTRYE\x00\x00\x00Second entry\x00'
# use two partials: one for the reference, one for the array count
class TrickierReference(Struct, partials={'R', 'A'}):
magic: Fixed(b'WARR')
offset: UInt(32) @ R.point
count: UInt(32) @ A.count
data: Ref(Arr(ReferencedStruct) @ A) @ R
>>> parse(TrickierReference, b'WARR\x10\x00\x00\x00\x01\x00\x00\x00\xDE\xAD\xC0\xDEENTRY\x2A\x00\x00\x00There can only be one\x00ENTRY\x45\x00\x00\x00Second nonsense entry\x00')
TrickierReference {
magic: [57 41 52 52],
offset: 16,
count: 1,
data: [ReferencedStruct {
magic: [45 4e 54 52 59],
foo: 42,
bar: 'There can only be one'
}]
}
>>> emit(TrickierReference, _).getvalue()
b'WARR\x0c\x00\x00\x00\x01\x00\x00\x00ENTRY*\x00\x00\x00There can only be one\x00'TODO: discuss streams
from restruct import parse, Struct, UInt
class GenericTest(Struct, generics=['T']):
# now you can use the variable T to stand in for any type and most values!
foo: UInt(32)
bar: Arr(T)
# use [] syntax on the type to resolve the generic
>>> parse(GenericTest[UInt(16)], b'\x39\x05\x00\x00\x45\x00\xa4\x01\x11\x22')
GenericTest[UInt(16, le)] {
foo: 1337,
bar: [69, 420, 8721]
}
# failing to resolve all generics before parsing predictably fails
>>> parse(GenericTest, b'\x39\x05\x00\x00huh?')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 695, in parse
val = parse(type, io, context)
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 619, in parse
raise Error(context, 'unresolved generic')
restruct.Error: [bar] ValueError: unresolved generic
# also works with inheritance!from restruct import parse, Struct
class Inner(Struct):
foo: Str(length=32, type='c')
class Nested(Struct):
level: UInt(8)
inner: Arr(Inner, count=4)
class Base(Struct):
version: UInt(8)
nested: Nested
# errors contain the full path through the structures to the error'd value
>>> parse(Base, b'\x01\x45All\x00Good\x00So\x00\x81hmm\x00\x00')
Traceback (most recent call last):
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 695, in parse
val = parse(type, io, context)
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 695, in parse
val = parse(type, io, context)
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 907, in parse
elem = parse(type, io, context)
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 695, in parse
val = parse(type, io, context)
File "restruct.py", line 1225, in parse
return type.parse(io, context)
File "restruct.py", line 1133, in parse
return raw.decode(encoding)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x81 in position 0: invalid start byte
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "restruct.py", line 1230, in parse
raise Error(context, e)
restruct.Error: [nested.inner[3].foo] UnicodeDecodeError: 'utf-8' codec can't decode byte 0x81 in position 0: invalid start byte
# access the path programmatically
>>> e.path
[('nested', <struct: Nested>), ('inner', <Arr(<struct: Inner>[4])>), (3, <restruct.StructType object at 0x1079b2810>), ('foo', <CStr(32)>)]
# access the original exception
>>> e.exception
UnicodeDecodeError('utf-8', b'\x81', 0, 1, 'invalid start byte')Int(bits, order='le', signed=True):two's-complement integerUInt(bits, order='le'):two's-complement unsigned integerFloat(bits):IEEE754 binary floatStr(length?=None, type='c', encoding='utf-8', terminator?=None, exact=False, length_unit=1, length_type=UInt(8)):string, supported types areraw,candpascalBool(type=UInt(8), true_value=1, false_value=0):generic boolean
Nothing:parses nothing and emits nothing, returnsNoneImplied(value):parses nothing and emits nothing, returnsvalueFixed(value):reads bytes and emits bytes, making sure they equatevaluePad(size, value?=b'\x00'):parses and emits padding bytes, returnsNoneData(size?=None):parses and returns raw bytesEnum(enum, type):parses and emitstypeand constructsenum.Enumsubclassenumwith its result
StructType(fields, cls, generics=[], union=False, partial=False, bound=[]):type class used byMetaStructStruct:base class for automatic struct type generation throughMetaStructmeta-class and field annotationsUnion:base class for automatic union type generation throughMetaStructmeta-class and field annotationsArr(type, count=None, size=None, stop_value=None):parses and emits array oftypes, of optionally maxcountelements andsizebytes total sizeSwitch(default=None, fallback=None, options={}):parses and emits a choice of types chosen through theselectorfield
Ref(type, point, reference=os.SEEK_SET, adjustment=0, stream=None): parses and emits a value oftypeelsewhere in the stream at offsetpointRebased(type, base=None):parses and emitstype, shifting the input position for absoluteRefreferences tobase(default: file position on parse/emit time)Sized(type, limit=None, exact=False):parses and emitstype, limiting its size in the tream tolimitbytesAlignTo(type, alignment, value?=b'\x00'):parses and emitstype, aligning stream to alignment bytes afterAlignedTo(type, alignment, value?=b'\x00'):parse and emitstype, aligning stream to alignment bytes beforeLazy(type, size):parses and emitstypelazily, returning a callable that will parse and return the type whenProcessed(type, parse, emit, with_context=False):parses and emitstype, processing it throughparseandemitcallables, respectively, optionally passingcontextChecked(type, check):parses and emitstype, callingcheck(value)and erroring if it returnsFalseMapped(type, mapping, default?=None):parses and emitstype, looking up the result inmapping
BSD-2; see LICENSE for details.
- Add
MaybeandEithertypes - Fix
ArrEOF-handling masking errors - Port more features over from
destruct - More examples