-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathsyrup.py
278 lines (252 loc) · 8.53 KB
/
syrup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
#!/usr/bin/env python
### BE FOREWRANED:
###
### This is a simple implementation in recursive descent style, mirrored
### off of the Racket/Guile scheme implementations. However, a recursive
### descent implementation is unlikely to be all to safe in Python-land
### because there's no tail-call-elimination.
import io
import struct
class SyrupDecodeError(Exception): pass
class SyrupEncodeError(Exception): pass
class SyrupSingleFloatsNotSupported(Exception): pass
class Record():
def __init__(self, label, args):
self.label = label
self.args = args
def __repr__(self):
return "<Record %s: %r>" % (self.label, self.args)
# def __eq__(self, other):
# return (isinstance(other, Record) and
# self.label == other.label and
# self.args == other.args)
#
# __hash__ = object.__hash__
class Symbol():
def __init__(self, name):
self.name = name
def __repr__(self):
return "Symbol(%s)" % self.name
# def __eq__(self, other):
# return (isinstance(other, Symbol) and
# self.name == other.name)
#
# __hash__ = object.__hash__
def record(label, *args):
return Record(label, args)
def netstring_encode(bstr, joiner=b':'):
return str(len(bstr)).encode('latin-1') + joiner + bstr
def syrup_encode(obj):
# Bytes are like <bytes-len>:<bytes>
if isinstance(obj, bytes):
return netstring_encode(obj)
# True is t, False is f
elif obj is True:
return b't'
elif obj is False:
return b'f'
# Integers are like <integer>+ or <integer>-
elif isinstance(obj, int):
if obj == 0:
return b"0+"
elif obj > 0:
return str(obj).encode('latin-1') + b'+'
else:
return str((obj * -1)).encode('latin-1') + b'-'
# Lists are like [<item1><item2><item3>]
elif isinstance(obj, list):
encoded_items = [syrup_encode(item) for item in obj]
return b'[' + b''.join(encoded_items) + b']'
# Dictionaries are like {<key1><val1><key2><val2>}
# We sort by the key being fully encoded.
elif isinstance(obj, dict):
keys_and_encoded = [
(syrup_encode(key), key)
for key in obj.keys()]
sorted_keys_and_encoded = sorted(
keys_and_encoded,
key=lambda x: x[0])
encoded_hash_pairs = [
# combine the encoded key and encode the val immediately
ek[0] + syrup_encode(obj[ek[1]])
for ek in sorted_keys_and_encoded]
return b'{' + b''.join(encoded_hash_pairs) + b'}'
# Strings are like <encoded-bytes-len>"<utf8-encoded>
elif isinstance(obj, str):
return netstring_encode(obj.encode('utf-8'),
joiner=b'"')
# Symbols are like <encoded-bytes-len>'<utf8-encoded>
elif isinstance(obj, Symbol):
return netstring_encode(obj.name.encode('utf-8'),
joiner=b"'")
# Only double is supported in Python. Single-precision not supported.
# Double flonum floats are like D<big-endian-encoded-double-float>
elif isinstance(obj, float):
return b'D' + struct.pack('>d', obj)
# Records are like <<tag><arg1><arg2>> but with the outer <> for realsies
elif isinstance(obj, Record):
return b'<' + \
syrup_encode(obj.label) +\
b''.join([syrup_encode(x) for x in obj.args]) + \
b'>'
# Sets are like #<item1><item2><item3>$
elif isinstance(obj, set):
encoded_items = [syrup_encode(x) for x in obj]
return b'#' + b''.join(sorted(encoded_items)) + b'$'
else:
raise SyrupEncodeError("Unsupported type: %r" % obj)
def peek_byte(f):
orig_pos = f.tell()
byte = f.read(1)
f.seek(orig_pos)
return byte
whitespace_chars = set([b' ', b'\t', b'\n'])
digit_chars = set([b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9'])
def syrup_read(f, convert_singles=False):
def _syrup_read(f):
return syrup_read(f, convert_singles=convert_singles)
# consume whitespace
while peek_byte(f) in whitespace_chars:
f.read(1)
next_char = peek_byte(f)
# it's either a bytestring, string, or symbol depending on the joiner
if next_char in digit_chars:
_type = False
bytes_len_str = b''
while True:
this_char = f.read(1)
if this_char == b':':
_type = "bstr"
break
elif this_char == b'"':
_type = "str"
break
elif this_char == b"'":
_type = "sym"
break
elif this_char == b"+":
_type = "int+"
break
elif this_char == b"-":
_type = "int-"
break
elif this_char in digit_chars:
bytes_len_str += this_char
else:
raise SyrupDecodeError(
"Invalid digit at pos %s: %r" % (
f.tell() - 1, this_char))
int_or_bytes_len = int(bytes_len_str.decode('latin-1'))
if _type == "int+":
return int_or_bytes_len
elif _type == "int-":
return int_or_bytes_len * -1
else:
bstr = f.read(bytes_len)
if _type == "bstr":
return bstr
elif _type == "sym":
return Symbol(bstr.decode('utf-8'))
elif _type == "str":
return bstr.decode('utf-8')
# it's a list
elif next_char in b'[(l':
f.read(1)
lst = []
while True:
if peek_byte(f) in b'])e':
f.read(1)
break
else:
lst.append(_syrup_read(f))
return lst
# it's a hashmap/dictionary
elif next_char in b'{d':
f.read(1)
d = dict()
while True:
if peek_byte(f) in b'}e':
f.read(1)
break
else:
key = _syrup_read(f)
val = _syrup_read(f)
d[key] = val
return d
# it's a record
elif next_char == b'<':
f.read(1)
label = _syrup_read(f)
args = []
while True:
if peek_byte(f) == b'>':
f.read(1)
break
else:
args.append(_syrup_read(f))
return Record(label, args)
# single floats not supported in Python
elif next_char == b'F':
if convert_singles:
f.read(1)
return struct.unpack('>f', f.read(4))[0]
else:
raise SyrupSingleFloatsNotSupported(
"Single floats not supported in Python and coersion disabled")
# it's a double float
elif next_char == b'D':
f.read(1)
return struct.unpack('>d', f.read(8))[0]
# it's a boolean
elif next_char == b'f':
f.read(1)
return False
elif next_char == b't':
f.read(1)
return True
# it's a set
elif next_char == b'#':
f.read(1)
s = set()
while True:
if peek_byte(f) == b'$':
f.read(1)
break
else:
s.add(_syrup_read(f))
return s
else:
raise SyrupEncodeError(
"Unexpected character and position %s: %s" %
(f.tell(), next_char))
def syrup_decode(bstr, convert_singles=False):
return syrup_read(io.BytesIO(bstr), convert_singles=convert_singles)
def _test_syrup():
zoo_structure = record(
b"zoo",
"The Grand Menagerie",
[{Symbol("species"): b"cat",
Symbol("name"): "Tabatha",
Symbol("age"): 12,
Symbol("weight"): 8.2,
Symbol("alive?"): True,
Symbol("eats"): {b"mice", b"fish", b"kibble"}},
{Symbol("species"): b"monkey",
Symbol("name"): "George",
Symbol("age"): 6,
Symbol("weight"): 17.24,
Symbol("alive?"): False,
Symbol("eats"): {b"bananas", b"insects"}},
{Symbol("species"): b"ghost",
Symbol("name"): "Casper",
Symbol("age"): -12,
Symbol("weight"): -34.5,
Symbol("alive?"): False,
Symbol("eats"): set()}])
with open("../../test-data/zoo.bin", "rb") as f:
if f.read() != syrup_encode(zoo_structure):
raise Exception("Does not match zoo encoding data")
__all__ = [
SyrupDecodeError, SyrupEncodeError, SyrupSingleFloatsNotSupported,
Record, Symbol, record,
syrup_encode, syrup_read, syrup_decode]