Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 441 lines (392 sloc) 16.033 kB
2dcf7b8 BUG #1280: Added json (simplejson) library for packaging
Patrick Galbraith authored
1 """Implementation of JSONEncoder
2 """
3 import re
4
5 try:
6 from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
7 except ImportError:
8 c_encode_basestring_ascii = None
9 try:
10 from simplejson._speedups import make_encoder as c_make_encoder
11 except ImportError:
12 c_make_encoder = None
13
14 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
16 HAS_UTF8 = re.compile(r'[\x80-\xff]')
17 ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25 }
26 for i in range(0x20):
27 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
28 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
29
30 # Assume this produces an infinity on all machines (probably not guaranteed)
31 INFINITY = float('1e66666')
32 FLOAT_REPR = repr
33
34 def encode_basestring(s):
35 """Return a JSON representation of a Python string
36
37 """
38 def replace(match):
39 return ESCAPE_DCT[match.group(0)]
40 return '"' + ESCAPE.sub(replace, s) + '"'
41
42
43 def py_encode_basestring_ascii(s):
44 """Return an ASCII-only JSON representation of a Python string
45
46 """
47 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
48 s = s.decode('utf-8')
49 def replace(match):
50 s = match.group(0)
51 try:
52 return ESCAPE_DCT[s]
53 except KeyError:
54 n = ord(s)
55 if n < 0x10000:
56 #return '\\u{0:04x}'.format(n)
57 return '\\u%04x' % (n,)
58 else:
59 # surrogate pair
60 n -= 0x10000
61 s1 = 0xd800 | ((n >> 10) & 0x3ff)
62 s2 = 0xdc00 | (n & 0x3ff)
63 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
64 return '\\u%04x\\u%04x' % (s1, s2)
65 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
66
67
68 encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii
69
70 class JSONEncoder(object):
71 """Extensible JSON <http://json.org> encoder for Python data structures.
72
73 Supports the following objects and types by default:
74
75 +-------------------+---------------+
76 | Python | JSON |
77 +===================+===============+
78 | dict | object |
79 +-------------------+---------------+
80 | list, tuple | array |
81 +-------------------+---------------+
82 | str, unicode | string |
83 +-------------------+---------------+
84 | int, long, float | number |
85 +-------------------+---------------+
86 | True | true |
87 +-------------------+---------------+
88 | False | false |
89 +-------------------+---------------+
90 | None | null |
91 +-------------------+---------------+
92
93 To extend this to recognize other objects, subclass and implement a
94 ``.default()`` method with another method that returns a serializable
95 object for ``o`` if possible, otherwise it should call the superclass
96 implementation (to raise ``TypeError``).
97
98 """
99 item_separator = ', '
100 key_separator = ': '
101 def __init__(self, skipkeys=False, ensure_ascii=True,
102 check_circular=True, allow_nan=True, sort_keys=False,
103 indent=None, separators=None, encoding='utf-8', default=None):
104 """Constructor for JSONEncoder, with sensible defaults.
105
106 If skipkeys is false, then it is a TypeError to attempt
107 encoding of keys that are not str, int, long, float or None. If
108 skipkeys is True, such items are simply skipped.
109
110 If ensure_ascii is true, the output is guaranteed to be str
111 objects with all incoming unicode characters escaped. If
112 ensure_ascii is false, the output will be unicode object.
113
114 If check_circular is true, then lists, dicts, and custom encoded
115 objects will be checked for circular references during encoding to
116 prevent an infinite recursion (which would cause an OverflowError).
117 Otherwise, no such check takes place.
118
119 If allow_nan is true, then NaN, Infinity, and -Infinity will be
120 encoded as such. This behavior is not JSON specification compliant,
121 but is consistent with most JavaScript based encoders and decoders.
122 Otherwise, it will be a ValueError to encode such floats.
123
124 If sort_keys is true, then the output of dictionaries will be
125 sorted by key; this is useful for regression tests to ensure
126 that JSON serializations can be compared on a day-to-day basis.
127
128 If indent is a non-negative integer, then JSON array
129 elements and object members will be pretty-printed with that
130 indent level. An indent level of 0 will only insert newlines.
131 None is the most compact representation.
132
133 If specified, separators should be a (item_separator, key_separator)
134 tuple. The default is (', ', ': '). To get the most compact JSON
135 representation you should specify (',', ':') to eliminate whitespace.
136
137 If specified, default is a function that gets called for objects
138 that can't otherwise be serialized. It should return a JSON encodable
139 version of the object or raise a ``TypeError``.
140
141 If encoding is not None, then all input strings will be
142 transformed into unicode using that encoding prior to JSON-encoding.
143 The default is UTF-8.
144
145 """
146
147 self.skipkeys = skipkeys
148 self.ensure_ascii = ensure_ascii
149 self.check_circular = check_circular
150 self.allow_nan = allow_nan
151 self.sort_keys = sort_keys
152 self.indent = indent
153 if separators is not None:
154 self.item_separator, self.key_separator = separators
155 if default is not None:
156 self.default = default
157 self.encoding = encoding
158
159 def default(self, o):
160 """Implement this method in a subclass such that it returns
161 a serializable object for ``o``, or calls the base implementation
162 (to raise a ``TypeError``).
163
164 For example, to support arbitrary iterators, you could
165 implement default like this::
166
167 def default(self, o):
168 try:
169 iterable = iter(o)
170 except TypeError:
171 pass
172 else:
173 return list(iterable)
174 return JSONEncoder.default(self, o)
175
176 """
177 raise TypeError(repr(o) + " is not JSON serializable")
178
179 def encode(self, o):
180 """Return a JSON string representation of a Python data structure.
181
182 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
183 '{"foo": ["bar", "baz"]}'
184
185 """
186 # This is for extremely simple cases and benchmarks.
187 if isinstance(o, basestring):
188 if isinstance(o, str):
189 _encoding = self.encoding
190 if (_encoding is not None
191 and not (_encoding == 'utf-8')):
192 o = o.decode(_encoding)
193 if self.ensure_ascii:
194 return encode_basestring_ascii(o)
195 else:
196 return encode_basestring(o)
197 # This doesn't pass the iterator directly to ''.join() because the
198 # exceptions aren't as detailed. The list call should be roughly
199 # equivalent to the PySequence_Fast that ''.join() would do.
200 chunks = self.iterencode(o, _one_shot=True)
201 if not isinstance(chunks, (list, tuple)):
202 chunks = list(chunks)
203 return ''.join(chunks)
204
205 def iterencode(self, o, _one_shot=False):
206 """Encode the given object and yield each string
207 representation as available.
208
209 For example::
210
211 for chunk in JSONEncoder().iterencode(bigobject):
212 mysocket.write(chunk)
213
214 """
215 if self.check_circular:
216 markers = {}
217 else:
218 markers = None
219 if self.ensure_ascii:
220 _encoder = encode_basestring_ascii
221 else:
222 _encoder = encode_basestring
223 if self.encoding != 'utf-8':
224 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
225 if isinstance(o, str):
226 o = o.decode(_encoding)
227 return _orig_encoder(o)
228
229 def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
230 # Check for specials. Note that this type of test is processor- and/or
231 # platform-specific, so do tests which don't depend on the internals.
232
233 if o != o:
234 text = 'NaN'
235 elif o == _inf:
236 text = 'Infinity'
237 elif o == _neginf:
238 text = '-Infinity'
239 else:
240 return _repr(o)
241
242 if not allow_nan:
243 raise ValueError(
244 "Out of range float values are not JSON compliant: " +
245 repr(o))
246
247 return text
248
249
250 if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys:
251 _iterencode = c_make_encoder(
252 markers, self.default, _encoder, self.indent,
253 self.key_separator, self.item_separator, self.sort_keys,
254 self.skipkeys, self.allow_nan)
255 else:
256 _iterencode = _make_iterencode(
257 markers, self.default, _encoder, self.indent, floatstr,
258 self.key_separator, self.item_separator, self.sort_keys,
259 self.skipkeys, _one_shot)
260 return _iterencode(o, 0)
261
262 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
263 ## HACK: hand-optimized bytecode; turn globals into locals
264 False=False,
265 True=True,
266 ValueError=ValueError,
267 basestring=basestring,
268 dict=dict,
269 float=float,
270 id=id,
271 int=int,
272 isinstance=isinstance,
273 list=list,
274 long=long,
275 str=str,
276 tuple=tuple,
277 ):
278
279 def _iterencode_list(lst, _current_indent_level):
280 if not lst:
281 yield '[]'
282 return
283 if markers is not None:
284 markerid = id(lst)
285 if markerid in markers:
286 raise ValueError("Circular reference detected")
287 markers[markerid] = lst
288 buf = '['
289 if _indent is not None:
290 _current_indent_level += 1
291 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
292 separator = _item_separator + newline_indent
293 buf += newline_indent
294 else:
295 newline_indent = None
296 separator = _item_separator
297 first = True
298 for value in lst:
299 if first:
300 first = False
301 else:
302 buf = separator
303 if isinstance(value, basestring):
304 yield buf + _encoder(value)
305 elif value is None:
306 yield buf + 'null'
307 elif value is True:
308 yield buf + 'true'
309 elif value is False:
310 yield buf + 'false'
311 elif isinstance(value, (int, long)):
312 yield buf + str(value)
313 elif isinstance(value, float):
314 yield buf + _floatstr(value)
315 else:
316 yield buf
317 if isinstance(value, (list, tuple)):
318 chunks = _iterencode_list(value, _current_indent_level)
319 elif isinstance(value, dict):
320 chunks = _iterencode_dict(value, _current_indent_level)
321 else:
322 chunks = _iterencode(value, _current_indent_level)
323 for chunk in chunks:
324 yield chunk
325 if newline_indent is not None:
326 _current_indent_level -= 1
327 yield '\n' + (' ' * (_indent * _current_indent_level))
328 yield ']'
329 if markers is not None:
330 del markers[markerid]
331
332 def _iterencode_dict(dct, _current_indent_level):
333 if not dct:
334 yield '{}'
335 return
336 if markers is not None:
337 markerid = id(dct)
338 if markerid in markers:
339 raise ValueError("Circular reference detected")
340 markers[markerid] = dct
341 yield '{'
342 if _indent is not None:
343 _current_indent_level += 1
344 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
345 item_separator = _item_separator + newline_indent
346 yield newline_indent
347 else:
348 newline_indent = None
349 item_separator = _item_separator
350 first = True
351 if _sort_keys:
352 items = dct.items()
353 items.sort(key=lambda kv: kv[0])
354 else:
355 items = dct.iteritems()
356 for key, value in items:
357 if isinstance(key, basestring):
358 pass
359 # JavaScript is weakly typed for these, so it makes sense to
360 # also allow them. Many encoders seem to do something like this.
361 elif isinstance(key, float):
362 key = _floatstr(key)
363 elif key is True:
364 key = 'true'
365 elif key is False:
366 key = 'false'
367 elif key is None:
368 key = 'null'
369 elif isinstance(key, (int, long)):
370 key = str(key)
371 elif _skipkeys:
372 continue
373 else:
374 raise TypeError("key " + repr(key) + " is not a string")
375 if first:
376 first = False
377 else:
378 yield item_separator
379 yield _encoder(key)
380 yield _key_separator
381 if isinstance(value, basestring):
382 yield _encoder(value)
383 elif value is None:
384 yield 'null'
385 elif value is True:
386 yield 'true'
387 elif value is False:
388 yield 'false'
389 elif isinstance(value, (int, long)):
390 yield str(value)
391 elif isinstance(value, float):
392 yield _floatstr(value)
393 else:
394 if isinstance(value, (list, tuple)):
395 chunks = _iterencode_list(value, _current_indent_level)
396 elif isinstance(value, dict):
397 chunks = _iterencode_dict(value, _current_indent_level)
398 else:
399 chunks = _iterencode(value, _current_indent_level)
400 for chunk in chunks:
401 yield chunk
402 if newline_indent is not None:
403 _current_indent_level -= 1
404 yield '\n' + (' ' * (_indent * _current_indent_level))
405 yield '}'
406 if markers is not None:
407 del markers[markerid]
408
409 def _iterencode(o, _current_indent_level):
410 if isinstance(o, basestring):
411 yield _encoder(o)
412 elif o is None:
413 yield 'null'
414 elif o is True:
415 yield 'true'
416 elif o is False:
417 yield 'false'
418 elif isinstance(o, (int, long)):
419 yield str(o)
420 elif isinstance(o, float):
421 yield _floatstr(o)
422 elif isinstance(o, (list, tuple)):
423 for chunk in _iterencode_list(o, _current_indent_level):
424 yield chunk
425 elif isinstance(o, dict):
426 for chunk in _iterencode_dict(o, _current_indent_level):
427 yield chunk
428 else:
429 if markers is not None:
430 markerid = id(o)
431 if markerid in markers:
432 raise ValueError("Circular reference detected")
433 markers[markerid] = o
434 o = _default(o)
435 for chunk in _iterencode(o, _current_indent_level):
436 yield chunk
437 if markers is not None:
438 del markers[markerid]
439
440 return _iterencode
Something went wrong with that request. Please try again.