Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 736 lines (636 sloc) 26.071 kB
47e5a7c @klacke JSON ajax code from Gaspar Chilingarov, I added docs describing an ex…
authored
1 %(%% Copyright (c) 2005-2006, A2Z Development USA, Inc. All Rights Reserved.
2 %%%
3 %%% The contents of this file are subject to the Erlang Public License,
4 %%% Version 1.1, (the "License"); you may not use this file except in
5 %%% compliance with the License. You should have received a copy of the
6 %%% Erlang Public License along with this software. If not, it can be
7 %%% retrieved via the world wide web at http://www.erlang.org/.
8 %%%
9 %%% Software distributed under the License is distributed on an "AS IS"
10 %%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
11 %%% the License for the specific language governing rights and limitations
12 %%% under the License.
13 %%%
14 %%% The Initial Developer of the Original Code is A2Z Development USA, Inc.
15 %%% All Rights Reserved.
16
17 -module(json).
18 -export([encode/1, decode_string/1, decode/2]).
19 -export([is_obj/1, obj_new/0, obj_fetch/2, obj_find/2, obj_is_key/2]).
20 -export([obj_store/3, obj_from_list/1, obj_fold/3]).
21 -export([test/0]).
22 -author("Jim Larson <jalarson@amazon.com>, Robert Wai-Chi Chu <robchu@amazon.com>").
23 -author("Gaspar Chilingarov <nm@web.am>, Gurgen Tumanyan <barbarian@armkb.com>").
24 -vsn("2").
25
26 %%% JavaScript Object Notation ("JSON", http://www.json.org) is a simple
27 %%% data syntax meant as a lightweight alternative to other representations,
28 %%% such as XML. JSON is natively supported by JavaScript, but many
29 %%% other languages have conversion libraries available.
30 %%%
31 %%% This module translates JSON types into the following Erlang types:
32 %%%
33 %%% JSON Erlang
34 %%% ---- ------
35 %%% number number
36 %%% string string
37 %%% array {array, ElementList}
38 %%% object tagged proplist with string (or atom) keys (i.e. {struct, PropList} )
39 %%% true, false, null atoms 'true', 'false', and 'null'
40 %%%
41 %%% Character Sets: the external representation, and the internal
42 %%% representation of strings, are lists of UTF-16 code units.
43 %%% The encoding of supplementary characters, as well as
44 %%% transcoding to other schemes, such as UTF-8, can be provided
45 %%% by other modules. (See discussion at
46 %%% http://groups.yahoo.com/group/json/message/52)
47 %%%
48 %%%######################################################################
49 %%% UPD by Gaspar: for this moment utf-8 encoding inplemented by default
50 %%% if incoming character list have symbols with codes
51 %%% > 255
52 %%%######################################################################
53 %%%
54 %%% Numbers: Thanks to Erlang's bignums, JSON-encoded integers of any
55 %%% size can be parsed. Conversely, extremely large integers may
56 %%% be JSON-encoded. This may cause problems for interoperability
57 %%% with JSON parsers which can't handle arbitrary-sized integers.
58 %%% Erlang's floats are of fixed precision and limited range, so
59 %%% syntactically valid JSON floating-point numbers could silently
60 %%% lose precision or noisily cause an overflow. However, most
61 %%% other JSON libraries are likely to behave in the same way.
62 %%% The encoding precision defaults to 6 digits.
63 %%%
64 %%% Strings: If we represented JSON string data as Erlang binaries,
65 %%% we would have to choose a particular unicode format. Instead,
66 %%% we use lists of UTF-16 code units, which applications may then
67 %%% change to binaries in their application-preferred manner.
68 %%%
69 %%% Arrays: Because of the string decision above, and Erlang's
70 %%% lack of a distinguished string datatype, JSON arrays map
71 %%% to Erlang tuples. Consider utilities like tuple_fold/3
72 %%% to deal with tuples in their native form.
73 %%%######################################################################
74 %%% UPD by Gaspar: array changed to {array, ArrayElementList}
75 %%% ArrayElementList -> list
76 %%% to provide compatibility to xmlrpc module
77 %%%######################################################################
78 %%%
79 %%% Objects: Though not explicitly stated in the JSON "spec",
80 %%% JSON's JavaScript heritage mandates that member names must
81 %%% be unique within an object. The object/tuple ambiguity is
82 %%% not a problem, since the atom 'struct' is not an
83 %%% allowable value. Object keys may be atoms or strings on
84 %%% encoding but are always decoded as strings.
85 %%%
86 %%%######################################################################
87 %%% UPD by Gaspar: struct changed to {array, PropList}
88 %%% object keys always decoded to atoms to
89 %%% provide full compatility with xmlrpc module
90 %%%######################################################################
91 %%%
92
93 %%% ENCODING
94
95 %% Encode an erlang number, string, tuple, or object to JSON syntax, as a
96 %% possibly deep list of UTF-16 code units, throwing a runtime error in the
97 %% case of un-convertible input.
98 %% Note: object keys may be either strings or atoms.
99
100 encode(true) -> "true";
101 encode(false) -> "false";
102 encode(null) -> "null";
5749449 @klacke rpc patch by adam.boz@gmail.com
authored
103 encode(undefined) -> "null";
47e5a7c @klacke JSON ajax code from Gaspar Chilingarov, I added docs describing an ex…
authored
104 encode(I) when is_integer(I) -> integer_to_list(I);
105 encode(F) when is_float(F) -> io_lib:format("~g", [F]);
106 encode(L) when is_list(L) ->
107 case is_string(L) of
108 yes -> encode_string(L);
109 unicode -> encode_string(xmerl_ucs:to_utf8(L));
110 no -> exit({json_encode, {not_string, L}})
111 end;
112 encode({array, Props}) when is_list(Props) -> encode_array(Props);
113 encode({struct, Props} = T) when is_list(Props) -> encode_object(T);
114 encode(Bad) -> exit({json_encode, {bad_term, Bad}}).
115
116 %% Encode an Erlang string to JSON.
117 %% Accumulate strings in reverse.
118
119 encode_string(S) -> encode_string(S, [$"]). % " fix highlight for vim :)
120
121 encode_string([], Acc) -> lists:reverse([$" | Acc]); % " fix highlight for vim :)
122 encode_string([C | Cs], Acc) ->
123 case C of
124 $" -> encode_string(Cs, [$", $\\ | Acc]);
125 % (don't escape solidus on encode)
126 $\\ -> encode_string(Cs, [$\\, $\\ | Acc]);
127 $\b -> encode_string(Cs, [$b, $\\ | Acc]); % note missing \
128 $\f -> encode_string(Cs, [$f, $\\ | Acc]);
129 $\n -> encode_string(Cs, [$n, $\\ | Acc]);
130 $\r -> encode_string(Cs, [$r, $\\ | Acc]);
131 $\t -> encode_string(Cs, [$t, $\\ | Acc]);
132 C when C >= 0, C < $\s ->
133 % Control characters must be unicode-encoded.
134 Hex = lists:flatten(io_lib:format("~4.16.0b", [C])),
135 encode_string(Cs, lists:reverse(Hex) ++ "u\\" ++ Acc); % "
136 C when C =< 16#FFFF -> encode_string(Cs, [C | Acc]);
137 _ -> exit({json_encode, {bad_char, C}})
138 end.
139
140 %% Encode an Erlang object as a JSON object, allowing string or atom keys.
141 %% Note that order is irrelevant in both internal and external object
142 %% representations. Nevertheless, the output will respect the order
143 %% of the input.
144
145 encode_object({struct, _Props} = Obj) ->
146 M = obj_fold(fun({Key, Value}, Acc) ->
147 S = case Key of
148 L when is_list(L) -> encode_string(L);
149 A when is_atom(A) -> encode_string(atom_to_list(A));
150 _ -> exit({json_encode, {bad_key, Key}})
151 end,
152 V = encode(Value),
153 case Acc of
154 [] -> [S, $:, V];
155 _ -> [Acc, $,, S, $:, V]
156 end
157 end, [], Obj),
158 [${, M, $}].
159
160 %% Encode an Erlang tuple as a JSON array.
161 %% Order *is* significant in a JSON array!
162
163 encode_array(T) ->
164 M = lists:foldl(fun(E, Acc) ->
165 V = encode(E),
166 case Acc of
167 [] -> V;
168 _ -> [Acc, $,, V]
169 end
170 end, [], T),
171 [$[, M, $]].
172
173 %%% SCANNING
174 %%%
175 %%% Scanning funs return either:
176 %%% {done, Result, LeftOverChars}
177 %%% if a complete token is recognized, or
178 %%% {more, Continuation}
179 %%% if more input is needed.
180 %%% Result is {ok, Term}, 'eof', or {error, Reason}.
181 %%% Here, the Continuation is a simple Erlang string.
182 %%%
183 %%% Currently, error handling is rather crude - errors are recognized
184 %%% by match failures. EOF is handled only by number scanning, where
185 %%% it can delimit a number, and otherwise causes a match failure.
186 %%%
187 %%% Tokens are one of the following
188 %%% JSON string -> erlang string
189 %%% JSON number -> erlang number
190 %%% true, false, null -> erlang atoms
191 %%% { } [ ] : , -> lcbrace rcbrace lsbrace rsbrace colon comma
192
193 token([]) -> {more, []};
194 token(eof) -> {done, eof, []};
195
196 token("true" ++ Rest) -> {done, {ok, true}, Rest};
197 token("tru") -> {more, "tru"};
198 token("tr") -> {more, "tr"};
199 token("t") -> {more, "t"};
200
201 token("false" ++ Rest) -> {done, {ok, false}, Rest};
202 token("fals") -> {more, "fals"};
203 token("fal") -> {more, "fal"};
204 token("fa") -> {more, "fa"};
205 token("f") -> {more, "f"};
206
207 token("null" ++ Rest) -> {done, {ok, null}, Rest};
208 token("nul") -> {more, "nul"};
209 token("nu") -> {more, "nu"};
210 token("n") -> {more, "n"};
211
212 token([C | Cs] = Input) ->
213 case C of
214 $\s -> token(Cs); % eat whitespace
215 $\t -> token(Cs); % eat whitespace
216 $\n -> token(Cs); % eat whitespace
217 $\r -> token(Cs); % eat whitespace
218 $" -> scan_string(Input);
219 $- -> scan_number(Input);
220 D when D >= $0, D =< $9-> scan_number(Input);
221 ${ -> {done, {ok, lcbrace}, Cs};
222 $} -> {done, {ok, rcbrace}, Cs};
223 $[ -> {done, {ok, lsbrace}, Cs};
224 $] -> {done, {ok, rsbrace}, Cs};
225 $: -> {done, {ok, colon}, Cs};
226 $, -> {done, {ok, comma}, Cs};
227 $/ -> case scan_comment(Cs) of
228 {more, X} -> {more, X};
229 {done, _, Chars} -> token(Chars)
230 end;
231 _ -> {done, {error, {bad_char, C}}, Cs}
232 end.
233
234 scan_string([$" | Cs] = Input) ->
235 scan_string(Cs, [], Input).
236
237 %% Accumulate in reverse order, save original start-of-string for continuation.
238
239 scan_string([], _, X) -> {more, X};
240 scan_string(eof, _, X) -> {done, {error, missing_close_quote}, X};
241 scan_string([$" | Rest], A, _) -> {done, {ok, lists:reverse(A)}, Rest};
242 scan_string([$\\], _, X) -> {more, X};
243 scan_string([$\\, $u, U1, U2, U3, U4 | Rest], A, X) ->
244 scan_string(Rest, [uni_char([U1, U2, U3, U4]) | A], X);
245 scan_string([$\\, $u | _], _, X) -> {more, X};
246 scan_string([$\\, C | Rest], A, X) ->
247 scan_string(Rest, [esc_to_char(C) | A], X);
248 scan_string([C | Rest], A, X) ->
249 scan_string(Rest, [C | A], X).
250
251 %% Given a list of hex characters, convert to the corresponding integer.
252
253 uni_char(HexList) ->
254 erlang:list_to_integer(HexList, 16).
255
256 esc_to_char($") -> $";
257 esc_to_char($/) -> $/;
258 esc_to_char($\\) -> $\\;
259 esc_to_char($b) -> $\b;
260 esc_to_char($f) -> $\f;
261 esc_to_char($n) -> $\n;
262 esc_to_char($r) -> $\r;
263 esc_to_char($t) -> $\t.
264
265 scan_number([]) -> {more, []};
266 scan_number(eof) -> {done, {error, incomplete_number}, []};
a7e93df @klacke patch from Magnus froberg to get better control over the files genera…
authored
267 scan_number([$-, $- | _Ds]) -> {done, {error, invalid_number}, []};
47e5a7c @klacke JSON ajax code from Gaspar Chilingarov, I added docs describing an ex…
authored
268 scan_number([$- | Ds] = Input) ->
269 case scan_number(Ds) of
270 {more, _Cont} -> {more, Input};
271 {done, {ok, N}, CharList} -> {done, {ok, -1 * N}, CharList};
272 {done, Other, Chars} -> {done, Other, Chars}
273 end;
274 scan_number([D | Ds] = Input) when D >= $0, D =< $9 ->
275 scan_number(Ds, D - $0, Input).
276
277 %% Numbers don't have a terminator, so stop at the first non-digit,
278 %% and ask for more if we run out.
279
280 scan_number([], _A, X) -> {more, X};
281 scan_number(eof, A, _X) -> {done, {ok, A}, eof};
282 scan_number([$.], _A, X) -> {more, X};
283 scan_number([$., D | Ds], A, X) when D >= $0, D =< $9 ->
284 scan_fraction([D | Ds], A, X);
285 scan_number([D | Ds], A, X) when A > 0, D >= $0, D =< $9 ->
286 % Note that nonzero numbers can't start with "0".
287 scan_number(Ds, 10 * A + (D - $0), X);
288 scan_number([D | Ds], A, X) when D == $E; D == $e ->
289 scan_exponent_begin(Ds, float(A), X);
290 scan_number([D | _] = Ds, A, _X) when D < $0; D > $9 ->
291 {done, {ok, A}, Ds}.
292
293 scan_fraction(Ds, I, X) -> scan_fraction(Ds, [], I, X).
294
295 scan_fraction([], _Fs, _I, X) -> {more, X};
296 scan_fraction(eof, Fs, I, _X) ->
297 R = I + list_to_float("0." ++ lists:reverse(Fs)),
298 {done, {ok, R}, eof};
299 scan_fraction([D | Ds], Fs, I, X) when D >= $0, D =< $9 ->
300 scan_fraction(Ds, [D | Fs], I, X);
301 scan_fraction([D | Ds], Fs, I, X) when D == $E; D == $e ->
302 R = I + list_to_float("0." ++ lists:reverse(Fs)),
303 scan_exponent_begin(Ds, R, X);
304 scan_fraction(Rest, Fs, I, _X) ->
305 R = I + list_to_float("0." ++ lists:reverse(Fs)),
306 {done, {ok, R}, Rest}.
307
308 scan_exponent_begin(Ds, R, X) ->
309 scan_exponent_begin(Ds, [], R, X).
310
311 scan_exponent_begin([], _Es, _R, X) -> {more, X};
312 scan_exponent_begin(eof, _Es, _R, X) -> {done, {error, missing_exponent}, X};
313 scan_exponent_begin([D | Ds], Es, R, X) when D == $-;
314 D == $+;
315 D >= $0, D =< $9 ->
316 scan_exponent(Ds, [D | Es], R, X).
317
318 scan_exponent([], _Es, _R, X) -> {more, X};
319 scan_exponent(eof, Es, R, _X) ->
320 X = R * math:pow(10, list_to_integer(lists:reverse(Es))),
321 {done, {ok, X}, eof};
322 scan_exponent([D | Ds], Es, R, X) when D >= $0, D =< $9 ->
323 scan_exponent(Ds, [D | Es], R, X);
324 scan_exponent(Rest, Es, R, _X) ->
325 X = R * math:pow(10, list_to_integer(lists:reverse(Es))),
326 {done, {ok, X}, Rest}.
327
328 scan_comment([]) -> {more, "/"};
329 scan_comment(eof) -> {done, eof, []};
330 scan_comment([$/ | Rest]) -> scan_cpp_comment(Rest);
331 scan_comment([$* | Rest]) -> scan_c_comment(Rest).
332
333 %% Ignore up to next CR or LF. If the line ends in CRLF,
334 %% the LF will be treated as separate whitespace, which is
335 %% okay since it will also be ignored.
336
337 scan_cpp_comment([]) -> {more, "//"};
338 scan_cpp_comment(eof) -> {done, eof, []};
339 scan_cpp_comment([$\r | Rest]) -> {done, [], Rest};
340 scan_cpp_comment([$\n | Rest]) -> {done, [], Rest};
341 scan_cpp_comment([_ | Rest]) -> scan_cpp_comment(Rest).
342
343 scan_c_comment([]) -> {more, "/*"};
344 scan_c_comment(eof) -> {done, eof, []};
345 scan_c_comment([$*]) -> {more, "/**"};
346 scan_c_comment([$*, $/ | Rest]) -> {done, [], Rest};
347 scan_c_comment([_ | Rest]) -> scan_c_comment(Rest).
348
349 %%% PARSING
350 %%%
351 %%% The decode function takes a char list as input, but
352 %%% interprets the end of the list as only an end to the available
353 %%% input, and returns a "continuation" requesting more input.
354 %%% When additional characters are available, they, and the
355 %%% continuation, are fed into decode/2. You can use the atom 'eof'
356 %%% as a character to signal a true end to the input stream, and
357 %%% possibly flush out an unfinished number. The decode_string/1
358 %%% function appends 'eof' to its input and calls decode/1.
359 %%%
360 %%% Parsing and scanning errors are handled only by match failures.
361 %%% The external caller must take care to wrap the call in a "catch"
362 %%% or "try" if better error-handling is desired. Eventually parse
363 %%% or scan errors will be returned explicitly with a description,
364 %%% and someday with line numbers too.
365 %%%
366 %%% The parsing code uses a continuation-passing style to allow
367 %%% for the parsing to suspend at any point and be resumed when
368 %%% more input is available.
369 %%% See http://en.wikipedia.org/wiki/Continuation_passing_style
370
371 %% Return the first JSON value decoded from the input string.
372 %% The string must contain at least one complete JSON value.
373
374 decode_string(CharList) ->
375 {done, V, _} = decode([], CharList ++ eof),
376 V.
377
378 %% Attempt to decode a JSON value from the input string
379 %% and continuation, using empty list for the initial continuation.
380 %% Return {done, Result, LeftoverChars} if a value is recognized,
381 %% or {more, Continuation} if more input characters are needed.
382 %% The Result can be {ok, Value}, eof, or {error, Reason}.
383 %% The Continuation is then fed as an argument to decode/2 when
384 %% more input is available.
385 %% Use the atom 'eof' instead of a char list to signal
386 %% a true end to the input, and may flush a final number.
387
388 decode([], CharList) ->
389 decode(first_continuation(), CharList);
390
391 decode(Continuation, CharList) ->
392 {OldChars, Kt} = Continuation,
393 get_token(OldChars ++ CharList, Kt).
394
395 first_continuation() ->
396 {[], fun
397 (eof, Cs) ->
398 {done, eof, Cs};
399 (T, Cs) ->
400 parse_value(T, Cs, fun(V, C2) ->
401 {done, {ok, V}, C2}
402 end)
403 end}.
404
405 %% Continuation Kt must accept (TokenOrEof, Chars)
406
407 get_token(Chars, Kt) ->
408 case token(Chars) of
409 {done, {ok, T}, Rest} -> Kt(T, Rest);
410 {done, eof, Rest} -> Kt(eof, Rest);
411 {done, {error, Reason}, Rest} -> {done, {error, Reason}, Rest};
412 {more, X} -> {more, {X, Kt}}
413 end.
414
415 %% Continuation Kv must accept (Value, Chars)
416
417 parse_value(eof, C, _Kv) -> {done, {error, premature_eof}, C};
418 parse_value(true, C, Kv) -> Kv(true, C);
419 parse_value(false, C, Kv) -> Kv(false, C);
420 parse_value(null, C, Kv) -> Kv(null, C);
421 parse_value(S, C, Kv) when is_list(S) -> Kv(S, C);
422 parse_value(N, C, Kv) when is_number(N) -> Kv(N, C);
423 parse_value(lcbrace, C, Kv) -> parse_object(C, Kv);
424 parse_value(lsbrace, C, Kv) -> parse_array(C, Kv);
425 parse_value(_, C, _Kv) -> {done, {error, syntax_error}, C}.
426
427 %% Continuation Kv must accept (Value, Chars)
428
429 parse_object(Chars, Kv) ->
430 get_token(Chars, fun(T, C2) ->
431 Obj = obj_new(),
432 case T of
433 rcbrace -> Kv(Obj, C2); % empty object
434 _ -> parse_object(Obj, T, C2, Kv) % token must be string
435 end
436 end).
437
438 parse_object(_Obj, eof, C, _Kv) ->
439 {done, {error, premature_eof}, C};
440
441 parse_object(Obj, S, C, Kv) when is_list(S) -> % S is member name
442 get_token(C, fun
443 (colon, C2) ->
444 parse_object2(Obj, S, C2, Kv);
445 (T, C2) ->
446 {done, {error, {expecting_colon, T}}, C2}
447 end);
448
449 parse_object(_Obj, M, C, _Kv) ->
450 {done, {error, {member_name_not_string, M}}, C}.
451
452 parse_object2(Obj, S, C, Kv) ->
453 get_token(C, fun
454 (eof, C2) ->
455 {done, {error, premature_eof}, C2};
456 (T, C2) ->
457 parse_value(T, C2, fun(V, C3) -> % V is member value
458 Obj2 = obj_store(S, V, Obj),
459 get_token(C3, fun
460 (rcbrace, C4) -> % "}" end of object
461 {struct, PropList1} = Obj2,
462 Kv({struct, lists:reverse(PropList1)}, C4);
463 (comma, C4) -> % "," another member follows
464 get_token(C4, fun(T3, C5) ->
465 parse_object(Obj2, T3, C5, Kv)
466 end);
467 (eof, C4) ->
468 {done, {error, premature_eof}, C4};
469 (T2, C4) ->
470 {done, {error, {expecting_comma_or_curly, T2}}, C4}
471 end)
472 end)
473 end).
474
475 %% Continuation Kv must accept (Value, Chars)
476
477 parse_array(C, Kv) ->
478 get_token(C, fun
479 (eof, C2) -> {done, {error, premature_eof}, C2};
f0ec0e1 @klacke empty array bug in json parse found by juhani and fixed by gaspar
authored
480 (rsbrace, C2) -> Kv({array, []}, C2); % empty array
47e5a7c @klacke JSON ajax code from Gaspar Chilingarov, I added docs describing an ex…
authored
481 (T, C2) -> parse_array([], T, C2, Kv)
482 end).
483
484 parse_array(E, T, C, Kv) ->
485 parse_value(T, C, fun(V, C2) ->
486 E2 = [V | E],
487 get_token(C2, fun
488 (rsbrace, C3) -> % "]" end of array
489 Kv({array, lists:reverse(E2)}, C3);
490
491 (comma, C3) -> % "," another value follows
492 get_token(C3, fun(T3, C4) ->
493 parse_array(E2, T3, C4, Kv)
494 end);
495 (eof, C3) ->
496 {done, {error, premature_eof}, C3};
497 (T2, C3) ->
498 {done, {error, {expecting_comma_or_close_array, T2}}, C3}
499 end)
500 end).
501
502 %%% OBJECTS
503 %%%
504 %%% We'll use tagged property lists as the internal representation
505 %%% of JSON objects. Unordered lists perform worse than trees for
506 %%% lookup and modification of members, but we expect objects to be
507 %%% have only a few members. Lists also print better.
508
509 %% Is this a proper JSON object representation?
510
511 is_obj({struct, Props}) when is_list(Props) ->
512 lists:all(fun
513 ({Member, _Value}) when is_atom(Member); is_list(Member) -> true;
514 (_) -> false
515 end, Props);
516
517 is_obj(_) ->
518 false.
519
520 %% Create a new, empty object.
521
522 obj_new() ->
523 {struct, []}.
524
525 %% Fetch an object member's value, expecting it to be in the object.
526 %% Return value, runtime error if no member found with that name.
527
528 obj_fetch(Key, {struct, Props}) when is_list(Props) ->
529 case proplists:get_value(Key, Props) of
530 undefined ->
531 exit({struct_no_key, Key});
532 Value ->
533 Value
534 end.
535
536 %% Fetch an object member's value, or indicate that there is no such member.
537 %% Return {ok, Value} or 'error'.
538
539 obj_find(Key, {struct, Props}) when is_list(Props) ->
540 case proplists:get_value(Key, Props) of
541 undefined ->
542 error;
543 Value ->
544 {ok, Value}
545 end.
546
547 obj_is_key(Key, {struct, Props}) ->
548 proplists:is_defined(Key, Props).
549
550 %% Store a new member in an object. Returns a new object.
551
552 obj_store(KeyList, Value, {struct, Props}) when is_list(Props) ->
553 Key = list_to_atom(KeyList),
554 {struct, [{Key, Value} | proplists:delete(Key, Props)]}.
555
556 %% Create an object from a list of Key/Value pairs.
557
558 obj_from_list(Props) ->
559 Obj = {struct, Props},
560 case is_obj(Obj) of
561 true -> Obj;
562 false -> exit(json_bad_object)
563 end.
564
565 %% Fold Fun across object, with initial accumulator Acc.
566 %% Fun should take (Value, Acc) as arguments and return Acc.
567
568 obj_fold(Fun, Acc, {struct, Props}) ->
569 lists:foldl(Fun, Acc, Props).
570
571 is_string([]) -> yes;
572 is_string(List) -> is_string(List, non_unicode).
573
574 is_string([C|Rest], non_unicode) when C >= 0, C =< 255 -> is_string(Rest, non_unicode);
575 is_string([C|Rest], _) when C =< 65000 -> is_string(Rest, unicode);
576 is_string([], non_unicode) -> yes;
577 is_string([], unicode) -> unicode;
578 is_string(_, _) -> no.
579
580
581 %%% TESTING
582 %%%
583 %%% We can't expect to round-trip from JSON -> Erlang -> JSON,
584 %%% due to the degrees of freedom in the JSON syntax: whitespace,
585 %%% and ordering of object members. We can, however, expect to
586 %%% round-trip from Erlang -> JSON -> Erlang, so the JSON parsing
587 %%% tests will in fact test the Erlang equivalence of the
588 %%% JSON -> Erlang -> JSON -> Erlang coding chain.
589
590 %% Test driver. Return 'ok' or {failed, Failures}.
591
592 test() ->
593 E2Js = e2j_test_vec(),
594 Failures = lists:foldl(fun({E, J}, Fs) ->
595 case (catch test_e2j(E, J)) of
596 ok ->
597 case (catch round_trip(E)) of
598 ok ->
599 case (catch round_trip_one_char(E)) of
600 ok -> Fs;
601 Reason -> [{round_trip_one_char, E, Reason} | Fs]
602 end;
603 Reason ->
604 [{round_trip, E, Reason} | Fs]
605 end;
606 Reason ->
607 [{erlang_to_json, E, J, Reason} | Fs]
608 end;
609 (end_of_tests, Fs) -> Fs end, [], E2Js),
610 case Failures of
611 [] -> ok;
612 _ -> {failed, Failures}
613 end.
614
615 %% Test for conversion from Erlang to JSON. Note that unequal strings
616 %% may represent equal JSON data, due to discretionary whitespace,
617 %% object member order, trailing zeroes in floating point, etc.
618 %% Legitimate changes to the encoding routines may require tweaks to
619 %% the reference JSON strings in e2j_test_vec().
620
621 test_e2j(E, J) ->
622 J2 = lists:flatten(encode(E)),
623 J = J2, % raises error if unequal
624 ok.
625
626 %% Test that Erlang -> JSON -> Erlang round-trip yields equivalent term.
627
628 round_trip(E) ->
629 J2 = lists:flatten(encode(E)),
630 {ok, E2} = decode_string(J2),
631 true = equiv(E, E2), % raises error if false
632 ok.
633
634 %% Round-trip with one character at a time to test all continuations.
635
636 round_trip_one_char(E) ->
637 J = lists:flatten(encode(E)),
638 {done, {ok, E2}, _} = lists:foldl(fun(C, Ret) ->
639 case Ret of
640 {done, _, _} -> Ret;
641 {more, Cont} -> decode(Cont, [C])
642 end
643 end, {more, first_continuation()}, J ++ [eof]),
644 true = equiv(E, E2), % raises error if false
645 ok.
646
647 %% Test for equivalence of Erlang terms.
648 %% Due to arbitrary order of construction, equivalent objects might
649 %% compare unequal as erlang terms, so we need to carefully recurse
650 %% through aggregates (tuples and objects).
651
652 equiv({struct, Props1}, {struct, Props2}) ->
653 equiv_object(Props1, Props2);
654 equiv(T1, T2) when is_tuple(T1), is_tuple(T2) ->
655 equiv_tuple(T1, T2);
656 equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
657 equiv(S1, S2) when is_list(S1), is_list(S2) -> S1 == S2;
658 equiv(true, true) -> true;
659 equiv(false, false) -> true;
660 equiv(null, null) -> true.
661
662 %% Object representation and traversal order is unknown.
663 %% Use the sledgehammer and sort property lists.
664
665 equiv_object(Props1, Props2) ->
666 L1 = lists:keysort(1, Props1),
667 L2 = lists:keysort(1, Props2),
668 Pairs = lists:zip(L1, L2),
669 true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
670 equiv(K1, K2) and equiv(V1, V2)
671 end, Pairs).
672
673 %% Recursively compare tuple elements for equivalence.
674
675 equiv_tuple({}, {}) ->
676 true;
677 equiv_tuple(T1, T2) when size(T1) == size(T2) ->
678 S = size(T1),
679 lists:all(fun(I) ->
680 equiv(element(I, T1), element(I, T2))
681 end, lists:seq(1, S)).
682
683 e2j_test_vec() -> [
684 {1, "1"},
685 {3.1416, "3.14160"}, % text representation may truncate, trail zeroes
686 {-1, "-1"},
687 {-3.1416, "-3.14160"},
688 {12.0e10, "1.20000e+11"},
689 {1.234E+10, "1.23400e+10"},
690 {-1.234E-10, "-1.23400e-10"},
691 {"foo", "\"foo\""},
692 {"foo" ++ [500] ++ "bar", [$", $f, $o, $o, 500, $b, $a, $r, $"]},
693 {"foo" ++ [5] ++ "bar", "\"foo\\u0005bar\""},
694 {"", "\"\""},
695 {[], "\"\""},
696 {"\n\n\n", "\"\\n\\n\\n\""},
697 {obj_new(), "{}"},
698 {obj_from_list([{"foo", "bar"}]), "{\"foo\":\"bar\"}"},
699 {obj_from_list([{"foo", "bar"}, {"baz", 123}]),
700 "{\"foo\":\"bar\",\"baz\":123}"},
701 {{}, "[]"},
702 {{{}}, "[[]]"},
703 {{1, "foo"}, "[1,\"foo\"]"},
704
705 % json array in a json object
706 {obj_from_list([{"foo", {123}}]),
707 "{\"foo\":[123]}"},
708
709 % json object in a json object
710 {obj_from_list([{"foo", obj_from_list([{"bar", true}])}]),
711 "{\"foo\":{\"bar\":true}}"},
712
713 % fold evaluation order
714 {obj_from_list([{"foo", {}},
715 {"bar", obj_from_list([{"baz", true}])},
716 {"alice", "bob"}]),
717 "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
718
719 % json object in a json array
720 {{-123, "foo", obj_from_list([{"bar", {}}]), null},
721 "[-123,\"foo\",{\"bar\":[]},null]"},
722
723 end_of_tests
724 ].
725
726 %%% TODO:
727 %%%
728 %%% Measure the overhead of the CPS-based parser by writing a conventional
729 %%% scanner-parser that expects all input to be available.
730 %%%
731 %%% JSON has dropped comments - disable their parsing.
732 %%%
733 %%% Allow a compile-time option to decode object member names as atoms,
734 %%% to reduce the internal representation overheads when communicating
735 %%% with trusted peers.
Something went wrong with that request. Please try again.