Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

515 lines (459 sloc) 16.848 kb
%% @author Bob Ippolito <bob@mochimedia.com>
%% @copyright 2007 Mochi Media, Inc.
%% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works
%% with binaries as strings, arrays as lists (without an {array, _})
%% wrapper and it only knows how to decode UTF-8 (and ASCII).
-module(mochijson2).
-author('bob@mochimedia.com').
-export([encoder/1, encode/1]).
-export([decoder/1, decode/1]).
-export([test/0]).
% This is a macro to placate syntax highlighters..
-define(Q, $\").
-define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset,
column=N+S#decoder.column}).
-define(INC_COL(S), S#decoder{offset=1+S#decoder.offset,
column=1+S#decoder.column}).
-define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset,
column=1,
line=1+S#decoder.line}).
-define(INC_CHAR(S, C),
case C of
$\n ->
S#decoder{column=1,
line=1+S#decoder.line,
offset=1+S#decoder.offset};
_ ->
S#decoder{column=1+S#decoder.column,
offset=1+S#decoder.offset}
end).
-define(IS_WHITESPACE(C),
(C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
%% @type iolist() = [char() | binary() | iolist()]
%% @type iodata() = iolist() | binary()
%% @type json_string() = atom | binary()
%% @type json_number() = integer() | float()
%% @type json_array() = [json_term()]
%% @type json_object() = {struct, [{json_string(), json_term()}]}
%% @type json_term() = json_string() | json_number() | json_array() |
%% json_object()
-record(encoder, {handler=null}).
-record(decoder, {object_hook=null,
offset=0,
line=1,
column=1,
state=null}).
%% @spec encoder([encoder_option()]) -> function()
%% @doc Create an encoder/1 with the given options.
encoder(Options) ->
State = parse_encoder_options(Options, #encoder{}),
fun (O) -> json_encode(O, State) end.
%% @spec encode(json_term()) -> iolist()
%% @doc Encode the given as JSON to an iolist.
encode(Any) ->
json_encode(Any, #encoder{}).
%% @spec decoder([decoder_option()]) -> function()
%% @doc Create a decoder/1 with the given options.
decoder(Options) ->
State = parse_decoder_options(Options, #decoder{}),
fun (O) -> json_decode(O, State) end.
%% @spec decode(iolist()) -> json_term()
%% @doc Decode the given iolist to Erlang terms.
decode(S) ->
json_decode(S, #decoder{}).
test() ->
test_all().
%% Internal API
parse_encoder_options([], State) ->
State;
parse_encoder_options([{handler, Handler} | Rest], State) ->
parse_encoder_options(Rest, State#encoder{handler=Handler}).
parse_decoder_options([], State) ->
State;
parse_decoder_options([{object_hook, Hook} | Rest], State) ->
parse_decoder_options(Rest, State#decoder{object_hook=Hook}).
format_float(F) ->
format_float1(lists:reverse(float_to_list(F)), []).
format_float1([$0, $0, _, $e | Rest], []) ->
strip_zeros(Rest, []);
format_float1([Sign, $e | Rest], Acc) ->
strip_zeros(Rest, [$e, Sign | Acc]);
format_float1([C | Rest], Acc) ->
format_float1(Rest, [C | Acc]).
strip_zeros(L=[$0, $. | _], Acc) ->
lists:reverse(L, Acc);
strip_zeros([$0 | Rest], Acc) ->
strip_zeros(Rest, Acc);
strip_zeros(L, Acc) ->
lists:reverse(L, Acc).
json_encode(true, _State) ->
<<"true">>;
json_encode(false, _State) ->
<<"false">>;
json_encode(null, _State) ->
<<"null">>;
json_encode(I, _State) when is_integer(I) ->
integer_to_list(I);
json_encode(F, _State) when is_float(F) ->
format_float(F);
json_encode(S, State) when is_binary(S); is_atom(S) ->
json_encode_string(S, State);
json_encode(Array, State) when is_list(Array) ->
json_encode_array(Array, State);
json_encode({struct, Props}, State) when is_list(Props) ->
json_encode_proplist(Props, State);
json_encode(Bad, #encoder{handler=null}) ->
exit({json_encode, {bad_term, Bad}});
json_encode(Bad, State=#encoder{handler=Handler}) ->
json_encode(Handler(Bad), State).
json_encode_array([], _State) ->
<<"[]">>;
json_encode_array(L, State) ->
F = fun (O, Acc) ->
[$,, json_encode(O, State) | Acc]
end,
[$, | Acc1] = lists:foldl(F, "[", L),
lists:reverse([$\] | Acc1]).
json_encode_proplist([], _State) ->
<<"{}">>;
json_encode_proplist(Props, State) ->
F = fun ({K, V}, Acc) ->
KS = json_encode_string(K, State),
VS = json_encode(V, State),
[$,, VS, $:, KS | Acc]
end,
[$, | Acc1] = lists:foldl(F, "{", Props),
lists:reverse([$\} | Acc1]).
json_encode_string(A, _State) when is_atom(A) ->
json_encode_string_unicode(xmerl_ucs:from_utf8(atom_to_list(A)), [?Q]);
json_encode_string(B, _State) when is_binary(B) ->
json_encode_string_unicode(xmerl_ucs:from_utf8(B), [?Q]);
json_encode_string(I, _State) when is_integer(I) ->
json_encode_string_unicode(integer_to_list(I), [?Q]);
json_encode_string(L, _State) when is_list(L) ->
json_encode_string_unicode(L, [?Q]).
json_encode_string_unicode([], Acc) ->
lists:reverse([$\" | Acc]);
json_encode_string_unicode([C | Cs], Acc) ->
Acc1 = case C of
?Q ->
[?Q, $\\ | Acc];
$/ ->
[$/, $\\ | Acc];
$\\ ->
[$\\, $\\ | Acc];
$\b ->
[$b, $\\ | Acc];
$\f ->
[$f, $\\ | Acc];
$\n ->
[$n, $\\ | Acc];
$\r ->
[$r, $\\ | Acc];
$\t ->
[$t, $\\ | Acc];
C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
[unihex(C) | Acc];
C when C < 16#7f ->
[C | Acc];
_ ->
exit({json_encode, {bad_char, C}})
end,
json_encode_string_unicode(Cs, Acc1).
hexdigit(C) when C >= 0, C =< 9 ->
C + $0;
hexdigit(C) when C =< 15 ->
C + $a - 10.
unihex(C) when C < 16#10000 ->
<<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
[$\\, $u | Digits];
unihex(C) when C =< 16#10FFFF ->
N = C - 16#10000,
S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
S2 = 16#dc00 bor (N band 16#3ff),
[unihex(S1), unihex(S2)].
json_decode(L, S) when is_list(L) ->
json_decode(iolist_to_binary(L), S);
json_decode(B, S) ->
{Res, S1} = decode1(B, S),
{eof, _} = tokenize(B, S1#decoder{state=trim}),
Res.
decode1(B, S=#decoder{state=null}) ->
case tokenize(B, S#decoder{state=any}) of
{{const, C}, S1} ->
{C, S1};
{start_array, S1} ->
decode_array(B, S1);
{start_object, S1} ->
decode_object(B, S1)
end.
make_object(V, #decoder{object_hook=null}) ->
V;
make_object(V, #decoder{object_hook=Hook}) ->
Hook(V).
decode_object(B, S) ->
decode_object(B, S#decoder{state=key}, []).
decode_object(B, S=#decoder{state=key}, Acc) ->
case tokenize(B, S) of
{end_object, S1} ->
V = make_object({struct, lists:reverse(Acc)}, S1),
{V, S1#decoder{state=null}};
{{const, K}, S1} ->
{colon, S2} = tokenize(B, S1),
{V, S3} = decode1(B, S2#decoder{state=null}),
decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc])
end;
decode_object(B, S=#decoder{state=comma}, Acc) ->
case tokenize(B, S) of
{end_object, S1} ->
V = make_object({struct, lists:reverse(Acc)}, S1),
{V, S1#decoder{state=null}};
{comma, S1} ->
decode_object(B, S1#decoder{state=key}, Acc)
end.
decode_array(B, S) ->
decode_array(B, S#decoder{state=any}, []).
decode_array(B, S=#decoder{state=any}, Acc) ->
case tokenize(B, S) of
{end_array, S1} ->
{lists:reverse(Acc), S1#decoder{state=null}};
{start_array, S1} ->
{Array, S2} = decode_array(B, S1),
decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
{start_object, S1} ->
{Array, S2} = decode_object(B, S1),
decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
{{const, Const}, S1} ->
decode_array(B, S1#decoder{state=comma}, [Const | Acc])
end;
decode_array(B, S=#decoder{state=comma}, Acc) ->
case tokenize(B, S) of
{end_array, S1} ->
{lists:reverse(Acc), S1#decoder{state=null}};
{comma, S1} ->
decode_array(B, S1#decoder{state=any}, Acc)
end.
tokenize_string(B, S) ->
tokenize_string(B, S, []).
tokenize_string(B, S=#decoder{offset=O}, Acc) ->
case B of
<<_:O/binary, ?Q, _/binary>> ->
{{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)};
<<_:O/binary, "\\\"", _/binary>> ->
tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]);
<<_:O/binary, "\\\\", _/binary>> ->
tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]);
<<_:O/binary, "\\/", _/binary>> ->
tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]);
<<_:O/binary, "\\b", _/binary>> ->
tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]);
<<_:O/binary, "\\f", _/binary>> ->
tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]);
<<_:O/binary, "\\n", _/binary>> ->
tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]);
<<_:O/binary, "\\r", _/binary>> ->
tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]);
<<_:O/binary, "\\t", _/binary>> ->
tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]);
<<_:O/binary, "\\u", C3, C2, C1, C0, _/binary>> ->
%% coalesce UTF-16 surrogate pair?
C = erlang:list_to_integer([C3, C2, C1, C0], 16),
Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc),
tokenize_string(B, ?ADV_COL(S, 6), Acc1);
<<_:O/binary, C, _/binary>> ->
tokenize_string(B, ?INC_CHAR(S, C), [C | Acc])
end.
tokenize_number(B, S) ->
case tokenize_number(B, sign, S, []) of
{{int, Int}, S1} ->
{{const, list_to_integer(Int)}, S1};
{{float, Float}, S1} ->
{{const, list_to_float(Float)}, S1}
end.
tokenize_number(B, sign, S=#decoder{offset=O}, []) ->
case B of
<<_:O/binary, $-, _/binary>> ->
tokenize_number(B, int, ?INC_COL(S), [$-]);
_ ->
tokenize_number(B, int, S, [])
end;
tokenize_number(B, int, S=#decoder{offset=O}, Acc) ->
case B of
<<_:O/binary, $0, _/binary>> ->
tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]);
<<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 ->
tokenize_number(B, int1, ?INC_COL(S), [C | Acc])
end;
tokenize_number(B, int1, S=#decoder{offset=O}, Acc) ->
case B of
<<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
tokenize_number(B, int1, ?INC_COL(S), [C | Acc]);
_ ->
tokenize_number(B, frac, S, Acc)
end;
tokenize_number(B, frac, S=#decoder{offset=O}, Acc) ->
case B of
<<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 ->
tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
<<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
_ ->
{{int, lists:reverse(Acc)}, S}
end;
tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) ->
case B of
<<_:O/binary, C, _/binary>> when C >= 0 andalso C =< $9 ->
tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]);
<<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]);
_ ->
{{float, lists:reverse(Acc)}, S}
end;
tokenize_number(B, esign, S=#decoder{offset=O}, Acc) ->
case B of
<<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ ->
tokenize_number(B, eint, ?INC_COL(S), [C | Acc]);
_ ->
tokenize_number(B, eint, S, Acc)
end;
tokenize_number(B, eint, S=#decoder{offset=O}, Acc) ->
case B of
<<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
tokenize_number(B, eint1, ?INC_COL(S), [C | Acc])
end;
tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) ->
case B of
<<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]);
_ ->
{{float, lists:reverse(Acc)}, S}
end.
tokenize(B, S=#decoder{offset=O}) ->
case B of
<<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
tokenize(B, ?INC_CHAR(S, C));
<<_:O/binary, "{", _/binary>> ->
{start_object, ?INC_COL(S)};
<<_:O/binary, "}", _/binary>> ->
{end_object, ?INC_COL(S)};
<<_:O/binary, "[", _/binary>> ->
{start_array, ?INC_COL(S)};
<<_:O/binary, "]", _/binary>> ->
{end_array, ?INC_COL(S)};
<<_:O/binary, ",", _/binary>> ->
{comma, ?INC_COL(S)};
<<_:O/binary, ":", _/binary>> ->
{colon, ?INC_COL(S)};
<<_:O/binary, "null", _/binary>> ->
{{const, null}, ?ADV_COL(S, 4)};
<<_:O/binary, "true", _/binary>> ->
{{const, true}, ?ADV_COL(S, 4)};
<<_:O/binary, "false", _/binary>> ->
{{const, false}, ?ADV_COL(S, 5)};
<<_:O/binary, "\"", _/binary>> ->
tokenize_string(B, ?INC_COL(S));
<<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9)
orelse C =:= $- ->
tokenize_number(B, S);
<<_:O/binary>> ->
trim = S#decoder.state,
{eof, S}
end.
%% testing constructs borrowed from the Yaws JSON implementation.
%% Create an object from a list of Key/Value pairs.
obj_new() ->
{struct, []}.
is_obj({struct, Props}) ->
F = fun ({K, _}) when is_binary(K) ->
true;
(_) ->
false
end,
lists:all(F, Props).
obj_from_list(Props) ->
Obj = {struct, Props},
case is_obj(Obj) of
true -> Obj;
false -> exit({json_bad_object, Obj})
end.
%% Test for equivalence of Erlang terms.
%% Due to arbitrary order of construction, equivalent objects might
%% compare unequal as erlang terms, so we need to carefully recurse
%% through aggregates (tuples and objects).
equiv({struct, Props1}, {struct, Props2}) ->
equiv_object(Props1, Props2);
equiv(L1, L2) when is_list(L1), is_list(L2) ->
equiv_list(L1, L2);
equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2;
equiv(true, true) -> true;
equiv(false, false) -> true;
equiv(null, null) -> true.
%% Object representation and traversal order is unknown.
%% Use the sledgehammer and sort property lists.
equiv_object(Props1, Props2) ->
L1 = lists:keysort(1, Props1),
L2 = lists:keysort(1, Props2),
Pairs = lists:zip(L1, L2),
true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
equiv(K1, K2) and equiv(V1, V2)
end, Pairs).
%% Recursively compare tuple elements for equivalence.
equiv_list([], []) ->
true;
equiv_list([V1 | L1], [V2 | L2]) ->
case equiv(V1, V2) of
true ->
equiv_list(L1, L2);
false ->
false
end.
test_all() ->
test_one(e2j_test_vec(utf8), 1).
test_one([], _N) ->
%% io:format("~p tests passed~n", [N-1]),
ok;
test_one([{E, J} | Rest], N) ->
%% io:format("[~p] ~p ~p~n", [N, E, J]),
true = equiv(E, decode(J)),
true = equiv(E, decode(encode(E))),
test_one(Rest, 1+N).
e2j_test_vec(utf8) ->
[
{1, "1"},
{3.1416, "3.14160"}, %% text representation may truncate, trail zeroes
{-1, "-1"},
{-3.1416, "-3.14160"},
{12.0e10, "1.20000e+11"},
{1.234E+10, "1.23400e+10"},
{-1.234E-10, "-1.23400e-10"},
{10.0, "1.0e+01"},
{123.456, "1.23456E+2"},
{10.0, "1e1"},
{<<"foo">>, "\"foo\""},
{<<"foo", 5, "bar">>, "\"foo\\u0005bar\""},
{<<"">>, "\"\""},
{<<"\n\n\n">>, "\"\\n\\n\\n\""},
{obj_new(), "{}"},
{obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"},
{obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]),
"{\"foo\":\"bar\",\"baz\":123}"},
{[], "[]"},
{[[]], "[[]]"},
{[1, <<"foo">>], "[1,\"foo\"]"},
%% json array in a json object
{obj_from_list([{<<"foo">>, [123]}]),
"{\"foo\":[123]}"},
%% json object in a json object
{obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]),
"{\"foo\":{\"bar\":true}}"},
%% fold evaluation order
{obj_from_list([{<<"foo">>, []},
{<<"bar">>, obj_from_list([{<<"baz">>, true}])},
{<<"alice">>, <<"bob">>}]),
"{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
%% json object in a json array
{[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null],
"[-123,\"foo\",{\"bar\":[]},null]"}
].
Jump to Line
Something went wrong with that request. Please try again.