Skip to content

Commit

Permalink
http://code.google.com/p/mochiweb/issues/detail?id=47
Browse files Browse the repository at this point in the history
  • Loading branch information
etrepum committed Nov 15, 2009
1 parent 6d300db commit de900ba
Showing 1 changed file with 42 additions and 2 deletions.
44 changes: 42 additions & 2 deletions src/mochijson2.erl
Expand Up @@ -354,10 +354,24 @@ tokenize_string_fast(B, O) ->
case B of
<<_:O/binary, ?Q, _/binary>> ->
O;
<<_:O/binary, C, _/binary>> when C =/= $\\ ->
<<_:O/binary, $\\, _/binary>> ->
{escape, O};
<<_:O/binary, C1, _/binary>> when C1 < 128 ->
tokenize_string_fast(B, 1 + O);
<<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
C2 >= 128, C2 =< 191 ->
tokenize_string_fast(B, 2 + O);
<<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
C2 >= 128, C2 =< 191,
C3 >= 128, C3 =< 191 ->
tokenize_string_fast(B, 3 + O);
<<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
C2 >= 128, C2 =< 191,
C3 >= 128, C3 =< 191,
C4 >= 128, C4 =< 191 ->
tokenize_string_fast(B, 4 + O);
_ ->
{escape, O}
throw(invalid_utf8)
end.

tokenize_string(B, S=#decoder{offset=O}, Acc) ->
Expand Down Expand Up @@ -556,6 +570,7 @@ test_all() ->
[1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
<<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]),
test_encoder_utf8(),
test_input_validation(),
test_one(e2j_test_vec(utf8), 1).

test_one([], _N) ->
Expand Down Expand Up @@ -621,3 +636,28 @@ test_encoder_utf8() ->
Enc = mochijson2:encoder([{utf8, true}]),
[34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] =
Enc(<<1,"\321\202\320\265\321\201\321\202">>).

test_input_validation() ->
Good = [
{16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, % pound
{16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, % euro
{16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} % denarius
],
lists:foreach(fun({CodePoint, UTF8}) ->
Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)),
Expect = decode(UTF8)
end, Good),

Bad = [
% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte
<<?Q, 16#80, ?Q>>,
% missing continuations, last byte in each should be 80-BF
<<?Q, 16#C2, 16#7F, ?Q>>,
<<?Q, 16#E0, 16#80,16#7F, ?Q>>,
<<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>,
% we don't support code points > 10FFFF per RFC 3629
<<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>
],
lists:foreach(fun(X) ->
ok = try decode(X) catch invalid_utf8 -> ok end
end, Bad).

0 comments on commit de900ba

Please sign in to comment.