Permalink
Browse files

WIP conversion to binaries

  • Loading branch information...
1 parent ff8276d commit 1c0f9f8418d24d18cdb96ec4941e16560030f3f0 @seancribbs committed Mar 7, 2010
Showing with 938 additions and 700 deletions.
  1. +1 −1 ebin/neotoma.app
  2. +162 −152 extra/csv.erl
  3. +3 −3 extra/csv.peg
  4. +174 −164 extra/json.erl
  5. +5 −5 extra/json.peg
  6. +57 −37 priv/neotoma_parse.peg
  7. +25 −18 priv/peg_includes.erl
  8. +4 −3 src/neotoma.erl
  9. +446 −267 src/neotoma_parse.erl
  10. +25 −18 src/neotoma_peg.erl
  11. +33 −29 test/test_combinators.erl
  12. +3 −3 test/test_memoization.erl
View
@@ -1,7 +1,7 @@
{application, neotoma,
[
{description, "PEG/Packrat toolkit and parser-generator."},
- {vsn, "1.4"},
+ {vsn, "1.5"},
{modules, [neotoma, neotoma_parse, neotoma_peg]},
{applications, [kernel, stdlib]}
]
View

Large diffs are not rendered by default.

Oops, something went wrong.
View
@@ -21,11 +21,11 @@ case Node of
[Head|Tail]
end
`;
-field <- quoted_field / (!(field_sep / crlf) .)* `lists:flatten(Node)`;
+field <- quoted_field / (!field_sep !crlf .)* `iolist_to_binary(Node)`;
quoted_field <- '"' string:('""' / (!'"' .))* '"'
`
String = proplists:get_value(string, Node),
- re:replace(String, "[\"]{2}", "\"",[global, {return, list}])
+ re:replace(String, "[\"]{2}", "\"",[global, {return, binary}])
`;
field_sep <- ',' ~;
-crlf <- "\r\n" / "\n" ~;
+crlf <- [\r]? [\n] ~;
View

Large diffs are not rendered by default.

Oops, something went wrong.
View
@@ -20,14 +20,14 @@ case length(Node) of
[Head|Rest]
end
`;
-string <- '"' chars:(!'"' ("\\\\" / '\\"' / .))* '"' `lists:flatten(proplists:get_value(chars, Node))`;
+string <- '"' chars:(!'"' ("\\\\" / '\\"' / .))* '"' `iolist_to_binary(proplists:get_value(chars, Node))`;
number <- int frac? exp?
`
case Node of
- [Int, [], []] -> list_to_integer(lists:flatten([Int]));
- [Int, Frac, []] -> list_to_float(lists:flatten([Int, Frac]));
- [Int, [], Exp] -> list_to_float(lists:flatten([Int, ".0", Exp]));
- _ -> list_to_float(lists:flatten(Node))
+ [Int, [], []] -> list_to_integer(binary_to_list(iolist_to_binary(Int)));
+ [Int, Frac, []] -> list_to_float(binary_to_list(iolist_to_binary([Int, Frac])));
+ [Int, [], Exp] -> list_to_float(binary_to_list(iolist_to_binary([Int, ".0", Exp])));
+ _ -> list_to_float(binary_to_list(iolist_to_binary(Node)))
end
`;
int <- '-'? (non_zero_digit digit+) / digit ~;
View
@@ -3,18 +3,21 @@
rules <- space? declaration_sequence space? code_block? space?
`
RootRule = verify_rules(),
- Rules = string:join(lists:nth(2, Node), "\n\n"),
+ Rules = iolist_to_binary(lists:map(fun(R) -> [R, "\n\n"] end, lists:nth(2, Node))),
Code = case lists:nth(4, Node) of
{code, Block} -> Block;
_ -> []
end,
- [{rules, Rules}, {code, Code}, {root, RootRule}, {transform, ets:lookup(memo_table_name(),gen_transform)}]
+ [{rules, Rules},
+ {code, Code},
+ {root, RootRule},
+ {transform, ets:lookup(memo_table_name(),gen_transform)}]
`;
declaration_sequence <- head:declaration tail:(space declaration)*
`
FirstRule = proplists:get_value(head, Node),
- OtherRules = [lists:last(I) || I <- proplists:get_value(tail, Node, [])],
+ OtherRules = [I || [_,I] <- proplists:get_value(tail, Node, [])],
[FirstRule|OtherRules]
`;
@@ -26,64 +29,65 @@ declaration <- nonterminal space '<-' space parsing_expression space? code_block
{code, CodeBlock} -> CodeBlock;
_ ->
ets:insert_new(memo_table_name(),{gen_transform, true}),
- "transform('"++Symbol++"', Node, Idx)"
+ ["transform('",Symbol,"', Node, Idx)"]
end,
- "'"++Symbol++"'"++"(Input, Index) ->\n " ++
- "p(Input, Index, '"++Symbol++"', fun(I,D) -> ("++
- lists:nth(4, Tail) ++
- ")(I,D) end, fun(Node, Idx) -> "++Transform++" end)."
+ ["'",Symbol,"'","(Input, Index) ->\n ",
+ "p(Input, Index, '",Symbol,"', fun(I,D) -> (",
+ lists:nth(4, Tail),
+ ")(I,D) end, fun(Node, Idx) -> ",Transform," end)."]
`;
parsing_expression <- choice / sequence / primary ~;
choice <- head:alternative tail:(space '/' space alternative)+
`
Tail = [lists:last(S) || S <- proplists:get_value(tail, Node)],
- Statements = [proplists:get_value(head, Node)|Tail],
- "p_choose([" ++ string:join(Statements, ", ") ++ "])"
+ Head = proplists:get_value(head, Node),
+ Statements = [[", ", TS] || TS <- Tail],
+ ["p_choose([", Head, Statements, "])"]
`;
alternative <- sequence / labeled_primary ~;
primary <- prefix atomic / atomic suffix / atomic
`
case Node of
- [Atomic, one_or_more] -> "p_one_or_more("++Atomic++")";
- [Atomic, zero_or_more] -> "p_zero_or_more("++Atomic++")";
- [Atomic, optional] -> "p_optional("++Atomic++")";
- [assert, Atomic] -> "p_assert("++Atomic++")";
- [not_, Atomic] -> "p_not("++Atomic++")";
+ [Atomic, one_or_more] -> ["p_one_or_more(", Atomic, ")"];
+ [Atomic, zero_or_more] -> ["p_zero_or_more(", Atomic, ")"];
+ [Atomic, optional] -> ["p_optional(", Atomic, ")"];
+ [assert, Atomic] -> ["p_assert(", Atomic, ")"];
+ [not_, Atomic] -> ["p_not(", Atomic, ")"];
_ -> Node
end
`;
sequence <- head:labeled_primary tail:(space labeled_primary)+
`
Tail = [lists:nth(2, S) || S <- proplists:get_value(tail, Node)],
- Statements = [proplists:get_value(head, Node)|Tail],
- "p_seq(["++ string:join(Statements, ", ") ++ "])"
+ Head = proplists:get_value(head, Node),
+ Statements = [[", ", TS] || TS <- Tail],
+ ["p_seq([", Head, Statements, "])"]
`;
labeled_primary <- label? primary
`
case hd(Node) of
[] -> lists:nth(2, Node);
- Label -> "p_label('" ++ Label ++ "', "++lists:nth(2, Node)++")"
+ Label -> ["p_label('", Label, "', ", lists:nth(2, Node), ")"]
end
`;
label <- alpha_char alphanumeric_char* ':'
`
- String = lists:flatten(Node),
- lists:sublist(String, length(String)-1)
+ lists:sublist(Node, length(Node)-1)
`;
suffix <- repetition_suffix / optional_suffix
`
case Node of
- "*" -> zero_or_more;
- "+" -> one_or_more;
- "?" -> optional
+ <<"*">> -> zero_or_more;
+ <<"+">> -> one_or_more;
+ <<"?">> -> optional
end
`;
@@ -94,60 +98,76 @@ repetition_suffix <- '+' / '*' ~;
prefix <- '&' / '!'
`
case Node of
- "&" -> assert;
- "!" -> not_
+ <<"&">> -> assert;
+ <<"!">> -> not_
end
`;
atomic <- terminal / nonterminal / parenthesized_expression
`
case Node of
{nonterminal, Symbol} ->
- add_nt(Symbol, Index),
- "fun '" ++ Symbol ++ "'/2";
+ [<<"fun '">>, Symbol, <<"'/2">>];
_ -> Node
end
`;
parenthesized_expression <- '(' space? parsing_expression space? ')' `lists:nth(3, Node)`;
-nonterminal <- alpha_char alphanumeric_char* `{nonterminal, lists:flatten(Node)}`;
+nonterminal <- alpha_char alphanumeric_char*
+`
+ Symbol = iolist_to_binary(Node),
+ add_nt(Symbol, Idx),
+ {nonterminal, Symbol}
+`;
terminal <- quoted_string / character_class / anything_symbol ~;
-quoted_string <- single_quoted_string / double_quoted_string `"p_string(\""++escape_quotes(lists:flatten(proplists:get_value(string, Node)))++"\")"`;
+quoted_string <- single_quoted_string / double_quoted_string
+`
+ ["p_string(",
+ io_lib:format("~p",[iolist_to_binary(proplists:get_value(string, Node))]),
+ ")"]
+`;
double_quoted_string <- '"' string:(!'"' ("\\\\" / '\\"' / .))* '"' ~;
single_quoted_string <- "'" string:(!"'" ("\\\\" / "\\'" / .))* "'" ~;
-character_class <- '[' characters:(!']' ('\\\\' . / !'\\\\' .))+ ']' `"p_charclass(\"[" ++ escape_quotes(lists:flatten(proplists:get_value(characters, Node))) ++ "]\")"`;
+character_class <- '[' characters:(!']' ('\\\\' . / !'\\\\' .))+ ']'
+`
+["p_charclass(<<\"[",
+ escape_quotes(io_lib:format("~s", [iolist_to_binary(proplists:get_value(characters, Node))])),
+ "]\">>)"]
+`;
-anything_symbol <- '.' ` "p_anything()" `;
+anything_symbol <- '.' ` <<"p_anything()">> `;
alpha_char <- [A-Za-z_] ~;
alphanumeric_char <- alpha_char / [0-9] ~;
space <- (white / comment_to_eol)+ ~;
-comment_to_eol <- '%' (!"\n" .)* ~;
+comment_to_eol <- !'%{' '%' (!"\n" .)* ~;
white <- [ \t\n\r] ~;
-code_block <- ('`' code:('\\`' / '$`' / !'`' .)+ '`') / '~'
+code_block <- ( '%{' code:('\\%' / '$%' / !'%}' .)+ '%}' ) /
+ ('`' code:('\\`' / '$`' / !'`' .)+ '`') /
+ '~'
`
case Node of
- "~" -> {code, "Node"};
- _ -> {code, lists:flatten(proplists:get_value('code', Node))}
+ <<"~">> -> {code, <<"Node">>};
+ _ -> {code, proplists:get_value('code', Node)}
end
`;
%% Extra functions
`
escape_quotes(String) ->
{ok, RE} = re:compile("\""),
- re:replace(String, RE, "\\\\\"", [global, {return, list}]).
+ re:replace(String, RE, "\\\\\"", [global, {return, binary}]).
add_lhs(Symbol, Index) ->
case ets:lookup(memo_table_name(), lhs) of
@@ -188,7 +208,7 @@ verify_rules() ->
ok;
_ ->
io:format("neotoma error: nonterminal '~s' has no reduction. (found at ~p) No parser will be generated!~n", [S,I]),
- exit({neotoma, {no_reduction, list_to_atom(S)}})
+ exit({neotoma, {no_reduction, list_to_atom(binary_to_list(S))}})
end
end, NTs),
Root.
View
@@ -47,7 +47,7 @@ memo_table_name() ->
get(parse_memo_table).
p_eof() ->
- fun([], Index) -> {eof, [], Index};
+ fun(<<>>, Index) -> {eof, [], Index};
(_, Index) -> {fail, {expected, eof, Index}} end.
p_optional(P) ->
@@ -140,37 +140,44 @@ p_scan(P, Inp, Index, Accum) ->
{Result, InpRem, NewIndex} -> p_scan(P, InpRem, NewIndex, [Result | Accum])
end.
+p_string(S) when is_list(S) -> p_string(list_to_binary(S));
p_string(S) ->
- fun(Input, Index) ->
- case lists:prefix(S, Input) of
- true -> {S, lists:sublist(Input, length(S)+1, length(Input)), p_advance_index(S,Index)};
- _ -> {fail, {expected, {string, S}, Index}}
+ Length = erlang:byte_size(S),
+ fun(Input, Index) ->
+ try
+ <<S:Length/binary, Rest/binary>> = Input,
+ {S, Rest, p_advance_index(S, Index)}
+ catch
+ error:{badmatch,_} -> {fail, {expected, {string, S}, Index}}
end
- end.
+ end.
p_anything() ->
- fun([], Index) -> {fail, {expected, any_character, Index}};
- ([H|T], Index) -> {H, T, p_advance_index(H, Index)}
+ fun(<<>>, Index) -> {fail, {expected, any_character, Index}};
+ (Input, Index) when is_binary(Input) ->
+ <<C/utf8, Rest/binary>> = Input,
+ {<<C/utf8>>, Rest, p_advance_index(<<C/utf8>>, Index)}
end.
p_charclass(Class) ->
- fun(Inp, Index) ->
- {ok, RE} = re:compile("^"++Class),
- case re:run(Inp, RE) of
- {match, _} ->
- {hd(Inp), tl(Inp), p_advance_index(hd(Inp), Index)};
- _ -> {fail,{expected, {character_class, Class}, Index}}
- end
- end.
+ {ok, RE} = re:compile(Class, [unicode, dotall]),
+ fun(Inp, Index) ->
+ case re:run(Inp, RE, [anchored]) of
+ {match, [{0, Length}|_]} ->
+ {Head, Tail} = erlang:split_binary(Inp, Length),
+ {Head, Tail, p_advance_index(Head, Index)};
+ _ -> {fail, {expected, {character_class, Class}, Index}}
+ end
+ end.
line({{line,L},_}) -> L;
line(_) -> undefined.
column({_,{column,C}}) -> C;
column(_) -> undefined.
-p_advance_index(MatchedInput, Index) when is_list(MatchedInput) -> % strings
- lists:foldl(fun p_advance_index/2, Index, MatchedInput);
+p_advance_index(MatchedInput, Index) when is_list(MatchedInput) orelse is_binary(MatchedInput)-> % strings
+ lists:foldl(fun p_advance_index/2, Index, unicode:characters_to_list(MatchedInput));
p_advance_index(MatchedInput, Index) when is_integer(MatchedInput) -> % single characters
{{line, Line}, {column, Col}} = Index,
case MatchedInput of
View
@@ -63,11 +63,12 @@ generate_module_attrs(ModName) ->
generate_entry_functions(Root) ->
{RootRule,_} = Root,
- ["file(Filename) -> {ok, Bin} = file:read_file(Filename), parse(binary_to_list(Bin)).\n\n",
- "parse(Input) ->\n",
+ ["file(Filename) -> {ok, Bin} = file:read_file(Filename), parse(Bin).\n\n",
+ "parse(List) when is_list(List) -> parse(list_to_binary(List));\n",
+ "parse(Input) when is_binary(Input) ->\n",
" setup_memo(),\n",
" Result = case '",RootRule,"'(Input,{{line,1},{column,1}}) of\n",
- " {AST, [], _Index} -> AST;\n",
+ " {AST, <<>>, _Index} -> AST;\n",
" Any -> Any\n"
" end,\n",
" release_memo(), Result.\n"].
Oops, something went wrong.

0 comments on commit 1c0f9f8

Please sign in to comment.