Permalink
Browse files

Avoid emitting unused combinators via macros.

This is a stop-gap to reduce dialyzer errors in generated parsers,
i.e. until a more thorough approach can be taken. Essentially
neotoma's parser tracks which combinators are used by in grammar, and
then the generator emits macros at the top of the Erlang output to
ensure that those used combinators are present, but no others.
peg_includes.hrl wraps each optional combinator in appropriate -ifdef
macros. There's still a possibility that a successful sub-tree of a
failed branch will cause an unused function to be included, but most
grammars should not encounter this problem.

Globally-unused combinators were also removed from peg_includes.hrl,
namely p_and/1 and p/4. p_eof/0 remains for future use but is guarded
by a macro.

The line/1 and column/1 functions must be manually turned on by
defining the 'line' and/or 'column' macros in the global code block.

Fixed the remaining dialyzer bugs in neotoma.erl, removing an
unnecessary clause of validate_params/4 and fixing the option() type
to include an option used by the escript.

Provided additional specs for the remaining functions in
peg_includes.hrl.
  • Loading branch information...
1 parent b3f3b3f commit bb8628c7a1efb5270b4dfafab8e1b1226547fb78 @seancribbs committed Jan 2, 2014
Showing with 215 additions and 69 deletions.
  1. +41 −11 priv/neotoma_parse.peg
  2. +36 −9 priv/peg_includes.hrl
  3. +17 −8 src/neotoma.erl
  4. +88 −21 src/neotoma_parse.erl
  5. +0 −18 src/neotoma_peg.erl
  6. +30 −0 test/neotoma_peg.erl
  7. +3 −2 test/test_memoization.erl
View
@@ -11,7 +11,9 @@ rules <- space? declaration_sequence space? code_block? space?
[{rules, Rules},
{code, Code},
{root, RootRule},
- {transform, ets:lookup(memo_table_name(),gen_transform)}]
+ {transform, ets:lookup(memo_table_name(),gen_transform)},
+ {combinators, ets:lookup_element(memo_table_name(), combinators, 2)}]
+
`;
declaration_sequence <- head:declaration tail:(space declaration)*
@@ -51,6 +53,7 @@ choice <- head:alternative tail:(space '/' space alternative)+
Tail = [lists:last(S) || S <- proplists:get_value(tail, Node)],
Head = proplists:get_value(head, Node),
Statements = [[", ", TS] || TS <- Tail],
+ used_combinator(p_choose),
["p_choose([", Head, Statements, "])"]
`;
@@ -59,11 +62,23 @@ alternative <- sequence / labeled_primary ~;
primary <- prefix atomic / atomic suffix / atomic
`
case Node of
- [Atomic, one_or_more] -> ["p_one_or_more(", Atomic, ")"];
- [Atomic, zero_or_more] -> ["p_zero_or_more(", Atomic, ")"];
- [Atomic, optional] -> ["p_optional(", Atomic, ")"];
- [assert, Atomic] -> ["p_assert(", Atomic, ")"];
- [not_, Atomic] -> ["p_not(", Atomic, ")"];
+ [Atomic, one_or_more] ->
+ used_combinator(p_one_or_more),
+ used_combinator(p_scan),
+ ["p_one_or_more(", Atomic, ")"];
+ [Atomic, zero_or_more] ->
+ used_combinator(p_zero_or_more),
+ used_combinator(p_scan),
+ ["p_zero_or_more(", Atomic, ")"];
+ [Atomic, optional] ->
+ used_combinator(p_optional),
+ ["p_optional(", Atomic, ")"];
+ [assert, Atomic] ->
+ used_combinator(p_assert),
+ ["p_assert(", Atomic, ")"];
+ [not_, Atomic] ->
+ used_combinator(p_not),
+ ["p_not(", Atomic, ")"];
_ -> Node
end
`;
@@ -73,14 +88,17 @@ sequence <- head:labeled_primary tail:(space labeled_primary)+
Tail = [lists:nth(2, S) || S <- proplists:get_value(tail, Node)],
Head = proplists:get_value(head, Node),
Statements = [[", ", TS] || TS <- Tail],
+ used_combinator(p_seq),
["p_seq([", Head, Statements, "])"]
`;
labeled_primary <- label? primary
`
case hd(Node) of
[] -> lists:nth(2, Node);
- Label -> ["p_label('", Label, "', ", lists:nth(2, Node), ")"]
+ Label ->
+ used_combinator(p_label),
+ ["p_label('", Label, "', ", lists:nth(2, Node), ")"]
end
`;
@@ -132,7 +150,8 @@ terminal <- regexp_string / quoted_string / character_class / anything_symbol ~;
regexp_string <- '#' string:(!'#' ('\\#' / .))+ '#'
`
-["p_regexp(<<\"",
+ used_combinator(p_regexp),
+ ["p_regexp(<<\"",
% Escape \ and " as they are used in erlang string. Other sumbol stay as is.
% \ -> \\
% " -> \"
@@ -142,6 +161,7 @@ regexp_string <- '#' string:(!'#' ('\\#' / .))+ '#'
quoted_string <- single_quoted_string / double_quoted_string
`
+ used_combinator(p_string),
lists:flatten(["p_string(<<\"",
escape_string(binary_to_list(iolist_to_binary(proplists:get_value(string, Node)))),
"\">>)"])
@@ -153,12 +173,13 @@ single_quoted_string <- "'" string:(!"'" ("\\\\" / "\\'" / .))* "'" ~;
character_class <- '[' characters:(!']' ('\\\\' . / !'\\\\' .))+ ']'
`
-["p_charclass(<<\"[",
+ used_combinator(p_charclass),
+ ["p_charclass(<<\"[",
escape_string(binary_to_list(iolist_to_binary(proplists:get_value(characters, Node)))),
- "]\">>)"]
+ "]\">>)"]
`;
-anything_symbol <- '.' ` <<"p_anything()">> `;
+anything_symbol <- '.' ` used_combinator(p_anything), <<"p_anything()">> `;
alpha_char <- [A-Za-z_] ~;
@@ -254,6 +275,15 @@ verify_rules() ->
end, NTs),
Root.
+-spec used_combinator(atom()) -> true.
+used_combinator(C) ->
+ case ets:lookup(memo_table_name(), combinators) of
+ [] ->
+ ets:insert(memo_table_name(), {combinators, ordsets:from_list([C])});
+ [{combinators, Cs}] ->
+ ets:insert(memo_table_name(), {combinators, ordsets:add_element(C, Cs)})
+ end.
+
-spec used_transform_variables(binary()) -> [ 'Node' | 'Idx' ].
used_transform_variables(Transform) ->
Code = unicode:characters_to_list(Transform),
View
@@ -1,4 +1,4 @@
--file("neotoma/priv/peg_includes.hrl", 1).
+-file("peg_includes.hrl", 1).
-type index() :: {{line, pos_integer()}, {column, pos_integer()}}.
-type input() :: binary().
-type parse_failure() :: {fail, term()}.
@@ -7,10 +7,6 @@
-type parse_fun() :: fun((input(), index()) -> parse_result()).
-type xform_fun() :: fun((input(), index()) -> term()).
--spec p(input(), index(), atom(), parse_fun()) -> parse_result().
-p(Inp, Index, Name, ParseFun) ->
- p(Inp, Index, Name, ParseFun, fun(N, _Idx) -> N end).
-
-spec p(input(), index(), atom(), parse_fun(), xform_fun()) -> parse_result().
p(Inp, StartIndex, Name, ParseFun, TransformFun) ->
case get_memo(StartIndex, Name) of % See if the current reduction is memoized
@@ -36,13 +32,15 @@ setup_memo() ->
release_memo() ->
ets:delete(memo_table_name()).
+-spec memoize(index(), atom(), term()) -> true.
memoize(Index, Name, Result) ->
Memo = case ets:lookup(memo_table_name(), Index) of
[] -> [];
[{Index, Plist}] -> Plist
end,
ets:insert(memo_table_name(), {Index, [{Name, Result}|Memo]}).
+-spec get_memo(index(), atom()) -> {ok, term()} | {error, not_found}.
get_memo(Index, Name) ->
case ets:lookup(memo_table_name(), Index) of
[] -> {error, not_found};
@@ -53,14 +51,18 @@ get_memo(Index, Name) ->
end
end.
+-spec memo_table_name() -> ets:tid().
memo_table_name() ->
get({parse_memo_table, ?MODULE}).
+-ifdef(p_eof).
-spec p_eof() -> parse_fun().
p_eof() ->
fun(<<>>, Index) -> {eof, [], Index};
(_, Index) -> {fail, {expected, eof, Index}} end.
+-endif.
+-ifdef(p_optional).
-spec p_optional(parse_fun()) -> parse_fun().
p_optional(P) ->
fun(Input, Index) ->
@@ -69,7 +71,9 @@ p_optional(P) ->
{_, _, _} = Success -> Success
end
end.
+-endif.
+-ifdef(p_not).
-spec p_not(parse_fun()) -> parse_fun().
p_not(P) ->
fun(Input, Index)->
@@ -79,7 +83,9 @@ p_not(P) ->
{Result, _, _} -> {fail, {expected, {no_match, Result},Index}}
end
end.
+-endif.
+-ifdef(p_assert).
-spec p_assert(parse_fun()) -> parse_fun().
p_assert(P) ->
fun(Input,Index) ->
@@ -88,11 +94,9 @@ p_assert(P) ->
_ -> {[], Input, Index}
end
end.
+-endif.
--spec p_and([parse_fun()]) -> parse_fun().
-p_and(P) ->
- p_seq(P).
-
+-ifdef(p_seq).
-spec p_seq([parse_fun()]) -> parse_fun().
p_seq(P) ->
fun(Input, Index) ->
@@ -106,7 +110,9 @@ p_all([P|Parsers], Inp, Index, Accum) ->
{fail, _} = Failure -> Failure;
{Result, InpRem, NewIndex} -> p_all(Parsers, InpRem, NewIndex, [Result|Accum])
end.
+-endif.
+-ifdef(p_choose).
-spec p_choose([parse_fun()]) -> parse_fun().
p_choose(Parsers) ->
fun(Input, Index) ->
@@ -124,13 +130,17 @@ p_attempt([P|Parsers], Input, Index, FirstFailure)->
end;
Result -> Result
end.
+-endif.
+-ifdef(p_zero_or_more).
-spec p_zero_or_more(parse_fun()) -> parse_fun().
p_zero_or_more(P) ->
fun(Input, Index) ->
p_scan(P, Input, Index, [])
end.
+-endif.
+-ifdef(p_one_or_more).
-spec p_one_or_more(parse_fun()) -> parse_fun().
p_one_or_more(P) ->
fun(Input, Index)->
@@ -143,7 +153,9 @@ p_one_or_more(P) ->
{fail, {expected, {at_least_one, Failure}, Index}}
end
end.
+-endif.
+-ifdef(p_label).
-spec p_label(atom(), parse_fun()) -> parse_fun().
p_label(Tag, P) ->
fun(Input, Index) ->
@@ -154,15 +166,19 @@ p_label(Tag, P) ->
{{Tag, Result}, InpRem, NewIndex}
end
end.
+-endif.
+-ifdef(p_scan).
-spec p_scan(parse_fun(), input(), index(), [term()]) -> parse_result().
p_scan(_, [], Index, Accum) -> {lists:reverse( Accum ), [], Index};
p_scan(P, Inp, Index, Accum) ->
case P(Inp, Index) of
{fail,_} -> {lists:reverse(Accum), Inp, Index};
{Result, InpRem, NewIndex} -> p_scan(P, InpRem, NewIndex, [Result | Accum])
end.
+-endif.
+-ifdef(p_string).
-spec p_string(binary()) -> parse_fun().
p_string(S) ->
Length = erlang:byte_size(S),
@@ -174,15 +190,19 @@ p_string(S) ->
error:{badmatch,_} -> {fail, {expected, {string, S}, Index}}
end
end.
+-endif.
+-ifdef(p_anything).
-spec p_anything() -> parse_fun().
p_anything() ->
fun(<<>>, Index) -> {fail, {expected, any_character, Index}};
(Input, Index) when is_binary(Input) ->
<<C/utf8, Rest/binary>> = Input,
{<<C/utf8>>, Rest, p_advance_index(<<C/utf8>>, Index)}
end.
+-endif.
+-ifdef(p_charclass).
-spec p_charclass(string() | binary()) -> parse_fun().
p_charclass(Class) ->
{ok, RE} = re:compile(Class, [unicode, dotall]),
@@ -194,7 +214,9 @@ p_charclass(Class) ->
_ -> {fail, {expected, {character_class, binary_to_list(Class)}, Index}}
end
end.
+-endif.
+-ifdef(p_regexp).
-spec p_regexp(binary()) -> parse_fun().
p_regexp(Regexp) ->
{ok, RE} = re:compile(Regexp, [unicode, dotall, anchored]),
@@ -206,14 +228,19 @@ p_regexp(Regexp) ->
_ -> {fail, {expected, {regexp, binary_to_list(Regexp)}, Index}}
end
end.
+-endif.
+-ifdef(line).
-spec line(index() | term()) -> pos_integer() | undefined.
line({{line,L},_}) -> L;
line(_) -> undefined.
+-endif.
+-ifdef(column).
-spec column(index() | term()) -> pos_integer() | undefined.
column({_,{column,C}}) -> C;
column(_) -> undefined.
+-endif.
-spec p_advance_index(input() | unicode:charlist() | pos_integer(), index()) -> index().
p_advance_index(MatchedInput, Index) when is_list(MatchedInput) orelse is_binary(MatchedInput)-> % strings
View
@@ -3,7 +3,12 @@
-export([file/1, file/2, bootstrap/0]).
-export([main/1]).
--type option() :: {module, atom()} | {output, file:filename()} | {transform_module, atom()}.
+-define(ALL_COMBINATORS, [p_eof, p_optional, p_not, p_assert, p_seq,
+ p_choose, p_zero_or_more, p_one_or_more, p_label, p_scan,
+ p_string, p_anything, p_charclass, p_regexp, line, column]).
+
+-type option() :: {module, atom()} | {output, file:filename()} | {transform_module, atom()} |
+ {neotoma_priv_dir, file:filename()}.
%% @doc Handler function for escript.
-spec main(list()) -> ok | no_return().
@@ -39,7 +44,8 @@ file(InputGrammar, Options) ->
Root = proplists:get_value(root, Parsed),
Code = proplists:get_value(code, Parsed),
GenTransform = proplists:get_value(transform, Parsed),
- ModuleAttrs = generate_module_attrs(ModuleName),
+ Combinators = proplists:get_value(combinators, Parsed, ?ALL_COMBINATORS),
+ ModuleAttrs = generate_module_attrs(ModuleName, Combinators),
EntryFuns = generate_entry_functions(Root),
TransformFun = create_transform(TransformModule, OutputDir, GenTransform),
PrivDir = proplists:get_value(neotoma_priv_dir, Options, code:priv_dir(neotoma)),
@@ -49,8 +55,6 @@ file(InputGrammar, Options) ->
-spec validate_params(file:filename(),atom(),atom(),file:filename()) -> 'ok'.
validate_params(InputGrammar, _, _, OutputFile) when InputGrammar =:= OutputFile ->
throw({badarg, "Input and output file are the same!"});
-validate_params(_,ModName,_,_) when not is_atom(ModName) ->
- throw({badarg, "Output module name is not an atom!"});
validate_params(_,_, false, _) -> ok;
validate_params(_,_, TransformModule, _) when not is_atom(TransformModule) ->
throw({badarg, "transform_module option must be an atom"});
@@ -63,11 +67,16 @@ validate_params(_,_, TransformModule, OutputFile) ->
_ -> ok
end.
--spec generate_module_attrs(atom()) -> iolist().
-generate_module_attrs(ModName) ->
- ["-module(",atom_to_list(ModName),").\n",
+-spec generate_module_attrs(atom(), [atom()]) -> iolist().
+generate_module_attrs(ModName, Combinators) ->
+ ["-module(", atom_to_list(ModName) ,").\n",
"-export([parse/1,file/1]).\n",
- "-compile({nowarn_unused_function,[p/4, p/5, p_eof/0, p_optional/1, p_not/1, p_assert/1, p_seq/1, p_and/1, p_choose/1, p_zero_or_more/1, p_one_or_more/1, p_label/2, p_string/1, p_anything/0, p_charclass/1, p_regexp/1, p_attempt/4, line/1, column/1]}).\n\n"].
+ [ generate_combinator_macro(C) || Combinators /= undefined, C <- Combinators ],
+ "\n"
+ ].
+
+generate_combinator_macro(C) ->
+ ["-define(", atom_to_list(C), ",true).\n"].
-spec generate_entry_functions({iodata(),_}) -> iolist().
generate_entry_functions(Root) ->
Oops, something went wrong.

0 comments on commit bb8628c

Please sign in to comment.