Skip to content

Commit

Permalink
Implement brackets and escape expressions
Browse files Browse the repository at this point in the history
This introduces two new AST nodes 'code' and 'escape'.

The first one represents an abstract syntax tree with possible "escaped" holes
in it. Its lexical form is of <| Expr |>, where Expr is any Erlang expression
or the new 'escape' AST node `Expr.

An escape expression is only valid in the context of a code expression, if
one is found in the "top level" code (not nested in brackets), erl_lint fails
with the error 'escaped_out_of_scope'.

Each new bracket expression level has its own fresh symbol table, unbound
variables errors are ignored in staged code.

	% Will compile, even if X is not bound.
	1> <| X |>.
	{var, 1, 'X'}
	2> <| begin X = 1, `(<| X + 2 |>) end |>.
	{block,1,
	       [{match,1,{var,1,'X'},{integer,1,1}},
	        {op,1,'+',{var,1,'X'},{integer,1,2}}]}

You can also pattern match over code expressions:

	3> <| `X + `Y |> = <| 1 + 2 + 3 |>.
	{op,1,'+',
	    {op,1,'+',{integer,1,1},{integer,1,2}},
	    {integer,1,3}}
	4> X.
	{op,1,'+',{integer,1,1},{integer,1,2}}
	5> Y.
	{integer,1,3}
  • Loading branch information
nox committed Oct 27, 2011
1 parent 1fd0ef5 commit b734fc4
Show file tree
Hide file tree
Showing 10 changed files with 358 additions and 5 deletions.
7 changes: 5 additions & 2 deletions lib/compiler/src/sys_pre_expand.erl
Expand Up @@ -65,7 +65,10 @@ module(Fs0, Opts0) ->
%% Expand records. Normalise guard tests.
Fs = erl_expand_records:module(Fs0, Opts0),

Opts = compiler_options(Fs) ++ Opts0,
%% Stage brackets.
Fs1 = erl_stage_brackets:module(Fs),

Opts = compiler_options(Fs1) ++ Opts0,

%% Set pre-defined exported functions.
PreExp = [{module_info,0},{module_info,1}],
Expand All @@ -82,7 +85,7 @@ module(Fs0, Opts0) ->
bittypes = erl_bits:system_bittypes()
},
%% Expand the functions.
{Tfs,St1} = forms(Fs, define_functions(Fs, St0)),
{Tfs,St1} = forms(Fs1, define_functions(Fs1, St0)),
{Efs,St2} = expand_pmod(Tfs, St1),
%% Get the correct list of exported functions.
Exports = case member(export_all, St2#expand.compile) of
Expand Down
1 change: 1 addition & 0 deletions lib/stdlib/src/Makefile
Expand Up @@ -68,6 +68,7 @@ MODULES= \
erl_posix_msg \
erl_pp \
erl_scan \
erl_stage_brackets \
erl_tar \
error_logger_file_h \
error_logger_tty_h \
Expand Down
6 changes: 6 additions & 0 deletions lib/stdlib/src/erl_eval.erl
Expand Up @@ -403,6 +403,10 @@ expr({bin,_,Fs}, Bs0, Lf, Ef, RBs) ->
ret_expr(V, Bs, RBs);
expr({remote,_,_,_}, _Bs, _Lf, _Ef, _RBs) ->
erlang:raise(error, {badexpr,':'}, stacktrace());
expr({code,_,_}=E, Bs0, Lf, Ef, RBs) ->
SE = erl_stage_brackets:expr(E),
{value,V,Bs} = expr(SE, Bs0, Lf, Ef, RBs),
ret_expr(V, Bs, RBs);
expr({value,_,Val}, Bs, _Lf, _Ef, RBs) -> % Special case straight values.
ret_expr(Val, Bs, RBs).

Expand Down Expand Up @@ -1050,6 +1054,8 @@ match1({op,Line,Op,L,R}, Term, Bs, BBs) ->
X ->
match1(X, Term, Bs, BBs)
end;
match1({code,_,_}=P, Term, Bs0, BBs) ->
match1(erl_stage_brackets:pattern(P), Term, Bs0, BBs);
match1(_, _, _Bs, _BBs) ->
throw(invalid).

Expand Down
5 changes: 4 additions & 1 deletion lib/stdlib/src/erl_expand_records.erl
Expand Up @@ -419,7 +419,10 @@ expr({op,Line,Op,L0,R0}, St0) when Op =:= 'andalso';
expr({op,Line,Op,L0,R0}, St0) ->
{L,St1} = expr(L0, St0),
{R,St2} = expr(R0, St1),
{{op,Line,Op,L,R},St2}.
{{op,Line,Op,L,R},St2};
expr({code,Line,E0}, St0) ->
{E,St1} = expr(E0, St0),
{{code,Line,E},St1}.

expr_list([E0 | Es0], St0) ->
{E,St1} = expr(E0, St0),
Expand Down
64 changes: 64 additions & 0 deletions lib/stdlib/src/erl_lint.erl
Expand Up @@ -93,6 +93,7 @@ value_option(Flag, Default, On, OnVal, Off, OffVal, Opts) ->
%% 'called' and 'exports' contain {Line, {Function, Arity}},
%% the other function collections contain {Function, Arity}.
-record(lint, {state=start :: 'start' | 'attribute' | 'function',
ctxs={[],[]}, %Contexts zipper (for code staging)
module=[], %Module
package="", %Module package
extends=[], %Extends
Expand Down Expand Up @@ -363,6 +364,9 @@ format_error(callback_wrong_arity) ->
format_error({imported_predefined_type, Name}) ->
io_lib:format("referring to built-in type ~w as a remote type; "
"please take out the module name", [Name]);
%% --- escapes ---
format_error(escaped_out_of_scope) ->
"escaped out of scope";
%% --- obsolete? unused? ---
format_error({format_error, {Fmt, Args}}) ->
io_lib:format(Fmt, Args);
Expand Down Expand Up @@ -1483,6 +1487,15 @@ pattern({match,_Line,Pat1,Pat2}, Vt, Old, Bvt, St0) ->
{Rvt,Bvt2,St2} = pattern(Pat2, Vt, Old, Bvt, St1),
St3 = reject_bin_alias(Pat1, Pat2, St2),
{vtmerge_pat(Lvt, Rvt),vtmerge_pat(Bvt1,Bvt2),St3};
pattern({escape,Line,E}, Vt, Old, Bvt, St) ->
case handle_escape(E, {pattern,Vt,Old,Bvt}, St) of
{{pattern,Vt1,_,Bvt1},St1} ->
{vtmerge_pat(Vt, Vt1),vtmerge_pat(Bvt, Bvt1),St1};
error -> {[],[],add_error(Line, escaped_out_of_scope, St)}
end;
pattern({code,_,E}, Vt, Old, Bvt, St) ->
{{pattern,Vt1,_,Bvt1},St1} = handle_code(E, {pattern,Vt,Old,Bvt}, St),
{vtmerge_pat(Vt, Vt1),vtmerge_pat(Bvt, Bvt1),St1};
%% Catch legal constant expressions, including unary +,-.
pattern(Pat, _Vt, _Old, _Bvt, St) ->
case is_pattern_expr(Pat) of
Expand Down Expand Up @@ -1914,7 +1927,15 @@ gexpr({op,Line,Op,L,R}, Vt, St0) ->
case is_gexpr_op(Op, 2) of
true -> {Avt,St1};
false -> {Avt,add_error(Line, illegal_guard_expr, St1)}
end;
gexpr({escape,Line,E}, Vt, St) ->
case handle_escape(E, {gexpr,Vt}, St) of
{{gexpr,Vt1},St1} -> {vtupdate(Vt1, Vt),St1};
error -> {[],add_error(Line, escaped_out_of_scope, St)}
end;
gexpr({code,_,E}, Vt, St) ->
{{gexpr,Vt1},St1} = handle_code(E, {gexpr,Vt}, St),
{vtupdate(Vt1, Vt),St1};
%% Everything else is illegal! You could put explicit tests here to
%% better error diagnostics.
gexpr(E, _Vt, St) ->
Expand Down Expand Up @@ -2039,6 +2060,8 @@ exprs([], _Vt, St) -> {[],St}.
%% mark illegally exported variables, e.g. from catch, as unsafe to better
%% show why unbound.

expr({var,_Line,_V}, Vt, St=#lint{ctxs={[_PCtx|_NPCtx],_NCtxs}}) ->
{Vt, St};
expr({var,Line,V}, Vt, St) ->
expr_var(V, Line, Vt, St);
expr({char,_Line,_C}, _Vt, St) -> {[],St};
Expand Down Expand Up @@ -2254,6 +2277,14 @@ expr({op,Line,Op,L,R}, Vt, St0) when Op =:= 'orelse'; Op =:= 'andalso' ->
{vtmerge(Evt1, Vt3),St3};
expr({op,_Line,_Op,L,R}, Vt, St) ->
expr_list([L,R], Vt, St); %They see the same variables
expr({escape,Line,E}, Vt, St) ->
case handle_escape(E, {expr,Vt}, St) of
{{expr,Vt1},St1} -> {vtupdate(Vt1, Vt),St1};
error -> {[],add_error(Line, escaped_out_of_scope, St)}
end;
expr({code,_,E}, Vt, St) ->
{{expr,Vt1},St1} = handle_code(E, {expr,Vt}, St),
{vtupdate(Vt1, Vt),St1};
%% The following are not allowed to occur anywhere!
expr({remote,Line,_M,_F}, _Vt, St) ->
{[],add_error(Line, illegal_expr, St)};
Expand Down Expand Up @@ -3030,6 +3061,39 @@ fun_clause({clause,_Line,H,G,B}, Vt0, St0) ->
Vt4 = vtmerge(Svt, vtsubtract(Cvt, Svt)),
{vtold(Vt4, Vt0),St6}.

handle_escape(_, _, #lint{ctxs={[],_}}) ->
error;
handle_escape(E, NCtx, St=#lint{ctxs={[PCtx|PCtxs1],NCtxs}}) ->
St1 = St#lint{ctxs={PCtxs1,[NCtx|NCtxs]}},
{PCtx1,St3} = case PCtx of
{expr,Vt} ->
{Evt,St2} = expr(E, Vt, St1),
{{expr,vtupdate(Evt, Vt)},St2};
{pattern,Vt,Old,Bvt} ->
{Vt1,Bvt1,St2} = pattern(E, Vt, Old, Bvt, St1),
Vt2 = vtmerge_pat(Vt, Vt1),
Bvt2 = vtmerge_pat(Bvt, Bvt1),
{{pattern,Vt2,Old,Bvt2},St2};
{gexpr,Vt} ->
{Vt1,St2} = gexpr(E, Vt, St1),
{{gexpr,vtupdate(Vt1, Vt)},St2}
end,
#lint{ctxs={PCtxs2,[NCtx1|NCtxs2]}} = St3,
{NCtx1,St3#lint{ctxs={[PCtx1|PCtxs2],NCtxs2}}}.

handle_code(E, PCtx, St=#lint{ctxs={PCtxs,NCtxs}}) ->
{New,Vt,NCtxs1} = case NCtxs of
[{expr,Vt1}|Rest] -> {false,Vt1,Rest};
_ -> {true,[],NCtxs}
end,
St1 = St#lint{ctxs={[PCtx|PCtxs],NCtxs1}},
{Evt,St2=#lint{ctxs={[PCtx1|PCtxs1],NCtxs2}}} = expr(E, Vt, St1),
NCtxs3 = case New of
false -> [{expr,vtupdate(Evt, Vt)}|NCtxs2];
true -> NCtxs2
end,
{PCtx1,St2#lint{ctxs={PCtxs1,NCtxs3}}}.

%% In the variable table we store information about variables. The
%% information is a tuple {State,Usage,Lines}, the variables state and
%% usage. A variable can be in the following states:
Expand Down
7 changes: 7 additions & 0 deletions lib/stdlib/src/erl_parse.yrl
Expand Up @@ -37,6 +37,7 @@ record_expr record_tuple record_field record_fields
if_expr if_clause if_clauses case_expr cr_clause cr_clauses receive_expr
fun_expr fun_clause fun_clauses
try_expr try_catch try_clause try_clauses query_expr
code_expr
function_call argument_list
exprs guard
atomic strings
Expand All @@ -52,6 +53,7 @@ bin_base_type bin_unit_type type_200 type_300 type_400 type_500.
Terminals
char integer float atom string var

'<|' '|>' '`'
'(' ')' ',' '->' ':-' '{' '}' '[' ']' '|' '||' '<-' ';' ':' '#' '.'
'after' 'begin' 'case' 'try' 'catch' 'end' 'fun' 'if' 'of' 'receive' 'when'
'andalso' 'orelse' 'query'
Expand Down Expand Up @@ -245,6 +247,7 @@ expr_500 -> expr_500 mult_op expr_600 :
?mkop2('$1', '$2', '$3').
expr_500 -> expr_600 : '$1'.

expr_600 -> '`' expr_700 : {escape,?line('$1'),'$2'}.
expr_600 -> prefix_op expr_700 :
?mkop1('$1', '$2').
expr_600 -> expr_700 : '$1'.
Expand Down Expand Up @@ -279,6 +282,7 @@ expr_max -> receive_expr : '$1'.
expr_max -> fun_expr : '$1'.
expr_max -> try_expr : '$1'.
expr_max -> query_expr : '$1'.
expr_max -> code_expr : '$1'.


list -> '[' ']' : {nil,?line('$1')}.
Expand Down Expand Up @@ -435,6 +439,9 @@ try_clause -> var ':' expr clause_guard clause_body :
query_expr -> 'query' list_comprehension 'end' :
{'query',?line('$1'),'$2'}.

code_expr -> '<|' expr '|>' :
{code,?line('$1'),'$2'}.


argument_list -> '(' ')' : {[],?line('$1')}.
argument_list -> '(' exprs ')' : {'$2',?line('$1')}.
Expand Down
8 changes: 6 additions & 2 deletions lib/stdlib/src/erl_pp.erl
Expand Up @@ -527,6 +527,8 @@ lexpr({op,_,Op,Larg,Rarg}, Prec, Hook) ->
Lr = lexpr(Rarg, R, Hook),
El = {list,[Ll,Ol,Lr]},
maybe_paren(P, Prec, El);
lexpr({code,_,E}, _, Hook) ->
{list,[{step,'<|',lexpr(E, Hook)},'|>']};
%% Special expressions which are not really legal everywhere.
lexpr({remote,_,M,F}, Prec, Hook) ->
{L,P,R} = inop_prec(':'),
Expand Down Expand Up @@ -1031,7 +1033,7 @@ wordtable() ->
L = [begin {leaf,Sz,S} = leaf(W), {S,Sz} end ||
W <- [" ->"," =","<<",">>","[]","after","begin","case","catch",
"end","fun","if","of","receive","try","when"," ::","..",
" |"]],
" |", "<|", "|>"]],
list_to_tuple(L).

word(' ->', WT) -> element(1, WT);
Expand All @@ -1052,4 +1054,6 @@ word('try', WT) -> element(15, WT);
word('when', WT) -> element(16, WT);
word(' ::', WT) -> element(17, WT);
word('..', WT) -> element(18, WT);
word(' |', WT) -> element(19, WT).
word(' |', WT) -> element(19, WT);
word('<|', WT) -> element(20, WT);
word('|>', WT) -> element(21, WT).
6 changes: 6 additions & 0 deletions lib/stdlib/src/erl_scan.erl
Expand Up @@ -529,6 +529,12 @@ scan1([C|Cs], St, Line, Col, Toks) when ?WHITE_SPACE(C) ->
skip_white_space(Cs, St, Line, Col, Toks, 1)
end;
%% Punctuation characters and operators, first recognise multiples.
%% <|
scan1("<|"++Cs, St, Line, Col, Toks) ->
tok2(Cs, St, Line, Col, Toks, "<|", '<|', 2);
%% |>
scan1("|>"++Cs, St, Line, Col, Toks) ->
tok2(Cs, St, Line, Col, Toks, "|>", '|>', 2);
%% << <- <=
scan1("<<"++Cs, St, Line, Col, Toks) ->
tok2(Cs, St, Line, Col, Toks, "<<", '<<', 2);
Expand Down

4 comments on commit b734fc4

@richcarl
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The most serious problem with this approach, in my opinion, is that it tightly couples the definition of the programming language Erlang with the de facto (and very ad hoc) syntax tree representation of its library. Meaning that anyone wanting to make a conforming Erlang implementation would have to implement these bracket expressions as you have defined them, mapping them to the "abstract format", and anyone wanting to document Erlang the language will have to include the abstract format in that spec.

There are also some pretty big limitations with the implementation as it is. You can only use expressions within the brackets. If you want to work with code snippets such as function definitions, export declarations, macro definitions, or standalone clauses with no surrounding "case", there is no way to do that.

I suspect that the pattern matching is also not flexible enough. I haven't studied your code that deeply, but it looks like you're not considering things like line numbers (you probably don't want to fail the match just because one bracket expression was on a different line than the pattern it got matched against) or alternative forms of what you want to think of as "the same" AST.

Metavariables are another limitation. You will only be able to have metavariables where you could have an arbitrary expression, and not e.g. <| fun foo/X |>. You'll also not be able to use metavariables for sequences of things, like a collection of clauses as in <| case foo() of X end |>. See my presentation on Merl (or the documentation) for a summary of the different kinds of metavariables needed. (Simon Thompson did similar things with their DSL for writing Wrangler refactorings.)

As you see, there are a number of things that should be addressed if one wants to have a feature like this as part of the language itself. That's really why I avoided that route, and made the Merl library instead. But if all problems can be solved, I wouldn't mind having something like MetaML (or better) in the Erlang language.

@nox
Copy link
Owner Author

@nox nox commented on b734fc4 Feb 11, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand the first part: making code brackets for both construction and pattern-matching means we never need again to see bare syntax trees, and thus that reimplementation of Erlang can change it, as long as people recompile their code. I wouldn't dare not recompiling either if I was using macros and fancy parse transforms.

I explained in some mail on erlang-questions that yes, you would be able to use metavariables (<| case Arg of Clauses end |>`), collections of clauses (`<|clauses| `Cl1; Cs |>), line numbers (<|(Line)| 1 + 1 |>`), etc. Note that this is a first try at it and thus isn't finished.

I read the presentation on Merl and the very fact that you use macros and strings is just sad to me, we would have ended up with something way much better through finishing this.

@richcarl
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll try to get back to your other remarks later, but please note that although you may think it "sad", I wrote Merl to solve some real problems for a real world product without making major changes to the OTP platform itself. If macros, strings and parse transforms are all you can work with, that's what you get.

@nox
Copy link
Owner Author

@nox nox commented on b734fc4 Feb 11, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course! The only person it is making sad is that nondescript contributor who can't refrain from mentioning cheese names in his test cases.

Please sign in to comment.