Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeffrey Massung committed Aug 31, 2011
0 parents commit 79598a4
Show file tree
Hide file tree
Showing 9 changed files with 540 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ebin/
37 changes: 37 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# makefile
#

# source and target folders
SRC=src
INC=include
BIN=ebin

# compile options
OPTS=-I "$(INC)" -o "$(BIN)"
CC=erlc
MKDIR=mkdir
RM=rm

# list of header files
HEADERS=$(INC)/parsec.hrl $(INC)/lexer.hrl

# compile parsec and all sample languages
all: parsec samples

# wipe all previously compile files
clean:
$(RM) -rf $(BIN)/

# build the parsec and lexeme libraries
parsec: parsec.beam lexer.beam

# build sample languages
samples: lisp

# sample language definitions
lisp: lisp_parser.beam

# compile a source
%.beam: $(SRC)/%.erl $(HEADERS)
$(MKDIR) -p $(BIN)/
$(CC) $(OPTS) $<
Empty file added README.md
Empty file.
25 changes: 25 additions & 0 deletions include/lexer.hrl
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
%% parsec lexer record definition
%%
%% copyright 2011 by jeffrey massung
%% all rights reserved
%%
%% lexer.hrl
%%

-include("../include/parsec.hrl").

%% the definition of a parser combinator function
-type parse_combinator(T) :: fun ((any()) -> parse_result(T)).

%% language definition
-record(lexer, {
comment_start :: parse_combinator(binary()),
comment_end :: parse_combinator(binary()),
comment_line :: parse_combinator(string()),
ident_start :: parse_combinator(integer()),
ident_letter :: parse_combinator(integer()),
op_start :: parse_combinator(integer()),
op_letter :: parse_combinator(integer()),
reserved_names :: list(string()),
reserved_ops :: list(string())
}).
32 changes: 32 additions & 0 deletions include/parsec.hrl
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
%% parsec record definitions
%%
%% copyright 2011 by jeffrey massung
%% all rights reserved
%%
%% parsec.hrl
%%

%% parse state is a value and an input stream
-type parse_state(T) :: {T,string()}.

%% return type of a parse combinator
-type parse_result(T) :: parse_state(T) | pzero.

%% common combinators
-define(UPPER_LETTER,parsec:one_of("ABCDEFGHIJKLMNOPQRSTUVWXYZ")).
-define(LOWER_LETTER,parsec:one_of("abcdefghijklmnopqrstuvwxyz")).
-define(LETTER,parsec:choice([?UPPER_LETTER,?LOWER_LETTER])).
-define(LETTERS,parsec:many1(?LETTER)).
-define(DIGIT,parsec:one_of("0123456789")).
-define(DIGITS,parsec:many1(?DIGIT)).
-define(HEX_DIGIT,parsec:one_of("0123456789abcdefABCDEF")).
-define(HEX_DIGITS,parsec:many1(?HEX_DIGIT)).
-define(OCT_DIGIT,parsec:one_of("01234567")).
-define(OCT_DIGITS,parsec:many1(?OCT_DIGIT)).
-define(ALPHANUM,parsec:choice([?LETTER,?DIGIT])).
-define(PUNCTUATION,parsec:one_of("!@#$%^&*()-=+[]{}\\|;:'\",./<>?~`")).
-define(SPACE,parsec:one_of(" \t")).
-define(SPACES,parsec:many1(?SPACE)).
-define(NEWLINE,parsec:one_of("\r\n")).
-define(NEWLINES,parsec:many1(?NEWLINES)).
-define(EOL,parsec:choice([parsec:eof(),parsec:do([?NEWLINE])])).
Binary file added src/.DS_Store
Binary file not shown.
180 changes: 180 additions & 0 deletions src/lexer.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
%% parsec-style token lexer
%%
%% copyright (c) 2011 by jeffrey massung
%% all rights reserved
%%
%% lexer.erl
%%

-module(lexer).

%% grab type definitions
-include("../include/lexer.hrl").

%% exposed functionality
-export([whitespace/1,
lexeme/2,
identifier/1,
reserved/2,
operator/1,
reserved_op/2,
char_lit/1,
string_lit/1,
natural/2,
decimal/1,
hexadecimal/1,
octal/1,
binary/1,
real/1,
real_or_natural/1,
parens/2,
brackets/2,
braces/2,
angles/2
]).

%% skip a single-line comment
single_line_comment (#lexer{comment_line=P}) ->
parsec:do([P,parsec:many_till(parsec:any_char(),?EOL)]).

%% skip a multi-line comment
block_comment (#lexer{comment_start=S,comment_end=E}) ->
parsec:do([S,parsec:many_till(parsec:any_char(),E)]).

%% skip any comment
comment (Lexer=#lexer{}) ->
parsec:choice([single_line_comment(Lexer),block_comment(Lexer)]).

%% skip all whitespace and comments
whitespace (Lexer=#lexer{}) ->
parsec:skip(parsec:choice([comment(Lexer),?SPACE])).

%% parse a lexeme
lexeme (Lexer=#lexer{},P) ->
parsec:bind(P,fun (X) ->
parsec:bind_(whitespace(Lexer),parsec:return(X))
end).

%% parse an identifier
identifier (Lexer=#lexer{ident_start=S,ident_letter=L,reserved_names=NS}) ->
Ident=parsec:bind(S,parsec:cons(parsec:many(L))),
lexeme(Lexer,parsec:bind(Ident,fun (Id) ->
case lists:member(Id,NS) of
false -> parsec:return(Id);
true -> parsec:pzero()
end
end)).

%% parse an operator
operator (Lexer=#lexer{op_start=S,op_letter=L,reserved_ops=OPS}) ->
Operator=parsec:bind(S,parsec:cons(parsec:many(L))),
lexeme(Lexer,parsec:bind(Operator,fun (Op) ->
case lists:member(Op,OPS) of
false -> parsec:return(Op);
true -> parsec:pzero()
end
end)).

%% parse a reserved word
reserved (Lexer=#lexer{ident_start=S,ident_letter=L},Name) ->
Ident=parsec:bind(S,parsec:cons(parsec:many(L))),
lexeme(Lexer,parsec:bind(Ident,fun (Id) ->
case Id==Name of
true -> parsec:return(Name);
false -> parsec:pzero()
end
end)).

%% parse a reserved operator
reserved_op (Lexer=#lexer{op_start=S,op_letter=L},Name) ->
Operator=parsec:bind(S,parsec:cons(parsec:many(L))),
lexeme(Lexer,parsec:bind(Operator,fun (Op) ->
case Op==Name of
true -> parsec:return(Op);
false -> parsec:pzero()
end
end)).

%% parse an escaped character
escaped_char () ->
fun ({_,[$\\,$\\|CS]}) -> {$\\,CS};
({_,[$\\,$t|CS]}) -> {$\t,CS};
({_,[$\\,$r|CS]}) -> {$\r,CS};
({_,[$\\,$n|CS]}) -> {$\n,CS};
({_,[$\\,C|CS]}) -> {C,CS};
({_,[C|CS]}) -> {C,CS};
(_) -> pzero
end.

%% parse a character literal
char_lit (Lexer=#lexer{}) ->
Char=fun ([]) -> pzero;
(CS) ->
X=lists:foldl(fun (C,Acc) -> (Acc bsl 8) bor C end,0,CS),
parsec:return(X)
end,
Quoted=parsec:do([parsec:char($'),
parsec:many_till(escaped_char(),parsec:char($'))
]),
lexeme(Lexer,parsec:bind(Quoted,Char)).

%% parse a string literal
string_lit (Lexer=#lexer{}) ->
String=fun (S) -> parsec:return(S) end,
Quoted=parsec:do([parsec:char($"),
parsec:many_till(escaped_char(),parsec:char($"))
]),
lexeme(Lexer,parsec:bind(Quoted,String)).

%% parse an unsigned, natural number using a given base (2-36)
natural (Lexer=#lexer{},Base) ->
Digits=string:left("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",Base),
lexeme(Lexer,parsec:bind(parsec:many1(parsec:one_of(Digits)),
fun (N) ->
parsec:return(list_to_integer(N,Base))
end)).

%% parse an unsigned, natural number for various bases
decimal (Lexer=#lexer{}) -> natural(Lexer,10).
hexadecimal (Lexer=#lexer{}) -> natural(Lexer,16).
octal (Lexer=#lexer{}) -> natural(Lexer,8).
binary (Lexer=#lexer{}) -> natural(Lexer,2).

%% parse a real number
real (Lexer=#lexer{}) ->
Fraction=parsec:do([parsec:char($.),?DIGITS]),
Exp=parsec:do([parsec:one_of("eE"),
parsec:maybe(parsec:one_of("-+")),
?DIGITS
]),

%% must be in the format \d+\.\d+(eE[-+]?\d+)?
lexeme(Lexer,parsec:bind(parsec:capture(parsec:do([?DIGITS,
Fraction,
parsec:maybe(Exp)
])),
fun (N) ->
parsec:return(list_to_float(N))
end)).

%% parse a float or natural number according to many languages
real_or_natural (Lexer=#lexer{}) ->
parsec:choice([real(Lexer),
parsec:bind_(parsec:string("0x"),hexadecimal(Lexer)),
parsec:bind_(parsec:string("0o"),octal(Lexer)),
parsec:bind_(parsec:string("0b"),binary(Lexer)),
decimal(Lexer)
]).

%% parse a combinator between start and end combinators
between (Lexer=#lexer{},S,E,P) ->
parsec:bind(parsec:bind_(lexeme(Lexer,S),lexeme(Lexer,P)),
fun (X) ->
parsec:bind_(lexeme(Lexer,E),parsec:return(X))
end).

%% parse a combinator between parens
parens (Lexer=#lexer{},P) -> between(Lexer,parsec:char($(),parsec:char($)),P).
brackets (Lexer=#lexer{},P) -> between(Lexer,parsec:char($[),parsec:char($]),P).
braces (Lexer=#lexer{},P) -> between(Lexer,parsec:char(${),parsec:char($}),P).
angles (Lexer=#lexer{},P) -> between(Lexer,parsec:char($<),parsec:char($>),P).
53 changes: 53 additions & 0 deletions src/lisp_parser.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
%% example lisp parser
%%
%% lisp_parser.erl
%%

-module(lisp_parser).
-export([parse/1]).

%% get the lexer definition
-include("../include/lexer.hrl").

special_symbol () -> parsec:one_of("~!@$%&*-+=<>/").

%% define the lisp lexer
make_lexer () ->
#lexer{
comment_start=parsec:string("#|"),
comment_end=parsec:string("|#"),
comment_line=parsec:string(";"),
ident_start=parsec:choice([?LETTER,special_symbol()]),
ident_letter=parsec:choice([?LETTER,special_symbol()]),
op_start=pzero,
op_letter=pzero,
reserved_names=["lambda","let"],
reserved_ops=[]
}.

as_list (X) -> parsec:return({list,X}).
as_num (X) -> parsec:return({num,X}).
as_string (X) -> parsec:return({str,X}).
as_char (X) -> parsec:return({char,X}).
as_ident (X) -> parsec:return({id,X}).
as_op (X) -> parsec:return({op,X}).

%% parse a form
form (Lexer) ->
fun (ST) ->
Parser=parsec:choice(
[parsec:bind(lexer:parens(Lexer,parsec:many(form(Lexer))),fun as_list/1),
parsec:bind(lexer:real_or_natural(Lexer),fun as_num/1),
parsec:bind(lexer:identifier(Lexer),fun as_ident/1),
parsec:bind(lexer:string_lit(Lexer),fun as_string/1),
parsec:bind(lexer:char_lit(Lexer),fun as_char/1),
parsec:bind(lexer:reserved(Lexer,"lambda"),fun as_op/1)
]),
Parser(ST)
end.

%% parse a form from a string
parse (S) ->
Lexer=make_lexer(),
WS=lexer:whitespace(Lexer),
parsec:parse(S,parsec:do([WS,form(Lexer)])).
Loading

0 comments on commit 79598a4

Please sign in to comment.