-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Jeffrey Massung
committed
Aug 31, 2011
0 parents
commit 79598a4
Showing
9 changed files
with
540 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
ebin/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# makefile | ||
# | ||
|
||
# source and target folders | ||
SRC=src | ||
INC=include | ||
BIN=ebin | ||
|
||
# compile options | ||
OPTS=-I "$(INC)" -o "$(BIN)" | ||
CC=erlc | ||
MKDIR=mkdir | ||
RM=rm | ||
|
||
# list of header files | ||
HEADERS=$(INC)/parsec.hrl $(INC)/lexer.hrl | ||
|
||
# compile parsec and all sample languages | ||
all: parsec samples | ||
|
||
# wipe all previously compile files | ||
clean: | ||
$(RM) -rf $(BIN)/ | ||
|
||
# build the parsec and lexeme libraries | ||
parsec: parsec.beam lexer.beam | ||
|
||
# build sample languages | ||
samples: lisp | ||
|
||
# sample language definitions | ||
lisp: lisp_parser.beam | ||
|
||
# compile a source | ||
%.beam: $(SRC)/%.erl $(HEADERS) | ||
$(MKDIR) -p $(BIN)/ | ||
$(CC) $(OPTS) $< |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
%% parsec lexer record definition | ||
%% | ||
%% copyright 2011 by jeffrey massung | ||
%% all rights reserved | ||
%% | ||
%% lexer.hrl | ||
%% | ||
|
||
-include("../include/parsec.hrl"). | ||
|
||
%% the definition of a parser combinator function | ||
-type parse_combinator(T) :: fun ((any()) -> parse_result(T)). | ||
|
||
%% language definition | ||
-record(lexer, { | ||
comment_start :: parse_combinator(binary()), | ||
comment_end :: parse_combinator(binary()), | ||
comment_line :: parse_combinator(string()), | ||
ident_start :: parse_combinator(integer()), | ||
ident_letter :: parse_combinator(integer()), | ||
op_start :: parse_combinator(integer()), | ||
op_letter :: parse_combinator(integer()), | ||
reserved_names :: list(string()), | ||
reserved_ops :: list(string()) | ||
}). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
%% parsec record definitions | ||
%% | ||
%% copyright 2011 by jeffrey massung | ||
%% all rights reserved | ||
%% | ||
%% parsec.hrl | ||
%% | ||
|
||
%% parse state is a value and an input stream | ||
-type parse_state(T) :: {T,string()}. | ||
|
||
%% return type of a parse combinator | ||
-type parse_result(T) :: parse_state(T) | pzero. | ||
|
||
%% common combinators | ||
-define(UPPER_LETTER,parsec:one_of("ABCDEFGHIJKLMNOPQRSTUVWXYZ")). | ||
-define(LOWER_LETTER,parsec:one_of("abcdefghijklmnopqrstuvwxyz")). | ||
-define(LETTER,parsec:choice([?UPPER_LETTER,?LOWER_LETTER])). | ||
-define(LETTERS,parsec:many1(?LETTER)). | ||
-define(DIGIT,parsec:one_of("0123456789")). | ||
-define(DIGITS,parsec:many1(?DIGIT)). | ||
-define(HEX_DIGIT,parsec:one_of("0123456789abcdefABCDEF")). | ||
-define(HEX_DIGITS,parsec:many1(?HEX_DIGIT)). | ||
-define(OCT_DIGIT,parsec:one_of("01234567")). | ||
-define(OCT_DIGITS,parsec:many1(?OCT_DIGIT)). | ||
-define(ALPHANUM,parsec:choice([?LETTER,?DIGIT])). | ||
-define(PUNCTUATION,parsec:one_of("!@#$%^&*()-=+[]{}\\|;:'\",./<>?~`")). | ||
-define(SPACE,parsec:one_of(" \t")). | ||
-define(SPACES,parsec:many1(?SPACE)). | ||
-define(NEWLINE,parsec:one_of("\r\n")). | ||
-define(NEWLINES,parsec:many1(?NEWLINES)). | ||
-define(EOL,parsec:choice([parsec:eof(),parsec:do([?NEWLINE])])). |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
%% parsec-style token lexer | ||
%% | ||
%% copyright (c) 2011 by jeffrey massung | ||
%% all rights reserved | ||
%% | ||
%% lexer.erl | ||
%% | ||
|
||
-module(lexer). | ||
|
||
%% grab type definitions | ||
-include("../include/lexer.hrl"). | ||
|
||
%% exposed functionality | ||
-export([whitespace/1, | ||
lexeme/2, | ||
identifier/1, | ||
reserved/2, | ||
operator/1, | ||
reserved_op/2, | ||
char_lit/1, | ||
string_lit/1, | ||
natural/2, | ||
decimal/1, | ||
hexadecimal/1, | ||
octal/1, | ||
binary/1, | ||
real/1, | ||
real_or_natural/1, | ||
parens/2, | ||
brackets/2, | ||
braces/2, | ||
angles/2 | ||
]). | ||
|
||
%% skip a single-line comment | ||
single_line_comment (#lexer{comment_line=P}) -> | ||
parsec:do([P,parsec:many_till(parsec:any_char(),?EOL)]). | ||
|
||
%% skip a multi-line comment | ||
block_comment (#lexer{comment_start=S,comment_end=E}) -> | ||
parsec:do([S,parsec:many_till(parsec:any_char(),E)]). | ||
|
||
%% skip any comment | ||
comment (Lexer=#lexer{}) -> | ||
parsec:choice([single_line_comment(Lexer),block_comment(Lexer)]). | ||
|
||
%% skip all whitespace and comments | ||
whitespace (Lexer=#lexer{}) -> | ||
parsec:skip(parsec:choice([comment(Lexer),?SPACE])). | ||
|
||
%% parse a lexeme | ||
lexeme (Lexer=#lexer{},P) -> | ||
parsec:bind(P,fun (X) -> | ||
parsec:bind_(whitespace(Lexer),parsec:return(X)) | ||
end). | ||
|
||
%% parse an identifier | ||
identifier (Lexer=#lexer{ident_start=S,ident_letter=L,reserved_names=NS}) -> | ||
Ident=parsec:bind(S,parsec:cons(parsec:many(L))), | ||
lexeme(Lexer,parsec:bind(Ident,fun (Id) -> | ||
case lists:member(Id,NS) of | ||
false -> parsec:return(Id); | ||
true -> parsec:pzero() | ||
end | ||
end)). | ||
|
||
%% parse an operator | ||
operator (Lexer=#lexer{op_start=S,op_letter=L,reserved_ops=OPS}) -> | ||
Operator=parsec:bind(S,parsec:cons(parsec:many(L))), | ||
lexeme(Lexer,parsec:bind(Operator,fun (Op) -> | ||
case lists:member(Op,OPS) of | ||
false -> parsec:return(Op); | ||
true -> parsec:pzero() | ||
end | ||
end)). | ||
|
||
%% parse a reserved word | ||
reserved (Lexer=#lexer{ident_start=S,ident_letter=L},Name) -> | ||
Ident=parsec:bind(S,parsec:cons(parsec:many(L))), | ||
lexeme(Lexer,parsec:bind(Ident,fun (Id) -> | ||
case Id==Name of | ||
true -> parsec:return(Name); | ||
false -> parsec:pzero() | ||
end | ||
end)). | ||
|
||
%% parse a reserved operator | ||
reserved_op (Lexer=#lexer{op_start=S,op_letter=L},Name) -> | ||
Operator=parsec:bind(S,parsec:cons(parsec:many(L))), | ||
lexeme(Lexer,parsec:bind(Operator,fun (Op) -> | ||
case Op==Name of | ||
true -> parsec:return(Op); | ||
false -> parsec:pzero() | ||
end | ||
end)). | ||
|
||
%% parse an escaped character | ||
escaped_char () -> | ||
fun ({_,[$\\,$\\|CS]}) -> {$\\,CS}; | ||
({_,[$\\,$t|CS]}) -> {$\t,CS}; | ||
({_,[$\\,$r|CS]}) -> {$\r,CS}; | ||
({_,[$\\,$n|CS]}) -> {$\n,CS}; | ||
({_,[$\\,C|CS]}) -> {C,CS}; | ||
({_,[C|CS]}) -> {C,CS}; | ||
(_) -> pzero | ||
end. | ||
|
||
%% parse a character literal | ||
char_lit (Lexer=#lexer{}) -> | ||
Char=fun ([]) -> pzero; | ||
(CS) -> | ||
X=lists:foldl(fun (C,Acc) -> (Acc bsl 8) bor C end,0,CS), | ||
parsec:return(X) | ||
end, | ||
Quoted=parsec:do([parsec:char($'), | ||
parsec:many_till(escaped_char(),parsec:char($')) | ||
]), | ||
lexeme(Lexer,parsec:bind(Quoted,Char)). | ||
|
||
%% parse a string literal | ||
string_lit (Lexer=#lexer{}) -> | ||
String=fun (S) -> parsec:return(S) end, | ||
Quoted=parsec:do([parsec:char($"), | ||
parsec:many_till(escaped_char(),parsec:char($")) | ||
]), | ||
lexeme(Lexer,parsec:bind(Quoted,String)). | ||
|
||
%% parse an unsigned, natural number using a given base (2-36) | ||
natural (Lexer=#lexer{},Base) -> | ||
Digits=string:left("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",Base), | ||
lexeme(Lexer,parsec:bind(parsec:many1(parsec:one_of(Digits)), | ||
fun (N) -> | ||
parsec:return(list_to_integer(N,Base)) | ||
end)). | ||
|
||
%% parse an unsigned, natural number for various bases | ||
decimal (Lexer=#lexer{}) -> natural(Lexer,10). | ||
hexadecimal (Lexer=#lexer{}) -> natural(Lexer,16). | ||
octal (Lexer=#lexer{}) -> natural(Lexer,8). | ||
binary (Lexer=#lexer{}) -> natural(Lexer,2). | ||
|
||
%% parse a real number | ||
real (Lexer=#lexer{}) -> | ||
Fraction=parsec:do([parsec:char($.),?DIGITS]), | ||
Exp=parsec:do([parsec:one_of("eE"), | ||
parsec:maybe(parsec:one_of("-+")), | ||
?DIGITS | ||
]), | ||
|
||
%% must be in the format \d+\.\d+(eE[-+]?\d+)? | ||
lexeme(Lexer,parsec:bind(parsec:capture(parsec:do([?DIGITS, | ||
Fraction, | ||
parsec:maybe(Exp) | ||
])), | ||
fun (N) -> | ||
parsec:return(list_to_float(N)) | ||
end)). | ||
|
||
%% parse a float or natural number according to many languages | ||
real_or_natural (Lexer=#lexer{}) -> | ||
parsec:choice([real(Lexer), | ||
parsec:bind_(parsec:string("0x"),hexadecimal(Lexer)), | ||
parsec:bind_(parsec:string("0o"),octal(Lexer)), | ||
parsec:bind_(parsec:string("0b"),binary(Lexer)), | ||
decimal(Lexer) | ||
]). | ||
|
||
%% parse a combinator between start and end combinators | ||
between (Lexer=#lexer{},S,E,P) -> | ||
parsec:bind(parsec:bind_(lexeme(Lexer,S),lexeme(Lexer,P)), | ||
fun (X) -> | ||
parsec:bind_(lexeme(Lexer,E),parsec:return(X)) | ||
end). | ||
|
||
%% parse a combinator between parens | ||
parens (Lexer=#lexer{},P) -> between(Lexer,parsec:char($(),parsec:char($)),P). | ||
brackets (Lexer=#lexer{},P) -> between(Lexer,parsec:char($[),parsec:char($]),P). | ||
braces (Lexer=#lexer{},P) -> between(Lexer,parsec:char(${),parsec:char($}),P). | ||
angles (Lexer=#lexer{},P) -> between(Lexer,parsec:char($<),parsec:char($>),P). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
%% example lisp parser | ||
%% | ||
%% lisp_parser.erl | ||
%% | ||
|
||
-module(lisp_parser). | ||
-export([parse/1]). | ||
|
||
%% get the lexer definition | ||
-include("../include/lexer.hrl"). | ||
|
||
special_symbol () -> parsec:one_of("~!@$%&*-+=<>/"). | ||
|
||
%% define the lisp lexer | ||
make_lexer () -> | ||
#lexer{ | ||
comment_start=parsec:string("#|"), | ||
comment_end=parsec:string("|#"), | ||
comment_line=parsec:string(";"), | ||
ident_start=parsec:choice([?LETTER,special_symbol()]), | ||
ident_letter=parsec:choice([?LETTER,special_symbol()]), | ||
op_start=pzero, | ||
op_letter=pzero, | ||
reserved_names=["lambda","let"], | ||
reserved_ops=[] | ||
}. | ||
|
||
as_list (X) -> parsec:return({list,X}). | ||
as_num (X) -> parsec:return({num,X}). | ||
as_string (X) -> parsec:return({str,X}). | ||
as_char (X) -> parsec:return({char,X}). | ||
as_ident (X) -> parsec:return({id,X}). | ||
as_op (X) -> parsec:return({op,X}). | ||
|
||
%% parse a form | ||
form (Lexer) -> | ||
fun (ST) -> | ||
Parser=parsec:choice( | ||
[parsec:bind(lexer:parens(Lexer,parsec:many(form(Lexer))),fun as_list/1), | ||
parsec:bind(lexer:real_or_natural(Lexer),fun as_num/1), | ||
parsec:bind(lexer:identifier(Lexer),fun as_ident/1), | ||
parsec:bind(lexer:string_lit(Lexer),fun as_string/1), | ||
parsec:bind(lexer:char_lit(Lexer),fun as_char/1), | ||
parsec:bind(lexer:reserved(Lexer,"lambda"),fun as_op/1) | ||
]), | ||
Parser(ST) | ||
end. | ||
|
||
%% parse a form from a string | ||
parse (S) -> | ||
Lexer=make_lexer(), | ||
WS=lexer:whitespace(Lexer), | ||
parsec:parse(S,parsec:do([WS,form(Lexer)])). |
Oops, something went wrong.