Skip to content

Commit

Permalink
enchance error handling, ajust seq[n] behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
luikore committed Feb 16, 2011
1 parent 5c04821 commit b8ed5df
Show file tree
Hide file tree
Showing 11 changed files with 142 additions and 108 deletions.
2 changes: 1 addition & 1 deletion examples/bnf.rb
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
def bnf def bnf
nbsp = /[\ \t]*/.r nbsp = /[\ \t]*/.r
spacee = /\s*/.r # include \n spacee = /\s*/.r # include \n
literal = /".*?"|'.*?'/ literal = /".*?"|'.*?'/.r
rule_name = /\<.*?\>/ rule_name = /\<.*?\>/
term = literal | rule_name term = literal | rule_name
list = term.join nbsp.skip list = term.join nbsp.skip
Expand Down
52 changes: 29 additions & 23 deletions examples/c_minus.rb
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -4,14 +4,23 @@ class CMinus
include Rsec::Helpers include Rsec::Helpers


# "terminal" rules # "terminal" rules
ID = /[a-zA-Z]\w*/.r ID = /[a-zA-Z]\w*/.r :id
NUM = /\d+/.r NUM = /\d+/.r :num
INT = /[+-]?\d+/.r INT = /[+-]?\d+/.r :int
NBSP = /[\ \t]*/.r.skip NBSP = /[\ \t]*/.r.skip
SPACE = /\s*/.r.skip SPACE = /\s*/.r.skip
TYPE = /int|void/.r TYPE = /int|void/.r :type
EOSTMT = /;/.r.skip # end of statement EOSTMT = /;/.r(';').skip # end of statement

ELSE = /else\s/.r :keyword_else
IF = 'if'.r :keyword_if
WHILE = 'while'.r :keyword_while
RETURN = /return\s/.r :keyword_return
MUL_OP = /\s*[\*\/%]\s*/.r '*/%', &:strip
ADD_OP = /\s*[\+\-]\s*/.r '+-', &:strip
COMP_OP = /\s*(\<=|\<|\>|\>=|==|!=)\s*/.r 'compare operator', &:strip
COMMA = /\s*,\s*/.r(:comma).skip
EMPTY_BRA = /\[\s*\]/.r('empty square bracket')

# ------------------- helpers # ------------------- helpers


# call(function apply) expression # call(function apply) expression
Expand All @@ -20,16 +29,13 @@ def call expr
seq_(ID, SPACE, args._?.wrap_('()')) seq_(ID, SPACE, args._?.wrap_('()'))
end end


# binary arithmetic
def binary_arithmetic factor
factor.join(/\s*[\*\/%]\s*/.r &:strip).flatten
.join(/\s*[\+\-]\s*/.r &:strip).flatten
.join(/\s*(\<=|\<|\>|\>=|==|!=)\s*/.r &:strip).flatten
end

# (binary) expression # (binary) expression
def expression def expression
expr = lazy{assign} | binary_arithmetic(lazy{factor}) binary_arithmetic = lazy{factor}
.join(MUL_OP).flatten
.join(ADD_OP).flatten
.join(COMP_OP).flatten
expr = lazy{assign} | binary_arithmetic
# abc # abc
# abc[12] # abc[12]
var = seq_(ID, expr.wrap_('[]')._?).flatten var = seq_(ID, expr.wrap_('[]')._?).flatten
Expand All @@ -47,10 +53,10 @@ def statement var_decl
_stmt = lazy{stmt} # to reduce the use of lazy{} _stmt = lazy{stmt} # to reduce the use of lazy{}


expr_stmt = seq_(expr, EOSTMT).flatten | EOSTMT expr_stmt = seq_(expr, EOSTMT).flatten | EOSTMT
else_stmt = seq_(/else\s/, _stmt) else_stmt = seq_(ELSE, _stmt)
if_stmt = seq_('if', brace, _stmt, else_stmt._?) if_stmt = seq_(IF, brace, _stmt, else_stmt._?)
while_stmt = seq_('while', brace, _stmt) while_stmt = seq_(WHILE, brace, _stmt)
return_stmt = seq_(/return\s/, expr._?, EOSTMT) return_stmt = seq_(RETURN, expr._?, EOSTMT)
# { var_decls statements } # { var_decls statements }
block = seq_(SPACE.join(var_decl), SPACE.join(_stmt)).wrap_ '{}' block = seq_(SPACE.join(var_decl), SPACE.join(_stmt)).wrap_ '{}'
stmt = block | if_stmt | while_stmt | return_stmt | expr_stmt stmt = block | if_stmt | while_stmt | return_stmt | expr_stmt
Expand All @@ -71,8 +77,8 @@ def initialize
# p stmt.parse! 'gcd(v,u-u/v*v);' # p stmt.parse! 'gcd(v,u-u/v*v);'
# p stmt.parse! 'if(3==2) {return 4;}' # p stmt.parse! 'if(3==2) {return 4;}'


param = seq_(type_id, /\[\s*\]/.r._?) param = seq_(type_id, EMPTY_BRA._?)
params = param.join(/\s*,\s*/.r.skip) | 'void' params = param.join(COMMA) | 'void'
brace = params.wrap_ '()' brace = params.wrap_ '()'
fun_decl = seq_(type_id, brace, block) fun_decl = seq_(type_id, brace, block)
# p fun_decl.parse! 'int gcd(int u, int v){return 2;}' # p fun_decl.parse! 'int gcd(int u, int v){return 2;}'
Expand All @@ -88,7 +94,7 @@ def initialize
pp c_minus.program.parse! %Q[ pp c_minus.program.parse! %Q[
int gcd(int u, int v) int gcd(int u, int v)
{ {
if (v == 0) return u ; if (v == 0) return u x;
else return gcd(v,u-u/v*v); else return gcd(v,u-u/v*v);
} }
Expand Down
3 changes: 2 additions & 1 deletion ext/rsec/ext.rb
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -69,13 +69,14 @@ class SpacedWrapByte < SpacedWrap


class String class String
# overwrite string-to-parser transformer # overwrite string-to-parser transformer
define_method ::Rsec::TO_PARSER_METHOD, ->(&p){ define_method ::Rsec::TO_PARSER_METHOD, ->(*expects, &p){
parser = \ parser = \
if self.bytesize == 1 if self.bytesize == 1
::Rsec::Byte[self] ::Rsec::Byte[self]
else else
::Rsec::FixString[self] ::Rsec::FixString[self]
end end
parser = parser.fail(*expects)
p ? parser.map(&p) : parser p ? parser.map(&p) : parser
} }
end end
Expand Down
13 changes: 11 additions & 2 deletions ext/rsec/predef.c
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ static VALUE parse_spaced_one_of_byte(VALUE self, VALUE ctx) {
// faster join parser // faster join parser




VALUE parse_join(VALUE self, VALUE ctx) { static VALUE parse_join(VALUE self, VALUE ctx) {
VALUE token = rb_iv_get(self, "@token"); VALUE token = rb_iv_get(self, "@token");
VALUE inter = rb_iv_get(self, "@inter"); VALUE inter = rb_iv_get(self, "@inter");
struct strscanner* ss; struct strscanner* ss;
Expand Down Expand Up @@ -412,14 +412,23 @@ VALUE parse_join(VALUE self, VALUE ctx) {
// faster map parser // faster map parser




VALUE parse_map(VALUE self, VALUE ctx) { static VALUE parse_map(VALUE self, VALUE ctx) {
VALUE* data = RSTRUCT_PTR(self); VALUE* data = RSTRUCT_PTR(self);
VALUE res = call_parse(data[0], ctx); VALUE res = call_parse(data[0], ctx);
if (res == invalid) return res; if (res == invalid) return res;
return rb_proc_call(data[1], rb_ary_new3(1, res)); return rb_proc_call(data[1], rb_ary_new3(1, res));
} }




// -----------------------------------------------------------------------------
// faster ParseContext.on_fail


static VALUE parse_context_on_fail(VALUE self, VALUE tokens) {
return 0; // TODO complete c side parse context
}


// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// init // init


Expand Down
126 changes: 74 additions & 52 deletions lib/rsec/base.rb
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -11,91 +11,134 @@ def parse str, source_name='source'
end end


# almost the same as parse<br/> # almost the same as parse<br/>
# but raises ParseError # but raises SyntaxError
def parse! str, source_name='source' def parse! str, source_name='source'
ctx = ParseContext.new str, source_name ctx = ParseContext.new str, source_name
ret = _parse ctx ret = _parse ctx
if INVALID[ret] if INVALID[ret]
raise ParseError[ctx.err || 'syntax error', ctx] raise ctx.generate_error source_name
end end
ret ret
end end


# error class for rescue
class SyntaxError < StandardError
attr_reader :msg, :line_text, :line, :col

# constructor
def initialize msg, line_text, line, col
@msg, @line_text, @line, @col = msg, line_text, line, col
end

# info with source position
def to_s
%Q<#@msg\n#@line_text\n#{' ' * @col}^>
end
end

# parse context inherits from StringScanner<br/> # parse context inherits from StringScanner<br/>
# <br/> # <br/>
# attributes:<br/> # attributes:<br/>
# <pre> # <pre>
# [R] string: string to parse # [R] string: string to parse
# [RW] pos: current position # [RW] pos: current position
# [R] source: source file name # [R] source: source file name
# [RW] err: parsing error
# [R] column: current position in line
# [R] line: current line number
# [R] current_line_text: current line text # [R] current_line_text: current line text
# [R] cache: for memoization # [R] cache: for memoization
# </pre> # </pre>
class ParseContext < StringScanner class ParseContext < StringScanner
attr_reader :source, :cache attr_reader :source, :cache, :last_fail_pos
attr_accessor :err attr_accessor :attr_names

def initialize str, source def initialize str, source
super(str) super(str)
@source = source @source = source
@cache = {} @cache = {}
@last_fail_pos = 0
@last_fail_tokens = []
end end


# clear packrat parser cache
def clear_cache def clear_cache
@cache.clear @cache.clear
end end


def line # add fail message
def on_fail tokens
if pos > @last_fail_pos
@last_fail_pos = pos
@last_fail_tokens = tokens
elsif pos == @last_fail_pos
@last_fail_tokens += (tokens - @last_fail_tokens)
end
end

# generate parse error
def generate_error source
if self.pos <= @last_fail_pos
line = line @last_fail_pos
col = col @last_fail_pos
line_text = line_text @last_fail_pos
expects = ", expect token [ #{@last_fail_tokens.join ' | '} ]"
else
line = line pos
col = col pos
line_text = line_text pos
expects = nil
end
msg = "\nin #{source}:#{line} at #{col}#{expects}"
SyntaxError.new msg, line_text, line, col
end

# get line number
def line pos
string[0...pos].count("\n") + 1 string[0...pos].count("\n") + 1
end end


def column # get column number: position in line
def col pos
return 1 if pos == 0 return 1 if pos == 0
newline_pos = string.rindex "\n", pos - 1 newline_pos = string.rindex "\n", pos - 1
if newline_pos if newline_pos
pos - newline_pos pos - newline_pos
else else
pos + 1 pos + 1
end end
end end
alias col column


def current_line_text # get line text containing pos
from = string.rindex "\n", pos - 1 # the text is 80 at most
to = string.index "\n", pos def line_text pos
string[(from || 0)..(to || -1)] from = string.rindex "\n", pos
from = from ? from + 1 : 0
from = pos - 40 if (from < pos - 40)

to = string.index("\n", pos)
to = to ? to - 1 : string.size
to = pos + 40 if (to > pos + 40)

string[from..to]
end end
end end


# the skip token # the skip token
SKIP = Object.new SKIP = Object.new
class << SKIP class << SKIP
# check if x is skip token
def [] x
self == x
end
def to_str def to_str
'SKIP_TOKEN' 'SKIP_TOKEN'
end end
def inspect alias :[] :==
'SKIP_TOKEN' alias inspect to_str
end
end end


# the invalid token # the invalid token
INVALID = Object.new INVALID = Object.new
class << INVALID class << INVALID
def [] x
self == x
end
def to_str def to_str
'INVALID_TOKEN' 'INVALID_TOKEN'
end end
def inspect alias :[] :==
'INVALID_TOKEN' alias inspect to_str
end
end end


attr_accessor :name attr_accessor :name
Expand All @@ -107,37 +150,16 @@ def inspect
"<#{name}>" "<#{name}>"
when Binary when Binary
"<#{name} #{left.inspect} #{right.inspect}>" "<#{name} #{left.inspect} #{right.inspect}>"
when Unary when Seq, Seq_, Branch
"<#{name} #{some.inspect}>"
when Array
# don't use redefined map! # don't use redefined map!
res = [] res = []
each{|e| res << e.inspect} each{|e| res << e.inspect}
"<#{name} #{res.join ' '}>" "<#{name} #{res.join ' '}>"
when Unary
"<#{name} #{some.inspect}>"
else else
"<#{name}>" "<#{name}>"
end end
end end


# error class for rescue
class ParseError < StandardError
attr_reader :ctx, :msg

# beautiful constructor
def self.[] msg, ctx
self.new msg, ctx
end

# constructor
def initialize msg, ctx
@msg, @ctx = msg, ctx
end

# info with source position
def to_s
# TODO show last parser
coord = "\"#{@ctx.source}\": (#{@ctx.line}, #{@ctx.col})"
"[#{@msg}] in #{coord}\n#{@ctx.current_line_text[0..79]}\n#{' ' * @ctx.col}^"
end
end
end end
8 changes: 3 additions & 5 deletions lib/rsec/binary.rb
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -18,14 +18,12 @@ def _parse ctx
end end
end end


# set the parsing error in ctx<br/> # set expect tokens for parsing error in ctx<br/>
# if left failed, the error would show up<br/> # if left failed, the error would be registered
# if not, the error disappears
class Fail < Binary class Fail < Binary
def _parse ctx def _parse ctx
ctx.err = right()
res = left()._parse ctx res = left()._parse ctx
ctx.err = nil unless INVALID[res] ctx.on_fail right if INVALID[res]
res res
end end
end end
Expand Down
Loading

0 comments on commit b8ed5df

Please sign in to comment.