Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Refactored several smallish areas.

Rolled temp refactorings back in.
Refactored unary +/- together.
Refactored ')', ']', '}' lexers together.
Rolled gvar sub scanners into main scanner.
Sorted ruby_parser_extras.
Finally fixed an annoyance by nilling yacc_value at start of yylex.
Started profiling the code...
Added some cheap method call reductions (src = self.src) in yylex.
Switched back to getch since it is a tad faster.
Uses pure ruby racc if ENV['PURE_RUBY'], otherwise use c.

[git-p4: depot-paths = "//src/ruby_parser/dev/": change = 3847]
  • Loading branch information...
commit af96092237af25fe57d10c9dd02e43d7875b811f 1 parent a793adf
@zenspider zenspider authored
View
1  .autotest
@@ -30,4 +30,3 @@ end
require 'autotest/rcov'
Autotest::RCov.command = 'rcov_info'
Autotest::RCov.pattern = 'test/test_ruby_lexer.rb'
-
View
2  README.txt
@@ -12,6 +12,7 @@ base types.
== FEATURES/PROBLEMS:
* Pure ruby, no compiles.
+* Includes preceding comment data for defn/defs/class/module nodes!
* Incredibly simple interface.
* Output is 100% equivalent to ParseTree.
* Can utilize PT's SexpProcessor and UnifiedRuby for language processing.
@@ -20,7 +21,6 @@ base types.
* Known Issue: I don't currently support newline nodes.
* Known Issue: Totally awesome.
* Known Issue: dasgn_curr decls can be out of order from ParseTree's.
-* TODO: Add comment nodes.
== SYNOPSIS:
View
318 lib/ruby_lexer.rb
@@ -30,16 +30,14 @@ class RubyLexer
# What handles warnings
attr_accessor :warnings
- # Give a name to a value. Enebo: This should be used more.
- # HACK OMG HORRIBLE KILL ME NOW. Enebo, no. this shouldn't be used more
- EOF = nil # was 0... ugh
+ EOF = :eof_haha!
# ruby constants for strings (should this be moved somewhere else?)
STR_FUNC_PLAIN = 0x00
STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
STR_FUNC_EXPAND = 0x02
STR_FUNC_REGEXP = 0x04
- STR_FUNC_AWORDS = 0x08
+ STR_FUNC_AWORDS = 0x08
STR_FUNC_SYMBOL = 0x10
STR_FUNC_INDENT = 0x20 # <<-HEREDOC
@@ -54,9 +52,25 @@ class RubyLexer
#
# @return true if not at end of file (EOF).
+ if ENV['SPY'] then
+ @@stats = Hash.new 0
+
+ def self.stats
+ @@stats
+ end
+
+ at_exit {
+ require 'pp'
+ pp RubyLexer.stats.sort_by {|k,v| -v}.first(20)
+ }
+ end
+
def advance
r = yylex
self.token = r
+
+ @@stats[r] += 1 if ENV['SPY']
+
return r != RubyLexer::EOF
end
@@ -70,6 +84,13 @@ def comments
c
end
+ def expr_beg_push val
+ cond.push false
+ cmdarg.push false
+ self.lex_state = :expr_beg
+ self.yacc_value = t(val)
+ end
+
def heredoc here # Region has 63 lines, 1595 characters
_, eos, func, last_line = here
@@ -196,6 +217,14 @@ def initialize
reset
end
+ def fix_arg_lex_state
+ self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
+ :expr_arg
+ else
+ :expr_beg
+ end
+ end
+
def int_with_base base
if src.matched =~ /__/ then
rb_compile_error "Invalid numeric format"
@@ -255,12 +284,12 @@ def parse_quote
if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
rb_compile_error "unknown type of %string" if
src.matched_size == 2
- c, beg, short_hand = src.matched, src.scan(/./m), false
+ c, beg, short_hand = src.matched, src.getch, false
else # Short-hand (e.g. %{, %., %!, etc)
- c, beg, short_hand = 'Q', src.scan(/./m), true
+ c, beg, short_hand = 'Q', src.getch, true
end
- if c == RubyLexer::EOF or beg == RubyLexer::EOF then
+ if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
rb_compile_error "unterminated quoted string meets end of file"
end
@@ -407,7 +436,7 @@ def read_escape
when src.scan(/[McCx0-9]/) || src.eos? then
rb_compile_error("Invalid escape character syntax")
else
- src.scan(/./m)
+ src.getch
end
end
@@ -463,7 +492,7 @@ def tokadd_escape term
unless src.check(/\\|#{Regexp.escape term}/) then
self.token_buffer << "\\"
end
- self.token_buffer << src.scan(/./m)
+ self.token_buffer << src.getch
end
end
@@ -523,7 +552,7 @@ def tokadd_string(func, term, paren, buffer)
# }
# }
else
- c = src.scan(/./m) # FIX: I don't like this style
+ c = src.getch # FIX: I don't like this style
if symbol && src.scan(/\0/) then
rb_compile_error "symbol cannot contain '\\0'"
end
@@ -532,6 +561,7 @@ def tokadd_string(func, term, paren, buffer)
c = src.matched unless c
buffer << c
end # until
+
c = src.matched unless c
c = RubyLexer::EOF if src.eos?
@@ -542,35 +572,6 @@ def warning s
# do nothing for now
end
- def temp_handle_strterm
- token = nil
-
- if lex_strterm[0] == :heredoc then
- token = self.heredoc(lex_strterm)
- if token == :tSTRING_END then
- self.lex_strterm = nil
- self.lex_state = :expr_end
- end
- else
- token = self.parse_string(lex_strterm)
-
- if token == :tSTRING_END || token == :tREGEXP_END then
- self.lex_strterm = nil
- self.lex_state = :expr_end
- end
- end
-
- return token
- end
-
- def fix_arg_lex_state
- self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
- :expr_arg
- else
- :expr_beg
- end
- end
-
##
# Returns the next token. Also sets yy_val is needed.
#
@@ -583,9 +584,30 @@ def yylex
space_seen = false
command_state = false
token_buffer.clear
+ src = self.src
+
+ self.token = nil
+ self.yacc_value = nil
if lex_strterm then
- return temp_handle_strterm
+ token = nil
+
+ if lex_strterm[0] == :heredoc then
+ token = self.heredoc(lex_strterm)
+ if token == :tSTRING_END then
+ self.lex_strterm = nil
+ self.lex_state = :expr_end
+ end
+ else
+ token = self.parse_string(lex_strterm)
+
+ if token == :tSTRING_END || token == :tREGEXP_END then
+ self.lex_strterm = nil
+ self.lex_state = :expr_end
+ end
+ end
+
+ return token
end
command_state = self.command_start
@@ -619,10 +641,8 @@ def yylex
src.scan(/\n+/)
- if (lex_state == :expr_beg ||
- lex_state == :expr_fname ||
- lex_state == :expr_dot ||
- lex_state == :expr_class) then
+ if [:expr_beg, :expr_fname,
+ :expr_dot, :expr_class].include? lex_state then
next
end
@@ -825,7 +845,7 @@ def yylex
c = if src.scan(/\\/) then
self.read_escape
else
- src.scan(/./m)
+ src.getch
end
c[0] &= 0xff
self.lex_state = :expr_end
@@ -872,21 +892,28 @@ def yylex
self.fix_arg_lex_state
self.yacc_value = t("|")
return :tPIPE
- when src.scan(/\+/) then
+ when src.scan(/[+-]/) then
+ sign = src.matched
+ utype, type = if sign == "+" then
+ [:tUPLUS, :tPLUS]
+ else
+ [:tUMINUS, :tMINUS]
+ end
+
if lex_state == :expr_fname || lex_state == :expr_dot then
self.lex_state = :expr_arg
if src.scan(/@/) then
- self.yacc_value = t("+@")
- return :tUPLUS
+ self.yacc_value = t("#{sign}@")
+ return utype
else
- self.yacc_value = t("+")
- return :tPLUS
+ self.yacc_value = t(sign)
+ return type
end
end
if src.scan(/\=/) then
self.lex_state = :expr_beg
- self.yacc_value = t("+")
+ self.yacc_value = t(sign)
return :tOP_ASGN
end
@@ -895,56 +922,24 @@ def yylex
if lex_state.is_argument then
arg_ambiguous
end
- self.lex_state = :expr_beg
-
- if src.check(/\d/) then
- return self.parse_number
- end
-
- self.yacc_value = t("+")
- return :tUPLUS
- end
-
- self.lex_state = :expr_beg
- self.yacc_value = t("+")
- return :tPLUS
- when src.scan(/-/) then
- if lex_state == :expr_fname || lex_state == :expr_dot then
- self.lex_state = :expr_arg
- if src.scan(/@/) then
- self.yacc_value = t("-@")
- return :tUMINUS
- end
-
- self.yacc_value = t("-")
- return :tMINUS
- end
- if src.scan(/\=/) then
- self.lex_state = :expr_beg
- self.yacc_value = t("-")
- return :tOP_ASGN
- end
-
- if (lex_state == :expr_beg ||
- lex_state == :expr_mid ||
- (lex_state.is_argument && space_seen && !src.check(/\s/))) then
- if lex_state.is_argument then
- arg_ambiguous
- end
self.lex_state = :expr_beg
- self.yacc_value = t("-")
+ self.yacc_value = t(sign)
if src.check(/\d/) then
- return :tUMINUS_NUM
+ if utype == :tUPLUS then
+ return self.parse_number
+ else
+ return :tUMINUS_NUM
+ end
end
- return :tUMINUS
+
+ return utype
end
self.lex_state = :expr_beg
- self.yacc_value = t("-")
-
- return :tMINUS
+ self.yacc_value = t(sign)
+ return type
when src.scan(/\.\.\./) then
self.lex_state = :expr_beg
self.yacc_value = t("...")
@@ -961,24 +956,17 @@ def yylex
return :tDOT
when src.check(/[0-9]/) then
return parse_number
- when src.scan(/\)/) then # REFACTOR: omg this is lame... next 3 are all the same
- cond.lexpop
- cmdarg.lexpop
- self.lex_state = :expr_end
- self.yacc_value = t(")")
- return :tRPAREN
- when src.scan(/\]/) then
- cond.lexpop
- cmdarg.lexpop
- self.lex_state = :expr_end
- self.yacc_value = t("]")
- return :tRBRACK
- when src.scan(/\}/) then
+ when src.scan(/[\)\]\}]/) then
cond.lexpop
cmdarg.lexpop
self.lex_state = :expr_end
- self.yacc_value = t("end") # except this... *sigh*
- return :tRCURLY
+ self.yacc_value = t(src.matched)
+ result = {
+ ")" => :tRPAREN,
+ "]" => :tRBRACK,
+ "}" => :tRCURLY
+ }[src.matched]
+ return result
when src.scan(/::/) then
if (lex_state == :expr_beg ||
lex_state == :expr_mid ||
@@ -990,11 +978,10 @@ def yylex
end
self.lex_state = :expr_dot
- self.yacc_value = t(":")
+ self.yacc_value = t("::")
return :tCOLON2
when src.scan(/\:/) then
- if (lex_state == :expr_end ||
- lex_state == :expr_endarg ||
+ if (lex_state == :expr_end || lex_state == :expr_endarg ||
src.check(/\s/)) then
self.lex_state = :expr_beg
self.yacc_value = t(":")
@@ -1077,10 +1064,7 @@ def yylex
end
end
- cond.push false
- cmdarg.push false
- self.lex_state = :expr_beg
- self.yacc_value = t("(")
+ self.expr_beg_push "("
return result
when src.scan(/\[/) then
@@ -1103,10 +1087,7 @@ def yylex
result = :tLBRACK
end
- cond.push false
- cmdarg.push false
- self.lex_state = :expr_beg
- self.yacc_value = t("[")
+ self.expr_beg_push("[")
return result
when src.scan(/\{/) then
@@ -1118,11 +1099,7 @@ def yylex
:tLBRACE # hash
end
- # REFACTOR: 3 occurances of these 4 lines
- cond.push false
- cmdarg.push false
- self.lex_state = :expr_beg
- self.yacc_value = t("{")
+ self.expr_beg_push("{")
return result
when src.scan(/\\/) then
@@ -1150,64 +1127,55 @@ def yylex
self.yacc_value = t("%")
return :tPERCENT
- when src.scan(/\$/) then
- last_state = lex_state
+ when src.scan(/(\$_)(\w)/) then
self.lex_state = :expr_end
-
- case
- when src.scan(/_(\w)/) then
- token_buffer << '$'
- token_buffer << '_'
- # HACK? c = src[1]
- # pass through
- when src.scan(/_/) then
- token_buffer << '$'
- self.yacc_value = t('$_')
- return :tGVAR
- when src.scan(/[~*$?!@\/\\;,.=:<>\"]/) then
- token_buffer << '$'
+ token_buffer << src[1]
+ # HACK? c = src[2]
+ # pass through
+ when src.scan(/\$_/) then
+ self.lex_state = :expr_end
+ token_buffer << src.matched
+ self.yacc_value = t(src.matched)
+ return :tGVAR
+ when src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
+ self.lex_state = :expr_end
+ token_buffer << src.matched
+ self.yacc_value = t(token_buffer.join)
+ return :tGVAR
+ when src.scan(/\$([\&\`\'\+])/) then
+ self.lex_state = :expr_end
+ # Explicit reference to these vars as symbols...
+ if last_state == :expr_fname then
token_buffer << src.matched
self.yacc_value = t(token_buffer.join)
return :tGVAR
- when src.scan(/-\w?/) then
- token_buffer << '$'
- token_buffer << src.matched
+ else
+ self.yacc_value = s(:back_ref, src[1].to_sym)
+ return :tBACK_REF
+ end
+ when src.scan(/\$[1-9]\d*/) then
+ self.lex_state = :expr_end
+ token_buffer.push(*src.matched.split(//))
+ if last_state == :expr_fname then
self.yacc_value = t(token_buffer.join)
- # xxx shouldn't check if valid option variable
return :tGVAR
- when src.scan(/[\&\`\'\+]/) then
- token_buffer << '$'
- # Explicit reference to these vars as symbols...
- if last_state == :expr_fname then
- token_buffer << src.matched
- self.yacc_value = t(token_buffer.join)
- return :tGVAR
- else
- self.yacc_value = s(:back_ref, src.matched.to_sym)
- return :tBACK_REF
- end
- when src.scan(/[1-9]\d*/) then
- token_buffer << '$'
- token_buffer.push(*src.matched.split(//))
- if last_state == :expr_fname then
- self.yacc_value = t(token_buffer.join)
- return :tGVAR
- else
- self.yacc_value = s(:nth_ref, token_buffer.join[1..-1].to_i)
- return :tNTH_REF
- end
- when src.scan(/0/) then
- token_buffer << '$'
- # pass through
- when src.check(/\W|\z/) then
- token_buffer << '$'
- self.yacc_value = t("$")
- return '$'
else
- token_buffer << '$'
- src.scan(/./m)
- # pass through
+ self.yacc_value = s(:nth_ref, token_buffer.join[1..-1].to_i)
+ return :tNTH_REF
end
+ when src.scan(/\$0/) then
+ self.lex_state = :expr_end
+ token_buffer << '$' # why just this?!?
+ # pass through
+ when src.scan(/\$\W|\$\z/) then # TODO: remove?
+ self.lex_state = :expr_end
+ self.yacc_value = t("$")
+ return '$'
+ when src.scan(/\$/)
+ self.lex_state = :expr_end
+ token_buffer << src.matched
+ src.getch
+ # pass through
when src.scan(/\@/) then
token_buffer << '@'
@@ -1251,7 +1219,7 @@ def yylex
last_state = lex_state
case token_buffer[0]
- when '$' then
+ when /^\$/ then
self.lex_state = :expr_end
result = :tGVAR
when '@' then
View
519 lib/ruby_parser_extras.rb
@@ -4,10 +4,6 @@
require 'strscan'
class StringScanner
- def lineno
- string[0..pos].split(/\n/).size
- end
-
def current_line # HAHA fuck you (HACK)
string[0..pos][/\A.*__LINE__/m].split(/\n/).size
end
@@ -20,6 +16,10 @@ def current_line # HAHA fuck you (HACK)
# end
# end
+ def lineno
+ string[0..pos].split(/\n/).size
+ end
+
def unread c
return if c.nil? # UGH
warn({:unread => caller[0]}.inspect) if ENV['TALLY']
@@ -42,38 +42,37 @@ class RubyParser < Racc::Parser
attr_accessor :lexer, :in_def, :in_single, :file
attr_reader :env, :comments
- def initialize
- super
- self.lexer = RubyLexer.new
- self.in_def = false
- self.in_single = 0
- @env = Environment.new
- @comments = []
- end
-
- def parse(str, file = "(string)")
- raise "bad val: #{str.inspect}" unless String === str
-
- self.file = file
- self.lexer.src = str
-
- @yydebug = ENV.has_key? 'DEBUG'
+ def append_to_block head, tail # FIX: wtf is this?!? switch to block_append
+ return head if tail.nil?
+ return tail if head.nil?
- do_parse
+ head = s(:block, head) unless head.first == :block
+ head << tail
end
- def do_parse
- _racc_do_parse_rb(_racc_setup, false)
+ def arg_add(node1, node2)
+ return s(:array, node2) unless node1
+ return node1 << node2 if node1[0] == :array
+ return s(:argspush, node1, node2)
end
- def next_token
- if self.lexer.advance then
- [self.lexer.token, self.lexer.yacc_value]
+ def arg_blk_pass node1, node2
+ if node2 then
+ node2.insert 1, node1
+ return node2
else
- return [false, '$end']
+ node1
end
end
+ def arg_concat node1, node2
+ return node2.nil? ? node1 : s(:argscat, node1, node2)
+ end
+
+ def aryset receiver, index
+ s(:attrasgn, receiver, :"[]=", index)
+ end
+
def assignable(lhs, value = nil)
id = lhs.to_sym
id = id.to_sym if Token === id
@@ -118,27 +117,97 @@ def assignable(lhs, value = nil)
return result
end
- def arg_add(node1, node2)
- return s(:array, node2) unless node1
- return node1 << node2 if node1[0] == :array
- return s(:argspush, node1, node2)
+ def block_append(head, tail, strip_tail_block=false)
+ return head unless tail
+ return tail unless head
+
+ case head[0]
+ when :lit, :str then
+ return tail
+ end
+
+ head = remove_begin(head)
+ head = s(:block, head) unless head[0] == :block
+
+ if strip_tail_block and Sexp === tail and tail[0] == :block then
+ head.push(*tail.values)
+ else
+ head << tail
+ end
end
- def node_assign(lhs, rhs)
- return nil unless lhs
+ def cond node
+ return nil if node.nil?
+ node = value_expr node
- rhs = value_expr rhs
+ case node.first
+ when :dregex then
+ return s(:match2, node, s(:gvar, "$_".to_sym))
+ when :regex then
+ return s(:match, node)
+ when :lit then
+ if Regexp === node.last then
+ return s(:match, node)
+ else
+ return node
+ end
+ when :and then
+ return s(:and, cond(node[1]), cond(node[2]))
+ when :or then
+ return s(:or, cond(node[1]), cond(node[2]))
+ when :dot2 then
+ label = "flip#{node.hash}"
+ env[label] = self.env.dynamic? ? :dvar : :lvar
+ return s(:flip2, node[1], node[2])
+ when :dot3 then
+ label = "flip#{node.hash}"
+ env[label] = self.env.dynamic? ? :dvar : :lvar
+ return s(:flip3, node[1], node[2])
+ else
+ return node
+ end
+ end
- case lhs[0]
- when :gasgn, :iasgn, :lasgn, :dasgn, :dasgn_curr,
- :masgn, :cdecl, :cvdecl, :cvasgn then
- lhs << rhs
- when :attrasgn, :call then
- args = lhs.array(true) || lhs.argscat(true) || lhs.splat(true) # FIX: fragile
- lhs << arg_add(args, rhs)
+ ##
+ # for pure ruby systems only
+
+ def do_parse
+ _racc_do_parse_rb(_racc_setup, false)
+ end if ENV['PURE_RUBY']
+
+ def dyna_init body, known_vars = [] # HACK HACK HACK - this is the worst
+ var = nil
+ vars = self.env.dynamic.keys - known_vars
+
+ vars.each do |id|
+ if self.env.used? id then
+ var = s(:dasgn_curr, id, var).compact
+ end
end
- lhs
+ self.block_append(var, body, body && body[0] == :block)
+ end
+
+ def get_match_node lhs, rhs
+ if lhs then
+ case lhs[0]
+ when :dregx, :dregx_once then
+ return s(:match2, lhs, rhs)
+ when :lit then
+ return s(:match2, lhs, rhs) if Regexp === lhs.last
+ end
+ end
+
+ if rhs then
+ case rhs[0]
+ when :dregx, :dregx_once then
+ return s(:match3, rhs, lhs)
+ when :lit then
+ return s(:match3, rhs, lhs) if Regexp === rhs.last
+ end
+ end
+
+ return s(:call, lhs, :"=~", s(:array, rhs))
end
def gettable(id)
@@ -178,37 +247,62 @@ def gettable(id)
raise "identifier #{id.inspect} is not valid"
end
- def block_append(head, tail, strip_tail_block=false)
- return head unless tail
- return tail unless head
+ def initialize
+ super
+ self.lexer = RubyLexer.new
+ self.in_def = false
+ self.in_single = 0
+ @env = Environment.new
+ @comments = []
+ end
- case head[0]
- when :lit, :str then
- return tail
- end
+ def list_append list, item # TODO: nuke me *sigh*
+ return s(:array, item) unless list
+ list << item
+ end
- head = remove_begin(head)
- head = s(:block, head) unless head[0] == :block
+ def literal_concat head, tail
+ return tail unless head
+ return head unless tail
- if strip_tail_block and Sexp === tail and tail[0] == :block then
- head.push(*tail.values)
- else
- head << tail
- end
- end
+ htype, ttype = head[0], tail[0]
- def new_yield(node)
- if node then
- raise SyntaxError, "Block argument should not be given." if
- node.node_type == :block_pass
+ head = s(:dstr, '', head) if htype == :evstr
- node = node.last if node.node_type == :array and node.size == 2
+ case ttype
+ when :str then
+ if htype == :str
+ head[-1] << tail[-1]
+ elsif htype == :dstr and head.size == 2 then
+ head[-1] << tail[-1]
+ else
+ head << tail
+ end
+ when :dstr then
+ if htype == :str then
+ tail[1] = head[-1] + tail[1]
+ head = tail
+ else
+ tail[0] = :array
+ tail[1] = s(:str, tail[1])
+ tail.delete_at 1 if tail[1] == s(:str, '')
+
+ head.push(*tail[1..-1])
+ end
+ when :evstr then
+ head[0] = :dstr if htype == :str
+ if head.size == 2 and tail[1][0] == :str then
+ head[-1] << tail[1][-1]
+ head[0] = :str if head.size == 2 # HACK ?
+ else
+ head.push(tail)
+ end
end
- return s(:yield, node)
+ return head
end
- def logop(type, left, right)
+ def logop(type, left, right) # TODO: rename logical_op
left = value_expr left
if left and left[0] == type and not left.paren then
@@ -254,140 +348,62 @@ def new_fcall meth, args
r
end
- def arg_blk_pass node1, node2
- if node2 then
- node2.insert 1, node1
- return node2
+ def new_super args
+ if args && args.first == :block_pass then
+ t, body, bp = args
+ result = s(t, bp, s(:super, body))
else
- node1
+ result = s(:super)
+ result << args if args and args != s(:array)
end
+ result
end
- def get_match_node lhs, rhs
- if lhs then
- case lhs[0]
- when :dregx, :dregx_once then
- return s(:match2, lhs, rhs)
- when :lit then
- return s(:match2, lhs, rhs) if Regexp === lhs.last
- end
- end
+ def new_yield(node)
+ if node then
+ raise SyntaxError, "Block argument should not be given." if
+ node.node_type == :block_pass
- if rhs then
- case rhs[0]
- when :dregx, :dregx_once then
- return s(:match3, rhs, lhs)
- when :lit then
- return s(:match3, rhs, lhs) if Regexp === rhs.last
- end
+ node = node.last if node.node_type == :array and node.size == 2
end
- return s(:call, lhs, :"=~", s(:array, rhs))
+ return s(:yield, node)
end
- def cond node
- return nil if node.nil?
- node = value_expr node
-
- case node.first
- when :dregex then
- return s(:match2, node, s(:gvar, "$_".to_sym))
- when :regex then
- return s(:match, node)
- when :lit then
- if Regexp === node.last then
- return s(:match, node)
- else
- return node
- end
- when :and then
- return s(:and, cond(node[1]), cond(node[2]))
- when :or then
- return s(:or, cond(node[1]), cond(node[2]))
- when :dot2 then
- label = "flip#{node.hash}"
- env[label] = self.env.dynamic? ? :dvar : :lvar
- return s(:flip2, node[1], node[2])
- when :dot3 then
- label = "flip#{node.hash}"
- env[label] = self.env.dynamic? ? :dvar : :lvar
- return s(:flip3, node[1], node[2])
+ def next_token
+ if self.lexer.advance then
+ [self.lexer.token, self.lexer.yacc_value]
else
- return node
+ return [false, '$end']
end
end
- def append_to_block head, tail # FIX: wtf is this?!? switch to block_append
- return head if tail.nil?
- return tail if head.nil?
+ def node_assign(lhs, rhs)
+ return nil unless lhs
- head = s(:block, head) unless head.first == :block
- head << tail
- end
+ rhs = value_expr rhs
- def new_super args
- if args && args.first == :block_pass then
- t, body, bp = args
- result = s(t, bp, s(:super, body))
- else
- result = s(:super)
- result << args if args and args != s(:array)
+ case lhs[0]
+ when :gasgn, :iasgn, :lasgn, :dasgn, :dasgn_curr,
+ :masgn, :cdecl, :cvdecl, :cvasgn then
+ lhs << rhs
+ when :attrasgn, :call then
+ args = lhs.array(true) || lhs.argscat(true) || lhs.splat(true) # FIX: fragile
+ lhs << arg_add(args, rhs)
end
- result
- end
-
- def aryset receiver, index
- s(:attrasgn, receiver, :"[]=", index)
- end
-
- def arg_concat node1, node2
- return node2.nil? ? node1 : s(:argscat, node1, node2)
- end
- def list_append list, item # TODO: nuke me *sigh*
- return s(:array, item) unless list
- list << item
+ lhs
end
- def literal_concat head, tail
- return tail unless head
- return head unless tail
-
- htype, ttype = head[0], tail[0]
-
- head = s(:dstr, '', head) if htype == :evstr
+ def parse(str, file = "(string)")
+ raise "bad val: #{str.inspect}" unless String === str
- case ttype
- when :str then
- if htype == :str
- head[-1] << tail[-1]
- elsif htype == :dstr and head.size == 2 then
- head[-1] << tail[-1]
- else
- head << tail
- end
- when :dstr then
- if htype == :str then
- tail[1] = head[-1] + tail[1]
- head = tail
- else
- tail[0] = :array
- tail[1] = s(:str, tail[1])
- tail.delete_at 1 if tail[1] == s(:str, '')
+ self.file = file
+ self.lexer.src = str
- head.push(*tail[1..-1])
- end
- when :evstr then
- head[0] = :dstr if htype == :str
- if head.size == 2 and tail[1][0] == :str then
- head[-1] << tail[1][-1]
- head[0] = :str if head.size == 2 # HACK ?
- else
- head.push(tail)
- end
- end
+ @yydebug = ENV.has_key? 'DEBUG'
- return head
+ do_parse
end
def remove_begin node
@@ -422,28 +438,10 @@ def void_stmts node
node
end
- ############################################################
- # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
-
- def dyna_init body, known_vars = []
- var = nil
- vars = self.env.dynamic.keys - known_vars
-
- vars.each do |id|
- if self.env.used? id then
- var = s(:dasgn_curr, id, var).compact
- end
- end
-
- self.block_append(var, body, body && body[0] == :block)
- end
-
def warning s
+ raise "no"
# do nothing for now
end
-
- # END HACK
- ############################################################$
end
class Keyword
@@ -500,21 +498,6 @@ def id1
56, 56, 56, 56, 56, 56
]
- def self.hash_keyword(str, len)
- hval = len
-
- case hval
- when 2, 1 then
- hval += ASSO_VALUES[str[0]]
- else
- hval += ASSO_VALUES[str[2]]
- hval += ASSO_VALUES[str[0]]
- end
-
- hval += ASSO_VALUES[str[len - 1]]
- return hval
- end
-
##
# :expr_beg = ignore newline, +/- is a sign.
# :expr_end = newline significant, +/- is a operator.
@@ -572,6 +555,21 @@ def self.hash_keyword(str, len)
["alias", [:kALIAS, :kALIAS ], :expr_fname ],
].map { |args| KWtable.new(*args) }
+ def self.hash_keyword(str, len)
+ hval = len
+
+ case hval
+ when 2, 1 then
+ hval += ASSO_VALUES[str[0]]
+ else
+ hval += ASSO_VALUES[str[2]]
+ hval += ASSO_VALUES[str[0]]
+ end
+
+ hval += ASSO_VALUES[str[len - 1]]
+ return hval
+ end
+
def self.keyword(str, len = str.size)
if len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH then
key = hash_keyword(str, len)
@@ -589,28 +587,6 @@ class Environment
attr_reader :env, :dyn
attr_accessor :init
- def initialize dyn = false
- @dyn = []
- @env = []
- @use = []
- @init = false
- self.extend
- end
-
- def use id
- @env.each_with_index do |env, i|
- if env[id] then
- @use[i][id] = true
- end
- end
- end
-
- def used? id
- idx = @dyn.index false # REFACTOR
- u = @use[0...idx].reverse.inject { |env, scope| env.merge scope } || {}
- u[id]
- end
-
def [] k
self.all[k]
end
@@ -625,15 +601,15 @@ def all
@env[0..idx].reverse.inject { |env, scope| env.merge scope }
end
+ def current
+ @env.first
+ end
+
def dynamic
idx = @dyn.index false
@env[0...idx].reverse.inject { |env, scope| env.merge scope } || {}
end
- def current
- @env.first
- end
-
def dynamic?
@dyn[0] != false
end
@@ -644,31 +620,50 @@ def extend dyn = false
@use.unshift({})
end
+ def initialize dyn = false
+ @dyn = []
+ @env = []
+ @use = []
+ @init = false
+ self.extend
+ end
+
def unextend
@dyn.shift
@env.shift
@use.shift
raise "You went too far unextending env" if @env.empty?
end
+
+ def use id
+ @env.each_with_index do |env, i|
+ if env[id] then
+ @use[i][id] = true
+ end
+ end
+ end
+
+ def used? id
+ idx = @dyn.index false # REFACTOR
+ u = @use[0...idx].reverse.inject { |env, scope| env.merge scope } || {}
+ u[id]
+ end
end
class StackState
attr_reader :stack
- def inspect
- "StackState(#{@name}, #{@stack.inspect})"
- end
-
def initialize(name)
@name = name
@stack = [false]
end
- def pop
- # raise "#{@name} empty" if @stack.size <= 1
- r = @stack.pop
- @stack.push false if @stack.size == 0
- r
+ def inspect
+ "StackState(#{@name}, #{@stack.inspect})"
+ end
+
+ def is_in_state
+ @stack.last
end
def lexpop
@@ -678,14 +673,17 @@ def lexpop
@stack.push(a || b)
end
+ def pop
+ # raise "#{@name} empty" if @stack.size <= 1
+ r = @stack.pop
+ @stack.push false if @stack.size == 0
+ r
+ end
+
def push val
raise if val != true and val != false
@stack.push val
end
-
- def is_in_state
- @stack.last
- end
end
def t str
@@ -694,16 +692,13 @@ def t str
class Token # TODO: nuke this and use sexps
attr_accessor :args
- def initialize(token)
- @args = Array(token)
- end
- def value # TODO: eventually phase this out (or make it official)
+ def first # HACK
self.args.first
end
- def first # HACK
- self.args.first
+ def initialize(token)
+ @args = Array(token)
end
def inspect
@@ -714,6 +709,10 @@ def to_sym
self.value.to_sym
end
+ def value # TODO: eventually phase this out (or make it official)
+ self.args.first
+ end
+
def == o
Token === o and self.args == o.args
end
@@ -732,6 +731,10 @@ class Sexp
attr_writer :paren
attr_accessor :comments
+ def node_type
+ first
+ end
+
def paren
@paren ||= false
end
@@ -744,13 +747,7 @@ def value
def values
self[1..-1]
end
-
- def node_type
- first
- end
end
# END HACK
############################################################
-
-
View
80 test/test_ruby_lexer.rb
@@ -11,7 +11,7 @@ def deny cond, msg = nil
def setup
@lex = RubyLexer.new
@lex.src = "blah blah"
- @lex.lex_state = :expr_beg # HACK ? I have no idea actually
+ @lex.lex_state = :expr_beg
end
def test_advance
@@ -208,7 +208,7 @@ def test_yylex_carat_equals
def test_yylex_colon2
util_lex_token("A::B",
:tCONSTANT, t("A"),
- :tCOLON2, t(":"), # FIX?
+ :tCOLON2, t("::"),
:tCONSTANT, t("B"))
end
@@ -225,7 +225,7 @@ def test_yylex_comma
def test_yylex_comment
util_lex_token("1 # one\n# two\n2",
:tINTEGER, 1,
- "\n", 1,
+ "\n", nil,
:tINTEGER, 2)
assert_equal "# one\n# two\n", @lex.comments
end
@@ -244,12 +244,12 @@ def test_yylex_comment_begin_bad
def test_yylex_comment_begin_not_comment
util_lex_token("beginfoo = 5\np x \\\n=beginfoo",
:tIDENTIFIER, t("beginfoo"),
- '=', t('='),
- :tINTEGER, 5,
- "\n", 5,
+ '=', t('='),
+ :tINTEGER, 5,
+ "\n", nil,
:tIDENTIFIER, t("p"),
:tIDENTIFIER, t("x"),
- '=', t('='),
+ '=', t('='),
:tIDENTIFIER, t("beginfoo"))
end
@@ -533,7 +533,7 @@ def test_yylex_heredoc_backtick
:tXSTRING_BEG, t("`"),
:tSTRING_CONTENT, s(:str, " blah blah\n"),
:tSTRING_END, t("EOF"),
- "\n", t("EOF"))
+ "\n", nil)
end
def test_yylex_heredoc_double
@@ -543,7 +543,7 @@ def test_yylex_heredoc_double
:tSTRING_BEG, t("\""),
:tSTRING_CONTENT, s(:str, " blah blah\n"),
:tSTRING_END, t("EOF"),
- "\n", t("EOF"))
+ "\n", nil)
end
def test_yylex_heredoc_double_dash
@@ -553,7 +553,7 @@ def test_yylex_heredoc_double_dash
:tSTRING_BEG, t("\""),
:tSTRING_CONTENT, s(:str, " blah blah\n"),
:tSTRING_END, t("EOF"),
- "\n", t("EOF"))
+ "\n", nil)
end
def test_yylex_heredoc_double_eos
@@ -583,7 +583,7 @@ def test_yylex_heredoc_double_interp
:tSTRING_DBEG, t("\#{"),
:tSTRING_CONTENT, s(:str, "3} \n"), # HUH?
:tSTRING_END, t("EOF"),
- "\n", t("EOF"))
+ "\n", nil)
end
def test_yylex_heredoc_none
@@ -594,7 +594,7 @@ def test_yylex_heredoc_none
:tSTRING_CONTENT, s(:str, "blah\nblah\n"),
:tSTRING_CONTENT, s(:str, ""),
:tSTRING_END, t("EOF"),
- "\n", t("EOF"))
+ "\n", nil)
end
def test_yylex_heredoc_none_bad_eos
@@ -612,7 +612,7 @@ def test_yylex_heredoc_none_dash
:tSTRING_CONTENT, s(:str, "blah\nblah\n"),
:tSTRING_CONTENT, s(:str, ""),
:tSTRING_END, t("EOF"),
- "\n", t("EOF"))
+ "\n", nil)
end
def test_yylex_heredoc_single
@@ -622,7 +622,7 @@ def test_yylex_heredoc_single
:tSTRING_BEG, t("\""),
:tSTRING_CONTENT, s(:str, " blah blah\n"),
:tSTRING_END, t("EOF"),
- "\n", t("EOF"))
+ "\n", nil)
end
def test_yylex_heredoc_single_bad_eos_body
@@ -660,7 +660,7 @@ def test_yylex_heredoc_single_dash
:tSTRING_BEG, t("\""),
:tSTRING_CONTENT, s(:str, " blah blah\n"),
:tSTRING_END, t("EOF"),
- "\n", t("EOF"))
+ "\n", nil)
end
def test_yylex_identifier
@@ -963,7 +963,7 @@ def test_yylex_open_curly_bracket_arg
:tIDENTIFIER, t("m"),
:tLCURLY, t("{"),
:tINTEGER, 3,
- :tRCURLY, t("end")) # FIX?
+ :tRCURLY, t("}"))
end
def test_yylex_open_curly_bracket_block
@@ -971,7 +971,7 @@ def test_yylex_open_curly_bracket_block
util_lex_token("{ 4 }",
:tLBRACE_ARG, t("{"),
:tINTEGER, 4,
- :tRCURLY, t("end")) # FIX?
+ :tRCURLY, t("}"))
end
def test_yylex_open_square_bracket_arg
@@ -1033,7 +1033,7 @@ def test_yylex_percent_equals
end
def test_yylex_plus
- util_lex_token("1 + 1", # FIX lex_state?
+ util_lex_token("1 + 1", # TODO lex_state?
:tINTEGER, 1,
:tPLUS, t("+"),
:tINTEGER, 1)
@@ -1095,7 +1095,7 @@ def test_yylex_rbracket
end
def test_yylex_rcurly
- util_lex_token "}", :tRCURLY, t("end") # FIX?
+ util_lex_token "}", :tRCURLY, t("}")
end
def test_yylex_regexp
@@ -1365,7 +1365,7 @@ def test_yylex_string_double
:tSTRING_END, t('"'))
end
- def test_yylex_string_double
+ def test_yylex_string_double_nested_curlies
util_lex_token('%{nest{one{two}one}nest}',
:tSTRING_BEG, t('%}'),
:tSTRING_CONTENT, s(:str, "nest{one{two}one}nest"),
@@ -1404,11 +1404,11 @@ def test_yylex_string_double_interp
util_lex_token("\"blah #x a \#@a b \#$b c \#{3} # \"",
:tSTRING_BEG, t("\""),
:tSTRING_CONTENT, s(:str, "blah #x a "),
- :tSTRING_DVAR, s(:str, "blah #x a "),
+ :tSTRING_DVAR, nil,
:tSTRING_CONTENT, s(:str, "@a b "),
- :tSTRING_DVAR, s(:str, "@a b "),
+ :tSTRING_DVAR, nil,
:tSTRING_CONTENT, s(:str, "$b c "),
- :tSTRING_DBEG, s(:str, "$b c "),
+ :tSTRING_DBEG, nil,
:tSTRING_CONTENT, s(:str, "3} # "),
:tSTRING_END, t("\""))
end
@@ -1436,22 +1436,22 @@ def test_yylex_string_pct_W
util_lex_token("%W[s1 s2\ns3]", # TODO: add interpolation to these
:tWORDS_BEG, t("%W["),
:tSTRING_CONTENT, s(:str, "s1"),
- " ", s(:str, "s1"), # FIX
+ " ", nil,
:tSTRING_CONTENT, s(:str, "s2"),
- " ", s(:str, "s2"), # FIX
+ " ", nil,
:tSTRING_CONTENT, s(:str, "s3"),
- " ", s(:str, "s3"), # FIX
- :tSTRING_END, s(:str, "s3")) # FIX
+ " ", nil,
+ :tSTRING_END, nil)
end
def test_yylex_string_pct_W_bs_nl
util_lex_token("%W[s1 \\\ns2]", # TODO: add interpolation to these
:tWORDS_BEG, t("%W["),
:tSTRING_CONTENT, s(:str, "s1"),
- " ", s(:str, "s1"), # FIX
+ " ", nil,
:tSTRING_CONTENT, s(:str, "\ns2"),
- " ", s(:str, "\ns2"), # FIX
- :tSTRING_END, s(:str, "\ns2")) # FIX
+ " ", nil,
+ :tSTRING_END, nil)
end
def test_yylex_string_pct_angle
@@ -1472,19 +1472,19 @@ def test_yylex_string_pct_w
util_bad_token("%w[s1 s2 ",
:tAWORDS_BEG, t("%w["),
:tSTRING_CONTENT, s(:str, "s1"),
- " ", s(:str, "s1"), # FIX
+ " ", nil,
:tSTRING_CONTENT, s(:str, "s2"),
- " ", s(:str, "s2")) # FIX
+ " ", nil)
end
def test_yylex_string_pct_w_bs_nl
util_lex_token("%w[s1 \\\ns2]",
:tAWORDS_BEG, t("%w["),
:tSTRING_CONTENT, s(:str, "s1"),
- " ", s(:str, "s1"), # FIX
+ " ", nil,
:tSTRING_CONTENT, s(:str, "\ns2"),
- " ", s(:str, "\ns2"), # FIX
- :tSTRING_END, s(:str, "\ns2")) # FIX
+ " ", nil,
+ :tSTRING_END, nil)
end
def test_yylex_string_single
@@ -1536,21 +1536,21 @@ def test_yylex_symbol_single
def test_yylex_ternary
util_lex_token("a ? b : c",
:tIDENTIFIER, t("a"),
- "?", t("?"), # FIX
+ "?", t("?"), # FIX
:tIDENTIFIER, t("b"),
- ":", t(":"), # FIX
+ ":", t(":"), # FIX
:tIDENTIFIER, t("c"))
util_lex_token("a ?bb : c", # GAH! MATZ!!!
:tIDENTIFIER, t("a"),
- "?", t("?"), # FIX
+ "?", t("?"), # FIX
:tIDENTIFIER, t("bb"),
- ":", t(":"), # FIX
+ ":", t(":"), # FIX
:tIDENTIFIER, t("c"))
util_lex_token("42 ?", # 42 forces expr_end
:tINTEGER, 42,
- "?", t("?"))
+ "?", t("?"))
end
def test_yylex_tilde
Please sign in to comment.
Something went wrong with that request. Please try again.