Permalink
Browse files

add some notes to RubyBuilder, move notes files to a notes/ directory…

…, extract LogBuilder
  • Loading branch information...
1 parent 2f469e1 commit e9e9689d403368a45726925d0f6aaa9d57437b9c Sven Fuchs committed Jun 28, 2009
View
@@ -0,0 +1,45 @@
+require 'ripper'
+require 'highlighters/ansi'
+
+class Ripper
+ class EventLog < Ripper::SexpBuilder
+ class << self
+ def out(src)
+ parser = new(src)
+ parser.parse
+ parser.out
+ end
+ end
+
+ attr_reader :log
+
+ def initialize(src)
+ @log = []
+ super
+ end
+
+ def out
+ log.each do |type, sexp|
+ arg = sexp[1] =~ /\s/ ? sexp[1].inspect : sexp[1]
+ line = (sexp[0].to_s).ljust(20)
+ if type == :scanner
+ puts line + arg[0..30]
+ else
+ puts highlight(line)
+ end
+ end
+ end
+
+ def highlight(str)
+ Highlighters::Ansi.new(:bold, :green).highlight(str)
+ end
+
+ { :scanner => SCANNER_EVENTS, :parser => PARSER_EVENTS }.each do |type, events|
+ events.each do |event|
+ define_method :"on_#{event}" do |*args|
+ log << [type, super(*args)]
+ end
+ end
+ end
+ end
+end
View
@@ -9,6 +9,17 @@
Dir[File.dirname(__FILE__) + '/ruby_builder/events/*.rb'].each { |file| require file }
+# Ripper::RubyBuilder extends Ripper's SexpBuilder and builds a rich, object
+# oriented representation of Ruby code.
+#
+# code = Ripper::RubyBuilder.build("foo(1, :bar, %w(baz)"), filename)
+# code.to_ruby # => "foo(1, :bar, %w(baz)"
+#
+# RubyBuilder uses SexpBuilder's lexing and parsing event callbacks (see
+# ruby_builder/events) and builds up Ruby::Nodes which can then be used.
+# See RubyBuilder::Stack, RubyBuilder::Queue, RubyBuilder::Buffer and
+# RubyBuilder::Token for more details about the parsing process.
+
class Ripper
class RubyBuilder < Ripper::SexpBuilder
class ParseError < RuntimeError
@@ -33,7 +44,7 @@ def build(src, filename = nil)
SEPARATORS = [:@semicolon, :@comma]
- UNARY_OPERATORS = [:'@+', :'@-', :'@!', :'@~', :@not]
+ UNARY_OPERATORS = [:'@+', :'@-', :'@!', :'@~', :@not, :'@+@', :'@-@']
BINARY_OPERATORS = [:'@**', :'@*', :'@/', :'@%', :'@+', :'@-', :'@<<', :'@>>', :'@&', :'@|', :'@^',
:'@>', :'@>=', :'@<', :'@<=', :'@<=>', :'@==', :'@===', :'@!=', :'@=~', :'@!~',
:'@&&', :'@||', :@and, :@or]
@@ -1,3 +1,14 @@
+# When tokens are pushed to the stack they may first be buffered when they
+# belong to the Prolog part of a Ruby::Node. Buffered tokens will then be
+# aggregated to the Prolog of the next token that is not buffered. Tokens
+# belonging to the Prolog part of a Ruby::Node are whitespace, separator and
+# heredoc tokens.
+#
+# E.g. when a whitespace char (" ") is pushed to the stack it will be buffered.
+# Then when an :@ident token is pushed to the stack the contents of the buffer
+# will be assigned to the Prolog of the :@ident token. Thus the whitespace char
+# ends up in the Prolog of the :@ident token.
+
class Ripper
class RubyBuilder < Ripper::SexpBuilder
class Buffer < Array
@@ -1,3 +1,20 @@
+# When tokens are pushed to the stack they will be pushed to a queue. Tokens
+# that open new constructs in Ruby (parentheses, semicolons, keywords like
+# class, do, if, etc.) will be held in the queue until the next token is
+# pushed. The queue will then empty itself and return the previously queued
+# token together with the currently pushed token.
+#
+# The reason for this is the way Ripper parses Ruby code. The lexer will fire
+# events every time a known token is found. The parser will fire events when
+# known Ruby constructs are completed. Thus often times when a parser event
+# fires the lexer has already pushed the next (opening) token to the stack.
+#
+# Otoh, event handlers responding to a parser event will want to check for
+# expected tokens (e.g. an opening parentheses for a method call). Thus, when
+# the opening tokens (added by lexer events) are held in a queue while the
+# parser event is fired it will be easier to pop the right tokens belonging
+# to the parser event from the stack.
+
class Ripper
class RubyBuilder < Ripper::SexpBuilder
class Queue < ::Array
@@ -7,7 +24,7 @@ def <<(token)
elsif token.opener?
push(token)
else
- result << token
+ result << token
end
result.compact
end
@@ -1,6 +1,46 @@
require 'ripper/ruby_builder/queue'
require 'ripper/ruby_builder/buffer'
+# The stack holds the current "state" of the parser and facilitates communication
+# between lexer events (which fire on known Ruby tokens) and parser events (which
+# fire on known Ruby constructs).
+#
+# E.g. when the Ruby code foo(:bar) is parsed the lexer will fire events for
+# the identifiers 'foo' and 'bar', for the opening and closing parentheses and
+# for the colon . Tt fires the events in the order of the occurence of the
+# tokens from left to right. RubyBuilder pushes all these tokens to the stack.
+#
+# See RubyBuilder::Queue and RubyBuilder::Buffer for what else happens when a
+# token is pushed to the stack.
+#
+# The parser on the other hand will fire events for known Ruby constructs such
+# as the argument list, arguments being added to the argument list, the method
+# call etc. The parser fires these events in the order of Ruby constructs being
+# recognized - i.e. when they are completed. RubyBuilder responds to these
+# events and will pop tokens off from the stack as required (e.g. for
+# constructing an argument list it will try to pop off the corresponding
+# left and right parentheses.)
+#
+# When RubyBuilder pops tokens off from the stack it wants to be careful not to
+# pop off tokens that belong to higher level constructs that haven't yet fired.
+# E.g. for a nested method call foo(bar(1)) the inner call fires first because
+# it completes first. Thus, when RubyBuilder constructs this call it must not
+# pop off the opening parentheses belonging to the outer call (which of course)
+# is already on the stack.
+#
+# For that reason when popping off tokens the stack by default stops searching
+# for the token when an opening token is found. RubyBuilder can force it to
+# search past opening tokens by setting the :pass option to true. Similarly
+# RubyBuilder can set constraints to what tokens it wants to be popped off:
+#
+# :pass => true # search past opening tokens
+# :max => count # number of tokens
+# :value => 'foo' # value of the token
+# :pos => pos # position of the token
+# :left => token # token must be located right of the given token
+# :right => token # token must be located left of the given token
+# :reverse => true # searches the stack in reverse order (i.e. tokens are shifted)
+
class Ripper
class RubyBuilder < Ripper::SexpBuilder
class Stack < ::Array
@@ -10,7 +50,7 @@ def initialize
@queue = Queue.new
@buffer = Buffer.new
end
-
+
def push(token)
return token if buffer.aggregate(token)
tokens = queue << token
@@ -42,7 +82,7 @@ def pop(*types)
end
protected
-
+
def matches?(conditions)
conditions.inject(true) do |result, (type, value)|
result && case type
@@ -1,5 +1,14 @@
require 'ruby/node/position'
+# Tokens are simple value objects that hold the token type, value and position.
+# There are a bunch of helper methods to check the token type and convert the
+# token to Ruby nodes.
+#
+# We mostly operate with Ripper's token types (such as :@ident etc.). For Ripper's
+# sexp types :@kw (keyword) and :@op we use more specific token types based on
+# the sexp's value. E.g. Ripper's sexp [:@op, '+', [0, 0]] would become a token
+# with the type :@+.
+
class Ripper
class RubyBuilder < Ripper::SexpBuilder
class Token
@@ -12,43 +21,43 @@ def initialize(type = nil, token = nil, position = nil)
@token = token
@position = position if position
end
-
+
def newline?
NEWLINE.include?(type)
end
def whitespace?
WHITESPACE.include?(type)
end
-
+
def opener?
OPENERS.include?(type)
end
-
+
def keyword?
KEYWORDS.include?(type)
end
-
+
def operator?
OPERATORS.include?(type)
end
-
+
def separator?
SEPARATORS.include?(type)
end
-
+
def prolog?
whitespace? or separator? or heredoc?
end
-
+
def known?
keyword? || operator? || opener? || whitespace? || [:@backtick].include?(type)
end
-
+
def comment?
type == :@comment
end
-
+
def heredoc?
type == :@heredoc
end
@@ -60,15 +69,15 @@ def to_sexp
def to_identifier
Ruby::Identifier.new(token, position, prolog)
end
-
+
def <=>(other)
position <=> (other.respond_to?(:position) ? other.position : other)
end
-
+
protected
-
+
def token_type(type, token)
- case type
+ case type
when :@kw
:"@#{token.gsub(/\W/, '')}"
when :@op
View
@@ -1,19 +1,28 @@
-Dir[File.dirname(__FILE__) + '/ruby/*.rb'].each do |file|
+Dir[File.dirname(__FILE__) + '/ruby/*.rb'].each do |file|
require "ruby/#{File.basename(file)}"
end
+# Object oriented representation of Ruby code.
+#
+# The base class is Ruby::Node. It facilitates
+#
+# * a composite pattern (see Ruby::Node::Composite)
+# * means for extracting from the original source (see Ruby::Node::Source)
+#
+# There are two main concrete classes derived from Node: Token and Aggregate.
+#
+# Tokens are "atomic" node types that represent non-composite Ruby constructs
+# such as Keyword, Identifier, StringContent and literal types such as integers,
+# floats, true, false, nil etc. Aggregates are composed node types that hold
+# one or many tokens, such as Class, Module, Block, If, For, Case, While etc.
+#
+# Each node type supports the to_ruby method which will return an exact copy
+# of the orginal code it was parsed from.
+#
+# There are also a few helper methods for converting a node to another type
+# (see Ruby::Node::Conversions) and very few helper methods for altering
+# existing code structures (see Ruby::Alternation).
+
module Ruby
include Conversions
-
- @@context_width = 2
-
- class << self
- def context_width
- @@context_width
- end
-
- def context_width=(context_width)
- @@context_width = context_width
- end
- end
end
View
@@ -19,7 +19,7 @@ def line
Ruby::Node::Text.new(lines[row]).clip([0, column], length)
end
- def prolog(options = {})
+ def context(options = {})
filter = options.has_key?(:highlight) ? options[:highlight] : false
line = filter ? self.line.head + filter.highlight(to_ruby) + self.line.tail : nil
Ruby::Node::Text::Context.new(lines, row, options[:width] || 2, line).to_s
View
@@ -10,13 +10,25 @@ def split(str)
end
class Context
+ @@context_width = 2
+
+ class << self
+ def context_width
+ @@context_width
+ end
+
+ def context_width=(context_width)
+ @@context_width = context_width
+ end
+ end
+
attr_reader :lines, :line, :row, :width
- def initialize(lines, row, width, line = nil)
+ def initialize(lines, row, width = nil, line = nil)
@lines = lines
@line = line || lines[row]
@row = row
- @width = width
+ @width = width || Context.width
end
def to_s(options = {})
View
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
View
@@ -1 +1,13 @@
+ add_tests("call_unary_neg",
+ "Ruby" => "-2**31",
+ "RawParseTree" => [:call,
+ [:call, [:lit, 2], :**, [:array, [:lit, 31]]],
+ :-@],
+ "ParseTree" => s(:call,
+ s(:call,
+ s(:lit, 2),
+ :**,
+ s(:arglist, s(:lit, 31))),
+ :-@, s(:arglist)),
+ "Ruby2Ruby" => "-(2 ** 31)")
Oops, something went wrong.

0 comments on commit e9e9689

Please sign in to comment.