Skip to content
Browse files

Experiment to drive Ripper entirely from Ruby code, using Java integr…

…ation and RubyYaccLexer.
  • Loading branch information...
1 parent 1d275e5 commit ae739489bc414f59c49544c804a4ff5d87e3a7b1 @headius headius committed Dec 9, 2010
View
57 lib/ruby/1.9/ripper/core.rb
@@ -8,22 +8,77 @@
# For details of Ruby License, see ruby/COPYING.
#
-require 'ripper.so'
+#require 'ripper.so'
+require 'java'
class Ripper
+ JRUBY_LEXER = org.jruby.lexer.yacc.RubyYaccLexer
+
# Parses Ruby program read from _src_.
# _src_ must be a String or a IO or a object which has #gets method.
def Ripper.parse(src, filename = '(ripper)', lineno = 1)
new(src, filename, lineno).parse
end
+ PARSER_EVENT_TABLE = {
+ :BEGIN=>1, :END=>1, :alias=>2, :alias_error=>1, :aref=>2, :aref_field=>2,
+ :arg_ambiguous=>0, :arg_paren=>1, :args_add=>2, :args_add_block=>2,
+ :args_add_star=>2, :args_new=>0, :array=>1, :assign=>2,
+ :assign_error=>1, :assoc_new=>2, :assoclist_from_args=>1,
+ :bare_assoc_hash=>1, :begin=>1, :binary=>3, :block_var=>2,
+ :block_var_add_block=>2, :block_var_add_star=>2, :blockarg=>1, :bodystmt=>4,
+ :brace_block=>2, :break=>1, :call=>3, :case=>2, :class=>3,
+ :class_name_error=>1, :command=>2, :command_call=>4, :const_path_field=>2,
+ :const_path_ref=>2, :const_ref=>1, :def=>3, :defined=>1, :defs=>5,
+ :do_block=>2, :dot2=>2, :dot3=>2, :dyna_symbol=>1, :else=>1, :elsif=>3,
+ :ensure=>1, :excessed_comma=>1, :fcall=>1, :field=>3, :for=>3, :hash=>1,
+ :if=>3, :if_mod=>2, :ifop=>3, :lambda=>2, :magic_comment=>2, :massign=>2,
+ :method_add_arg=>2, :method_add_block=>2, :mlhs_add=>2, :mlhs_add_star=>2,
+ :mlhs_new=>0, :mlhs_paren=>1, :module=>2, :mrhs_add=>2, :mrhs_add_star=>2,
+ :mrhs_new=>0, :mrhs_new_from_args=>1, :next=>1, :opassign=>3,
+ :operator_ambiguous=>2, :param_error=>1, :params=>5, :paren=>1,
+ :parse_error=>1, :program=>1, :qwords_add=>2, :qwords_new=>0, :redo=>0,
+ :regexp_add=>2, :regexp_literal=>2, :regexp_new=>0, :rescue=>4,
+ :rescue_mod=>2, :rest_param=>1, :retry=>0, :return=>1, :return0=>0,
+ :sclass=>2, :stmts_add=>2, :stmts_new=>0, :string_add=>2, :string_concat=>2,
+ :string_content=>0, :string_dvar=>1, :string_embexpr=>1, :string_literal=>1,
+ :super=>1, :symbol=>1, :symbol_literal=>1, :top_const_field=>1,
+ :top_const_ref=>1, :unary=>2, :undef=>1, :unless=>3, :unless_mod=>2,
+ :until=>2, :until_mod=>2, :var_alias=>2, :var_field=>1, :var_ref=>1,
+ :void_stmt=>0, :when=>3, :while=>2, :while_mod=>2, :word_add=>2,
+ :word_new=>0, :words_add=>2, :words_new=>0, :xstring_add=>2,
+ :xstring_literal=>1, :xstring_new=>0, :yield=>1, :yield0=>0, :zsuper=>0}
+
+
+ SCANNER_EVENT_TABLE = {
+ :CHAR=>1, :__end__=>1, :backref=>1, :backtick=>1, :comma=>1, :comment=>1,
+ :const=>1, :cvar=>1, :embdoc=>1, :embdoc_beg=>1, :embdoc_end=>1,
+ :embexpr_beg=>1, :embexpr_end=>1, :embvar=>1, :float=>1, :gvar=>1,
+ :heredoc_beg=>1, :heredoc_end=>1, :ident=>1, :ignored_nl=>1,
+ :int=>1, :ivar=>1, :kw=>1, :label=>1, :lbrace=>1, :lbracket=>1, :lparen=>1,
+ :nl=>1, :op=>1, :period=>1, :qwords_beg=>1, :rbrace=>1, :rbracket=>1,
+ :regexp_beg=>1, :regexp_end=>1, :rparen=>1, :semicolon=>1, :sp=>1,
+ :symbeg=>1, :tlambda=>1, :tlambeg=>1, :tstring_beg=>1, :tstring_content=>1,
+ :tstring_end=>1, :words_beg=>1, :words_sep=>1}
+
# This array contains name of parser events.
PARSER_EVENTS = PARSER_EVENT_TABLE.keys
# This array contains name of scanner events.
SCANNER_EVENTS = SCANNER_EVENT_TABLE.keys
+ TOKENS = {}
+ KEYWORDS = {}
+
+ org.jruby.parser.Ruby19Parser.java_class.declared_fields.each do |field|
+ if field.name =~ /^t/
+ TOKENS[Java.java_to_ruby(field.static_value)] = field.name[1..-1].downcase
+ elsif field.name =~ /^k/
+ KEYWORDS[Java.java_to_ruby(field.static_value)] = field.name[1..-1].downcase
+ end
+ end
+
# This array contains name of all ripper events.
EVENTS = PARSER_EVENTS + SCANNER_EVENTS
View
41 lib/ruby/1.9/ripper/lexer.rb
@@ -39,7 +39,48 @@ def Ripper.lex(src, filename = '-', lineno = 1)
Lexer.new(src, filename, lineno).lex
end
+ def parse
+ lexer = JRUBY_LEXER.new(false, true)
+ lexer.encoding = org.jcodings.specific.UTF8Encoding::INSTANCE
+ lexer.parser_support = org.jruby.parser.ParserSupport19.new
+ lexer.source = org.jruby.lexer.yacc.ByteArrayLexerSource.new(
+ @filename, @src.to_java_bytes, @src.split("\n"), @lineno, false)
+
+ while lexer.advance
+ if KEYWORDS[lexer.token]
+ send "on_kw", lexer.value.value.to_s
+ else
+ send "on_#{TOKENS[lexer.token]}", lexer.value.value.to_s
+ end
+ end
+ end
+
+ def column
+ 0
+ end
+
+ # normalize some tokens
+ def on_integer(token)
+ on_int(token)
+ end
+
+ def on_assoc(token)
+ on_label(token)
+ end
+
+ def on_rcurly(token)
+ on_rbrace(token)
+ end
+
class Lexer < ::Ripper #:nodoc: internal use only
+ def initialize(src, filename, lineno)
+ @src = src
+ @filename = filename
+ @lineno = lineno
+ end
+
+ attr_accessor :src, :filename, :lineno
+
def tokenize
lex().map {|pos, event, tok| tok }
end
View
16 src/org/jruby/lexer/yacc/RubyYaccLexer.java
@@ -259,6 +259,8 @@ public static Keyword getKeyword(String str) {
private int parenNest = 0;
// 1.9 only
private int leftParenBegin = 0;
+ // Ripper only
+ private boolean spaceTokens = false;
public int incrementParenNest() {
parenNest++;
@@ -282,6 +284,15 @@ public RubyYaccLexer(boolean isOneEight) {
reset();
this.isOneEight = isOneEight;
}
+
+ /**
+ * Constructor for Ripper, which wants space tokens
+ */
+ public RubyYaccLexer(boolean isOneEight, boolean spaceTokens) {
+ reset();
+ this.isOneEight = isOneEight;
+ this.spaceTokens = spaceTokens;
+ }
public final void reset() {
token = 0;
@@ -958,6 +969,11 @@ private int yylex() throws IOException {
case ' ': case '\t': case '\f': case '\r':
case '\13': /* '\v' */
getPosition();
+ if (spaceTokens && !spaceSeen) {
+ // TODO: consume whitespace, return as a single string
+ yaccValue = new Token(" ",getPosition());
+ return Tokens.tSP;
+ }
spaceSeen = true;
continue;
case '#': /* it's a comment */
View
1 src/org/jruby/parser/DefaultRubyParser.java
@@ -278,6 +278,7 @@ public void setWarnings(IRubyWarnings warnings) {
public static final int tFLOAT = 379;
public static final int tREGEXP_END = 380;
public static final int tLOWEST = 381;
+ public static final int tSP = 9999;
public static final int yyErrorCode = 256;
/** number of final state.
View
1 src/org/jruby/parser/Ruby19Parser.java
@@ -265,6 +265,7 @@ public void setWarnings(IRubyWarnings warnings) {
public static final int tFLOAT = 379;
public static final int tREGEXP_END = 380;
public static final int tLOWEST = 381;
+ public static final int tSP = 9999;
public static final int yyErrorCode = 256;
/** number of final state.
View
1 src/org/jruby/parser/Tokens.java
@@ -157,6 +157,7 @@
int tLAMBDA = DefaultRubyParser.tLAMBDA;
int tLAMBEG = DefaultRubyParser.tLAMBEG;
int tLABEL = DefaultRubyParser.tLABEL;
+ int tSP = DefaultRubyParser.tSP;
String[] operators = {"+@", "-@", "**", "<=>", "==", "===", "!=", ">=", "<=", "&&",
"||", "=~", "!~", "..", "...", "[]", "[]=", "<<", ">>", "::"};

0 comments on commit ae73948

Please sign in to comment.
Something went wrong with that request. Please try again.