Permalink
Browse files

Adding current state of JS/CC based parser

  • Loading branch information...
szegedi committed May 6, 2010
1 parent b42d352 commit 913661825e89bbb862be4c0d5d86c5804949daa4
Showing with 5,768 additions and 0 deletions.
  1. +25 −0 jscc/README.txt
  2. +352 −0 jscc/driver_v8.js_
  3. +4,855 −0 jscc/jscc.js
  4. +536 −0 jscc/ruby.par
  5. BIN jscc/v8sh
View
@@ -0,0 +1,25 @@
+This is an attempt at creating a Ruby LALR parser in pure JavaScript that can
+execute on V8.
+
+Ingredients:
+* jscc.js and driver_v8.js_ are the the JS/CC parser generator, as retrieved
+ from <http://jscc.jmksf.com/> and built for V8 target.
+* v8sh is a V8 shell binary, compiled for Intel 32-bit Mac OS X. It is
+ included in source form in the JS/CC distribution and can be rebuilt from it
+ if necessary (i.e. for a different platform).
+* ruby.par is a Ruby BNF grammar, in the format that JS/CC understands.
+
+To build a Ruby parser:
+./v8sh jscc.js ruby.par > rubyparser.js
+
+To run the Ruby parser (i.e. using one of the "target" files):
+./v8sh rubyparser.js ../target/ben.rb
+
+The parser doesn't really do anything right now aside from verifying that the
+passed .rb file is indeed a valid Ruby program - it has no semantic actions.
+The next step is to fill it out with semantic actions that generate a JS
+equivalent of the Ruby code, thus implementing a Ruby->JS translator.
+
+That said, the parser currently fails to parse even a single-token Ruby
+program. I don't understand the reasons for that and will need to do some
+debugging to figure it out. If you want to chime in, have a go at it!
View
@@ -0,0 +1,352 @@
+/*
+ Default driver template for JS/CC generated parsers for V8
+
+ Features:
+ - Parser trace messages
+ - Step-by-step parsing
+ - Integrated panic-mode error recovery
+ - Pseudo-graphical parse tree generation
+
+ Written 2007 by Jan Max Meyer, J.M.K S.F. Software Technologies
+ Modified 2008 from driver.js_ to support V8 by Louis P.Santillan
+ <lpsantil@gmail.com>
+
+ This is in the public domain.
+*/
+##HEADER##
+
+var ##PREFIX##_dbg_withparsetree = false;
+var ##PREFIX##_dbg_withtrace = false;
+var ##PREFIX##_dbg_withstepbystep = false;
+
+function __##PREFIX##dbg_print( text )
+{
+ print( text );
+}
+
+function __##PREFIX##dbg_wait()
+{
+ var v = read_line();
+}
+
+function __##PREFIX##lex( info )
+{
+ var state = 0;
+ var match = -1;
+ var match_pos = 0;
+ var start = 0;
+ var pos = info.offset + 1;
+
+ do
+ {
+ pos--;
+ state = 0;
+ match = -2;
+ start = pos;
+
+ if( info.src.length <= start )
+ return ##EOF##;
+
+ do
+ {
+
+##DFA##
+ pos++;
+
+ }
+ while( state > -1 );
+
+ }
+ while( ##WHITESPACE## > -1 && match == ##WHITESPACE## );
+
+ if( match > -1 )
+ {
+ info.att = info.src.substr( start, match_pos - start );
+ info.offset = match_pos;
+
+##TERMINAL_ACTIONS##
+ }
+ else
+ {
+ info.att = new String();
+ match = -1;
+ }
+
+ return match;
+}
+
+
+function __##PREFIX##parse( src, err_off, err_la )
+{
+ var sstack = new Array();
+ var vstack = new Array();
+ var err_cnt = 0;
+ var act;
+ var go;
+ var la;
+ var rval;
+ var parseinfo = new Function( "", "var offset; var src; var att;" );
+ var info = new parseinfo();
+
+ //Visual parse tree generation
+ var treenode = new Function( "", "var sym; var att; var child;" );
+ var treenodes = new Array();
+ var tree = new Array();
+ var tmptree = null;
+
+##TABLES##
+
+##LABELS##
+
+ info.offset = 0;
+ info.src = src;
+ info.att = new String();
+
+ if( !err_off )
+ err_off = new Array();
+ if( !err_la )
+ err_la = new Array();
+
+ sstack.push( 0 );
+ vstack.push( 0 );
+
+ la = __##PREFIX##lex( info );
+
+ while( true )
+ {
+ act = ##ERROR##;
+ for( var i = 0; i < act_tab[sstack[sstack.length-1]].length; i+=2 )
+ {
+ if( act_tab[sstack[sstack.length-1]][i] == la )
+ {
+ act = act_tab[sstack[sstack.length-1]][i+1];
+ break;
+ }
+ }
+
+ /*
+ _print( "state " + sstack[sstack.length-1] + " la = " + la + " info.att = >" +
+ info.att + "< act = " + act + " src = >" + info.src.substr( info.offset, 30 ) + "..." + "<" +
+ " sstack = " + sstack.join() );
+ */
+
+ if( ##PREFIX##_dbg_withtrace && sstack.length > 0 )
+ {
+ __##PREFIX##dbg_print( "\nState " + sstack[sstack.length-1] + "\n" +
+ "\tLookahead: " + labels[la] + " (\"" + info.att + "\")\n" +
+ "\tAction: " + act + "\n" +
+ "\tSource: \"" + info.src.substr( info.offset, 30 ) + ( ( info.offset + 30 < info.src.length ) ?
+ "..." : "" ) + "\"\n" +
+ "\tStack: " + sstack.join() + "\n" +
+ "\tValue stack: " + vstack.join() + "\n" );
+
+ if( ##PREFIX##_dbg_withstepbystep )
+ __##PREFIX##dbg_wait();
+ }
+
+
+ //Panic-mode: Try recovery when parse-error occurs!
+ if( act == ##ERROR## )
+ {
+ if( ##PREFIX##_dbg_withtrace )
+ __##PREFIX##dbg_print( "Error detected: There is no reduce or shift on the symbol " + labels[la] );
+
+ err_cnt++;
+ err_off.push( info.offset - info.att.length );
+ err_la.push( new Array() );
+ for( var i = 0; i < act_tab[sstack[sstack.length-1]].length; i+=2 )
+ err_la[err_la.length-1].push( labels[act_tab[sstack[sstack.length-1]][i]] );
+
+ //Remember the original stack!
+ var rsstack = new Array();
+ var rvstack = new Array();
+ for( var i = 0; i < sstack.length; i++ )
+ {
+ rsstack[i] = sstack[i];
+ rvstack[i] = vstack[i];
+ }
+
+ while( act == ##ERROR## && la != ##EOF## )
+ {
+ if( ##PREFIX##_dbg_withtrace )
+ __##PREFIX##dbg_print( "\tError recovery\n" +
+ "Current lookahead: " + labels[la] + " (" + info.att + ")\n" +
+ "Action: " + act + "\n\n" );
+ if( la == -1 )
+ info.offset++;
+
+ while( act == ##ERROR## && sstack.length > 0 )
+ {
+ sstack.pop();
+ vstack.pop();
+
+ if( sstack.length == 0 )
+ break;
+
+ act = ##ERROR##;
+ for( var i = 0; i < act_tab[sstack[sstack.length-1]].length; i+=2 )
+ {
+ if( act_tab[sstack[sstack.length-1]][i] == la )
+ {
+ act = act_tab[sstack[sstack.length-1]][i+1];
+ break;
+ }
+ }
+ }
+
+ if( act != ##ERROR## )
+ break;
+
+ for( var i = 0; i < rsstack.length; i++ )
+ {
+ sstack.push( rsstack[i] );
+ vstack.push( rvstack[i] );
+ }
+
+ la = __##PREFIX##lex( info );
+ }
+
+ if( act == ##ERROR## )
+ {
+ if( ##PREFIX##_dbg_withtrace )
+ __##PREFIX##dbg_print( "\tError recovery failed, terminating parse process..." );
+ break;
+ }
+
+
+ if( ##PREFIX##_dbg_withtrace )
+ __##PREFIX##dbg_print( "\tError recovery succeeded, continuing" );
+ }
+
+ /*
+ if( act == ##ERROR## )
+ break;
+ */
+
+
+ //Shift
+ if( act > 0 )
+ {
+ //Parse tree generation
+ if( ##PREFIX##_dbg_withparsetree )
+ {
+ var node = new treenode();
+ node.sym = labels[ la ];
+ node.att = info.att;
+ node.child = new Array();
+ tree.push( treenodes.length );
+ treenodes.push( node );
+ }
+
+ if( ##PREFIX##_dbg_withtrace )
+ __##PREFIX##dbg_print( "Shifting symbol: " + labels[la] + " (" + info.att + ")" );
+
+ sstack.push( act );
+ vstack.push( info.att );
+
+ la = __##PREFIX##lex( info );
+
+ if( ##PREFIX##_dbg_withtrace )
+ __##PREFIX##dbg_print( "\tNew lookahead symbol: " + labels[la] + " (" + info.att + ")" );
+ }
+ //Reduce
+ else
+ {
+ act *= -1;
+
+ if( ##PREFIX##_dbg_withtrace )
+ __##PREFIX##dbg_print( "Reducing by producution: " + act );
+
+ rval = void(0);
+
+ if( ##PREFIX##_dbg_withtrace )
+ __##PREFIX##dbg_print( "\tPerforming semantic action..." );
+
+##ACTIONS##
+
+ if( ##PREFIX##_dbg_withparsetree )
+ tmptree = new Array();
+
+ if( ##PREFIX##_dbg_withtrace )
+ __##PREFIX##dbg_print( "\tPopping " + pop_tab[act][1] + " off the stack..." );
+
+ for( var i = 0; i < pop_tab[act][1]; i++ )
+ {
+ if( ##PREFIX##_dbg_withparsetree )
+ tmptree.push( tree.pop() );
+
+ sstack.pop();
+ vstack.pop();
+ }
+
+ go = -1;
+ for( var i = 0; i < goto_tab[sstack[sstack.length-1]].length; i+=2 )
+ {
+ if( goto_tab[sstack[sstack.length-1]][i] == pop_tab[act][0] )
+ {
+ go = goto_tab[sstack[sstack.length-1]][i+1];
+ break;
+ }
+ }
+
+ if( ##PREFIX##_dbg_withparsetree )
+ {
+ var node = new treenode();
+ node.sym = labels[ pop_tab[act][0] ];
+ node.att = new String();
+ node.child = tmptree.reverse();
+ tree.push( treenodes.length );
+ treenodes.push( node );
+ }
+
+ if( act == 0 )
+ break;
+
+ if( ##PREFIX##_dbg_withtrace )
+ __##PREFIX##dbg_print( "\tPushing non-terminal " + labels[ pop_tab[act][0] ] );
+
+ sstack.push( go );
+ vstack.push( rval );
+ }
+ }
+
+ if( ##PREFIX##_dbg_withtrace )
+ __##PREFIX##dbg_print( "\nParse complete." );
+
+ if( ##PREFIX##_dbg_withparsetree )
+ {
+ if( err_cnt == 0 )
+ {
+ __##PREFIX##dbg_print( "\n\n--- Parse tree ---" );
+ __##PREFIX##dbg_parsetree( 0, treenodes, tree );
+ }
+ else
+ {
+ __##PREFIX##dbg_print( "\n\nParse tree cannot be viewed. There where parse errors." );
+ }
+ }
+
+ return err_cnt;
+}
+
+
+function __##PREFIX##dbg_parsetree( indent, nodes, tree )
+{
+ var str = new String();
+ for( var i = 0; i < tree.length; i++ )
+ {
+ str = "";
+ for( var j = indent; j > 0; j-- )
+ str += "\t";
+
+ str += nodes[ tree[i] ].sym;
+ if( nodes[ tree[i] ].att != "" )
+ str += " >" + nodes[ tree[i] ].att + "<" ;
+
+ __##PREFIX##dbg_print( str );
+ if( nodes[ tree[i] ].child.length > 0 )
+ __##PREFIX##dbg_parsetree( indent + 1, nodes, nodes[ tree[i] ].child );
+ }
+}
+
+##FOOTER##
Oops, something went wrong.

0 comments on commit 9136618

Please sign in to comment.