Skip to content

Commit

Permalink
Finished first version of the compiler.
Browse files Browse the repository at this point in the history
Added optimizations.
Created cache place holder.
Updated README.markdown
Create script object to make it easier to use.
  • Loading branch information
daKuleMune committed Sep 8, 2011
1 parent 1c69a9e commit 424c72d
Show file tree
Hide file tree
Showing 6 changed files with 327 additions and 99 deletions.
19 changes: 7 additions & 12 deletions README.markdown
@@ -1,31 +1,26 @@
NodeBNF
=======

NodeBNF is a language parsing library, and interpreter framework. Programmed in JavaScript and tested with [nodeJS]( https://github.com/joyent/node). It was made to work with another project as a dependency for sub language processing in node.
NodeBNF is a script language parsing library. Programmed in JavaScript and tested with [nodeJS]( https://github.com/joyent/node). It was made to work with another project as a dependency for sub language processing in node.

Description
-----------

NodeBNF is both a framework for an interpreter, and a language parser. It's using at this time a custom JavaScript mark-up which was molded after [BNF]( http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form).
NodeBNF is both a framework for an interpreter, , [BNF]( http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form) compiler, and a language parser. It can use BNF or a custom JavaScript mark-up which was molded after BNF.

Parts
-----
Working Parts
-------------

- Language parser: Using a custom JavaScript mark-up the language parser converts scripts into a pre-interpreted collection of tokens, while at the same time checking for syntax correctness.
- BNF compiler: Optionally taking raw BNF and turning it into a cached JavaScript file used by the language parser.
- Language parser: Using a custom JavaScript mark-up, the language parser converts scripts into a pre-interpreted collection of tokens, while at the same time checking for syntax correctness.
- Interpreter framework: Every interpreter wants to be different, and should be, as no language compiles or understands tokens in the same way. The framework binds actions to the tokens, and then calls the actions, which is a basic parsing style. The framework try's to speed this process up by putting the tokens in a tree shaped data pool.

Road Map
--------

- At this time the language parser reads a custom JavaScript API, however when the project was envisioned it was to read files of true BNF grammar. It will very soon be able to do that in both real time, and compile BNF grammar files into the optimized JavaScript API.
- I personally like normal BNF, but I know ABNF offers some great advantages to writing a scripting language, so ABNF is going to be supported.
- Parsing text will be able to be done with regular expressions.
- More optimizations are already TODO'd and will be done.

TODO
----

- The OR operation should set the syntax into groups so as only process each syntax layer once.
- The OR operation should set the syntax into groups so as only process each syntax layer only once.

License
-------
Expand Down
28 changes: 17 additions & 11 deletions lib/bnf.bnf.js
Expand Up @@ -26,36 +26,42 @@ i.AddRule( "optWhitespace" );
i.AddRule( "expression" );
i.AddRule( "lineEnd" );
i.AddRule( "list" );
i.AddRule( "orlist" );
i.AddRule( "term" );
i.AddRule( "literal" );
i.AddRule( "ruleName" );
i.AddRule( "text" );
i.AddRule( "char" );
i.AddRule( "varRule" );
i.AddRule( "anyNoQuotes" );
i.AddRule( "textSingleQuotes" );
i.AddRule( "textDoubleQuotes" );
i.AddRule( "anyWithSingleQuotes" );
i.AddRule( "anyWithDoubleQuotes" );
i.AddRule( "anyWithQuotes" );
i.IndexTokenIdList();

/**
* Define grammar of all the rules
*/
i.WriteRule( "syntax", i.Or( r.rule, i.And( r.rule, r.lineEnd, r.syntax ) ) );
i.WriteRule( "lineEnd", i.Or( i.And( "\r", "\n" ), "\n" ) );
i.WriteRule( "rule", i.And( r.optWhitespace, "<", r.ruleName, ">", r.optWhitespace, "::=",
r.optWhitespace, r.expression ) );
i.WriteRule( "rule", i.And( r.optWhitespace, "<", r.ruleName, ">", r.optWhitespace, "::=", r.optWhitespace, r.expression ) );
i.WriteRule( "optWhitespace", i.Or( i.Blank(), i.And( r.whitespace, r.optWhitespace ) ) );
i.WriteRule( "whitespace", i.Or( i.Or( " ", "\t", "\n" ) ) );
i.WriteRule( "whitespace", i.Or( " ", "\t", "\n" ) );
i.WriteRule( "ruleName", r.text );
i.WriteRule( "expression", i.Or( r.list, i.And( r.list, r.optWhitespace, "|", r.optWhitespace, r.expression ) ) );
i.WriteRule( "expression", i.Or( r.list, i.And( r.list, r.optWhitespace, r.orlist, r.optWhitespace, r.expression ) ) );
i.WriteRule( "orlist", "|" );
i.WriteRule( "list", i.Or( r.term, i.And( r.term, r.optWhitespace, r.list ) ) );
i.WriteRule( "term", i.Or( r.literal, r.varRule ) );
i.WriteRule( "varRule", i.And( "<", r.text, ">" ) );
i.WriteRule( "literal", i.Or( i.And( "'", r.text, "'" ), i.And( '"', r.text, '"') ) );
i.WriteRule( "literal", i.Or( i.And( "'", r.textSingleQuotes, "'" ), i.And( '"', r.textDoubleQuotes, '"' ) ) );
i.WriteRule( "text", i.Or( r.char, i.And( r.char, r.text ) ) );
i.WriteRule( "char", i.Or( i.CharGroup( "A", "Z" ), i.CharGroup( "a", "z" ) ) );
/*
i.WriteRule( "list", or( r.term, and( r.term, r.optWhitespace, r.list ) ) );
i.WriteRule( "term", or( r.literal, and( "<", r.ruleName, ">" ), r.regEx ) );
i.WriteRule( "regEx", and( "/", /^([A-Za-z\\ \-\_\!\&\{\}\]\[\(\)\+\#\@\*\%\~\`\,\.\<\>\?\:\;\"\'\|]|\\\/){1,}$/, "/" ) );
*/
i.WriteRule( "textSingleQuotes", i.Or( i.Blank(), r.anyWithSingleQuotes, i.And( r.anyWithSingleQuotes, r.textSingleQuotes ) ) );
i.WriteRule( "textDoubleQuotes", i.Or( i.Blank(), r.anyWithDoubleQuotes, i.And( r.anyWithDoubleQuotes, r.textDoubleQuotes ) ) );
i.WriteRule( "anyWithSingleQuotes", i.Or( r.anyNoQuotes, "\\'", '"' ) );
i.WriteRule( "anyWithDoubleQuotes", i.Or( r.anyNoQuotes, '\\"', "'" ) );
i.WriteRule( "anyNoQuotes", i.Or( i.CharGroup( " ", "!" ), i.CharGroup( "#", "&" ), i.CharGroup( "(", "[" ), i.CharGroup( "]", "~" ) ) );

exports.interpreter = i;
1 change: 1 addition & 0 deletions lib/cache/dummy.txt
@@ -0,0 +1 @@
This is the cache directory for pre-compiled scripts
176 changes: 161 additions & 15 deletions lib/compiler.js
Expand Up @@ -3,25 +3,171 @@
*
* Compiler that can compile BNF syntax into bnf.js scripts to load faster for the first pass or obfuscate the BNF.
*
* @version 0.0.1
* @version 0.0.8
*/

var i = require( "./bnf.bnf.js" ).interpreter;
exports.version = '0.0.8';
var parserObject = require( "./parser.js" ).parser;
var parser = new parserObject( i );
i.AddTokenEventByName( "ruleName", function( token ){
console.log( token.text );
} );
i.AddTokenEventByName( "list", function( token ){
//var left = i.SeekTokenByName( token, "number" ).text;
var term = null;
while( ( term = i.SeekTokenByName( token, "term" ) ) != null ){
console.log( term.text );
var fs = require('fs');
/**
* BNF script compiler for language interpretation.
* @see Compiler.Constructor
*/
exports.Compiler = function( ){
/**
* Constructor of the object
*/
function Constructor( ){
_interpreter = require( "./bnf.bnf.js" ).interpreter;
_parser = new parserObject( _interpreter );
for( var i in _eventObject ){
_interpreter.AddTokenEventByName( i, _eventObject[i] );
}
}
} );

//parser.ParseScript( "test.bnf" );
/////////////////////
//////PRIVATE VARIABLES//
/////////////////////
var _interpreter = null;
var _parser = null;
var _eventObject = {
"script":function( token ){
this.ruleContainer = {};
},
"ruleName":function( token ){
this.ruleContainer[token.text] = [ [] ];
this.currentRuleContainer = this.ruleContainer[token.text];
},
"term":function( token ){
var tokenStore = {};
if( token.tokens[0].name == "literal" ){
tokenStore.type = "literal";
tokenStore.text = token.tokens[0].text.substring( 1, token.tokens[0].text.length - 1 );
}
else if( token.tokens[0].name == "varRule" ){
tokenStore.type = "rule";
tokenStore.text = token.tokens[0].text.substring( 1, token.tokens[0].text.length - 1 );
}
this.currentRuleContainer[ this.currentRuleContainer.length - 1 ].push( tokenStore );
},
"orlist":function( token ){
this.currentRuleContainer.push( [] );
}

};
//////////////////
//////PUBLIC METHODS//
//////////////////
this.ImportEvents = function( eventObject ){

};

function _OutputWriteRule( syntax ){
if( syntax.type == "literal" ){
if( syntax.text != "" ){
return "\"" + syntax.text + "\"";
}
else{
return "i.Blank()";
}
}
else if( syntax.type == "rule" ){
return "r." + syntax.text;
}
}

function _GenerateAndTree( ruleSyntax ){
var Output = [];
for( var i = 0; i < ruleSyntax.length; i++ ){
Output.push( _OutputWriteRule.call( this, ruleSyntax[i] ) );
}

if( Output.length > 1 ){
return "i.And( " + Output.join( ", " ) + " )";
}
else{
return Output[0];
}
}

function _GenerateWriteRule( ruleSyntax ){
var Output = [];
for( var i = 0; i < ruleSyntax.length; i++ ){
Output.push( _GenerateAndTree.call( this, ruleSyntax[i] ) );
}

exports.Compiler = function( ){
if( Output.length > 1 ){
return "i.Or( " + Output.join( ", " ) + " )";
}
else{
return Output[0];
}
};

/**
* TODO check if the file in cache is the same as the file we want to compile.
* @param id
* @returns {Boolean}
*/
function _CheckIdCache( id ){
return false;
}

function _ConnectScript( id ){
return require( __dirname + "/cache/" + id + ".bnf.js" ).interpreter;
}

function _CompileObjectScript( script, id, callback ){
var cacheScript = 'var languageObject = require( "../parser.js" ).LanguageObject;\n';
cacheScript += 'var i = new languageObject( "bnf" );\n';
cacheScript += 'var r = i.syntaxObject;\n';
//Rule Names
for( var i in script.ruleContainer ){
cacheScript += 'i.AddRule( "'+i+'" );\n';
}
cacheScript += 'i.IndexTokenIdList();\n';
//Rule Writes
for( var i in script.ruleContainer ){
cacheScript += 'i.WriteRule( "'+i+'", '+_GenerateWriteRule.call( this, script.ruleContainer[i] )+' );\n';
}
cacheScript += 'exports.interpreter = i;';

fs.writeFile( __dirname + "/cache/" + id + ".bnf.js", cacheScript, 'utf8', function(){
callback( _ConnectScript.call( this, id ) );
} );

fs.writeFile( __dirname + "/cache/" + id + ".bnf", script.rawScript, 'utf8' );
}

this.CompileScript = function( scriptName, id, callback ){
var compiled = _CheckIdCache.call( this, id );
if( !compiled ){
var self = this;
_parser.ParseScript( scriptName, function( script ){
_CompileObjectScript.call( self, script, id, function( i ){
callback( i );
} );
} );
}
else{
callback( _ConnectScript.call( this, id ) );
}
};

this.CompileString = function( string, id, callback ){
var compiled = _CheckIdCache.call( this, id );
if( !compiled ){
var script = _parser.ParseScriptString( string );
return _CompileObjectScript.call( this, script, id, function( i ){
callback( i );
} );
}
else{
//Run Script
callback( _ConnectScript.call( this, id ) );
}
};

//CALL TO CONSTRUCTOR//
Constructor.call( this, arguments[0] );
//CALL TO CONSTRUCTOR//
};

0 comments on commit 424c72d

Please sign in to comment.