Skip to content
Browse files

Split the source code into several files, introduce build system

The source code is now in the src directory. The library needs to be
built using "rake", which creates the lib/peg.js file by combining the
source files.
  • Loading branch information...
1 parent 95a7889 commit e59f3ba3384649cef46790f3b186be3d845006d6 @dmajda dmajda committed
View
2 .gitignore
@@ -1 +1 @@
-lib/*.min.js
+lib/*
View
9 README.md
@@ -14,6 +14,15 @@ Features
* Handles wide class of grammars (superset of LL(*k*) and LR(*k*))
* Precise and human-friendly error reporting
+Building
+--------
+
+To build PEG.js, simply run the `rake` command:
+
+ $ rake
+
+Of course, you need to have [Rake](http://rake.rubyforge.org/) installed. The command creates PEG.js library in `lib/peg.js` by processing files in the `src` directory.
+
Usage
-----
View
29 Rakefile
@@ -1,4 +1,31 @@
+SRC_DIR = "src"
+LIB_DIR = "lib"
+BIN_DIR = "bin"
+
+def preprocess(input, base_dir)
+ input.split("\n").map do |line|
+ if line =~ /^\s*\/\/\s*@include\s*"([^"]*)"\s*$/
+ included_file = "#{base_dir}/#$1"
+ if !File.exist?(included_file)
+ abort "Included file \"#{included_file}\" does not exist."
+ end
+ preprocess(File.read(included_file), base_dir)
+ else
+ line
+ end
+ end.join("\n")
+end
+
desc "Generate the grammar parser"
task :metaparser do
- system "bin/pegjs PEG.parser lib/metagrammar.pegjs"
+ system "#{BIN_DIR}/pegjs PEG.parser #{SRC_DIR}/parser.pegjs"
+end
+
+desc "Build the peg.js file"
+task :build do
+ File.open("#{LIB_DIR}/peg.js", "w") do |f|
+ f.write(preprocess(File.read("#{SRC_DIR}/peg.js"), SRC_DIR))
+ end
end
+
+task :default => :build
View
3 benchmark/index.html
@@ -60,8 +60,7 @@
</tr>
</table>
- <script src="../lib/compiler.js"></script>
- <script src="../lib/metagrammar.js"></script>
+ <script src="../lib/peg.js"></script>
<script src="../vendor/jquery/jquery.js"></script>
<script src="../vendor/jquery.scrollto/jquery.scrollTo.js"></script>
<script>
View
3 bin/pegjs-main.js
@@ -5,8 +5,7 @@ importPackage(java.lang);
* Rhino does not have __FILE__ or anything similar so we have to pass the
* script path from the outside.
*/
-load(arguments[0] + "/../lib/compiler.js");
-load(arguments[0] + "/../lib/metagrammar.js");
+load(arguments[0] + "/../lib/peg.js");
var FILE_STDIN = "-";
var FILE_STDOUT = "-";
View
127 src/checks.js
@@ -0,0 +1,127 @@
+/*
+ * Checks made on the grammar AST before compilation. Each check is a function
+ * that is passed the AST and does not return anything. If the check passes, the
+ * function does not do anything special, otherwise it throws
+ * |PEG.GrammarError|. The checks are run in sequence in order of their
+ * definition.
+ */
+PEG.compiler.checks = [
+ /* Checks that all referenced rules exist. */
+ function(ast) {
+ function nop() {}
+
+ function checkExpression(node) { check(node.expression); }
+
+ function checkSubnodes(propertyName) {
+ return function(node) {
+ PEG.ArrayUtils.each(node[propertyName], check);
+ };
+ }
+
+ var checkFunctions = {
+ grammar:
+ function(node) {
+ for (var name in node.rules) {
+ check(node.rules[name]);
+ }
+ },
+
+ rule: checkExpression,
+ choice: checkSubnodes("alternatives"),
+ sequence: checkSubnodes("elements"),
+ labeled: checkExpression,
+ simple_and: checkExpression,
+ simple_not: checkExpression,
+ semantic_and: nop,
+ semantic_not: nop,
+ optional: checkExpression,
+ zero_or_more: checkExpression,
+ one_or_more: checkExpression,
+ action: checkExpression,
+
+ rule_ref:
+ function(node) {
+ if (ast.rules[node.name] === undefined) {
+ throw new PEG.GrammarError(
+ "Referenced rule \"" + node.name + "\" does not exist."
+ );
+ }
+ },
+
+ literal: nop,
+ any: nop,
+ "class": nop
+ };
+
+ function check(node) { checkFunctions[node.type](node); }
+
+ check(ast);
+ },
+
+ /* Checks that no left recursion is present. */
+ function(ast) {
+ function nop() {}
+
+ function checkExpression(node, appliedRules) {
+ check(node.expression, appliedRules);
+ }
+
+ var checkFunctions = {
+ grammar:
+ function(node, appliedRules) {
+ for (var name in node.rules) {
+ check(ast.rules[name], appliedRules);
+ }
+ },
+
+ rule:
+ function(node, appliedRules) {
+ check(node.expression, appliedRules.concat(node.name));
+ },
+
+ choice:
+ function(node, appliedRules) {
+ PEG.ArrayUtils.each(node.alternatives, function(alternative) {
+ check(alternative, appliedRules);
+ });
+ },
+
+ sequence:
+ function(node, appliedRules) {
+ if (node.elements.length > 0) {
+ check(node.elements[0], appliedRules);
+ }
+ },
+
+ labeled: checkExpression,
+ simple_and: checkExpression,
+ simple_not: checkExpression,
+ semantic_and: nop,
+ semantic_not: nop,
+ optional: checkExpression,
+ zero_or_more: checkExpression,
+ one_or_more: checkExpression,
+ action: checkExpression,
+
+ rule_ref:
+ function(node, appliedRules) {
+ if (PEG.ArrayUtils.contains(appliedRules, node.name)) {
+ throw new PEG.GrammarError(
+ "Left recursion detected for rule \"" + node.name + "\"."
+ );
+ }
+ check(ast.rules[node.name], appliedRules);
+ },
+
+ literal: nop,
+ any: nop,
+ "class": nop
+ };
+
+ function check(node, appliedRules) {
+ checkFunctions[node.type](node, appliedRules);
+ }
+
+ check(ast, []);
+ }
+];
View
31 src/compiler.js
@@ -0,0 +1,31 @@
+/* ===== PEG.compiler ===== */
+
+PEG.compiler = {
+ /*
+ * Generates a parser from a specified grammar AST. Throws |PEG.GrammarError|
+ * if the AST contains a semantic error. Note that not all errors are detected
+ * during the generation and some may protrude to the generated parser and
+ * cause its malfunction.
+ */
+ compile: function(ast) {
+ for (var i = 0; i < this.checks.length; i++) {
+ this.checks[i](ast);
+ }
+
+ for (var i = 0; i < this.passes.length; i++) {
+ ast = this.passes[i](ast);
+ }
+
+ var source = this.emitter(ast);
+ var result = eval(source);
+ result._source = source;
+
+ return result;
+ }
+};
+
+/* ===== Includes ===== */
+
+// @include "checks.js"
+// @include "passes.js"
+// @include "emitter.js"
View
369 lib/compiler.js → src/emitter.js
@@ -1,370 +1,3 @@
-/* PEG.js compiler. */
-
-(function(undefined) {
-
-/* ===== PEG ===== */
-
-/* no var */ PEG = {};
-
-/*
- * Generates a parser from a specified grammar and returns it.
- *
- * The grammar must be a string in the format described by the metagramar in the
- * metagrammar.pegjs file.
- *
- * Throws |PEG.parser.SyntaxError| if the grammar contains a syntax error or
- * |PEG.GrammarError| if it contains a semantic error. Note that not all errors
- * are detected during the generation and some may protrude to the generated
- * parser and cause its malfunction.
- */
-PEG.buildParser = function(grammar) {
- return PEG.compiler.compile(PEG.parser.parse(grammar));
-};
-
-/* ===== PEG.GrammarError ===== */
-
-/* Thrown when the grammar contains an error. */
-
-PEG.GrammarError = function(message) {
- this.name = "PEG.GrammarError";
- this.message = message;
-};
-
-PEG.GrammarError.prototype = Error.prototype;
-
-/* ===== PEG.ArrayUtils ===== */
-
-/* Array manipulation utility functions. */
-
-PEG.ArrayUtils = {
- /*
- * The code needs to be in sync with the code template in the compilation
- * function for "action" nodes.
- */
- contains: function(array, value) {
- /*
- * Stupid IE does not have Array.prototype.indexOf, otherwise this function
- * would be a one-liner.
- */
- var length = array.length;
- for (var i = 0; i < length; i++) {
- if (array[i] === value) {
- return true;
- }
- }
- return false;
- },
-
- each: function(array, callback) {
- var length = array.length;
- for (var i = 0; i < length; i++) {
- callback(array[i]);
- }
- },
-
- map: function(array, callback) {
- var result = [];
- var length = array.length;
- for (var i = 0; i < length; i++) {
- result[i] = callback(array[i]);
- }
- return result;
- }
-};
-
-/* ===== PEG.StringUtils ===== */
-
-/* String manipulation utility functions. */
-
-PEG.StringUtils = {
- /*
- * Surrounds the string with quotes and escapes characters inside so that the
- * result is a valid JavaScript string.
- *
- * The code needs to be in sync with th code template in the compilation
- * function for "action" nodes.
- */
- quote: function(s) {
- /*
- * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
- * literal except for the closing quote character, backslash, carriage
- * return, line separator, paragraph separator, and line feed. Any character
- * may appear in the form of an escape sequence.
- */
- return '"' + s
- .replace(/\\/g, '\\\\') // backslash
- .replace(/"/g, '\\"') // closing quote character
- .replace(/\r/g, '\\r') // carriage return
- .replace(/\u2028/g, '\\u2028') // line separator
- .replace(/\u2029/g, '\\u2029') // paragraph separator
- .replace(/\n/g, '\\n') // line feed
- + '"';
- }
-
-};
-
-/* ===== PEG.RegExpUtils ===== */
-
-/* RegExp manipulation utility functions. */
-
-PEG.RegExpUtils = {
- /*
- * Escapes characters inside the string so that it can be used as a list of
- * characters in a character class of a regular expression.
- */
- quoteForClass: function(s) {
- /* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. */
- return s
- .replace(/\\/g, '\\\\') // backslash
- .replace(/\0/g, '\\0') // null, IE needs this
- .replace(/\//g, '\\/') // closing slash
- .replace(/]/g, '\\]') // closing bracket
- .replace(/-/g, '\\-') // dash
- .replace(/\r/g, '\\r') // carriage return
- .replace(/\u2028/g, '\\u2028') // line separator
- .replace(/\u2029/g, '\\u2029') // paragraph separator
- .replace(/\n/g, '\\n') // line feed
- }
-};
-
-/* ===== PEG.compiler ===== */
-
-PEG.compiler = {
- /*
- * Generates a parser from a specified grammar AST. Throws |PEG.GrammarError|
- * if the AST contains a semantic error. Note that not all errors are detected
- * during the generation and some may protrude to the generated parser and
- * cause its malfunction.
- */
- compile: function(ast) {
- for (var i = 0; i < this.checks.length; i++) {
- this.checks[i](ast);
- }
-
- for (var i = 0; i < this.passes.length; i++) {
- ast = this.passes[i](ast);
- }
-
- var source = this.emitter(ast);
- var result = eval(source);
- result._source = source;
-
- return result;
- }
-};
-
-/*
- * Checks made on the grammar AST before compilation. Each check is a function
- * that is passed the AST and does not return anything. If the check passes, the
- * function does not do anything special, otherwise it throws
- * |PEG.GrammarError|. The checks are run in sequence in order of their
- * definition.
- */
-PEG.compiler.checks = [
- /* Checks that all referenced rules exist. */
- function(ast) {
- function nop() {}
-
- function checkExpression(node) { check(node.expression); }
-
- function checkSubnodes(propertyName) {
- return function(node) {
- PEG.ArrayUtils.each(node[propertyName], check);
- };
- }
-
- var checkFunctions = {
- grammar:
- function(node) {
- for (var name in node.rules) {
- check(node.rules[name]);
- }
- },
-
- rule: checkExpression,
- choice: checkSubnodes("alternatives"),
- sequence: checkSubnodes("elements"),
- labeled: checkExpression,
- simple_and: checkExpression,
- simple_not: checkExpression,
- semantic_and: nop,
- semantic_not: nop,
- optional: checkExpression,
- zero_or_more: checkExpression,
- one_or_more: checkExpression,
- action: checkExpression,
-
- rule_ref:
- function(node) {
- if (ast.rules[node.name] === undefined) {
- throw new PEG.GrammarError(
- "Referenced rule \"" + node.name + "\" does not exist."
- );
- }
- },
-
- literal: nop,
- any: nop,
- "class": nop
- };
-
- function check(node) { checkFunctions[node.type](node); }
-
- check(ast);
- },
-
- /* Checks that no left recursion is present. */
- function(ast) {
- function nop() {}
-
- function checkExpression(node, appliedRules) {
- check(node.expression, appliedRules);
- }
-
- var checkFunctions = {
- grammar:
- function(node, appliedRules) {
- for (var name in node.rules) {
- check(ast.rules[name], appliedRules);
- }
- },
-
- rule:
- function(node, appliedRules) {
- check(node.expression, appliedRules.concat(node.name));
- },
-
- choice:
- function(node, appliedRules) {
- PEG.ArrayUtils.each(node.alternatives, function(alternative) {
- check(alternative, appliedRules);
- });
- },
-
- sequence:
- function(node, appliedRules) {
- if (node.elements.length > 0) {
- check(node.elements[0], appliedRules);
- }
- },
-
- labeled: checkExpression,
- simple_and: checkExpression,
- simple_not: checkExpression,
- semantic_and: nop,
- semantic_not: nop,
- optional: checkExpression,
- zero_or_more: checkExpression,
- one_or_more: checkExpression,
- action: checkExpression,
-
- rule_ref:
- function(node, appliedRules) {
- if (PEG.ArrayUtils.contains(appliedRules, node.name)) {
- throw new PEG.GrammarError(
- "Left recursion detected for rule \"" + node.name + "\"."
- );
- }
- check(ast.rules[node.name], appliedRules);
- },
-
- literal: nop,
- any: nop,
- "class": nop
- };
-
- function check(node, appliedRules) {
- checkFunctions[node.type](node, appliedRules);
- }
-
- check(ast, []);
- }
-];
-
-/*
- * Optimalization passes made on the grammar AST before compilation. Each pass
- * is a function that is passed the AST and returns a new AST. The AST can be
- * modified in-place by the pass. The passes are run in sequence in order of
- * their definition.
- */
-PEG.compiler.passes = [
- /*
- * Removes proxy rules -- that is, rules that only delegate to other rule.
- */
- function(ast) {
- function isProxyRule(node) {
- return node.type === "rule" && node.expression.type === "rule_ref";
- }
-
- function replaceRuleRefs(ast, from, to) {
- function nop() {}
-
- function replaceInExpression(node, from, to) {
- replace(node.expression, from, to);
- }
-
- function replaceInSubnodes(propertyName) {
- return function(node, from, to) {
- PEG.ArrayUtils.each(node[propertyName], function(node) {
- replace(node, from, to);
- });
- };
- }
-
- var replaceFunctions = {
- grammar:
- function(node, from, to) {
- for (var name in node.rules) {
- replace(ast.rules[name], from, to);
- }
- },
-
- rule: replaceInExpression,
- choice: replaceInSubnodes("alternatives"),
- sequence: replaceInSubnodes("elements"),
- labeled: replaceInExpression,
- simple_and: replaceInExpression,
- simple_not: replaceInExpression,
- semantic_and: nop,
- semantic_not: nop,
- optional: replaceInExpression,
- zero_or_more: replaceInExpression,
- one_or_more: replaceInExpression,
- action: replaceInExpression,
-
- rule_ref:
- function(node, from, to) {
- if (node.name === from) {
- node.name = to;
- }
- },
-
- literal: nop,
- any: nop,
- "class": nop
- };
-
- function replace(node, from, to) {
- replaceFunctions[node.type](node, from, to);
- }
-
- replace(ast, from, to);
- }
-
- for (var name in ast.rules) {
- if (isProxyRule(ast.rules[name])) {
- replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name);
- if (name === ast.startRule) {
- ast.startRule = ast.rules[name].expression.name;
- }
- delete ast.rules[name];
- }
- }
-
- return ast;
- }
-];
-
/* Emits the generated code for the AST. */
PEG.compiler.emitter = function(ast) {
/*
@@ -1085,5 +718,3 @@ PEG.compiler.emitter = function(ast) {
return emit(ast);
};
-
-})();
View
0 lib/metagrammar.js → src/parser.js
File renamed without changes.
View
0 lib/metagrammar.pegjs → src/parser.pegjs
File renamed without changes.
View
83 src/passes.js
@@ -0,0 +1,83 @@
+/*
+ * Optimalization passes made on the grammar AST before compilation. Each pass
+ * is a function that is passed the AST and returns a new AST. The AST can be
+ * modified in-place by the pass. The passes are run in sequence in order of
+ * their definition.
+ */
+PEG.compiler.passes = [
+ /*
+ * Removes proxy rules -- that is, rules that only delegate to other rule.
+ */
+ function(ast) {
+ function isProxyRule(node) {
+ return node.type === "rule" && node.expression.type === "rule_ref";
+ }
+
+ function replaceRuleRefs(ast, from, to) {
+ function nop() {}
+
+ function replaceInExpression(node, from, to) {
+ replace(node.expression, from, to);
+ }
+
+ function replaceInSubnodes(propertyName) {
+ return function(node, from, to) {
+ PEG.ArrayUtils.each(node[propertyName], function(node) {
+ replace(node, from, to);
+ });
+ };
+ }
+
+ var replaceFunctions = {
+ grammar:
+ function(node, from, to) {
+ for (var name in node.rules) {
+ replace(ast.rules[name], from, to);
+ }
+ },
+
+ rule: replaceInExpression,
+ choice: replaceInSubnodes("alternatives"),
+ sequence: replaceInSubnodes("elements"),
+ labeled: replaceInExpression,
+ simple_and: replaceInExpression,
+ simple_not: replaceInExpression,
+ semantic_and: nop,
+ semantic_not: nop,
+ optional: replaceInExpression,
+ zero_or_more: replaceInExpression,
+ one_or_more: replaceInExpression,
+ action: replaceInExpression,
+
+ rule_ref:
+ function(node, from, to) {
+ if (node.name === from) {
+ node.name = to;
+ }
+ },
+
+ literal: nop,
+ any: nop,
+ "class": nop
+ };
+
+ function replace(node, from, to) {
+ replaceFunctions[node.type](node, from, to);
+ }
+
+ replace(ast, from, to);
+ }
+
+ for (var name in ast.rules) {
+ if (isProxyRule(ast.rules[name])) {
+ replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name);
+ if (name === ast.startRule) {
+ ast.startRule = ast.rules[name].expression.name;
+ }
+ delete ast.rules[name];
+ }
+ }
+
+ return ast;
+ }
+];
View
43 src/peg.js
@@ -0,0 +1,43 @@
+(function(global, undefined) {
+
+/* ===== PEG ===== */
+
+var PEG = {
+ /*
+ * Generates a parser from a specified grammar and returns it.
+ *
+ * The grammar must be a string in the format described by the metagramar in
+ * the parser.pegjs file.
+ *
+ * Throws |PEG.parser.SyntaxError| if the grammar contains a syntax error or
+ * |PEG.GrammarError| if it contains a semantic error. Note that not all
+ * errors are detected during the generation and some may protrude to the
+ * generated parser and cause its malfunction.
+ */
+ buildParser: function(grammar) {
+ return PEG.compiler.compile(PEG.parser.parse(grammar));
+ }
+};
+
+/* ===== PEG.GrammarError ===== */
+
+/* Thrown when the grammar contains an error. */
+
+PEG.GrammarError = function(message) {
+ this.name = "PEG.GrammarError";
+ this.message = message;
+};
+
+PEG.GrammarError.prototype = Error.prototype;
+
+/* ===== Includes ===== */
+
+// @include "utils.js"
+// @include "parser.js"
+// @include "compiler.js"
+
+/* ===== Export ===== */
+
+global.PEG = PEG;
+
+})(this);
View
94 src/utils.js
@@ -0,0 +1,94 @@
+/* ===== PEG.ArrayUtils ===== */
+
+/* Array manipulation utility functions. */
+
+PEG.ArrayUtils = {
+ /*
+ * The code needs to be in sync with the code template in the compilation
+ * function for "action" nodes.
+ */
+ contains: function(array, value) {
+ /*
+ * Stupid IE does not have Array.prototype.indexOf, otherwise this function
+ * would be a one-liner.
+ */
+ var length = array.length;
+ for (var i = 0; i < length; i++) {
+ if (array[i] === value) {
+ return true;
+ }
+ }
+ return false;
+ },
+
+ each: function(array, callback) {
+ var length = array.length;
+ for (var i = 0; i < length; i++) {
+ callback(array[i]);
+ }
+ },
+
+ map: function(array, callback) {
+ var result = [];
+ var length = array.length;
+ for (var i = 0; i < length; i++) {
+ result[i] = callback(array[i]);
+ }
+ return result;
+ }
+};
+
+/* ===== PEG.StringUtils ===== */
+
+/* String manipulation utility functions. */
+
+PEG.StringUtils = {
+ /*
+ * Surrounds the string with quotes and escapes characters inside so that the
+ * result is a valid JavaScript string.
+ *
+ * The code needs to be in sync with th code template in the compilation
+ * function for "action" nodes.
+ */
+ quote: function(s) {
+ /*
+ * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
+ * literal except for the closing quote character, backslash, carriage
+ * return, line separator, paragraph separator, and line feed. Any character
+ * may appear in the form of an escape sequence.
+ */
+ return '"' + s
+ .replace(/\\/g, '\\\\') // backslash
+ .replace(/"/g, '\\"') // closing quote character
+ .replace(/\r/g, '\\r') // carriage return
+ .replace(/\u2028/g, '\\u2028') // line separator
+ .replace(/\u2029/g, '\\u2029') // paragraph separator
+ .replace(/\n/g, '\\n') // line feed
+ + '"';
+ }
+
+};
+
+/* ===== PEG.RegExpUtils ===== */
+
+/* RegExp manipulation utility functions. */
+
+PEG.RegExpUtils = {
+ /*
+ * Escapes characters inside the string so that it can be used as a list of
+ * characters in a character class of a regular expression.
+ */
+ quoteForClass: function(s) {
+ /* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. */
+ return s
+ .replace(/\\/g, '\\\\') // backslash
+ .replace(/\0/g, '\\0') // null, IE needs this
+ .replace(/\//g, '\\/') // closing slash
+ .replace(/]/g, '\\]') // closing bracket
+ .replace(/-/g, '\\-') // dash
+ .replace(/\r/g, '\\r') // carriage return
+ .replace(/\u2028/g, '\\u2028') // line separator
+ .replace(/\u2029/g, '\\u2029') // paragraph separator
+ .replace(/\n/g, '\\n') // line feed
+ }
+};
View
5 test/index.html
@@ -5,10 +5,9 @@
<title>PEG.js Test Suite</title>
<link rel="stylesheet" href="../vendor/qunit/qunit.css">
<script src="../vendor/qunit/qunit.js"></script>
- <script src="../lib/compiler.js"></script>
- <script src="../lib/metagrammar.js"></script>
+ <script src="../lib/peg.js"></script>
<script src="compiler-test.js"></script>
- <script src="metagrammar-test.js"></script>
+ <script src="parser-test.js"></script>
</head>
<body>
<h1 id="qunit-header">PEG.js Test Suite</h1>
View
0 test/metagrammar-test.js → test/parser-test.js
File renamed without changes.

0 comments on commit e59f3ba

Please sign in to comment.
Something went wrong with that request. Please try again.