Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Make sure quoting functions output only ASCII characters

This patch prevents portability problems. In particular, it fixes a
problem where "SyntaxError: Invalid range in character class." error
appeared when using command-line version on Widnows (see GH-13).
  • Loading branch information...
commit aeb2cb4f1cf0bb0d47282e0416328a20d0bfe272 1 parent 4d68812
@dmajda dmajda authored
Showing with 123 additions and 31 deletions.
  1. +32 −6 src/emitter.js
  2. +32 −8 src/parser.js
  3. +58 −16 src/utils.js
  4. +1 −1  test/parser-test.js
View
38 src/emitter.js
@@ -119,6 +119,33 @@ PEG.compiler.emitter = function(ast) {
" var rightmostMatchFailuresExpected = [];",
" var cache = {};",
" ",
+ /* This needs to be in sync with |padLeft| in utils.js. */
+ " function padLeft(input, padding, length) {",
+ " var result = input;",
+ " ",
+ " var padLength = length - input.length;",
+ " for (var i = 0; i < padLength; i++) {",
+ " result = padding + result;",
+ " }",
+ " ",
+ " return result;",
+ " }",
+ " ",
+ /* This needs to be in sync with |escape| in utils.js. */
+ " function escape(ch) {",
+ " var charCode = ch.charCodeAt(0);",
+ " ",
+ " if (charCode < 0xFF) {",
+ " var escapeChar = 'x';",
+ " var length = 2;",
+ " } else {",
+ " var escapeChar = 'u';",
+ " var length = 4;",
+ " }",
+ " ",
+ " return '\\\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);",
+ " }",
+ " ",
/* This needs to be in sync with |quote| in utils.js. */
" function quote(s) {",
" /*",
@@ -128,12 +155,11 @@ PEG.compiler.emitter = function(ast) {
" * Any character may appear in the form of an escape sequence.",
" */",
" return '\"' + s",
- " .replace(/\\\\/g, '\\\\\\\\') // backslash",
- " .replace(/\"/g, '\\\\\"') // closing quote character",
- " .replace(/\\r/g, '\\\\r') // carriage return",
- " .replace(/\\u2028/g, '\\\\u2028') // line separator",
- " .replace(/\\u2029/g, '\\\\u2029') // paragraph separator",
- " .replace(/\\n/g, '\\\\n') // line feed",
+ " .replace(/\\\\/g, '\\\\\\\\') // backslash",
+ " .replace(/\"/g, '\\\\\"') // closing quote character",
+ " .replace(/\\r/g, '\\\\r') // carriage return",
+ " .replace(/\\n/g, '\\\\n') // line feed",
+ " .replace(/[\\x80-\\uFFFF]/g, escape) // non-ASCII characters",
" + '\"';",
" }",
" ",
View
40 src/parser.js
@@ -15,6 +15,31 @@ PEG.parser = (function(){
var rightmostMatchFailuresExpected = [];
var cache = {};
+ function padLeft(input, padding, length) {
+ var result = input;
+
+ var padLength = length - input.length;
+ for (var i = 0; i < padLength; i++) {
+ result = padding + result;
+ }
+
+ return result;
+ }
+
+ function escape(ch) {
+ var charCode = ch.charCodeAt(0);
+
+ if (charCode < 0xFF) {
+ var escapeChar = 'x';
+ var length = 2;
+ } else {
+ var escapeChar = 'u';
+ var length = 4;
+ }
+
+ return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);
+ }
+
function quote(s) {
/*
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a
@@ -23,12 +48,11 @@ PEG.parser = (function(){
* Any character may appear in the form of an escape sequence.
*/
return '"' + s
- .replace(/\\/g, '\\\\') // backslash
- .replace(/"/g, '\\"') // closing quote character
- .replace(/\r/g, '\\r') // carriage return
- .replace(/\u2028/g, '\\u2028') // line separator
- .replace(/\u2029/g, '\\u2029') // paragraph separator
- .replace(/\n/g, '\\n') // line feed
+ .replace(/\\/g, '\\\\') // backslash
+ .replace(/"/g, '\\"') // closing quote character
+ .replace(/\r/g, '\\r') // carriage return
+ .replace(/\n/g, '\\n') // line feed
+ .replace(/[\x80-\uFFFF]/g, escape) // non-ASCII characters
+ '"';
}
@@ -3404,13 +3428,13 @@ PEG.parser = (function(){
var savedReportMatchFailures = reportMatchFailures;
reportMatchFailures = false;
- if (input.substr(pos).match(/^[   ᠎ -    ]/) !== null) {
+ if (input.substr(pos).match(/^[ \xA0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]/) !== null) {
var result0 = input.charAt(pos);
pos++;
} else {
var result0 = null;
if (reportMatchFailures) {
- matchFailed("[   ᠎ -    ]");
+ matchFailed("[ \\xA0\\uFEFF\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000]");
}
}
reportMatchFailures = savedReportMatchFailures;
View
74 src/utils.js
@@ -35,6 +35,44 @@ function map(array, callback) {
}
/*
+ * Returns a string padded on the left to a desired length with a character.
+ *
+ * The code needs to be in sync with th code template in the compilation
+ * function for "action" nodes.
+ */
+function padLeft(input, padding, length) {
+ var result = input;
+
+ var padLength = length - input.length;
+ for (var i = 0; i < padLength; i++) {
+ result = padding + result;
+ }
+
+ return result;
+}
+
+/*
+ * Returns an escape sequence for given character. Uses \x for characters <=
+ * 0xFF to save space, \u for the rest.
+ *
+ * The code needs to be in sync with th code template in the compilation
+ * function for "action" nodes.
+ */
+function escape(ch) {
+ var charCode = ch.charCodeAt(0);
+
+ if (charCode < 0xFF) {
+ var escapeChar = 'x';
+ var length = 2;
+ } else {
+ var escapeChar = 'u';
+ var length = 4;
+ }
+
+ return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);
+}
+
+/*
* Surrounds the string with quotes and escapes characters inside so that the
* result is a valid JavaScript string.
*
@@ -47,14 +85,15 @@ function quote(s) {
* literal except for the closing quote character, backslash, carriage return,
* line separator, paragraph separator, and line feed. Any character may
* appear in the form of an escape sequence.
+ *
+ * For portability, we also escape escape all non-ASCII characters.
*/
return '"' + s
- .replace(/\\/g, '\\\\') // backslash
- .replace(/"/g, '\\"') // closing quote character
- .replace(/\r/g, '\\r') // carriage return
- .replace(/\u2028/g, '\\u2028') // line separator
- .replace(/\u2029/g, '\\u2029') // paragraph separator
- .replace(/\n/g, '\\n') // line feed
+ .replace(/\\/g, '\\\\') // backslash
+ .replace(/"/g, '\\"') // closing quote character
+ .replace(/\r/g, '\\r') // carriage return
+ .replace(/\n/g, '\\n') // line feed
+ .replace(/[\x80-\uFFFF]/g, escape) // non-ASCII characters
+ '"';
};
@@ -63,17 +102,20 @@ function quote(s) {
* characters in a character class of a regular expression.
*/
function quoteForRegexpClass(s) {
- /* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. */
+ /*
+ * Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1.
+ *
+ * For portability, we also escape escape all non-ASCII characters.
+ */
return s
- .replace(/\\/g, '\\\\') // backslash
- .replace(/\0/g, '\\0') // null, IE needs this
- .replace(/\//g, '\\/') // closing slash
- .replace(/]/g, '\\]') // closing bracket
- .replace(/-/g, '\\-') // dash
- .replace(/\r/g, '\\r') // carriage return
- .replace(/\u2028/g, '\\u2028') // line separator
- .replace(/\u2029/g, '\\u2029') // paragraph separator
- .replace(/\n/g, '\\n') // line feed
+ .replace(/\\/g, '\\\\') // backslash
+ .replace(/\0/g, '\\0') // null, IE needs this
+ .replace(/\//g, '\\/') // closing slash
+ .replace(/]/g, '\\]') // closing bracket
+ .replace(/-/g, '\\-') // dash
+ .replace(/\r/g, '\\r') // carriage return
+ .replace(/\n/g, '\\n') // line feed
+ .replace(/[\x80-\uFFFF]/g, escape) // non-ASCII characters
}
/*
View
2  test/parser-test.js
@@ -435,7 +435,7 @@ test("parses bracketDelimitedCharacter", function() {
parserParses("start = [\\n]", classGrammar(false, ["\n"], "[\\n]"));
parserParses("start = [\\0]", classGrammar(false, ["\0"], "[\\0]"));
parserParses("start = [\\x00]", classGrammar(false, ["\0"], "[\\0]"));
- parserParses("start = [\\u0120]", classGrammar(false, ["\u0120"], "[\u0120]"));
+ parserParses("start = [\\u0120]", classGrammar(false, ["\u0120"], "[\\u0120]"));
parserParses("start = [\\\n]", classGrammar(false, ["\n"], "[\\n]"));
});
Please sign in to comment.
Something went wrong with that request. Please try again.