Skip to content

Commit

Permalink
Merge e880548 into 01bf8c6
Browse files Browse the repository at this point in the history
  • Loading branch information
jugglinmike committed Dec 10, 2018
2 parents 01bf8c6 + e880548 commit e0542fb
Show file tree
Hide file tree
Showing 4 changed files with 366 additions and 813 deletions.
281 changes: 242 additions & 39 deletions src/lex.js
Expand Up @@ -1319,11 +1319,58 @@ Lexer.prototype = {
var value = char;
var body = "";
var flags = [];
var groupReferences = [];
var malformed = false;
var isCharSet = false;
var isCharSetRange = false;
var isGroup = false;
var isQuantifiable = false;
var hasInvalidQuantifier = false;
var escapedChars = "";
var escapeSequence;
var groupCount = 0;
var terminated, malformedDesc;

var scanUnexpectedChars = function() {
var scanEscapeSequence = function() {
var next, sequence, reference;
index += 1;
char = this.peek(index);

if (reg.nonzeroDigit.test(char)) {
sequence = char;
next = this.peek(index + 1);
while (reg.nonzeroDigit.test(next) || next === "0") {
index += 1;
char = next;
sequence += char;
body += char;
value += char;
next = this.peek(index + 1);
}
groupReferences.push(Number(sequence));
return sequence;
}

escapedChars += char;

if (char === "u" && this.peek(index + 1) === "{") {
var x = index + 2;
sequence = "u{";
next = this.peek(x);
while (isHex(next)) {
sequence += next;
x += 1;
next = this.peek(x);
}
if (sequence.length > 2 && next === "}") {
sequence += "}";
body += sequence;
value += sequence;
index = x + 1;
return sequence;
}
}

// Unexpected control character
if (char < " ") {
malformed = true;
Expand Down Expand Up @@ -1354,6 +1401,99 @@ Lexer.prototype = {
function() { return true; }
);
}

index += 1;
body += char;
value += char;

return char;
}.bind(this);

var checkQuantifier = function() {
var lookahead = index;
var lowerBound = "";
var upperBound = "";
var next;

next = this.peek(lookahead + 1);

while (reg.decimalDigit.test(next)) {
lookahead += 1;
lowerBound += next;
next = this.peek(lookahead + 1);
}

if (!lowerBound) {
return false;
}

if (next === "}") {
return true;
}

if (next !== ",") {
return false;
}

lookahead += 1;
next = this.peek(lookahead + 1);

while (reg.decimalDigit.test(next)) {
lookahead += 1;
upperBound += next;
next = this.peek(lookahead + 1);
}

if (next !== "}") {
return false;
}

if (upperBound) {
return Number(lowerBound) <= Number(upperBound);
}

return true;
}.bind(this);

var translateUFlag = function(body) {
// The BMP character to use as a replacement for astral symbols when
// translating an ES6 "u"-flagged pattern to an ES5-compatible
// approximation.
// Note: replacing with '\uFFFF' enables false positives in unlikely
// scenarios. For example, `[\u{1044f}-\u{10440}]` is an invalid pattern
// that would not be detected by this substitution.
var astralSubstitute = "\uFFFF";

return body
// Replace every Unicode escape sequence with the equivalent BMP
// character or a constant ASCII code point in the case of astral
// symbols. (See the above note on `astralSubstitute` for more
// information.)
.replace(/\\u\{([0-9a-fA-F]+)\}|\\u([a-fA-F0-9]{4})/g, function($0, $1, $2) {
var codePoint = parseInt($1 || $2, 16);
if (codePoint > 0x10FFFF) {
malformed = true;
this.trigger("error", {
code: "E016",
line: this.line,
character: this.char,
data: [ char ]
});

return;
}
if (codePoint <= 0xFFFF) {
return String.fromCharCode(codePoint);
}
return astralSubstitute;
}.bind(this))
// Replace each paired surrogate with a single ASCII symbol to avoid
// throwing on regular expressions that are only valid in combination
// with the "u" flag.
.replace(
/[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
astralSubstitute
);
}.bind(this);

// Regular expressions must start with '/'
Expand All @@ -1365,11 +1505,15 @@ Lexer.prototype = {
terminated = false;

// Try to get everything in between slashes. A couple of
// cases aside (see scanUnexpectedChars) we don't really
// cases aside (see scanEscapeSequence) we don't really
// care whether the resulting expression is valid or not.
// We will check that later using the RegExp object.

while (index < length) {
// Because an iteration of this loop may terminate in a number of
// distinct locations, `isCharSetRange` is re-set at the onset of
// iteration.
isCharSetRange &= char === "-";
char = this.peek(index);
value += char;
body += char;
Expand All @@ -1379,47 +1523,67 @@ Lexer.prototype = {
if (this.peek(index - 1) !== "\\" || this.peek(index - 2) === "\\") {
isCharSet = false;
}
}

if (char === "\\") {
index += 1;
char = this.peek(index);
body += char;
value += char;

scanUnexpectedChars();
}

index += 1;
continue;
} else if (char === "-") {
isCharSetRange = true;
}
}

if (char === "\\") {
index += 1;
char = this.peek(index);
body += char;
value += char;

scanUnexpectedChars();

if (char === "/") {
index += 1;
continue;
}

if (char === "[") {
index += 1;
continue;
}
escapeSequence = scanEscapeSequence();
if (isCharSet && (this.peek(index) === "-" || isCharSetRange) &&
reg.regexpCharClasses.test(escapeSequence)) {
this.triggerAsync(
"error",
{
code: "E016",
line: this.line,
character: this.char,
data: [ "Character class used in range" ]
},
checks,
function() { return flags.indexOf("u") > -1; }
);
}
continue;
}

if (char === "{" && !hasInvalidQuantifier) {
hasInvalidQuantifier = !checkQuantifier();
}

if (char === "[") {
isCharSet = true;
index += 1;
continue;
}
} else if (char === "(") {
isGroup = true
if (this.peek(index + 1) === "?" &&
(this.peek(index + 2) === "=" || this.peek(index+2) === "!")) {
isQuantifiable = true;
}
} else if (char === ")") {
if (isQuantifiable) {
isQuantifiable = false;

if (reg.regexpQuantifiers.test(this.peek(index + 1))) {
this.triggerAsync(
"error",
{
code: "E016",
line: this.line,
character: this.char,
data: [ "Quantified quantifiable" ]
},
checks,
function() { return flags.indexOf("u") > -1; }
);
}
} else {
groupCount += 1;
}

if (char === "/") {
isGroup = false;
} else if (char === "/") {
body = body.substr(0, body.length - 1);
terminated = true;
index += 1;
Expand Down Expand Up @@ -1449,7 +1613,7 @@ Lexer.prototype = {

while (index < length) {
char = this.peek(index);
if (!/[gimy]/.test(char)) {
if (!/[gimyu]/.test(char)) {
break;
}
if (char === "y") {
Expand All @@ -1466,20 +1630,59 @@ Lexer.prototype = {
function() { return true; }
);
}
if (value.indexOf("y") > -1) {
malformedDesc = "Duplicate RegExp flag";
} else if (char === "u") {
if (!state.inES6(true)) {
this.triggerAsync(
"warning",
{
code: "W119",
line: this.line,
character: this.char,
data: [ "Unicode RegExp flag", "6" ]
},
checks,
function() { return true; }
);
}


var hasInvalidEscape = groupReferences.some(function(groupReference) {
if (groupReference > groupCount) {
return true;
}
});

if (!hasInvalidEscape) {
hasInvalidEscape = !escapedChars.split("").every(function(escapedChar) {
return escapedChar === "u" ||
escapedChar === "/" ||
reg.regexpCharClasses.test(escapedChar) ||
reg.regexpSyntaxChars.test(escapedChar);
});
}
} else {
flags.push(char);

if (hasInvalidEscape) {
malformedDesc = "Invalid escape";
} else if (hasInvalidQuantifier) {
malformedDesc = "Invalid quantifier";
}

body = translateUFlag(body);
}

if (flags.indexOf(char) > -1) {
malformedDesc = "Duplicate RegExp flag";
}
flags.push(char);

value += char;
index += 1;
}

// Check regular expression for correctness.

try {
new RegExp(body, flags.join(""));
new RegExp(body);
} catch (err) {
/**
* Because JSHint relies on the current engine's RegExp parser to
Expand Down
10 changes: 10 additions & 0 deletions src/reg.js
Expand Up @@ -40,3 +40,13 @@ exports.maxlenException = /^(?:(?:\/\/|\/\*|\*) ?)?[^ ]+$/;
// Source:
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp
exports.whitespace = /[ \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]/;

exports.nonzeroDigit = /^[1-9]$/;

exports.decimalDigit = /^[0-9]$/;

exports.regexpSyntaxChars = /[\^$\\.*+?()[\]{}|]/;

exports.regexpQuantifiers = /[*+?{]/;

exports.regexpCharClasses = /[dDsSwW]/;

0 comments on commit e0542fb

Please sign in to comment.