Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup (?:) from beginning/end of groups #164

Merged
merged 6 commits into from Apr 16, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
65 changes: 48 additions & 17 deletions src/xregexp.js
Expand Up @@ -308,12 +308,31 @@ function isType(value, type) {
* @returns {Boolean} Whether the next token is a quantifier.
*/
function isQuantifierNext(pattern, pos, flags) {
var quantifierPattern = '[?*+]|{\\d+(?:,\\d*)?}';
return isPatternNext(pattern, pos, flags, quantifierPattern);
}

/**
* Checks whether the next nonignorable token after the specified position matches the
* `needlePattern`
*
* @private
* @param {String} pattern Pattern to search within.
* @param {Number} pos Index in `pattern` to search at.
* @param {String} flags Flags used by the pattern.
* @param {String} needlePattern Pattern to match the next token against.
* @returns {Boolean} Whether the next token matches `needlePattern`
*/
function isPatternNext(pattern, pos, flags, needlePattern) {
var inlineCommentPattern = '\\(\\?#[^)]*\\)';
var lineCommentPattern = '#[^#\\n]*';
var patternsToIgnore = flags.indexOf('x') > -1 ?
// Ignore any leading whitespace, line comments, and inline comments
['\\s', lineCommentPattern, inlineCommentPattern] :
// Ignore any leading inline comments
[inlineCommentPattern];
return nativ.test.call(
flags.indexOf('x') > -1 ?
// Ignore any leading whitespace, line comments, and inline comments
/^(?:\s|#[^#\n]*|\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})/ :
// Ignore any leading inline comments
/^(?:\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})/,
new RegExp('^(?:' + patternsToIgnore.join('|') + ')*(?:' + needlePattern + ')'),
pattern.slice(pos)
);
}
Expand Down Expand Up @@ -1753,12 +1772,7 @@ XRegExp.addToken(
*/
XRegExp.addToken(
/\(\?#[^)]*\)/,
function(match, scope, flags) {
// Keep tokens separated unless the following token is a quantifier. This avoids e.g.
// inadvertedly changing `\1(?#)1` to `\11`.
return isQuantifierNext(match.input, match.index + match[0].length, flags) ?
'' : '(?:)';
},
getCommentOrWhitespaceSeparator,
{leadChar: '('}
);

Expand All @@ -1767,15 +1781,32 @@ XRegExp.addToken(
*/
XRegExp.addToken(
/\s+|#[^\n]*\n?/,
function(match, scope, flags) {
// Keep tokens separated unless the following token is a quantifier. This avoids e.g.
// inadvertedly changing `\1 1` to `\11`.
return isQuantifierNext(match.input, match.index + match[0].length, flags) ?
'' : '(?:)';
},
getCommentOrWhitespaceSeparator,
{flag: 'x'}
);

/**
* Returns a pattern that can be used in a native RegExp instead of a comment or whitespace.
* Depending on the context of the match, this can be either '' or '(?:)'.
*
* @private
* @param {String} match Match object for inline comments, whitespace, or line comments
* @param {String} scope (unused)
* @param {String} flags Flags used in the match
* @returns {String} Either '' or '(?:)', depending on which is needed in the context of the match.
*/
function getCommentOrWhitespaceSeparator (match, scope, flags) {
return (
// Keep tokens separated unless the following token is a quantifier. This avoids e.g.
// inadvertedly changing `\1 1` or `\1(?#)1` to `\11`
isQuantifierNext(match.input, match.index + match[0].length, flags) ||
// If the match is at the beginning or end of a group,
// we don't need to insert an empty non-capturing group.
match.input.charAt(match.index - 1) === '(' ||
isPatternNext(match.input, match.index + match[0].length, flags, '\\)')) ?
'' : '(?:)';
}

/*
* Dot, in dotall mode (aka singleline mode, flag s) only.
*/
Expand Down
13 changes: 13 additions & 0 deletions tests/spec/s-xregexp.js
Expand Up @@ -182,6 +182,19 @@ describe('XRegExp()', function() {
expect(function() {XRegExp('(\\(?:)');}).not.toThrow();
});

it('should not put (?:) at beginning/end of groups', function() {
var regex = XRegExp('( [0-9]{4} ) -? # year \n' +
'( [0-9]{2} ) -? # month \n' +
'( [0-9]{2} ) # day ', 'x');
expect(regex.source).toEqual('([0-9]{4})(?:)-?(?:)([0-9]{2})(?:)-?(?:)([0-9]{2})(?:)');
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is enforcing the inclusion of multiple (?:) empty groups that aren't needed for this regex to operate correctly. The test should be re-framed to not enforce anything that is unnecessary.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, yeah it is a bit brittle. We could change it to reject certain substrings, but then we might end up duplicating some of the logic in the non-test code. What if we changed it to something a little simpler, but still future-proof, like this?

expect(regex.source.length <= 54).toBe(true); // 54 is the length of the current result

});

it('should leave escaped (?:) at end of group', function() {
var regex = XRegExp('(.(\\(?:))');
expect(regex.test('x:')).toBe(true);
expect(regex.test('x(:')).toBe(true);
});

it('should store named capture data on regex instances', function() {
// The `captureNames` property is undocumented, so this is technically just testing
// implementation details. However, any changes to this need to be very intentional
Expand Down
65 changes: 48 additions & 17 deletions xregexp-all.js
Expand Up @@ -2959,12 +2959,31 @@ function isType(value, type) {
* @returns {Boolean} Whether the next token is a quantifier.
*/
function isQuantifierNext(pattern, pos, flags) {
var quantifierPattern = '[?*+]|{\\d+(?:,\\d*)?}';
return isPatternNext(pattern, pos, flags, quantifierPattern);
}

/**
* Checks whether the next nonignorable token after the specified position matches the
* `needlePattern`
*
* @private
* @param {String} pattern Pattern to search within.
* @param {Number} pos Index in `pattern` to search at.
* @param {String} flags Flags used by the pattern.
* @param {String} needlePattern Pattern to match the next token against.
* @returns {Boolean} Whether the next token matches `needlePattern`
*/
function isPatternNext(pattern, pos, flags, needlePattern) {
var inlineCommentPattern = '\\(\\?#[^)]*\\)';
var lineCommentPattern = '#[^#\\n]*';
var patternsToIgnore = flags.indexOf('x') > -1 ?
// Ignore any leading whitespace, line comments, and inline comments
['\\s', lineCommentPattern, inlineCommentPattern] :
// Ignore any leading inline comments
[inlineCommentPattern];
return nativ.test.call(
flags.indexOf('x') > -1 ?
// Ignore any leading whitespace, line comments, and inline comments
/^(?:\s|#[^#\n]*|\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})/ :
// Ignore any leading inline comments
/^(?:\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})/,
new RegExp('^(?:' + patternsToIgnore.join('|') + ')*(?:' + needlePattern + ')'),
pattern.slice(pos)
);
}
Expand Down Expand Up @@ -4404,12 +4423,7 @@ XRegExp.addToken(
*/
XRegExp.addToken(
/\(\?#[^)]*\)/,
function(match, scope, flags) {
// Keep tokens separated unless the following token is a quantifier. This avoids e.g.
// inadvertedly changing `\1(?#)1` to `\11`.
return isQuantifierNext(match.input, match.index + match[0].length, flags) ?
'' : '(?:)';
},
getCommentOrWhitespaceSeparator,
{leadChar: '('}
);

Expand All @@ -4418,15 +4432,32 @@ XRegExp.addToken(
*/
XRegExp.addToken(
/\s+|#[^\n]*\n?/,
function(match, scope, flags) {
// Keep tokens separated unless the following token is a quantifier. This avoids e.g.
// inadvertedly changing `\1 1` to `\11`.
return isQuantifierNext(match.input, match.index + match[0].length, flags) ?
'' : '(?:)';
},
getCommentOrWhitespaceSeparator,
{flag: 'x'}
);

/**
* Returns a pattern that can be used in a native RegExp instead of a comment or whitespace.
* Depending on the context of the match, this can be either '' or '(?:)'.
*
* @private
* @param {String} match Match object for inline comments, whitespace, or line comments
* @param {String} scope (unused)
* @param {String} flags Flags used in the match
* @returns {String} Either '' or '(?:)', depending on which is needed in the context of the match.
*/
function getCommentOrWhitespaceSeparator (match, scope, flags) {
return (
// Keep tokens separated unless the following token is a quantifier. This avoids e.g.
// inadvertedly changing `\1 1` or `\1(?#)1` to `\11`
isQuantifierNext(match.input, match.index + match[0].length, flags) ||
// If the match is at the beginning or end of a group,
// we don't need to insert an empty non-capturing group.
match.input.charAt(match.index - 1) === '(' ||
isPatternNext(match.input, match.index + match[0].length, flags, '\\)')) ?
'' : '(?:)';
}

/*
* Dot, in dotall mode (aka singleline mode, flag s) only.
*/
Expand Down