diff --git a/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts b/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts index b97bb917dfaa7..24b934fed2318 100644 --- a/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts +++ b/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts @@ -357,7 +357,11 @@ function getRgArgs(query: vscode.TextSearchQuery, options: vscode.TextSearchOpti const regexpStr = regexp.source.replace(/\\\//g, '/'); // RegExp.source arbitrarily returns escaped slashes. Search and destroy. args.push('--regexp', regexpStr); } else if (query.isRegExp) { - args.push('--regexp', fixRegexEndingPattern(query.pattern)); + let fixedRegexpQuery = fixRegexEndingPattern(query.pattern); + fixedRegexpQuery = fixRegexNewline(fixedRegexpQuery); + fixedRegexpQuery = fixRegexCRMatchingNonWordClass(fixedRegexpQuery, query.isMultiline); + fixedRegexpQuery = fixRegexCRMatchingWhitespaceClass(fixedRegexpQuery, query.isMultiline); + args.push('--regexp', fixedRegexpQuery); } else { searchPatternAfterDoubleDashes = pattern; args.push('--fixed-strings'); @@ -435,22 +439,20 @@ export function fixRegexEndingPattern(pattern: string): string { pattern; } +export function fixRegexNewline(pattern: string): string { + // Replace an unescaped $ at the end of the pattern with \r?$ + // Match $ preceeded by none or even number of literal \ + return pattern.replace(/([^\\]|^)(\\\\)*\\n/g, '$1$2\\r?\\n'); +} + export function fixRegexCRMatchingWhitespaceClass(pattern: string, isMultiline: boolean): string { return isMultiline ? - pattern.replace(/([^\\]|^)((?:\\\\)*)\\s/, (prefix1, prefix2) => { - return prefix1 + prefix2 + '(\\r?\\n|[^\\S\\r])'; - }) : - pattern.replace(/([^\\]|^)((?:\\\\)*)\\s/, (prefix1, prefix2) => { - return prefix1 + prefix2 + '[ \\t\\f]'; - }); + pattern.replace(/([^\\]|^)((?:\\\\)*)\\s/g, '$1$2(\\r?\\n|[^\\S\\r])') : + pattern.replace(/([^\\]|^)((?:\\\\)*)\\s/g, '$1$2[ \\t\\f]'); } export function fixRegexCRMatchingNonWordClass(pattern: string, isMultiline: boolean): string { return isMultiline ? - pattern.replace(/([^\\]|^)((?:\\\\)*)\\W/, (prefix1, prefix2) => { - return prefix1 + prefix2 + '(\\r?\\n|[^\\w\\r])'; - }) : - pattern.replace(/([^\\]|^)((?:\\\\)*)\\W/, (prefix1, prefix2) => { - return prefix1 + prefix2 + '[^\\w\\r]'; - }); + pattern.replace(/([^\\]|^)((?:\\\\)*)\\W/g, '$1$2(\\r?\\n|[^\\w\\r])') : + pattern.replace(/([^\\]|^)((?:\\\\)*)\\W/g, '$1$2[^\\w\\r]'); } diff --git a/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngine.test.ts b/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngine.test.ts index 313dd31c1e5ca..39f4b9171ee50 100644 --- a/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngine.test.ts +++ b/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngine.test.ts @@ -4,7 +4,7 @@ *--------------------------------------------------------------------------------------------*/ import * as assert from 'assert'; -import { unicodeEscapesToPCRE2, fixRegexEndingPattern } from 'vs/workbench/services/search/node/ripgrepTextSearchEngine'; +import { unicodeEscapesToPCRE2, fixRegexEndingPattern, fixRegexCRMatchingWhitespaceClass, fixRegexCRMatchingNonWordClass, fixRegexNewline } from 'vs/workbench/services/search/node/ripgrepTextSearchEngine'; suite('RipgrepTextSearchEngine', () => { test('unicodeEscapesToPCRE2', async () => { @@ -37,19 +37,72 @@ suite('RipgrepTextSearchEngine', () => { ].forEach(testFixRegexEndingPattern); }); - test('fixRegexEndingPattern', () => { - function testFixRegexEndingPattern([input, expectedResult]: string[]): void { - assert.equal(fixRegexEndingPattern(input), expectedResult); + test('fixRegexCRMatchingWhitespaceClass', () => { + function testFixRegexCRMatchingWhitespaceClass([inputReg, isMultiline, testStr, shouldMatch]): void { + const fixed = fixRegexCRMatchingWhitespaceClass(inputReg, isMultiline); + const reg = new RegExp(fixed); + assert.equal(reg.test(testStr), shouldMatch, `${inputReg} => ${reg}, ${testStr}, ${shouldMatch}`); } [ - ['foo', 'foo'], - ['', ''], - ['^foo.*bar\\s+', '^foo.*bar\\s+'], - ['foo$', 'foo\\r?$'], - ['$', '\\r?$'], - ['foo\\$', 'foo\\$'], - ['foo\\\\$', 'foo\\\\\\r?$'], - ].forEach(testFixRegexEndingPattern); + ['foo', false, 'foo', true], + + ['foo\\s', false, 'foo\r\n', false], + ['foo\\sabc', true, 'foo\r\nabc', true], + + ['foo\\s', false, 'foo\n', false], + ['foo\\s', true, 'foo\n', true], + + ['foo\\s\\n', true, 'foo\r\n', false], + ['foo\\r\\s', true, 'foo\r\n', true], + + ['foo\\s+abc', true, 'foo \r\nabc', true], + ['foo\\s+abc', false, 'foo \t abc', true], + ].forEach(testFixRegexCRMatchingWhitespaceClass); + }); + + test('fixRegexCRMatchingNonWordClass', () => { + function testRegexCRMatchingNonWordClass([inputReg, isMultiline, testStr, shouldMatch]): void { + const fixed = fixRegexCRMatchingNonWordClass(inputReg, isMultiline); + const reg = new RegExp(fixed); + assert.equal(reg.test(testStr), shouldMatch, `${inputReg} => ${reg}, ${testStr}, ${shouldMatch}`); + } + + [ + ['foo', false, 'foo', true], + + ['foo\\W', false, 'foo\r\n', false], + ['foo\\Wabc', true, 'foo\r\nabc', true], + + ['foo\\W', false, 'foo\n', true], + ['foo\\W', true, 'foo\n', true], + + ['foo\\W\\n', true, 'foo\r\n', false], + ['foo\\r\\W', true, 'foo\r\n', true], + + ['foo\\W+abc', true, 'foo \r\nabc', true], + ['foo\\W+abc', false, 'foo .-\t abc', true], + ].forEach(testRegexCRMatchingNonWordClass); + }); + + test('fixRegexNewline', () => { + function testFixRegexNewline([inputReg, testStr, shouldMatch]): void { + const fixed = fixRegexNewline(inputReg); + const reg = new RegExp(fixed); + assert.equal(reg.test(testStr), shouldMatch, `${inputReg} => ${reg}, ${testStr}, ${shouldMatch}`); + } + + [ + ['foo', 'foo', true], + + ['foo\\n', 'foo\r\n', true], + ['foo\\n', 'foo\n', true], + ['foo\\nabc', 'foo\r\nabc', true], + ['foo\\nabc', 'foo\nabc', true], + ['foo\\r\\n', 'foo\r\n', true], + + ['foo\\n+abc', 'foo\r\nabc', true], + ['foo\\n+abc', 'foo\n\n\nabc', true], + ].forEach(testFixRegexNewline); }); });