From 40fe8384a5181bf0deaab36c51562bbf76dca26b Mon Sep 17 00:00:00 2001 From: Mike Bland Date: Sat, 30 Dec 2023 01:38:37 -0500 Subject: [PATCH] Replace stripJsonComments() trailing comma regexp I realized after merging #11 that the regular expression at the end of stripJsonComments() would affect quoted strings. The new "stripJsonComments > doesn't modify > strings including trailing commas" test case confirmed this. The new algorithm took a bit of trial and error, and the existing tests really helped distill it to its essence. Quite happy with how it turned out. Also reorganized the test cases with more `describe` blocks. Added the new "maintains correct syntax error position info when..." suite to confirm positional info in JSON.parse errors remains correct. --- lib/index.js | 31 +-- test/stripJsonComments.test.js | 414 ++++++++++++++++++++------------- 2 files changed, 266 insertions(+), 179 deletions(-) diff --git a/lib/index.js b/lib/index.js index 311aba9..812dbc6 100644 --- a/lib/index.js +++ b/lib/index.js @@ -185,14 +185,12 @@ export async function analyzeArgv(argv) { * @returns {string} jsonStr with comments, trailing commas replaced by space */ export function stripJsonComments(jsonStr) { - let inString = false - let escaped = false - let inComment = null - let result = '' + let inString, escaped, inComment, comma, result = [] for (let i = 0; i !== jsonStr.length; ++i) { - const prevChar = i !== 0 ? jsonStr[i-1] : null + const prevChar = jsonStr[i-1] let curChar = jsonStr[i] + const isNotWhitespace = curChar.trimStart() !== '' if (inString) { inString = curChar !== '"' || escaped @@ -202,19 +200,22 @@ export function stripJsonComments(jsonStr) { (inComment === 'block' && prevChar === '*' && curChar === '/')) { inComment = null } - if (curChar.trimStart() !== '') curChar = ' ' - } else if (curChar === '"') { + if (isNotWhitespace) curChar = ' ' + } else if (curChar === '"') { // opening a string inString = true - } else if (prevChar === '/') { - if (curChar === '/') inComment = 'line' - else if (curChar === '*') inComment = 'block' - if (inComment) curChar = ' ' // otherwise prevChar closed a block comment - } else if (curChar === '/') { - curChar = ' ' // opening a line or block comment + comma = null + } else if (curChar === '/' || curChar === '*') { // maybe opening a comment + if (prevChar === '/') { // definitely opening a comment + inComment = (curChar === '/') ? 'line' : 'block' + curChar = result[i-1] = ' ' + } + } else if (isNotWhitespace) { // definitely outside string or comment + if (comma && (curChar === ']' || curChar === '}')) result[comma] = ' ' + comma = (curChar === ',') ? i : null } - result += curChar + result.push(curChar) } - return result.replaceAll(/,(\s*[\]}])/g, ' $1') + return result.join('') } /** diff --git a/test/stripJsonComments.test.js b/test/stripJsonComments.test.js index db0e2db..8e025d3 100644 --- a/test/stripJsonComments.test.js +++ b/test/stripJsonComments.test.js @@ -11,186 +11,272 @@ import { describe, expect, test } from 'vitest' describe('stripJsonComments', () => { const BASIC_OBJECT = {opts: {destination: 'foo'}} - test('handles empty string', () => { - expect(stripJsonComments('')).toBe('') - }) + describe('doesn\'t modify', () => { + test('the empty string', () => { + expect(stripJsonComments('')).toBe('') + }) - test('doesn\'t modify basic object without comments', () => { - const orig = JSON.stringify(BASIC_OBJECT, null, 2) + test('an object with plain strings and no comments', () => { + const orig = JSON.stringify(BASIC_OBJECT, null, 2) - expect(stripJsonComments(orig)).toBe(orig) - }) + expect(stripJsonComments(orig)).toBe(orig) + }) - test('doesn\'t modify properly escaped strings', () => { - const obj = { - opts: { - first: 'ignores escaped \\" before the end of the string', - second: 'ignores escaped \\ before the end of the string \\\\\\\\' + test('properly escaped strings', () => { + const obj = { + opts: { + first: 'ignores escaped \\" before the end of the string', + second: 'ignores escaped \\ before the end of the string \\\\\\\\' + } } - } - const orig = JSON.stringify(obj, null, 2) + const orig = JSON.stringify(obj, null, 2) - expect(stripJsonComments(orig)).toBe(orig) - }) + expect(stripJsonComments(orig)).toBe(orig) + }) - test('doesn\'t modify strings containing comment patterns', () => { - const obj = { - opts: { - line: 'looks like a // line comment, but isn\'t', - block: 'looks like a /* block comment, */ but isn\'t' + test('strings containing comment patterns', () => { + const obj = { + opts: { + line: 'looks like a // line comment, but isn\'t', + block: 'looks like a /* block comment, */ but isn\'t' + } } - } - const orig = JSON.stringify(obj, null, 2) + const orig = JSON.stringify(obj, null, 2) - expect(stripJsonComments(orig)).toBe(orig) - }) + expect(stripJsonComments(orig)).toBe(orig) + }) - test('replaces line comments, preserves existing whitespace', () => { - const orig = [ - '// Frist', - '{//\tSecond', - ' // Third\r', - ' "opts": { // Fourth', - ' // Fifth', - ' "destination": "foo" // Sixth', - ' } // Seventh', - ' // Eighth', - '}// Ninth', - '// Tenth' - ].join('\n') - - const result = stripJsonComments(orig) - - expect(result).toBe([ - ' ', - '{ \t ', - ' \r', - ' "opts": { ', - ' ', - ' "destination": "foo" ', - ' } ', - ' ', - '} ', - ' ' - ].join('\n')) - expect(JSON.parse(result)).toStrictEqual(BASIC_OBJECT) - }) + test('strings including trailing commas', () => { + const obj = { + opts: { + arrayWithTrailingComma: '[ "foo", "bar", "baz", ]', + objectWithTrailingComma: '{ "foo": "bar", }' + } + } + const orig = JSON.stringify(obj, null, 2) - test('replaces block comments, preserves existing whitespace', () => { - const orig = [ - '/** Frist */', - '{/*\tSecond', - ' * Third\r', - '*/"opts": { /* Fourth', - ' Fifth*/', - '/*/ "destination": "foo"/* Sixth */', - ' } /* Seventh', - ' /*Eighth*/', - '}/* Ninth', - ' Tenth*/' - ].join('\n') - - const result = stripJsonComments(orig) - - expect(result).toBe([ - ' ', - '{ \t ', - ' \r', - ' "opts": { ', - ' ', - ' "destination": "foo" ', - ' } ', - ' ', - '} ', - ' ' - ].join('\n')) - expect(JSON.parse(result)).toStrictEqual(BASIC_OBJECT) + expect(stripJsonComments(orig)).toBe(orig) + }) }) - test('replaces mixed comments and trailing commas before ] or }', () => { - const orig = [ - '// Frist', - '{/* Second', - ' * //Third', - '*/"opts": { // Fourth', - ' //*Fifth', - ' "destinations": [', - ' "foo",', - ' "bar",', - ' "baz", /* Sixth, with trailing comma for future expansion */', - ' ],', // Not a JSON comment, but here's another trailing comma. - ' },// Seventh, also with trailing comma for future expansion', - ' /*Eighth*/', - '} /* Ninth', - ' Tenth*/' - ].join('\n') - - const result = stripJsonComments(orig) - - expect(result).toBe([ - ' ', - '{ ', - ' ', - ' "opts": { ', - ' ', - ' "destinations": [', - ' "foo",', - ' "bar",', - ' "baz" ', - ' ] ', - ' } ', - ' ', - '} ', - ' ' - ].join('\n')) - expect(JSON.parse(result)).toStrictEqual({ - opts: { destinations: ['foo', 'bar', 'baz'] } + describe('replaces', () => { + test('line comments, preserving existing whitespace', () => { + const orig = [ + '// Frist', + '{//\tSecond', + ' // Third\r', + ' "opts": { // Fourth', + ' // Fifth', + ' "destination": "foo" // Sixth', + ' } // Seventh', + ' // Eighth', + '}// Ninth', + '// Tenth' + ].join('\n') + + const result = stripJsonComments(orig) + + expect(result).toBe([ + ' ', + '{ \t ', + ' \r', + ' "opts": { ', + ' ', + ' "destination": "foo" ', + ' } ', + ' ', + '} ', + ' ' + ].join('\n')) + expect(JSON.parse(result)).toStrictEqual(BASIC_OBJECT) + }) + + test('block comments, preserving existing whitespace', () => { + const orig = [ + '/** Frist */', + '{/*\tSecond', + ' * Third\r', + '*/"opts": { /* Fourth', + ' Fifth*/', + '/*/ "destination": "foo"/* Sixth */', + ' } /* Seventh', + ' /*Eighth*/', + '}/* Ninth', + ' Tenth*/' + ].join('\n') + + const result = stripJsonComments(orig) + + expect(result).toBe([ + ' ', + '{ \t ', + ' \r', + ' "opts": { ', + ' ', + ' "destination": "foo" ', + ' } ', + ' ', + '} ', + ' ' + ].join('\n')) + expect(JSON.parse(result)).toStrictEqual(BASIC_OBJECT) + }) + + test('mixed comments and trailing commas before ] or }', () => { + const orig = [ + '// Frist', + '{/* Second', + ' * //Third', + '*/"opts": { // Fourth', + ' //*Fifth', + ' "destinations": [', + ' "foo",', + ' "bar",', + ' "baz", /* Sixth, with trailing comma for future expansion */', + ' ],', // Not a JSON comment, but here's another trailing comma. + ' }, // Seventh, also with trailing comma for future expansion', + ' /*Eighth*/', + '} /* Ninth', + ' Tenth*/' + ].join('\n') + + const result = stripJsonComments(orig) + + expect(result).toBe([ + ' ', + '{ ', + ' ', + ' "opts": { ', + ' ', + ' "destinations": [', + ' "foo",', + ' "bar",', + ' "baz" ', + ' ] ', + ' } ', + ' ', + '} ', + ' ' + ].join('\n')) + expect(JSON.parse(result)).toStrictEqual({ + opts: { destinations: ['foo', 'bar', 'baz'] } + }) }) }) - test('reopens block comment if character after "*/" is \'*\'', () => { - const orig = [ - '{/* Frist', - ' */*', - ' "opts": {', - ' "destination": "doesn\'t matter, because commented out"', - ' }*/', - '}' - ].join('\n') - - const result = stripJsonComments(orig) - - expect(result).toBe([ - '{ ', - ' ', - ' ', - ' ', - ' ', - '}' - ].join('\n')) - expect(JSON.parse(result)).toStrictEqual({}) + describe('opens', () => { + test('a block comment if character after "*/" is \'*\'', () => { + const orig = [ + '{/* Frist', + ' */*', + ' "opts": {', + ' "destination": "doesn\'t matter, because commented out"', + ' }*/', + '}' + ].join('\n') + + const result = stripJsonComments(orig) + + expect(result).toBe([ + '{ ', + ' ', + ' ', + ' ', + ' ', + '}' + ].join('\n')) + expect(JSON.parse(result)).toStrictEqual({}) + }) + + test('a line comment if character after "*/" is \'/\'', () => { + const orig = [ + '{/* Frist', + ' "opts": {', + ' "destination": "doesn\'t matter, because commented out"', + ' Still commented out here, but next line will open a line comment.', + ' *//}', + '}' + ].join('\n') + + const result = stripJsonComments(orig) + + expect(result).toBe([ + '{ ', + ' ', + ' ', + ' ', + ' ', + '}' + ].join('\n')) + expect(JSON.parse(result)).toStrictEqual({}) + }) }) - test('opens a line comment if character after "*/" is \'/\'', () => { - const orig = [ - '{/* Frist', - ' "opts": {', - ' "destination": "doesn\'t matter, because commented out"', - ' Still commented out here, but next line will open a line comment.', - ' *//}', - '}' - ].join('\n') - - const result = stripJsonComments(orig) - - expect(result).toBe([ - '{ ', - ' ', - ' ', - ' ', - ' ', - '}' - ].join('\n')) - expect(JSON.parse(result)).toStrictEqual({}) + describe('maintains correct syntax error position info when', () => { + test('* not preceded or followed by /', () => { + const str = [ + '{', + ' // Starting off strong...', + ' "foo": "bar",', + ' * ...but forgot opening slash on this line. */', + ' "baz": "quux",', + '}' + ].join('\n') + const badPos = str.indexOf('* ...but forgot') + + expect(() => JSON.parse(stripJsonComments(str))).toThrowError( + `Expected double-quoted property name in JSON at position ${badPos}` + ) + }) + + test('/ not followed by /', () => { + const str = [ + '{', + ' // Starting off strong...', + ' "foo": "bar",', + ' / ...but forgot opening slash on this line.', + ' "baz": "quux",', + '}' + ].join('\n') + const badPos = str.indexOf('/ ...but forgot') + + expect(() => JSON.parse(stripJsonComments(str))).toThrowError( + `Expected double-quoted property name in JSON at position ${badPos}` + ) + }) + + test('multiple trailing commas are present', () => { + const str = [ + '{', + ' // Starting off strong...', + ' "foo": "bar",', + ' // ...but added too many trailing commas on the next line.', + ' "baz": "quux",,,', + '}' + ].join('\n') + // The last comma will become a space, so JSON.parse() will break on the + // one before that. + const badPos = str.indexOf(',,,') + 1 + + expect(() => JSON.parse(stripJsonComments(str))).toThrowError( + `Expected double-quoted property name in JSON at position ${badPos}` + ) + }) + + test('trailing commas don\'t follow an element or property', () => { + const str = [ + '{', + ' // Starting off strong...', + ' "foo": "bar",', + ' /* ...still looking good... */', + ' "baz": "quux",', + '}, // ...but this last comma is a problem.' + ].join('\n') + const badPos = str.indexOf(', // ...but') + + expect(() => JSON.parse(stripJsonComments(str))).toThrowError( + `Unexpected non-whitespace character after JSON at position ${badPos}` + ) + }) }) })