From 6f696811b77cc6642b724305a4a1a77586eb93e7 Mon Sep 17 00:00:00 2001 From: Austin Kelleher Date: Fri, 8 Apr 2022 16:59:40 -0400 Subject: [PATCH 1/6] buffer: atob throw error when the input value is invalid The specification of `atob` has various different conditions that we need to abide by. The specific changes that were made: * `atob` now immediately throws when `undefined`, `false`, or a `number` is supplied * `atob` now strips ASCII whitespace before attempting to decode * `atob` now validates that the code point's length divided by 4 leaves a remainder that is not 1 See: https://infra.spec.whatwg.org/#forgiving-base64-decode Fixes: https://github.com/nodejs/node/issues/42646 --- lib/buffer.js | 23 ++++++++++++++++++++++- test/parallel/test-btoa-atob.js | 9 +++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/lib/buffer.js b/lib/buffer.js index 773d56572aa2a4..2b7536643ba95d 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -1259,7 +1259,28 @@ function atob(input) { if (arguments.length === 0) { throw new ERR_MISSING_ARGS('input'); } - input = `${input}`; + + if (input === undefined || input === false || typeof input === 'number') { + throw lazyDOMException( + 'The string to be decoded is not correctly encoded.', + 'ValidationError'); + } + + // Remove all ASCII whitespace from data. + // + // See #1 - https://infra.spec.whatwg.org/#forgiving-base64 + input = `${input}`.replace(/\s/g, ''); + + // If data's code point length divides by 4 leaving a remainder of 1, then + // return failure. + // + // See #3 - https://infra.spec.whatwg.org/#forgiving-base64 + if (input.length % 4 === 1) { + throw lazyDOMException( + 'The string to be decoded is not correctly encoded.', + 'ValidationError'); + } + for (let n = 0; n < input.length; n++) { if (!ArrayPrototypeIncludes(kForgivingBase64AllowedChars, StringPrototypeCharCodeAt(input, n))) diff --git a/test/parallel/test-btoa-atob.js b/test/parallel/test-btoa-atob.js index 64f53671030ba0..3a9786f7a62111 100644 --- a/test/parallel/test-btoa-atob.js +++ b/test/parallel/test-btoa-atob.js @@ -15,3 +15,12 @@ throws(() => buffer.btoa(), /TypeError/); strictEqual(atob(' '), ''); strictEqual(atob(' YW\tJ\njZA=\r= '), 'abcd'); + +throws(() => buffer.atob(undefined), /ValidationError/); +throws(() => buffer.atob(false), /ValidationError/); +throws(() => buffer.atob(1), /ValidationError/); +throws(() => buffer.atob(0), /ValidationError/); +throws(() => buffer.atob('a'), /ValidationError/); +throws(() => buffer.atob('a '), /ValidationError/); +throws(() => buffer.atob(' a'), /ValidationError/); +throws(() => buffer.atob('aaaaa'), /ValidationError/); From 08e4c7567eea122f0bff36468869ec6d093eb551 Mon Sep 17 00:00:00 2001 From: Austin Kelleher Date: Sat, 9 Apr 2022 08:44:20 -0400 Subject: [PATCH 2/6] buffer: simplify atob approach of disregarding whitespace * Use approach of counting non-ASCII whitespace characters instead of removing the characters via string replacement * Additional atob validation tests have been added --- lib/buffer.js | 33 ++++++++++++++++----------------- test/parallel/test-btoa-atob.js | 21 +++++++++++++-------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/lib/buffer.js b/lib/buffer.js index 2b7536643ba95d..69f9560c19b9e3 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -1232,12 +1232,14 @@ function btoa(input) { return buf.toString('base64'); } -// Refs: https://infra.spec.whatwg.org/#forgiving-base64-decode -const kForgivingBase64AllowedChars = [ +const asciiWhitespaceCharacters = [ // ASCII whitespace // Refs: https://infra.spec.whatwg.org/#ascii-whitespace 0x09, 0x0A, 0x0C, 0x0D, 0x20, +]; +// Refs: https://infra.spec.whatwg.org/#forgiving-base64-decode +const kForgivingBase64AllowedChars = [ // Uppercase letters ...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('A') + i), @@ -1260,32 +1262,29 @@ function atob(input) { throw new ERR_MISSING_ARGS('input'); } - if (input === undefined || input === false || typeof input === 'number') { - throw lazyDOMException( - 'The string to be decoded is not correctly encoded.', - 'ValidationError'); - } + input = `${input}`; + let nonAsciiWhitespaceCharCount = 0; - // Remove all ASCII whitespace from data. - // - // See #1 - https://infra.spec.whatwg.org/#forgiving-base64 - input = `${input}`.replace(/\s/g, ''); + for (let n = 0; n < input.length; n++) { + const char = StringPrototypeCharCodeAt(input, n); + + if (ArrayPrototypeIncludes(kForgivingBase64AllowedChars, char)) { + nonAsciiWhitespaceCharCount++; + } else if (!ArrayPrototypeIncludes(asciiWhitespaceCharacters, char)) { + throw lazyDOMException('Invalid character', 'InvalidCharacterError'); + } + } // If data's code point length divides by 4 leaving a remainder of 1, then // return failure. // // See #3 - https://infra.spec.whatwg.org/#forgiving-base64 - if (input.length % 4 === 1) { + if (nonAsciiWhitespaceCharCount % 4 === 1) { throw lazyDOMException( 'The string to be decoded is not correctly encoded.', 'ValidationError'); } - for (let n = 0; n < input.length; n++) { - if (!ArrayPrototypeIncludes(kForgivingBase64AllowedChars, - StringPrototypeCharCodeAt(input, n))) - throw lazyDOMException('Invalid character', 'InvalidCharacterError'); - } return Buffer.from(input, 'base64').toString('latin1'); } diff --git a/test/parallel/test-btoa-atob.js b/test/parallel/test-btoa-atob.js index 3a9786f7a62111..988f8a0f74fc0b 100644 --- a/test/parallel/test-btoa-atob.js +++ b/test/parallel/test-btoa-atob.js @@ -16,11 +16,16 @@ throws(() => buffer.btoa(), /TypeError/); strictEqual(atob(' '), ''); strictEqual(atob(' YW\tJ\njZA=\r= '), 'abcd'); -throws(() => buffer.atob(undefined), /ValidationError/); -throws(() => buffer.atob(false), /ValidationError/); -throws(() => buffer.atob(1), /ValidationError/); -throws(() => buffer.atob(0), /ValidationError/); -throws(() => buffer.atob('a'), /ValidationError/); -throws(() => buffer.atob('a '), /ValidationError/); -throws(() => buffer.atob(' a'), /ValidationError/); -throws(() => buffer.atob('aaaaa'), /ValidationError/); +strictEqual(atob(null), '\x9Eée'); +strictEqual(atob(NaN), '5£'); +strictEqual(atob(Infinity), '"wâ\x9E+r'); +strictEqual(atob(true), '¶»\x9E'); +strictEqual(atob(1234), '×mø'); +strictEqual(atob([]), ''); +strictEqual(atob({ toString: () => '' }), ''); +strictEqual(atob({ [Symbol.toPrimitive]: () => '' }), ''); + +throws(() => atob(), /ERR_MISSING_ARGS/); +throws(() => atob(Symbol()), /TypeError/); +[undefined, false, () => {}, 0, 1, 0n, 1n, -Infinity, [1], {}].forEach((value) => + throws(() => atob(value), { constructor: DOMException })); From a85b856c74fdd9dd7e08fb3d4423ca13d1762af2 Mon Sep 17 00:00:00 2001 From: Austin Kelleher Date: Sat, 9 Apr 2022 09:09:54 -0400 Subject: [PATCH 3/6] buffer: switch to using InvalidCharacterError on encoding error * Chrome and Firefox both use `InvalidCharacterError` when the string to be decoded is not correctly encoded * Remove unnecessary atob test --- lib/buffer.js | 2 +- test/parallel/test-btoa-atob.js | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/buffer.js b/lib/buffer.js index 69f9560c19b9e3..f7233c3aad9750 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -1282,7 +1282,7 @@ function atob(input) { if (nonAsciiWhitespaceCharCount % 4 === 1) { throw lazyDOMException( 'The string to be decoded is not correctly encoded.', - 'ValidationError'); + 'InvalidCharacterError'); } return Buffer.from(input, 'base64').toString('latin1'); diff --git a/test/parallel/test-btoa-atob.js b/test/parallel/test-btoa-atob.js index 988f8a0f74fc0b..38617892552691 100644 --- a/test/parallel/test-btoa-atob.js +++ b/test/parallel/test-btoa-atob.js @@ -25,7 +25,6 @@ strictEqual(atob([]), ''); strictEqual(atob({ toString: () => '' }), ''); strictEqual(atob({ [Symbol.toPrimitive]: () => '' }), ''); -throws(() => atob(), /ERR_MISSING_ARGS/); throws(() => atob(Symbol()), /TypeError/); [undefined, false, () => {}, 0, 1, 0n, 1n, -Infinity, [1], {}].forEach((value) => throws(() => atob(value), { constructor: DOMException })); From a5828d83654250d39db90a42bd1594ea743bc1a1 Mon Sep 17 00:00:00 2001 From: Austin Kelleher Date: Sat, 9 Apr 2022 10:19:20 -0400 Subject: [PATCH 4/6] buffer: update character validation approach in atob * Small optimization in atob validation by assuming positions of whitespace characters in allowed chars array * Improved tests for ASCII whitespace characters * Explicitly validate properties of `DOMException` in tests --- lib/buffer.js | 16 ++++++++-------- test/parallel/test-btoa-atob.js | 15 ++++++++++++--- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/lib/buffer.js b/lib/buffer.js index f7233c3aad9750..8b12782c87c4d6 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -26,7 +26,7 @@ const { ArrayFrom, ArrayIsArray, ArrayPrototypeForEach, - ArrayPrototypeIncludes, + ArrayPrototypeIndexOf, MathFloor, MathMin, MathTrunc, @@ -1232,14 +1232,12 @@ function btoa(input) { return buf.toString('base64'); } -const asciiWhitespaceCharacters = [ +// Refs: https://infra.spec.whatwg.org/#forgiving-base64-decode +const kForgivingBase64AllowedChars = [ // ASCII whitespace // Refs: https://infra.spec.whatwg.org/#ascii-whitespace 0x09, 0x0A, 0x0C, 0x0D, 0x20, -]; -// Refs: https://infra.spec.whatwg.org/#forgiving-base64-decode -const kForgivingBase64AllowedChars = [ // Uppercase letters ...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('A') + i), @@ -1266,11 +1264,13 @@ function atob(input) { let nonAsciiWhitespaceCharCount = 0; for (let n = 0; n < input.length; n++) { - const char = StringPrototypeCharCodeAt(input, n); + const index = ArrayPrototypeIndexOf( + kForgivingBase64AllowedChars, + StringPrototypeCharCodeAt(input, n)); - if (ArrayPrototypeIncludes(kForgivingBase64AllowedChars, char)) { + if (index > 4) { // The first five char are ASCII whitespace. nonAsciiWhitespaceCharCount++; - } else if (!ArrayPrototypeIncludes(asciiWhitespaceCharacters, char)) { + } else if (index === -1) { throw lazyDOMException('Invalid character', 'InvalidCharacterError'); } } diff --git a/test/parallel/test-btoa-atob.js b/test/parallel/test-btoa-atob.js index 38617892552691..abf05adeef1042 100644 --- a/test/parallel/test-btoa-atob.js +++ b/test/parallel/test-btoa-atob.js @@ -14,7 +14,7 @@ throws(() => buffer.atob(), /TypeError/); throws(() => buffer.btoa(), /TypeError/); strictEqual(atob(' '), ''); -strictEqual(atob(' YW\tJ\njZA=\r= '), 'abcd'); +strictEqual(atob(' Y\fW\tJ\njZ A=\r= '), 'abcd'); strictEqual(atob(null), '\x9Eée'); strictEqual(atob(NaN), '5£'); @@ -26,5 +26,14 @@ strictEqual(atob({ toString: () => '' }), ''); strictEqual(atob({ [Symbol.toPrimitive]: () => '' }), ''); throws(() => atob(Symbol()), /TypeError/); -[undefined, false, () => {}, 0, 1, 0n, 1n, -Infinity, [1], {}].forEach((value) => - throws(() => atob(value), { constructor: DOMException })); +[ + undefined, false, () => {}, {}, [1], + 0, 1, 0n, 1n, -Infinity, + 'a', 'a\n\n\n', '\ra\r\r', ' a ', '\t\t\ta', 'a\f\f\f', '\ta\r \n\f', +].forEach((value) => + // See #2 - https://html.spec.whatwg.org/multipage/webappapis.html#dom-atob + throws(() => atob(value), { + constructor: DOMException, + name: 'InvalidCharacterError', + code: 5, + })); From 48d6cee24299f00c9cf707a81e27f3f80592af5e Mon Sep 17 00:00:00 2001 From: Austin Kelleher Date: Sat, 9 Apr 2022 10:35:37 -0400 Subject: [PATCH 5/6] buffer: improve comments in atob function --- lib/buffer.js | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/buffer.js b/lib/buffer.js index 8b12782c87c4d6..1c3dc117d958bb 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -1268,17 +1268,15 @@ function atob(input) { kForgivingBase64AllowedChars, StringPrototypeCharCodeAt(input, n)); - if (index > 4) { // The first five char are ASCII whitespace. + if (index > 4) { + // The first 5 elements of `kForgivingBase64AllowedChars` are + // ASCII whitespace char codes. nonAsciiWhitespaceCharCount++; } else if (index === -1) { throw lazyDOMException('Invalid character', 'InvalidCharacterError'); } } - // If data's code point length divides by 4 leaving a remainder of 1, then - // return failure. - // - // See #3 - https://infra.spec.whatwg.org/#forgiving-base64 if (nonAsciiWhitespaceCharCount % 4 === 1) { throw lazyDOMException( 'The string to be decoded is not correctly encoded.', From 5cee1b4937c4d9562e44a7cab6d263e23150ba38 Mon Sep 17 00:00:00 2001 From: Austin Kelleher Date: Sat, 9 Apr 2022 10:42:26 -0400 Subject: [PATCH 6/6] buffer: revive comment about code point length validation in atob --- lib/buffer.js | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/buffer.js b/lib/buffer.js index 1c3dc117d958bb..bdf01156824180 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -1277,6 +1277,7 @@ function atob(input) { } } + // See #3 - https://infra.spec.whatwg.org/#forgiving-base64 if (nonAsciiWhitespaceCharCount % 4 === 1) { throw lazyDOMException( 'The string to be decoded is not correctly encoded.',