-
Notifications
You must be signed in to change notification settings - Fork 29.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
util: graduate TextEncoder/TextDecoder, tests
Add tests ported from Web Platform Tests. Graduate TextEncoder / TextDecoder from experimental PR-URL: #15743 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: Refael Ackermann <refack@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com> Reviewed-By: Timothy Gu <timothygu99@gmail.com>
- Loading branch information
Showing
12 changed files
with
563 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
'use strict'; | ||
|
||
// From: https://github.com/w3c/web-platform-tests/blob/d74324b53c/encoding/textdecoder-fatal-streaming.html | ||
|
||
const common = require('../common'); | ||
|
||
if (!common.hasIntl) | ||
common.skip('missing Intl'); | ||
|
||
const assert = require('assert'); | ||
const { | ||
TextDecoder | ||
} = require('util'); | ||
|
||
|
||
{ | ||
[ | ||
{ encoding: 'utf-8', sequence: [0xC0] }, | ||
{ encoding: 'utf-16le', sequence: [0x00] }, | ||
{ encoding: 'utf-16be', sequence: [0x00] } | ||
].forEach((testCase) => { | ||
const data = new Uint8Array([testCase.sequence]); | ||
common.expectsError( | ||
() => { | ||
const decoder = new TextDecoder(testCase.encoding, { fatal: true }); | ||
decoder.decode(data); | ||
}, { | ||
code: 'ERR_ENCODING_INVALID_ENCODED_DATA', | ||
type: TypeError, | ||
message: | ||
`The encoded data was not valid for encoding ${testCase.encoding}` | ||
} | ||
); | ||
|
||
assert.strictEqual( | ||
new TextDecoder(testCase.encoding).decode(data), | ||
'\uFFFD' | ||
); | ||
}); | ||
} | ||
|
||
{ | ||
const decoder = new TextDecoder('utf-16le', { fatal: true }); | ||
const odd = new Uint8Array([0x00]); | ||
const even = new Uint8Array([0x00, 0x00]); | ||
|
||
assert.strictEqual(decoder.decode(odd, { stream: true }), ''); | ||
assert.strictEqual(decoder.decode(odd), '\u0000'); | ||
|
||
common.expectsError( | ||
() => { | ||
decoder.decode(even, { stream: true }); | ||
decoder.decode(odd); | ||
}, { | ||
code: 'ERR_ENCODING_INVALID_ENCODED_DATA', | ||
type: TypeError, | ||
message: | ||
'The encoded data was not valid for encoding utf-16le' | ||
} | ||
); | ||
|
||
common.expectsError( | ||
() => { | ||
decoder.decode(odd, { stream: true }); | ||
decoder.decode(even); | ||
}, { | ||
code: 'ERR_ENCODING_INVALID_ENCODED_DATA', | ||
type: TypeError, | ||
message: | ||
'The encoded data was not valid for encoding utf-16le' | ||
} | ||
); | ||
|
||
assert.strictEqual(decoder.decode(even, { stream: true }), '\u0000'); | ||
assert.strictEqual(decoder.decode(even), '\u0000'); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
'use strict'; | ||
|
||
// From: https://github.com/w3c/web-platform-tests/blob/fa9436d12c/encoding/api-surrogates-utf8.html | ||
|
||
require('../common'); | ||
|
||
const assert = require('assert'); | ||
const { | ||
TextDecoder, | ||
TextEncoder | ||
} = require('util'); | ||
|
||
const badStrings = [ | ||
{ | ||
input: 'abc123', | ||
expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33], | ||
decoded: 'abc123', | ||
name: 'Sanity check' | ||
}, | ||
{ | ||
input: '\uD800', | ||
expected: [0xef, 0xbf, 0xbd], | ||
decoded: '\uFFFD', | ||
name: 'Surrogate half (low)' | ||
}, | ||
{ | ||
input: '\uDC00', | ||
expected: [0xef, 0xbf, 0xbd], | ||
decoded: '\uFFFD', | ||
name: 'Surrogate half (high)' | ||
}, | ||
{ | ||
input: 'abc\uD800123', | ||
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33], | ||
decoded: 'abc\uFFFD123', | ||
name: 'Surrogate half (low), in a string' | ||
}, | ||
{ | ||
input: 'abc\uDC00123', | ||
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33], | ||
decoded: 'abc\uFFFD123', | ||
name: 'Surrogate half (high), in a string' | ||
}, | ||
{ | ||
input: '\uDC00\uD800', | ||
expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd], | ||
decoded: '\uFFFD\uFFFD', | ||
name: 'Wrong order' | ||
} | ||
]; | ||
|
||
badStrings.forEach((t) => { | ||
const encoded = new TextEncoder().encode(t.input); | ||
assert.deepStrictEqual([].slice.call(encoded), t.expected); | ||
assert.strictEqual(new TextDecoder('utf-8').decode(encoded), t.decoded); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
'use strict'; | ||
|
||
// From: https://github.com/w3c/web-platform-tests/blob/39a67e2fff/encoding/textdecoder-fatal.html | ||
|
||
const common = require('../common'); | ||
|
||
if (!common.hasIntl) | ||
common.skip('missing Intl'); | ||
|
||
const assert = require('assert'); | ||
const { | ||
TextDecoder | ||
} = require('util'); | ||
|
||
const bad = [ | ||
{ encoding: 'utf-8', input: [0xFF], name: 'invalid code' }, | ||
{ encoding: 'utf-8', input: [0xC0], name: 'ends early' }, | ||
{ encoding: 'utf-8', input: [0xE0], name: 'ends early 2' }, | ||
{ encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' }, | ||
{ encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail 2' }, | ||
{ encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail 3' }, | ||
{ encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail 4' }, | ||
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail 5' }, | ||
{ encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail 6' }, | ||
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], | ||
name: '> 0x10FFFF' }, | ||
{ encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80], | ||
name: 'obsolete lead byte' }, | ||
// Overlong encodings | ||
{ encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' }, | ||
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x80], | ||
name: 'overlong U+0000 - 3 bytes' }, | ||
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80], | ||
name: 'overlong U+0000 - 4 bytes' }, | ||
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80], | ||
name: 'overlong U+0000 - 5 bytes' }, | ||
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], | ||
name: 'overlong U+0000 - 6 bytes' }, | ||
{ encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' }, | ||
{ encoding: 'utf-8', input: [0xE0, 0x81, 0xBF], | ||
name: 'overlong U+007F - 3 bytes' }, | ||
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF], | ||
name: 'overlong U+007F - 4 bytes' }, | ||
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF], | ||
name: 'overlong U+007F - 5 bytes' }, | ||
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF], | ||
name: 'overlong U+007F - 6 bytes' }, | ||
{ encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF], | ||
name: 'overlong U+07FF - 3 bytes' }, | ||
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF], | ||
name: 'overlong U+07FF - 4 bytes' }, | ||
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF], | ||
name: 'overlong U+07FF - 5 bytes' }, | ||
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF], | ||
name: 'overlong U+07FF - 6 bytes' }, | ||
{ encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF], | ||
name: 'overlong U+FFFF - 4 bytes' }, | ||
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF], | ||
name: 'overlong U+FFFF - 5 bytes' }, | ||
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF], | ||
name: 'overlong U+FFFF - 6 bytes' }, | ||
{ encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF], | ||
name: 'overlong U+10FFFF - 5 bytes' }, | ||
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF], | ||
name: 'overlong U+10FFFF - 6 bytes' }, | ||
// UTF-16 surrogates encoded as code points in UTF-8 | ||
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' }, | ||
{ encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' }, | ||
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80], | ||
name: 'surrogate pair' }, | ||
{ encoding: 'utf-16le', input: [0x00], name: 'truncated code unit' }, | ||
// Mismatched UTF-16 surrogates are exercised in utf16-surrogates.html | ||
// FIXME: Add legacy encoding cases | ||
]; | ||
|
||
bad.forEach((t) => { | ||
common.expectsError( | ||
() => { | ||
new TextDecoder(t.encoding, { fatal: true }) | ||
.decode(new Uint8Array(t.input)); | ||
}, { | ||
code: 'ERR_ENCODING_INVALID_ENCODED_DATA', | ||
type: TypeError | ||
} | ||
); | ||
}); | ||
|
||
{ | ||
assert('fatal' in new TextDecoder()); | ||
assert.strictEqual(typeof new TextDecoder().fatal, 'boolean'); | ||
assert(!new TextDecoder().fatal); | ||
assert(new TextDecoder('utf-8', { fatal: true }).fatal); | ||
} |
50 changes: 50 additions & 0 deletions
50
test/parallel/test-whatwg-encoding-textdecoder-ignorebom.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
'use strict'; | ||
|
||
// From: https://github.com/w3c/web-platform-tests/blob/7f567fa29c/encoding/textdecoder-ignorebom.html | ||
|
||
const common = require('../common'); | ||
|
||
const assert = require('assert'); | ||
const { | ||
TextDecoder | ||
} = require('util'); | ||
|
||
const cases = [ | ||
{ | ||
encoding: 'utf-8', | ||
bytes: [0xEF, 0xBB, 0xBF, 0x61, 0x62, 0x63], | ||
skipNoIntl: false | ||
}, | ||
{ | ||
encoding: 'utf-16le', | ||
bytes: [0xFF, 0xFE, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00], | ||
skipNoIntl: false | ||
}, | ||
{ | ||
encoding: 'utf-16be', | ||
bytes: [0xFE, 0xFF, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63], | ||
skipNoIntl: true | ||
} | ||
]; | ||
|
||
cases.forEach((testCase) => { | ||
if (testCase.skipNoIntl && !common.hasIntl) { | ||
console.log(`skipping ${testCase.encoding} because missing Intl`); | ||
return; // skipping | ||
} | ||
const BOM = '\uFEFF'; | ||
let decoder = new TextDecoder(testCase.encoding, { ignoreBOM: true }); | ||
const bytes = new Uint8Array(testCase.bytes); | ||
assert.strictEqual(decoder.decode(bytes), `${BOM}abc`); | ||
decoder = new TextDecoder(testCase.encoding, { ignoreBOM: false }); | ||
assert.strictEqual(decoder.decode(bytes), 'abc'); | ||
decoder = new TextDecoder(testCase.encoding); | ||
assert.strictEqual(decoder.decode(bytes), 'abc'); | ||
}); | ||
|
||
{ | ||
assert('ignoreBOM' in new TextDecoder()); | ||
assert.strictEqual(typeof new TextDecoder().ignoreBOM, 'boolean'); | ||
assert(!new TextDecoder().ignoreBOM); | ||
assert(new TextDecoder('utf-8', { ignoreBOM: true }).ignoreBOM); | ||
} |
Oops, something went wrong.