Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

util: graduate TextEncoder/TextDecoder, tests #15743

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions doc/api/util.md
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,6 @@ see [Custom promisified functions][].
added: v8.3.0
-->

> Stability: 1 - Experimental

An implementation of the [WHATWG Encoding Standard][] `TextDecoder` API.

```js
Expand Down Expand Up @@ -690,8 +688,6 @@ mark.
added: v8.3.0
-->

> Stability: 1 - Experimental

An implementation of the [WHATWG Encoding Standard][] `TextEncoder` API. All
instances of `TextEncoder` only support UTF-8 encoding.

Expand Down
20 changes: 0 additions & 20 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,6 @@ const kEncoding = Symbol('encoding');
const kDecoder = Symbol('decoder');
const kEncoder = Symbol('encoder');

let warned = false;
const experimental =
'The WHATWG Encoding Standard implementation is an experimental API. It ' +
'should not yet be used in production applications.';

const {
getConstructorOf,
customInspectSymbol: inspect
Expand Down Expand Up @@ -289,11 +284,6 @@ function getEncodingFromLabel(label) {

class TextEncoder {
constructor() {
if (!warned) {
warned = true;
process.emitWarning(experimental, 'ExperimentalWarning');
}

this[kEncoder] = true;
}

Expand Down Expand Up @@ -353,11 +343,6 @@ function makeTextDecoderICU() {

class TextDecoder {
constructor(encoding = 'utf-8', options = {}) {
if (!warned) {
warned = true;
process.emitWarning(experimental, 'ExperimentalWarning');
}

encoding = `${encoding}`;
if (typeof options !== 'object')
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
Expand Down Expand Up @@ -430,11 +415,6 @@ function makeTextDecoderJS() {

class TextDecoder {
constructor(encoding = 'utf-8', options = {}) {
if (!warned) {
warned = true;
process.emitWarning(experimental, 'ExperimentalWarning');
}

encoding = `${encoding}`;
if (typeof options !== 'object')
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
Expand Down
76 changes: 76 additions & 0 deletions test/parallel/test-whatwg-encoding-fatal-streaming.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
'use strict';

// From: https://github.com/w3c/web-platform-tests/blob/d74324b53c/encoding/textdecoder-fatal-streaming.html

const common = require('../common');

if (!common.hasIntl)
common.skip('missing Intl');

const assert = require('assert');
const {
TextDecoder
} = require('util');


{
[
{ encoding: 'utf-8', sequence: [0xC0] },
{ encoding: 'utf-16le', sequence: [0x00] },
{ encoding: 'utf-16be', sequence: [0x00] }
].forEach((testCase) => {
const data = new Uint8Array([testCase.sequence]);
common.expectsError(
() => {
const decoder = new TextDecoder(testCase.encoding, { fatal: true });
decoder.decode(data);
}, {
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
type: TypeError,
message:
`The encoded data was not valid for encoding ${testCase.encoding}`
}
);

assert.strictEqual(
new TextDecoder(testCase.encoding).decode(data),
'\uFFFD'
);
});
}

{
const decoder = new TextDecoder('utf-16le', { fatal: true });
const odd = new Uint8Array([0x00]);
const even = new Uint8Array([0x00, 0x00]);

assert.strictEqual(decoder.decode(odd, { stream: true }), '');
assert.strictEqual(decoder.decode(odd), '\u0000');

common.expectsError(
() => {
decoder.decode(even, { stream: true });
decoder.decode(odd);
}, {
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
type: TypeError,
message:
'The encoded data was not valid for encoding utf-16le'
}
);

common.expectsError(
() => {
decoder.decode(odd, { stream: true });
decoder.decode(even);
}, {
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
type: TypeError,
message:
'The encoded data was not valid for encoding utf-16le'
}
);

assert.strictEqual(decoder.decode(even, { stream: true }), '\u0000');
assert.strictEqual(decoder.decode(even), '\u0000');
}
3 changes: 2 additions & 1 deletion test/parallel/test-whatwg-encoding-internals.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// Flags: --expose-internals
'use strict';

require('../common');
const common = require('../common');

const assert = require('assert');
const { getEncodingFromLabel } = require('internal/encoding');

Expand Down
56 changes: 56 additions & 0 deletions test/parallel/test-whatwg-encoding-surrogates-utf8.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
'use strict';

// From: https://github.com/w3c/web-platform-tests/blob/fa9436d12c/encoding/api-surrogates-utf8.html

const common = require('../common');

const assert = require('assert');
const {
TextDecoder,
TextEncoder
} = require('util');

const badStrings = [
{
input: 'abc123',
expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
decoded: 'abc123',
name: 'Sanity check'
},
{
input: '\uD800',
expected: [0xef, 0xbf, 0xbd],
decoded: '\uFFFD',
name: 'Surrogate half (low)'
},
{
input: '\uDC00',
expected: [0xef, 0xbf, 0xbd],
decoded: '\uFFFD',
name: 'Surrogate half (high)'
},
{
input: 'abc\uD800123',
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
decoded: 'abc\uFFFD123',
name: 'Surrogate half (low), in a string'
},
{
input: 'abc\uDC00123',
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
decoded: 'abc\uFFFD123',
name: 'Surrogate half (high), in a string'
},
{
input: '\uDC00\uD800',
expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
decoded: '\uFFFD\uFFFD',
name: 'Wrong order'
}
];

badStrings.forEach((t) => {
const encoded = new TextEncoder().encode(t.input);
assert.deepStrictEqual([].slice.call(encoded), t.expected);
assert.strictEqual(new TextDecoder('utf-8').decode(encoded), t.decoded);
});
93 changes: 93 additions & 0 deletions test/parallel/test-whatwg-encoding-textdecoder-fatal.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
'use strict';

// From: https://github.com/w3c/web-platform-tests/blob/39a67e2fff/encoding/textdecoder-fatal.html

const common = require('../common');

if (!common.hasIntl)
common.skip('missing Intl');

const assert = require('assert');
const {
TextDecoder
} = require('util');

const bad = [
{ encoding: 'utf-8', input: [0xFF], name: 'invalid code' },
{ encoding: 'utf-8', input: [0xC0], name: 'ends early' },
{ encoding: 'utf-8', input: [0xE0], name: 'ends early 2' },
{ encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' },
{ encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail 2' },
{ encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail 3' },
{ encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail 4' },
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail 5' },
{ encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail 6' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
name: '> 0x10FFFF' },
{ encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80],
name: 'obsolete lead byte' },
// Overlong encodings
{ encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' },
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x80],
name: 'overlong U+0000 - 3 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80],
name: 'overlong U+0000 - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80],
name: 'overlong U+0000 - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
name: 'overlong U+0000 - 6 bytes' },
{ encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' },
{ encoding: 'utf-8', input: [0xE0, 0x81, 0xBF],
name: 'overlong U+007F - 3 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF],
name: 'overlong U+007F - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF],
name: 'overlong U+007F - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF],
name: 'overlong U+007F - 6 bytes' },
{ encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF],
name: 'overlong U+07FF - 3 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF],
name: 'overlong U+07FF - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF],
name: 'overlong U+07FF - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF],
name: 'overlong U+07FF - 6 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF],
name: 'overlong U+FFFF - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF],
name: 'overlong U+FFFF - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF],
name: 'overlong U+FFFF - 6 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF],
name: 'overlong U+10FFFF - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF],
name: 'overlong U+10FFFF - 6 bytes' },
// UTF-16 surrogates encoded as code points in UTF-8
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' },
{ encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' },
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80],
name: 'surrogate pair' },
{ encoding: 'utf-16le', input: [0x00], name: 'truncated code unit' },
// Mismatched UTF-16 surrogates are exercised in utf16-surrogates.html
// FIXME: Add legacy encoding cases
];

bad.forEach((t) => {
common.expectsError(
() => {
new TextDecoder(t.encoding, { fatal: true })
.decode(new Uint8Array(t.input));
}, {
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
type: TypeError
}
);
});

{
assert('fatal' in new TextDecoder());
assert.strictEqual(typeof new TextDecoder().fatal, 'boolean');
assert(!new TextDecoder().fatal);
assert(new TextDecoder('utf-8', { fatal: true }).fatal);
}
50 changes: 50 additions & 0 deletions test/parallel/test-whatwg-encoding-textdecoder-ignorebom.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
'use strict';

// From: https://github.com/w3c/web-platform-tests/blob/7f567fa29c/encoding/textdecoder-ignorebom.html

const common = require('../common');

const assert = require('assert');
const {
TextDecoder
} = require('util');

const cases = [
{
encoding: 'utf-8',
bytes: [0xEF, 0xBB, 0xBF, 0x61, 0x62, 0x63],
skipNoIntl: false
},
{
encoding: 'utf-16le',
bytes: [0xFF, 0xFE, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00],
skipNoIntl: false
},
{
encoding: 'utf-16be',
bytes: [0xFE, 0xFF, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63],
skipNoIntl: true
}
];

cases.forEach((testCase) => {
if (testCase.skipNoIntl && !common.hasIntl) {
console.log(`skipping ${testCase.encoding} because missing Intl`);
return; // skipping
}
const BOM = '\uFEFF';
let decoder = new TextDecoder(testCase.encoding, { ignoreBOM: true });
const bytes = new Uint8Array(testCase.bytes);
assert.strictEqual(decoder.decode(bytes), `${BOM}abc`);
decoder = new TextDecoder(testCase.encoding, { ignoreBOM: false });
assert.strictEqual(decoder.decode(bytes), 'abc');
decoder = new TextDecoder(testCase.encoding);
assert.strictEqual(decoder.decode(bytes), 'abc');
});

{
assert('ignoreBOM' in new TextDecoder());
assert.strictEqual(typeof new TextDecoder().ignoreBOM, 'boolean');
assert(!new TextDecoder().ignoreBOM);
assert(new TextDecoder('utf-8', { ignoreBOM: true }).ignoreBOM);
}
Loading