Skip to content

Commit

Permalink
test: update wpt encoding
Browse files Browse the repository at this point in the history
Refs: web-platform-tests/wpt#26385

PR-URL: #36659
Reviewed-By: Michaël Zasso <targos@protonmail.com>
Reviewed-By: Rich Trott <rtrott@gmail.com>
  • Loading branch information
watilde authored and danielleadams committed Jan 12, 2021
1 parent 986d5ac commit 4acc273
Show file tree
Hide file tree
Showing 7 changed files with 374 additions and 2 deletions.
2 changes: 1 addition & 1 deletion test/fixtures/wpt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ See [test/wpt](../../wpt/README.md) for information on how these tests are run.
Last update:

- console: https://github.com/web-platform-tests/wpt/tree/3b1f72e99a/console
- encoding: https://github.com/web-platform-tests/wpt/tree/1821fb5f77/encoding
- encoding: https://github.com/web-platform-tests/wpt/tree/3c9820d1cc/encoding
- url: https://github.com/web-platform-tests/wpt/tree/1783c9bccf/url
- resources: https://github.com/web-platform-tests/wpt/tree/001e50de41/resources
- interfaces: https://github.com/web-platform-tests/wpt/tree/8719553b2d/interfaces
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<!doctype html>
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<script src=resources/ranges.js></script>
<script>
const decode = (input, output, desc) => {
test(function() {
for (encoding of ["gb18030", "gbk"])
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(input)), output)
}, "gb18030 decoder: " + desc)
}

decode([115], "s", "ASCII");
decode([0x80], "\u20AC", "euro");
decode([0xFF], "\uFFFD", "initial byte out of accepted ranges");
decode([0x81], "\uFFFD", "end of queue, gb18030 first not 0");
decode([0x81, 0x28], "\ufffd(", "two bytes 0x81 0x28");
decode([0x81, 0x40], "\u4E02", "two bytes 0x81 0x40");
decode([0x81, 0x7E], "\u4E8A", "two bytes 0x81 0x7e");
decode([0x81, 0x7F], "\ufffd\u007f", "two bytes 0x81 0x7f");
decode([0x81, 0x80], "\u4E90", "two bytes 0x81 0x80");
decode([0x81, 0xFE], "\u4FA2", "two bytes 0x81 0xFE");
decode([0x81, 0xFF], "\ufffd", "two bytes 0x81 0xFF");
decode([0xFE, 0x40], "\uFA0C", "two bytes 0xFE 0x40");
decode([0xFE, 0xFE], "\uE4C5", "two bytes 0xFE 0xFE");
decode([0xFE, 0xFF], "\ufffd", "two bytes 0xFE 0xFF");
decode([0x81, 0x30], "\ufffd", "two bytes 0x81 0x30");
decode([0x81, 0x30, 0xFE], "\ufffd", "three bytes 0x81 0x30 0xFE");
decode([0x81, 0x30, 0xFF], "\ufffd0\ufffd", "three bytes 0x81 0x30 0xFF");
decode([0x81, 0x30, 0xFE, 0x29], "\ufffd0\ufffd)", "four bytes 0x81 0x30 0xFE 0x29");
decode([0xFE, 0x39, 0xFE, 0x39], "\ufffd", "four bytes 0xFE 0x39 0xFE 0x39");
decode([0x81, 0x35, 0xF4, 0x36], "\u1E3E", "pointer 7458");
decode([0x81, 0x35, 0xF4, 0x37], "\ue7c7", "pointer 7457");
decode([0x81, 0x35, 0xF4, 0x38], "\u1E40", "pointer 7459");
decode([0x84, 0x31, 0xA4, 0x39], "\uffff", "pointer 39419");
decode([0x84, 0x31, 0xA5, 0x30], "\ufffd", "pointer 39420");
decode([0x8F, 0x39, 0xFE, 0x39], "\ufffd", "pointer 189999");
decode([0x90, 0x30, 0x81, 0x30], "\u{10000}", "pointer 189000");
decode([0xE3, 0x32, 0x9A, 0x35], "\u{10FFFF}", "pointer 1237575");
decode([0xE3, 0x32, 0x9A, 0x36], "\ufffd", "pointer 1237576");
decode([0x83, 0x36, 0xC8, 0x30], "\uE7C8", "legacy ICU special case 1");
decode([0xA1, 0xAD], "\u2026", "legacy ICU special case 2");
decode([0xA1, 0xAB], "\uFF5E", "legacy ICU special case 3");

let i = 0;
for (const range of ranges) {
const pointer = range[0];
decode([
Math.floor(pointer / 12600) + 0x81,
Math.floor((pointer % 12600) / 1260) + 0x30,
Math.floor((pointer % 1260) / 10) + 0x81,
pointer % 10 + 0x30
], range[1], "range " + i++);
}
</script>
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<!doctype html>
<meta charset=gb18030>
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<script src=resources/ranges.js></script>
<script>
const encode = (input, output, desc) => {
test(function() {
const a = document.createElement("a"); // <a> uses document encoding for URL's query
a.href = "https://example.com/?" + input;
assert_equals(a.search.substr(1), output); // remove leading "?"
}, "gb18030 encoder: " + desc);
}

encode("s", "s", "very basic");
encode("\u20AC", "%A2%E3", "Euro");
encode("\u4E02", "%81@", "character");
encode("\uE4C6", "%A1@", "PUA");
encode("\uE4C5", "%FE%FE", "PUA #2");
encode("\uE5E5", "%26%2358853%3B", "PUA #3");
encode("\ud83d\udca9", "%949%DA3", "poo");
encode("\uE7C7", "%815%F47", "Ranges pointer special case");
encode("\uE7C8", "%836%C80", "legacy ICU special case 1");
encode("\u2026", "%A1%AD", "legacy ICU special case 2");
encode("\uFF5E", "%A1%AB", "legacy ICU special case 3");

const upperCaseNibble = x => {
return Math.floor(x).toString(16).toUpperCase();
}

const encodePointer = pointer => {
const firstByte = Math.floor(pointer / 12600) + 0x81;
const thirdByte = Math.floor((pointer % 1260) / 10) + 0x81;
return "%"
+ upperCaseNibble(firstByte / 16)
+ upperCaseNibble(firstByte % 16)
+ String.fromCharCode(Math.floor((pointer % 12600) / 1260) + 0x30)
+ "%"
+ upperCaseNibble(thirdByte / 16)
+ upperCaseNibble(thirdByte % 16)
+ String.fromCharCode(pointer % 10 + 0x30);
}

let i = 0;
for (const range of ranges) {
encode(range[1], encodePointer(range[0]), "range " + i++);
}
</script>
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
// Based on https://encoding.spec.whatwg.org/index-gb18030-ranges.txt
const ranges = [
[0, "\u0080"],
[36, "\u00A5"],
[38, "\u00A9"],
[45, "\u00B2"],
[50, "\u00B8"],
[81, "\u00D8"],
[89, "\u00E2"],
[95, "\u00EB"],
[96, "\u00EE"],
[100, "\u00F4"],
[103, "\u00F8"],
[104, "\u00FB"],
[105, "\u00FD"],
[109, "\u0102"],
[126, "\u0114"],
[133, "\u011C"],
[148, "\u012C"],
[172, "\u0145"],
[175, "\u0149"],
[179, "\u014E"],
[208, "\u016C"],
[306, "\u01CF"],
[307, "\u01D1"],
[308, "\u01D3"],
[309, "\u01D5"],
[310, "\u01D7"],
[311, "\u01D9"],
[312, "\u01DB"],
[313, "\u01DD"],
[341, "\u01FA"],
[428, "\u0252"],
[443, "\u0262"],
[544, "\u02C8"],
[545, "\u02CC"],
[558, "\u02DA"],
[741, "\u03A2"],
[742, "\u03AA"],
[749, "\u03C2"],
[750, "\u03CA"],
[805, "\u0402"],
[819, "\u0450"],
[820, "\u0452"],
[7922, "\u2011"],
[7924, "\u2017"],
[7925, "\u201A"],
[7927, "\u201E"],
[7934, "\u2027"],
[7943, "\u2031"],
[7944, "\u2034"],
[7945, "\u2036"],
[7950, "\u203C"],
[8062, "\u20AD"],
[8148, "\u2104"],
[8149, "\u2106"],
[8152, "\u210A"],
[8164, "\u2117"],
[8174, "\u2122"],
[8236, "\u216C"],
[8240, "\u217A"],
[8262, "\u2194"],
[8264, "\u219A"],
[8374, "\u2209"],
[8380, "\u2210"],
[8381, "\u2212"],
[8384, "\u2216"],
[8388, "\u221B"],
[8390, "\u2221"],
[8392, "\u2224"],
[8393, "\u2226"],
[8394, "\u222C"],
[8396, "\u222F"],
[8401, "\u2238"],
[8406, "\u223E"],
[8416, "\u2249"],
[8419, "\u224D"],
[8424, "\u2253"],
[8437, "\u2262"],
[8439, "\u2268"],
[8445, "\u2270"],
[8482, "\u2296"],
[8485, "\u229A"],
[8496, "\u22A6"],
[8521, "\u22C0"],
[8603, "\u2313"],
[8936, "\u246A"],
[8946, "\u249C"],
[9046, "\u254C"],
[9050, "\u2574"],
[9063, "\u2590"],
[9066, "\u2596"],
[9076, "\u25A2"],
[9092, "\u25B4"],
[9100, "\u25BE"],
[9108, "\u25C8"],
[9111, "\u25CC"],
[9113, "\u25D0"],
[9131, "\u25E6"],
[9162, "\u2607"],
[9164, "\u260A"],
[9218, "\u2641"],
[9219, "\u2643"],
[11329, "\u2E82"],
[11331, "\u2E85"],
[11334, "\u2E89"],
[11336, "\u2E8D"],
[11346, "\u2E98"],
[11361, "\u2EA8"],
[11363, "\u2EAB"],
[11366, "\u2EAF"],
[11370, "\u2EB4"],
[11372, "\u2EB8"],
[11375, "\u2EBC"],
[11389, "\u2ECB"],
[11682, "\u2FFC"],
[11686, "\u3004"],
[11687, "\u3018"],
[11692, "\u301F"],
[11694, "\u302A"],
[11714, "\u303F"],
[11716, "\u3094"],
[11723, "\u309F"],
[11725, "\u30F7"],
[11730, "\u30FF"],
[11736, "\u312A"],
[11982, "\u322A"],
[11989, "\u3232"],
[12102, "\u32A4"],
[12336, "\u3390"],
[12348, "\u339F"],
[12350, "\u33A2"],
[12384, "\u33C5"],
[12393, "\u33CF"],
[12395, "\u33D3"],
[12397, "\u33D6"],
[12510, "\u3448"],
[12553, "\u3474"],
[12851, "\u359F"],
[12962, "\u360F"],
[12973, "\u361B"],
[13738, "\u3919"],
[13823, "\u396F"],
[13919, "\u39D1"],
[13933, "\u39E0"],
[14080, "\u3A74"],
[14298, "\u3B4F"],
[14585, "\u3C6F"],
[14698, "\u3CE1"],
[15583, "\u4057"],
[15847, "\u4160"],
[16318, "\u4338"],
[16434, "\u43AD"],
[16438, "\u43B2"],
[16481, "\u43DE"],
[16729, "\u44D7"],
[17102, "\u464D"],
[17122, "\u4662"],
[17315, "\u4724"],
[17320, "\u472A"],
[17402, "\u477D"],
[17418, "\u478E"],
[17859, "\u4948"],
[17909, "\u497B"],
[17911, "\u497E"],
[17915, "\u4984"],
[17916, "\u4987"],
[17936, "\u499C"],
[17939, "\u49A0"],
[17961, "\u49B8"],
[18664, "\u4C78"],
[18703, "\u4CA4"],
[18814, "\u4D1A"],
[18962, "\u4DAF"],
[19043, "\u9FA6"],
[33469, "\uE76C"],
[33470, "\uE7C8"],
[33471, "\uE7E7"],
[33484, "\uE815"],
[33485, "\uE819"],
[33490, "\uE81F"],
[33497, "\uE827"],
[33501, "\uE82D"],
[33505, "\uE833"],
[33513, "\uE83C"],
[33520, "\uE844"],
[33536, "\uE856"],
[33550, "\uE865"],
[37845, "\uF92D"],
[37921, "\uF97A"],
[37948, "\uF996"],
[38029, "\uF9E8"],
[38038, "\uF9F2"],
[38064, "\uFA10"],
[38065, "\uFA12"],
[38066, "\uFA15"],
[38069, "\uFA19"],
[38075, "\uFA22"],
[38076, "\uFA25"],
[38078, "\uFA2A"],
[39108, "\uFE32"],
[39109, "\uFE45"],
[39113, "\uFE53"],
[39114, "\uFE58"],
[39115, "\uFE67"],
[39116, "\uFE6C"],
[39265, "\uFF5F"],
[39394, "\uFFE6"],
[189000, "\u{10000}"]
];
33 changes: 33 additions & 0 deletions test/fixtures/wpt/encoding/legacy-mb-schinese/gbk/gbk-decoder.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<!doctype html>
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<script>
const gbkPointers = [
6432, 7533, 7536, 7672, 7673, 7674, 7675, 7676, 7677, 7678, 7679, 7680, 7681, 7682, 7683, 7684,
23766, 23770, 23771, 23772, 23773, 23774, 23776, 23777, 23778, 23779, 23780, 23781, 23782, 23784, 23785, 23786,
23787, 23790, 23791, 23792, 23793, 23796, 23797, 23798, 23799, 23800, 23801, 23802, 23803, 23805, 23806, 23807,
23808, 23809, 23810, 23811, 23813, 23814, 23815, 23816, 23817, 23818, 23819, 23820, 23821, 23822, 23823, 23824,
23825, 23826, 23827, 23828, 23831, 23832, 23833, 23834, 23835, 23836, 23837, 23838, 23839, 23840, 23841, 23842,
23843, 23844
];
const codePoints = [
0x20ac, 0x1e3f, 0x01f9, 0x303e, 0x2ff0, 0x2ff1, 0x2ff2, 0x2ff3, 0x2ff4, 0x2ff5, 0x2ff6, 0x2ff7, 0x2ff8, 0x2ff9, 0x2ffa, 0x2ffb,
0x2e81, 0x2e84, 0x3473, 0x3447, 0x2e88, 0x2e8b, 0x359e, 0x361a, 0x360e, 0x2e8c, 0x2e97, 0x396e, 0x3918, 0x39cf, 0x39df, 0x3a73,
0x39d0, 0x3b4e, 0x3c6e, 0x3ce0, 0x2ea7, 0x2eaa, 0x4056, 0x415f, 0x2eae, 0x4337, 0x2eb3, 0x2eb6, 0x2eb7, 0x43b1, 0x43ac, 0x2ebb,
0x43dd, 0x44d6, 0x4661, 0x464c, 0x4723, 0x4729, 0x477c, 0x478d, 0x2eca, 0x4947, 0x497a, 0x497d, 0x4982, 0x4983, 0x4985, 0x4986,
0x499f, 0x499b, 0x49b7, 0x49b6, 0x4ca3, 0x4c9f, 0x4ca0, 0x4ca1, 0x4c77, 0x4ca2, 0x4d13, 0x4d14, 0x4d15, 0x4d16, 0x4d17, 0x4d18,
0x4d19, 0x4dae
];

for (let i = 0; i < gbkPointers.length; i++) {
const pointer = gbkPointers[i];
test(function() {
const lead = pointer / 190 + 0x81;
const trail = pointer % 190;
const offset = trail < 0x3F ? 0x40 : 0x41;
const encoded = [lead, trail + offset];
const decoded = new TextDecoder("GBK").decode(new Uint8Array(encoded)).charCodeAt(0);
assert_equals(decoded, codePoints[i]);
}, "gbk pointer: " + pointer)
}
</script>
26 changes: 26 additions & 0 deletions test/fixtures/wpt/encoding/legacy-mb-schinese/gbk/gbk-encoder.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<!doctype html>
<meta charset=gbk> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<script>
function encode(input, output, desc) {
test(function() {
const a = document.createElement("a") // <a> uses document encoding for URL's query
a.href = "https://example.com/?" + input;
assert_equals(a.search.substr(1), output); // remove leading "?"
}, "gbk encoder: " + desc);
}

encode("s", "s", "very basic");
encode("\u20AC", "%80", "Euro");
encode("\u4E02", "%81@", "character");
encode("\uE4C6", "%A1@", "PUA");
encode("\uE4C5", "%FE%FE", "PUA #2");
encode("\ud83d\udca9", "%26%23128169%3B", "poo");
encode("\uE7C8", "%26%2359336%3B", "legacy ICU special case 1");
encode("\u2026", "%A1%AD", "legacy ICU special case 2");
encode("\uFF5E", "%A1%AB", "legacy ICU special case 3");
encode("\u00A5", "%26%23165%3B", "legacy WebKit case 1");
encode("\u22EF", "%26%238943%3B", "legacy WebKit case 2");
encode("\u301C", "%26%2312316%3B", "legacy WebKit case 3");
</script>
2 changes: 1 addition & 1 deletion test/fixtures/wpt/versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"path": "console"
},
"encoding": {
"commit": "1821fb5f77723b5361058c6a8ed0b71f9d2d6b8d",
"commit": "3c9820d1cc5d9d2627c26ef1268b6d54a35adf22",
"path": "encoding"
},
"url": {
Expand Down

0 comments on commit 4acc273

Please sign in to comment.