diff --git a/benchmark/decode-string.ts b/benchmark/decode-string.ts index ed4813ee..1cde203d 100644 --- a/benchmark/decode-string.ts +++ b/benchmark/decode-string.ts @@ -1,5 +1,5 @@ /* eslint-disable no-console */ -import { utf8Encode, utf8Count, utf8DecodeJs, utf8DecodeTD } from "../src/utils/utf8"; +import { utf8EncodeJs, utf8Count, utf8DecodeJs, utf8DecodeTD } from "../src/utils/utf8"; import { utf8DecodeWasm } from "../src/wasmFunctions"; // @ts-ignore @@ -13,7 +13,7 @@ for (const baseStr of ["A", "あ", "🌏"]) { for (const str of dataSet) { const byteLength = utf8Count(str); const bytes = new Uint8Array(new ArrayBuffer(byteLength)); - utf8Encode(str, bytes, 0); + utf8EncodeJs(str, bytes, 0); console.log(`\n## string "${baseStr}" x ${str.length} (byteLength=${byteLength})\n`); diff --git a/benchmark/encode-string.ts b/benchmark/encode-string.ts new file mode 100644 index 00000000..47662dd1 --- /dev/null +++ b/benchmark/encode-string.ts @@ -0,0 +1,33 @@ +/* eslint-disable no-console */ +import { utf8EncodeJs, utf8Count, utf8EncodeTE } from "../src/utils/utf8"; + +// @ts-ignore +import Benchmark from "benchmark"; + +for (const baseStr of ["A", "あ", "🌏"]) { + const dataSet = [10, 100, 200, 1_000, 10_000, 100_000].map((n) => { + return baseStr.repeat(n); + }); + + for (const str of dataSet) { + const byteLength = utf8Count(str); + const buffer = new Uint8Array(byteLength); + + console.log(`\n## string "${baseStr}" x ${str.length} (byteLength=${byteLength})\n`); + + const suite = new Benchmark.Suite(); + + suite.add("utf8EncodeJs", () => { + utf8EncodeJs(str, buffer, 0); + }); + + suite.add("utf8DecodeTE", () => { + utf8EncodeTE(str, buffer, 0); + }); + suite.on("cycle", (event: any) => { + console.log(String(event.target)); + }); + + suite.run(); + } +} diff --git a/package.json b/package.json index 23e253f6..482a6c9b 100644 --- a/package.json +++ b/package.json @@ -14,15 +14,15 @@ "prepublishOnly": "run-p 'test:dist:*' && npm run test:browser", "clean": "rimraf build dist dist.*", "test": "mocha 'test/**/*.test.ts'", - "test:purejs": "TEXT_DECODER=never MSGPACK_WASM=never mocha 'test/**/*.test.ts'", - "test:wasm": "npm run asbuild:production && TEXT_DECODER=never MSGPACK_WASM=force mocha 'test/**/*.test.ts'", - "test:td": "TEXT_DECODER=force mocha 'test/**/*.test.ts'", + "test:purejs": "TEXT_ENCODING=never MSGPACK_WASM=never mocha 'test/**/*.test.ts'", + "test:wasm": "npm run asbuild:production && TEXT_ENCODING=never MSGPACK_WASM=force mocha 'test/**/*.test.ts'", + "test:te": "TEXT_ENCODING=force mocha 'test/**/*.test.ts'", "test:dist:purejs": "TS_NODE_PROJECT=tsconfig.test-dist-es5-purejs.json npm run test:purejs -- --reporter=dot", "test:dist:wasm": "TS_NODE_PROJECT=tsconfig.test-dist-es5-wasm.json npm run test:wasm -- --reporter=dot", "test:cover": "npm run cover:clean && npm-run-all 'test:cover:*' && npm run cover:report", "test:cover:purejs": "npx nyc --no-clean npm run test:purejs", "test:cover:wasm": "npx nyc --no-clean npm run test:wasm", - "test:cover:td": "npx nyc --no-clean npm run test:td", + "test:cover:te": "npx nyc --no-clean npm run test:te", "cover:clean": "rimraf .nyc_output coverage/", "cover:report": "nyc report --reporter=text-summary --reporter=html --reporter=json", "test:browser": "karma start --single-run", diff --git a/src/Decoder.ts b/src/Decoder.ts index becdbefa..246f8b9a 100644 --- a/src/Decoder.ts +++ b/src/Decoder.ts @@ -1,7 +1,7 @@ import { prettyByte } from "./utils/prettyByte"; import { ExtensionCodec } from "./ExtensionCodec"; import { getInt64, getUint64 } from "./utils/int"; -import { utf8DecodeJs, TEXT_DECODER_AVAILABLE, TEXT_DECODER_THRESHOLD, utf8DecodeTD } from "./utils/utf8"; +import { utf8DecodeJs, TEXT_ENCODING_AVAILABLE, TEXT_DECODER_THRESHOLD, utf8DecodeTD } from "./utils/utf8"; import { createDataView, ensureUint8Array } from "./utils/typedArrays"; import { WASM_AVAILABLE, WASM_STR_THRESHOLD, utf8DecodeWasm } from "./wasmFunctions"; @@ -482,7 +482,7 @@ export class Decoder { const offset = this.pos + headerOffset; let object: string; - if (TEXT_DECODER_AVAILABLE && byteLength > TEXT_DECODER_THRESHOLD) { + if (TEXT_ENCODING_AVAILABLE && byteLength > TEXT_DECODER_THRESHOLD) { object = utf8DecodeTD(this.bytes, offset, byteLength); } else if (WASM_AVAILABLE && byteLength > WASM_STR_THRESHOLD) { object = utf8DecodeWasm(this.bytes, offset, byteLength); diff --git a/src/Encoder.ts b/src/Encoder.ts index 2ff7bf41..d4b2c58c 100644 --- a/src/Encoder.ts +++ b/src/Encoder.ts @@ -1,4 +1,4 @@ -import { utf8Encode, utf8Count } from "./utils/utf8"; +import { utf8EncodeJs, utf8Count, TEXT_ENCODING_AVAILABLE, TEXT_ENCODER_THRESHOLD, utf8EncodeTE } from "./utils/utf8"; import { ExtensionCodec } from "./ExtensionCodec"; import { setInt64, setUint64 } from "./utils/int"; import { ensureUint8Array } from "./utils/typedArrays"; @@ -148,7 +148,13 @@ export class Encoder { const maxHeaderSize = 1 + 4; const strLength = object.length; - if (WASM_AVAILABLE && strLength > WASM_STR_THRESHOLD) { + if (TEXT_ENCODING_AVAILABLE && strLength > TEXT_ENCODER_THRESHOLD) { + const byteLength = utf8Count(object); + this.ensureBufferSizeToWrite(maxHeaderSize + byteLength); + this.writeStringHeader(byteLength); + utf8EncodeTE(object, this.bytes, this.pos); + this.pos += byteLength; + } else if (WASM_AVAILABLE && strLength > WASM_STR_THRESHOLD) { // ensure max possible size const maxSize = maxHeaderSize + strLength * 4; this.ensureBufferSizeToWrite(maxSize); @@ -161,7 +167,7 @@ export class Encoder { const byteLength = utf8Count(object); this.ensureBufferSizeToWrite(maxHeaderSize + byteLength); this.writeStringHeader(byteLength); - utf8Encode(object, this.bytes, this.pos); + utf8EncodeJs(object, this.bytes, this.pos); this.pos += byteLength; } } diff --git a/src/utils/utf8.ts b/src/utils/utf8.ts index 78241647..68df4516 100644 --- a/src/utils/utf8.ts +++ b/src/utils/utf8.ts @@ -1,3 +1,6 @@ +export const TEXT_ENCODING_AVAILABLE = + process.env.TEXT_ENCODING !== "never" && (typeof TextEncoder !== "undefined" && typeof TextDecoder !== "undefined"); + export function utf8Count(str: string): number { const strLength = str.length; @@ -38,7 +41,7 @@ export function utf8Count(str: string): number { return byteLength; } -export function utf8Encode(str: string, output: Uint8Array, outputOffset: number): void { +export function utf8EncodeJs(str: string, output: Uint8Array, outputOffset: number): void { const strLength = str.length; let offset = outputOffset; let pos = 0; @@ -81,6 +84,22 @@ export function utf8Encode(str: string, output: Uint8Array, outputOffset: number } } +const sharedTextEncoder = TEXT_ENCODING_AVAILABLE ? new TextEncoder() : undefined; +export const TEXT_ENCODER_THRESHOLD = process.env.TEXT_ENCODING !== "force" ? 200 : 0; + +function utf8EncodeTEencode(str: string, output: Uint8Array, outputOffset: number): void { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + output.set(sharedTextEncoder!.encode(str), outputOffset); +} + +function utf8EncodeTEencodeInto(str: string, output: Uint8Array, outputOffset: number): void { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + sharedTextEncoder!.encodeInto(str, output.subarray(outputOffset)); +} + +export const utf8EncodeTE = + sharedTextEncoder && sharedTextEncoder.encodeInto ? utf8EncodeTEencodeInto : utf8EncodeTEencode; + const CHUNK_SIZE = 0x10_000; export function utf8DecodeJs(bytes: Uint8Array, inputOffset: number, byteLength: number): string { @@ -132,8 +151,7 @@ export function utf8DecodeJs(bytes: Uint8Array, inputOffset: number, byteLength: return result; } -const sharedTextDecoder = typeof TextDecoder !== "undefined" ? new TextDecoder() : null; -export const TEXT_DECODER_AVAILABLE = process.env.TEXT_DECODER !== "never" && !!sharedTextDecoder; +const sharedTextDecoder = TEXT_ENCODING_AVAILABLE ? new TextDecoder() : null; export const TEXT_DECODER_THRESHOLD = process.env.TEXT_DECODER !== "force" ? 200 : 0; export function utf8DecodeTD(bytes: Uint8Array, inputOffset: number, byteLength: number): string {