-
-
Notifications
You must be signed in to change notification settings - Fork 3
/
TextDecoder.ts
111 lines (100 loc) · 3.65 KB
/
TextDecoder.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/*
* Copyright (C) 2021 Klaus Reimer <k@ailis.de>
* See LICENSE.md for licensing information.
*/
import { ByteBuffer } from "./ByteBuffer";
import { DEFAULT_ENCODING, FINISHED } from "./constants";
import { Decoder } from "./Decoder";
import { Encoding, getEncoding } from "./Encoding";
/**
* The TextDecoder represents a decoder for a specific text encoding, such as UTF-8, ISO-8859-2, KOI8-R, GBK, etc.
* A decoder takes a stream of bytes as input and emits a stream of code points.
*/
export class TextDecoder implements globalThis.TextDecoder {
/** True if byte order marker is ignored. */
public readonly ignoreBOM: boolean;
/** True if error mode is fatal. */
public readonly fatal: boolean;
private readonly enc: Encoding;
private seenBOM = false;
private decoder: Decoder | null = null;
/**
* Creates text decoder for the given encoding.
*
* @param label - The label of the encoding. Defaults to 'utf-8'
*/
public constructor(label = DEFAULT_ENCODING, { fatal = false, ignoreBOM = false }: TextDecoderOptions = {}) {
this.enc = getEncoding(label);
this.fatal = fatal;
this.ignoreBOM = ignoreBOM;
}
/** @return The name of the encoding. */
public get encoding(): string {
return this.enc.getName();
}
/**
* Decoded the given input into string and returns it.
*
* @param input - The input to decode.
* @param options - The decoding options.
* @return The decoded string.
*/
public decode(input?: BufferSource, { stream = false }: TextDecodeOptions = {}): string {
let bytes;
if (input instanceof ArrayBuffer) {
bytes = new Uint8Array(input);
} else if (ArrayBuffer.isView(input)) {
bytes = new Uint8Array(input.buffer, input.byteOffset, input.byteLength);
} else {
bytes = new Uint8Array(0);
}
// Initialize decoder if not already done
if (this.decoder == null) {
this.decoder = this.enc.createDecoder(this.fatal);
this.seenBOM = false;
}
// Decode the input bytes
const inputStream = new ByteBuffer(bytes);
let output = "";
let result: number | number[] | null;
while (!inputStream.isEndOfBuffer()) {
result = this.decoder.decode(inputStream);
if (result === FINISHED) {
break;
}
if (result != null) {
if (typeof result === "number") {
output += String.fromCodePoint(result);
} else {
output += String.fromCodePoint(...result);
}
}
}
if (!stream) {
do {
result = this.decoder.decode(inputStream);
if (result === FINISHED) {
break;
}
if (result != null) {
if (typeof result === "number") {
output += String.fromCodePoint(result);
} else {
output += String.fromCodePoint(...result);
}
}
} while(!inputStream.isEndOfBuffer());
this.decoder = null;
}
// Remove BOM header from output if ignoreBOM flag is not set
if ([ "utf-8", "utf-16le", "utf-16be" ].includes(this.encoding) && !this.ignoreBOM && !this.seenBOM) {
if (output.length > 0) {
this.seenBOM = true;
if (output[0] === "\uFEFF") {
return output.substring(1);
}
}
}
return output;
}
}