This repository has been archived by the owner on Apr 10, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 305
/
TextDecoder.cpp
263 lines (233 loc) · 7.37 KB
/
TextDecoder.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/dom/TextDecoder.h"
#include "mozilla/dom/EncodingUtils.h"
#include "nsContentUtils.h"
#include "nsICharsetConverterManager.h"
#include "nsServiceManagerUtils.h"
namespace mozilla {
namespace dom {
static const PRUnichar kReplacementChar = static_cast<PRUnichar>(0xFFFD);
void
TextDecoder::Init(const nsAString& aEncoding,
const TextDecoderOptions& aFatal,
ErrorResult& aRv)
{
nsAutoString label(aEncoding);
EncodingUtils::TrimSpaceCharacters(label);
// If label is a case-insensitive match for "utf-16"
// then set the internal useBOM flag.
if (label.LowerCaseEqualsLiteral("utf-16")) {
mUseBOM = true;
mIsUTF16Family = true;
mEncoding = "utf-16le";
// If BOM is used, we can't determine the converter yet.
return;
}
// Run the steps to get an encoding from Encoding.
if (!EncodingUtils::FindEncodingForLabel(label, mEncoding)) {
// If the steps result in failure,
// throw a "EncodingError" exception and terminate these steps.
aRv.Throw(NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR);
return;
}
mIsUTF16Family = !strcmp(mEncoding, "utf-16le") ||
!strcmp(mEncoding, "utf-16be");
// If the constructor is called with an options argument,
// and the fatal property of the dictionary is set,
// set the internal fatal flag of the decoder object.
mFatal = aFatal.fatal;
CreateDecoder(aRv);
}
void
TextDecoder::CreateDecoder(ErrorResult& aRv)
{
// Create a decoder object for mEncoding.
nsCOMPtr<nsICharsetConverterManager> ccm =
do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID);
if (!ccm) {
aRv.Throw(NS_ERROR_UNEXPECTED);
return;
}
ccm->GetUnicodeDecoder(mEncoding, getter_AddRefs(mDecoder));
if (!mDecoder) {
aRv.Throw(NS_ERROR_UNEXPECTED);
return;
}
if (mFatal) {
mDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
}
}
void
TextDecoder::ResetDecoder(bool aResetOffset)
{
mDecoder->Reset();
if (aResetOffset) {
mOffset = 0;
}
}
void
TextDecoder::Decode(const ArrayBufferView* aView,
const TextDecodeOptions& aOptions,
nsAString& aOutDecodedString,
ErrorResult& aRv)
{
const char* data;
uint32_t length;
// If view is not specified, let view be a Uint8Array of length 0.
if (!aView) {
data = EmptyCString().BeginReading();
length = EmptyCString().Length();
} else {
data = reinterpret_cast<const char*>(aView->Data());
length = aView->Length();
}
aOutDecodedString.Truncate();
if (mIsUTF16Family && mOffset < 2) {
HandleBOM(data, length, aOptions, aOutDecodedString, aRv);
if (aRv.Failed() || mOffset < 2) {
return;
}
}
// Run or resume the decoder algorithm of the decoder object's encoder.
int32_t outLen;
nsresult rv = mDecoder->GetMaxLength(data, length, &outLen);
if (NS_FAILED(rv)) {
aRv.Throw(rv);
return;
}
// Need a fallible allocator because the caller may be a content
// and the content can specify the length of the string.
static const fallible_t fallible = fallible_t();
nsAutoArrayPtr<PRUnichar> buf(new (fallible) PRUnichar[outLen + 1]);
if (!buf) {
aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
return;
}
for (;;) {
int32_t srcLen = length;
int32_t dstLen = outLen;
rv = mDecoder->Convert(data, &srcLen, buf, &dstLen);
// Convert will convert the input partially even if the status
// indicates a failure.
buf[dstLen] = 0;
aOutDecodedString.Append(buf, dstLen);
if (mFatal || rv != NS_ERROR_ILLEGAL_INPUT) {
break;
}
// Emit a decode error manually because some decoders
// do not support kOnError_Recover (bug 638379)
if (srcLen == -1) {
ResetDecoder();
} else {
data += srcLen + 1;
length -= srcLen + 1;
aOutDecodedString.Append(kReplacementChar);
}
}
// If the internal streaming flag of the decoder object is not set,
// then reset the encoding algorithm state to the default values
if (!aOptions.stream) {
ResetDecoder();
if (rv == NS_OK_UDEC_MOREINPUT) {
if (mFatal) {
aRv.Throw(NS_ERROR_DOM_ENCODING_DECODE_ERR);
} else {
// Need to emit a decode error manually
// to simulate the EOF handling of the Encoding spec.
aOutDecodedString.Append(kReplacementChar);
}
}
}
if (NS_FAILED(rv)) {
aRv.Throw(NS_ERROR_DOM_ENCODING_DECODE_ERR);
}
}
void
TextDecoder::HandleBOM(const char*& aData, uint32_t& aLength,
const TextDecodeOptions& aOptions,
nsAString& aOutString, ErrorResult& aRv)
{
if (aLength < 2u - mOffset) {
if (aOptions.stream) {
memcpy(mInitialBytes + mOffset, aData, aLength);
mOffset += aLength;
} else if (mFatal) {
aRv.Throw(NS_ERROR_DOM_ENCODING_DECODE_ERR);
} else {
aOutString.Append(kReplacementChar);
}
return;
}
memcpy(mInitialBytes + mOffset, aData, 2 - mOffset);
// copied data will be fed later.
aData += 2 - mOffset;
aLength -= 2 - mOffset;
mOffset = 2;
const char* encoding = "";
if (!EncodingUtils::IdentifyDataOffset(mInitialBytes, 2, encoding) ||
strcmp(encoding, mEncoding)) {
// If the stream doesn't start with BOM or the BOM doesn't match the
// encoding, feed a BOM to workaround decoder's bug (bug 634541).
if (!mUseBOM) {
FeedBytes(!strcmp(mEncoding, "utf-16le") ? "\xFF\xFE" : "\xFE\xFF");
}
}
if (mUseBOM) {
// Select a decoder corresponding to the BOM.
if (!*encoding) {
encoding = "utf-16le";
}
// If the endian has not been changed, reuse the decoder.
if (mDecoder && !strcmp(encoding, mEncoding)) {
ResetDecoder(false);
} else {
mEncoding = encoding;
CreateDecoder(aRv);
}
}
FeedBytes(mInitialBytes, &aOutString);
}
void
TextDecoder::FeedBytes(const char* aBytes, nsAString* aOutString)
{
PRUnichar buf[3];
int32_t srcLen = mOffset;
int32_t dstLen = mozilla::ArrayLength(buf);
DebugOnly<nsresult> rv =
mDecoder->Convert(aBytes, &srcLen, buf, &dstLen);
MOZ_ASSERT(NS_SUCCEEDED(rv));
MOZ_ASSERT(srcLen == mOffset);
if (aOutString) {
aOutString->Assign(buf, dstLen);
}
}
void
TextDecoder::GetEncoding(nsAString& aEncoding)
{
// Our utf-16 converter does not comply with the Encoding Standard.
// As a result the utf-16le converter is used for the encoding label
// "utf-16".
// This workaround should not be exposed to the public API and so "utf-16"
// is returned by GetEncoding() if the internal encoding name is "utf-16le".
if (mUseBOM || !strcmp(mEncoding, "utf-16le")) {
aEncoding.AssignLiteral("utf-16");
return;
}
// Similarly, "x-windows-949" is used for the "euc-kr" family. Therefore, if
// the internal encoding name is "x-windows-949", "euc-kr" is returned.
if (!strcmp(mEncoding, "x-windows-949")) {
aEncoding.AssignLiteral("euc-kr");
return;
}
aEncoding.AssignASCII(mEncoding);
}
NS_IMPL_CYCLE_COLLECTING_ADDREF(TextDecoder)
NS_IMPL_CYCLE_COLLECTING_RELEASE(TextDecoder)
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(TextDecoder)
NS_INTERFACE_MAP_ENTRY(nsISupports)
NS_INTERFACE_MAP_END
NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_1(TextDecoder, mGlobal)
} // dom
} // mozilla