From 145c909e46481847b5f5e82d7ea639b477bb8f7d Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Sat, 29 Nov 2025 11:29:29 +0400 Subject: [PATCH 1/3] Revert "test: convert test_encoding_binding.cc to a JS test" This reverts commit 3c105b6e21d9880d4d5ad28f8c5edb07e2e59299. --- test/cctest/test_encoding_binding.cc | 176 ++++++++++++++++++ .../test-internal-encoding-binding.js | 48 ----- 2 files changed, 176 insertions(+), 48 deletions(-) create mode 100644 test/cctest/test_encoding_binding.cc delete mode 100644 test/parallel/test-internal-encoding-binding.js diff --git a/test/cctest/test_encoding_binding.cc b/test/cctest/test_encoding_binding.cc new file mode 100644 index 00000000000000..d5d14c60fedf7e --- /dev/null +++ b/test/cctest/test_encoding_binding.cc @@ -0,0 +1,176 @@ +#include "encoding_binding.h" +#include "env-inl.h" +#include "gtest/gtest.h" +#include "node_test_fixture.h" +#include "v8.h" + +namespace node { +namespace encoding_binding { + +bool RunDecodeLatin1(Environment* env, + Local args[], + bool ignore_bom, + bool has_fatal, + Local* result) { + Isolate* isolate = env->isolate(); + TryCatch try_catch(isolate); + + Local ignoreBOMValue = Boolean::New(isolate, ignore_bom); + Local fatalValue = Boolean::New(isolate, has_fatal); + + Local updatedArgs[] = {args[0], ignoreBOMValue, fatalValue}; + + BindingData::DecodeLatin1(FunctionCallbackInfo(updatedArgs)); + + if (try_catch.HasCaught()) { + return false; + } + + *result = args[0]; + return true; +} + +class EncodingBindingTest : public NodeTestFixture {}; + +TEST_F(EncodingBindingTest, DecodeLatin1_ValidInput) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_EmptyInput) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + Local ab = ArrayBuffer::New(isolate, 0); + Local array = Uint8Array::New(ab, 0, 0); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, ""); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_InvalidInput) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + Local args[] = {String::NewFromUtf8Literal(isolate, "Invalid input")}; + + Local result; + EXPECT_FALSE(RunDecodeLatin1(env, args, false, false, &result)); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOM) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_FatalInvalidInput) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t invalid_data[] = {0xFF, 0xFF, 0xFF}; + Local ab = ArrayBuffer::New(isolate, sizeof(invalid_data)); + memcpy(ab->GetBackingStore()->Data(), invalid_data, sizeof(invalid_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(invalid_data)); + Local args[] = {array}; + + Local result; + EXPECT_FALSE(RunDecodeLatin1(env, args, false, true, &result)); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOMAndFatal) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, true, true, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_BOMPresent) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xFF, 0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result)); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); +} + +TEST_F(EncodingBindingTest, DecodeLatin1_ReturnsString) { + Environment* env = CreateEnvironment(); + Isolate* isolate = env->isolate(); + HandleScope handle_scope(isolate); + + const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3}; + Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); + memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); + + Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); + Local args[] = {array}; + + Local result; + ASSERT_TRUE(RunDecodeLatin1(env, args, false, false, &result)); + + ASSERT_TRUE(result->IsString()); + + String::Utf8Value utf8_result(isolate, result); + EXPECT_STREQ(*utf8_result, "Áéó"); +} + +} // namespace encoding_binding +} // namespace node diff --git a/test/parallel/test-internal-encoding-binding.js b/test/parallel/test-internal-encoding-binding.js deleted file mode 100644 index b7483bf1d22820..00000000000000 --- a/test/parallel/test-internal-encoding-binding.js +++ /dev/null @@ -1,48 +0,0 @@ -// Flags: --expose-internals - -'use strict'; - -require('../common'); - -const assert = require('node:assert'); -const { internalBinding } = require('internal/test/binding'); -const binding = internalBinding('encoding_binding'); - -{ - // Valid input - const buf = Uint8Array.from([0xC1, 0xE9, 0xF3]); - assert.strictEqual(binding.decodeLatin1(buf, false, false), 'Áéó'); -} - -{ - // Empty input - const buf = Uint8Array.from([]); - assert.strictEqual(binding.decodeLatin1(buf, false, false), ''); -} - -{ - // Invalid input, but Latin1 has no invalid chars and should never throw. - const buf = new TextEncoder().encode('Invalid Latin1 🧑‍🧑‍🧒‍🧒'); - assert.strictEqual( - binding.decodeLatin1(buf, false, false), - 'Invalid Latin1 ð\x9F§\x91â\x80\x8Dð\x9F§\x91â\x80\x8Dð\x9F§\x92â\x80\x8Dð\x9F§\x92' - ); -} - -{ - // IgnoreBOM with BOM - const buf = Uint8Array.from([0xFE, 0xFF, 0xC1, 0xE9, 0xF3]); - assert.strictEqual(binding.decodeLatin1(buf, true, false), 'þÿÁéó'); -} - -{ - // Fatal and InvalidInput, but Latin1 has no invalid chars and should never throw. - const buf = Uint8Array.from([0xFF, 0xFF, 0xFF]); - assert.strictEqual(binding.decodeLatin1(buf, false, true), 'ÿÿÿ'); -} - -{ - // IgnoreBOM and Fatal, but Latin1 has no invalid chars and should never throw. - const buf = Uint8Array.from([0xFE, 0xFF, 0xC1, 0xE9, 0xF3]); - assert.strictEqual(binding.decodeLatin1(buf, true, true), 'þÿÁéó'); -} From f037c2d2ac959b80e58cb5431081e8239061a6cc Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Sat, 29 Nov 2025 11:31:05 +0400 Subject: [PATCH 2/3] Revert "util: add fast path for Latin1 decoding" This reverts commit 20bcaa081be2a573474564966b492969e692571f. --- benchmark/util/text-decoder.js | 2 +- lib/internal/encoding.js | 10 +- src/encoding_binding.cc | 49 -------- src/encoding_binding.h | 1 - test/cctest/test_encoding_binding.cc | 176 --------------------------- 5 files changed, 2 insertions(+), 236 deletions(-) delete mode 100644 test/cctest/test_encoding_binding.cc diff --git a/benchmark/util/text-decoder.js b/benchmark/util/text-decoder.js index 1aa60f2dd0bcd6..dd4f02016df077 100644 --- a/benchmark/util/text-decoder.js +++ b/benchmark/util/text-decoder.js @@ -3,7 +3,7 @@ const common = require('../common.js'); const bench = common.createBenchmark(main, { - encoding: ['utf-8', 'windows-1252', 'iso-8859-3'], + encoding: ['utf-8', 'latin1', 'iso-8859-3'], ignoreBOM: [0, 1], fatal: [0, 1], len: [256, 1024 * 16, 1024 * 128], diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index e054ca4dd5202d..1d4d562dfe35ee 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -28,7 +28,6 @@ const kEncoding = Symbol('encoding'); const kDecoder = Symbol('decoder'); const kFatal = Symbol('kFatal'); const kUTF8FastPath = Symbol('kUTF8FastPath'); -const kLatin1FastPath = Symbol('kLatin1FastPath'); const kIgnoreBOM = Symbol('kIgnoreBOM'); const { @@ -55,7 +54,6 @@ const { encodeIntoResults, encodeUtf8String, decodeUTF8, - decodeLatin1, } = binding; const { Buffer } = require('buffer'); @@ -420,10 +418,9 @@ function makeTextDecoderICU() { this[kFatal] = Boolean(options?.fatal); // Only support fast path for UTF-8. this[kUTF8FastPath] = enc === 'utf-8'; - this[kLatin1FastPath] = enc === 'windows-1252'; this[kHandle] = undefined; - if (!this[kUTF8FastPath] && !this[kLatin1FastPath]) { + if (!this[kUTF8FastPath]) { this.#prepareConverter(); } } @@ -440,16 +437,11 @@ function makeTextDecoderICU() { validateDecoder(this); this[kUTF8FastPath] &&= !(options?.stream); - this[kLatin1FastPath] &&= !(options?.stream); if (this[kUTF8FastPath]) { return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]); } - if (this[kLatin1FastPath]) { - return decodeLatin1(input, this[kIgnoreBOM], this[kFatal]); - } - this.#prepareConverter(); validateObject(options, 'options', kValidateObjectAllowObjectsAndNull); diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index 877ae8a18f6b8f..ad3674599690c8 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -1,7 +1,6 @@ #include "encoding_binding.h" #include "ada.h" #include "env-inl.h" -#include "node_buffer.h" #include "node_errors.h" #include "node_external_reference.h" #include "simdutf.h" @@ -221,7 +220,6 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data, SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8); SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII); SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode); - SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1); } void BindingData::CreatePerContextProperties(Local target, @@ -239,53 +237,6 @@ void BindingData::RegisterTimerExternalReferences( registry->Register(DecodeUTF8); registry->Register(ToASCII); registry->Register(ToUnicode); - registry->Register(DecodeLatin1); -} - -void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - - CHECK_GE(args.Length(), 1); - if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() || - args[0]->IsArrayBufferView())) { - return node::THROW_ERR_INVALID_ARG_TYPE( - env->isolate(), - "The \"input\" argument must be an instance of ArrayBuffer, " - "SharedArrayBuffer, or ArrayBufferView."); - } - - bool ignore_bom = args[1]->IsTrue(); - bool has_fatal = args[2]->IsTrue(); - - ArrayBufferViewContents buffer(args[0]); - const uint8_t* data = buffer.data(); - size_t length = buffer.length(); - - if (ignore_bom && length > 0 && data[0] == 0xFF) { - data++; - length--; - } - - if (length == 0) { - return args.GetReturnValue().SetEmptyString(); - } - - std::string result(length * 2, '\0'); - - size_t written = simdutf::convert_latin1_to_utf8( - reinterpret_cast(data), length, result.data()); - - if (has_fatal && written == 0) { - return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( - env->isolate(), "The encoded data was not valid for encoding latin1"); - } - - std::string_view view(result.c_str(), written); - - Local ret; - if (ToV8Value(env->context(), view, env->isolate()).ToLocal(&ret)) { - args.GetReturnValue().Set(ret); - } } } // namespace encoding_binding diff --git a/src/encoding_binding.h b/src/encoding_binding.h index 97f55394d27641..2690cb74f8a05b 100644 --- a/src/encoding_binding.h +++ b/src/encoding_binding.h @@ -31,7 +31,6 @@ class BindingData : public SnapshotableObject { static void EncodeInto(const v8::FunctionCallbackInfo& args); static void EncodeUtf8String(const v8::FunctionCallbackInfo& args); static void DecodeUTF8(const v8::FunctionCallbackInfo& args); - static void DecodeLatin1(const v8::FunctionCallbackInfo& args); static void ToASCII(const v8::FunctionCallbackInfo& args); static void ToUnicode(const v8::FunctionCallbackInfo& args); diff --git a/test/cctest/test_encoding_binding.cc b/test/cctest/test_encoding_binding.cc deleted file mode 100644 index d5d14c60fedf7e..00000000000000 --- a/test/cctest/test_encoding_binding.cc +++ /dev/null @@ -1,176 +0,0 @@ -#include "encoding_binding.h" -#include "env-inl.h" -#include "gtest/gtest.h" -#include "node_test_fixture.h" -#include "v8.h" - -namespace node { -namespace encoding_binding { - -bool RunDecodeLatin1(Environment* env, - Local args[], - bool ignore_bom, - bool has_fatal, - Local* result) { - Isolate* isolate = env->isolate(); - TryCatch try_catch(isolate); - - Local ignoreBOMValue = Boolean::New(isolate, ignore_bom); - Local fatalValue = Boolean::New(isolate, has_fatal); - - Local updatedArgs[] = {args[0], ignoreBOMValue, fatalValue}; - - BindingData::DecodeLatin1(FunctionCallbackInfo(updatedArgs)); - - if (try_catch.HasCaught()) { - return false; - } - - *result = args[0]; - return true; -} - -class EncodingBindingTest : public NodeTestFixture {}; - -TEST_F(EncodingBindingTest, DecodeLatin1_ValidInput) { - Environment* env = CreateEnvironment(); - Isolate* isolate = env->isolate(); - HandleScope handle_scope(isolate); - - const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3}; - Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); - memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); - - Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); - Local args[] = {array}; - - Local result; - EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result)); - - String::Utf8Value utf8_result(isolate, result); - EXPECT_STREQ(*utf8_result, "Áéó"); -} - -TEST_F(EncodingBindingTest, DecodeLatin1_EmptyInput) { - Environment* env = CreateEnvironment(); - Isolate* isolate = env->isolate(); - HandleScope handle_scope(isolate); - - Local ab = ArrayBuffer::New(isolate, 0); - Local array = Uint8Array::New(ab, 0, 0); - Local args[] = {array}; - - Local result; - EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result)); - - String::Utf8Value utf8_result(isolate, result); - EXPECT_STREQ(*utf8_result, ""); -} - -TEST_F(EncodingBindingTest, DecodeLatin1_InvalidInput) { - Environment* env = CreateEnvironment(); - Isolate* isolate = env->isolate(); - HandleScope handle_scope(isolate); - - Local args[] = {String::NewFromUtf8Literal(isolate, "Invalid input")}; - - Local result; - EXPECT_FALSE(RunDecodeLatin1(env, args, false, false, &result)); -} - -TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOM) { - Environment* env = CreateEnvironment(); - Isolate* isolate = env->isolate(); - HandleScope handle_scope(isolate); - - const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3}; - Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); - memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); - - Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); - Local args[] = {array}; - - Local result; - EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result)); - - String::Utf8Value utf8_result(isolate, result); - EXPECT_STREQ(*utf8_result, "Áéó"); -} - -TEST_F(EncodingBindingTest, DecodeLatin1_FatalInvalidInput) { - Environment* env = CreateEnvironment(); - Isolate* isolate = env->isolate(); - HandleScope handle_scope(isolate); - - const uint8_t invalid_data[] = {0xFF, 0xFF, 0xFF}; - Local ab = ArrayBuffer::New(isolate, sizeof(invalid_data)); - memcpy(ab->GetBackingStore()->Data(), invalid_data, sizeof(invalid_data)); - - Local array = Uint8Array::New(ab, 0, sizeof(invalid_data)); - Local args[] = {array}; - - Local result; - EXPECT_FALSE(RunDecodeLatin1(env, args, false, true, &result)); -} - -TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOMAndFatal) { - Environment* env = CreateEnvironment(); - Isolate* isolate = env->isolate(); - HandleScope handle_scope(isolate); - - const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3}; - Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); - memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); - - Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); - Local args[] = {array}; - - Local result; - EXPECT_TRUE(RunDecodeLatin1(env, args, true, true, &result)); - - String::Utf8Value utf8_result(isolate, result); - EXPECT_STREQ(*utf8_result, "Áéó"); -} - -TEST_F(EncodingBindingTest, DecodeLatin1_BOMPresent) { - Environment* env = CreateEnvironment(); - Isolate* isolate = env->isolate(); - HandleScope handle_scope(isolate); - - const uint8_t latin1_data[] = {0xFF, 0xC1, 0xE9, 0xF3}; - Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); - memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); - - Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); - Local args[] = {array}; - - Local result; - EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result)); - - String::Utf8Value utf8_result(isolate, result); - EXPECT_STREQ(*utf8_result, "Áéó"); -} - -TEST_F(EncodingBindingTest, DecodeLatin1_ReturnsString) { - Environment* env = CreateEnvironment(); - Isolate* isolate = env->isolate(); - HandleScope handle_scope(isolate); - - const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3}; - Local ab = ArrayBuffer::New(isolate, sizeof(latin1_data)); - memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data)); - - Local array = Uint8Array::New(ab, 0, sizeof(latin1_data)); - Local args[] = {array}; - - Local result; - ASSERT_TRUE(RunDecodeLatin1(env, args, false, false, &result)); - - ASSERT_TRUE(result->IsString()); - - String::Utf8Value utf8_result(isolate, result); - EXPECT_STREQ(*utf8_result, "Áéó"); -} - -} // namespace encoding_binding -} // namespace node From a76afd801b5d4dc0177d0b44addf84bb0d0a30db Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Sat, 29 Nov 2025 11:31:19 +0400 Subject: [PATCH 3/3] fix typings --- typings/internalBinding/encoding_binding.d.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/typings/internalBinding/encoding_binding.d.ts b/typings/internalBinding/encoding_binding.d.ts index 6e1d48efd81529..0774a21f25e21f 100644 --- a/typings/internalBinding/encoding_binding.d.ts +++ b/typings/internalBinding/encoding_binding.d.ts @@ -4,5 +4,4 @@ export interface EncodingBinding { decodeUTF8(buffer: ArrayBufferView | ArrayBuffer | SharedArrayBuffer, ignoreBOM?: boolean, hasFatal?: boolean): string; toASCII(input: string): string; toUnicode(input: string): string; - decodeLatin1(buffer: ArrayBufferView | ArrayBuffer | SharedArrayBuffer, ignoreBOM?: boolean, hasFatal?: boolean): string; }