Permalink
Browse files

src: allow UTF-16 in generic StringBytes decode call

This allows removing a number of special cases in other
parts of the code, at least one of which was incorrect
in expecting aligned input when that was not guaranteed.

Fixes: #22358

PR-URL: #22622
Reviewed-By: Tobias Nießen <tniessen@tnie.de>
  • Loading branch information...
addaleax committed Aug 31, 2018
1 parent 6455bea commit 71f633a32f1f5617c1c7b60ad936342a579bff74
Showing with 40 additions and 96 deletions.
  1. +0 −1 src/node.h
  2. +0 −59 src/node_buffer.cc
  3. +32 −25 src/string_bytes.cc
  4. +0 −1 src/string_bytes.h
  5. +0 −10 src/string_decoder.cc
  6. +8 −0 test/parallel/test-string-decoder.js
View
@@ -410,7 +410,6 @@ NODE_DEPRECATED("Use FatalException(isolate, ...)",
return FatalException(v8::Isolate::GetCurrent(), try_catch);
})
// Don't call with encoding=UCS2.
NODE_EXTERN v8::Local<v8::Value> Encode(v8::Isolate* isolate,
const char* buf,
size_t len,
View
@@ -467,65 +467,6 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) {
}
template <>
void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) {
Isolate* isolate = args.GetIsolate();
Environment* env = Environment::GetCurrent(isolate);
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
SPREAD_BUFFER_ARG(args.This(), ts_obj);
if (ts_obj_length == 0)
return args.GetReturnValue().SetEmptyString();
SLICE_START_END(args[0], args[1], ts_obj_length)
length /= 2;
const char* data = ts_obj_data + start;
const uint16_t* buf;
bool release = false;
// Node's "ucs2" encoding expects LE character data inside a Buffer, so we
// need to reorder on BE platforms. See https://nodejs.org/api/buffer.html
// regarding Node's "ucs2" encoding specification.
const bool aligned = (reinterpret_cast<uintptr_t>(data) % sizeof(*buf) == 0);
if (IsLittleEndian() && !aligned) {
// Make a copy to avoid unaligned accesses in v8::String::NewFromTwoByte().
// This applies ONLY to little endian platforms, as misalignment will be
// handled by a byte-swapping operation in StringBytes::Encode on
// big endian platforms.
uint16_t* copy = new uint16_t[length];
for (size_t i = 0, k = 0; i < length; i += 1, k += 2) {
// Assumes that the input is little endian.
const uint8_t lo = static_cast<uint8_t>(data[k + 0]);
const uint8_t hi = static_cast<uint8_t>(data[k + 1]);
copy[i] = lo | hi << 8;
}
buf = copy;
release = true;
} else {
buf = reinterpret_cast<const uint16_t*>(data);
}
Local<Value> error;
MaybeLocal<Value> ret =
StringBytes::Encode(isolate,
buf,
length,
&error);
if (release)
delete[] buf;
if (ret.IsEmpty()) {
CHECK(!error.IsEmpty());
isolate->ThrowException(error);
return;
}
args.GetReturnValue().Set(ret.ToLocalChecked());
}
// bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd])
void Copy(const FunctionCallbackInfo<Value> &args) {
Environment* env = Environment::GetCurrent(args);
View
@@ -625,7 +625,6 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
size_t buflen,
enum encoding encoding,
Local<Value>* error) {
CHECK_NE(encoding, UCS2);
CHECK_BUFLEN_IN_RANGE(buflen);
if (!buflen && encoding != BUFFER) {
@@ -703,6 +702,37 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
return ExternOneByteString::New(isolate, dst, dlen, error);
}
case UCS2: {
if (IsBigEndian()) {
uint16_t* dst = node::UncheckedMalloc<uint16_t>(buflen / 2);
if (dst == nullptr) {
*error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
return MaybeLocal<Value>();
}
for (size_t i = 0, k = 0; k < buflen / 2; i += 2, k += 1) {
// The input is in *little endian*, because that's what Node.js
// expects, so the high byte comes after the low byte.
const uint8_t hi = static_cast<uint8_t>(buf[i + 1]);
const uint8_t lo = static_cast<uint8_t>(buf[i + 0]);
dst[k] = static_cast<uint16_t>(hi) << 8 | lo;
}
return ExternTwoByteString::New(isolate, dst, buflen / 2, error);
}
if (reinterpret_cast<uintptr_t>(buf) % 2 != 0) {
// Unaligned data still means we can't directly pass it to V8.
char* dst = node::UncheckedMalloc(buflen);
if (dst == nullptr) {
*error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
return MaybeLocal<Value>();
}
memcpy(dst, buf, buflen);
return ExternTwoByteString::New(
isolate, reinterpret_cast<uint16_t*>(dst), buflen / 2, error);
}
return ExternTwoByteString::NewFromCopy(
isolate, reinterpret_cast<const uint16_t*>(buf), buflen / 2, error);
}
default:
CHECK(0 && "unknown encoding");
break;
@@ -742,30 +772,7 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
enum encoding encoding,
Local<Value>* error) {
const size_t len = strlen(buf);
MaybeLocal<Value> ret;
if (encoding == UCS2) {
// In Node, UCS2 means utf16le. The data must be in little-endian
// order and must be aligned on 2-bytes. This returns an empty
// value if it's not aligned and ensures the appropriate byte order
// on big endian architectures.
const bool be = IsBigEndian();
if (len % 2 != 0)
return ret;
std::vector<uint16_t> vec(len / 2);
for (size_t i = 0, k = 0; i < len; i += 2, k += 1) {
const uint8_t hi = static_cast<uint8_t>(buf[i + 0]);
const uint8_t lo = static_cast<uint8_t>(buf[i + 1]);
vec[k] = be ?
static_cast<uint16_t>(hi) << 8 | lo
: static_cast<uint16_t>(lo) << 8 | hi;
}
ret = vec.empty() ?
static_cast< Local<Value> >(String::Empty(isolate))
: StringBytes::Encode(isolate, &vec[0], vec.size(), error);
} else {
ret = StringBytes::Encode(isolate, buf, len, encoding, error);
}
return ret;
return Encode(isolate, buf, len, encoding, error);
}
} // namespace node
View
@@ -90,7 +90,6 @@ class StringBytes {
int* chars_written = nullptr);
// Take the bytes in the src, and turn it into a Buffer or String.
// Don't call with encoding=UCS2.
static v8::MaybeLocal<v8::Value> Encode(v8::Isolate* isolate,
const char* buf,
size_t buflen,
View
@@ -30,16 +30,6 @@ MaybeLocal<String> MakeString(Isolate* isolate,
data,
v8::NewStringType::kNormal,
length);
} else if (encoding == UCS2) {
#ifdef DEBUG
CHECK_EQ(reinterpret_cast<uintptr_t>(data) % 2, 0);
CHECK_EQ(length % 2, 0);
#endif
ret = StringBytes::Encode(
isolate,
reinterpret_cast<const uint16_t*>(data),
length / 2,
&error);
} else {
ret = StringBytes::Encode(
isolate,
@@ -143,6 +143,14 @@ decoder = new StringDecoder('utf16le');
assert.strictEqual(decoder.write(Buffer.from('3DD84D', 'hex')), '\ud83d');
assert.strictEqual(decoder.end(), '');
// Regression test for https://github.com/nodejs/node/issues/22358
// (unaligned UTF-16 access).
decoder = new StringDecoder('utf16le');
assert.strictEqual(decoder.write(Buffer.alloc(1)), '');
assert.strictEqual(decoder.write(Buffer.alloc(20)), '\0'.repeat(10));
assert.strictEqual(decoder.write(Buffer.alloc(48)), '\0'.repeat(24));
assert.strictEqual(decoder.end(), '');
common.expectsError(
() => new StringDecoder(1),
{

0 comments on commit 71f633a

Please sign in to comment.