Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: unify implementations of Utf8Value, TwoByteValue, etc. #6357

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 15 additions & 29 deletions src/string_bytes.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,14 @@
#include "node.h"
#include "env.h"
#include "env-inl.h"
#include "util.h"

namespace node {

class StringBytes {
public:
class InlineDecoder {
class InlineDecoder : public MaybeStackBuffer<char> {
public:
InlineDecoder() : out_(nullptr) {
}

~InlineDecoder() {
if (out_ != out_st_)
delete[] out_;
out_ = nullptr;
}

inline bool Decode(Environment* env,
v8::Local<v8::String> string,
v8::Local<v8::Value> encoding,
Expand All @@ -33,28 +25,22 @@ class StringBytes {
return false;
}

size_t buflen = StringBytes::StorageSize(env->isolate(), string, enc);
if (buflen > sizeof(out_st_))
out_ = new char[buflen];
else
out_ = out_st_;
size_ = StringBytes::Write(env->isolate(),
out_,
buflen,
string,
enc);
const size_t storage = StringBytes::StorageSize(env->isolate(),
string,
enc);
AllocateSufficientStorage(storage);
const size_t length = StringBytes::Write(env->isolate(),
out(),
storage,
string,
enc);

// No zero terminator is included when using this method.
SetLength(length);
return true;
}

inline const char* out() const { return out_; }
inline size_t size() const { return size_; }

private:
static const int kStorageSize = 1024;

char out_st_[kStorageSize];
char* out_;
size_t size_;
inline size_t size() const { return length(); }
};

// Does the string match the encoding? Quick but non-exhaustive.
Expand Down
75 changes: 34 additions & 41 deletions src/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,76 +10,69 @@ using v8::Local;
using v8::String;
using v8::Value;

static int MakeUtf8String(Isolate* isolate,
Local<Value> value,
char** dst,
const size_t size) {
template <typename T>
static void MakeUtf8String(Isolate* isolate,
Local<Value> value,
T* target) {
Local<String> string = value->ToString(isolate);
if (string.IsEmpty())
return 0;
size_t len = StringBytes::StorageSize(isolate, string, UTF8) + 1;
if (len > size) {
*dst = static_cast<char*>(malloc(len));
CHECK_NE(*dst, nullptr);
}
return;

const size_t storage = StringBytes::StorageSize(isolate, string, UTF8) + 1;
target->AllocateSufficientStorage(storage);
const int flags =
String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8;
const int length = string->WriteUtf8(*dst, len, 0, flags);
(*dst)[length] = '\0';
return length;
const int length = string->WriteUtf8(target->out(), storage, 0, flags);
target->SetLengthAndZeroTerminate(length);
}

Utf8Value::Utf8Value(Isolate* isolate, Local<Value> value)
: length_(0), str_(str_st_) {
Utf8Value::Utf8Value(Isolate* isolate, Local<Value> value) {
if (value.IsEmpty())
return;
length_ = MakeUtf8String(isolate, value, &str_, sizeof(str_st_));

MakeUtf8String(isolate, value, this);
}


TwoByteValue::TwoByteValue(Isolate* isolate, Local<Value> value)
: length_(0), str_(str_st_) {
if (value.IsEmpty())
TwoByteValue::TwoByteValue(Isolate* isolate, Local<Value> value) {
if (value.IsEmpty()) {
return;
}

Local<String> string = value->ToString(isolate);
if (string.IsEmpty())
return;

// Allocate enough space to include the null terminator
size_t len =
StringBytes::StorageSize(isolate, string, UCS2) +
sizeof(uint16_t);
if (len > sizeof(str_st_)) {
str_ = static_cast<uint16_t*>(malloc(len));
CHECK_NE(str_, nullptr);
}
const size_t storage = string->Length() + 1;
AllocateSufficientStorage(storage);

const int flags =
String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8;
length_ = string->Write(str_, 0, len, flags);
str_[length_] = '\0';
const int length = string->Write(out(), 0, storage, flags);
SetLengthAndZeroTerminate(length);
}

BufferValue::BufferValue(Isolate* isolate, Local<Value> value)
: str_(str_st_), fail_(true) {
BufferValue::BufferValue(Isolate* isolate, Local<Value> value) {
// Slightly different take on Utf8Value. If value is a String,
// it will return a Utf8 encoded string. If value is a Buffer,
// it will copy the data out of the Buffer as is.
if (value.IsEmpty())
if (value.IsEmpty()) {
// Dereferencing this object will return nullptr.
Invalidate();
return;
}

if (value->IsString()) {
MakeUtf8String(isolate, value, &str_, sizeof(str_st_));
fail_ = false;
MakeUtf8String(isolate, value, this);
} else if (Buffer::HasInstance(value)) {
size_t len = Buffer::Length(value) + 1;
if (len > sizeof(str_st_)) {
str_ = static_cast<char*>(malloc(len));
CHECK_NE(str_, nullptr);
}
memcpy(str_, Buffer::Data(value), len);
str_[len - 1] = '\0';
fail_ = false;
const size_t len = Buffer::Length(value);
// Leave place for the terminating '\0' byte.
AllocateSufficientStorage(len + 1);
memcpy(out(), Buffer::Data(value), len);
SetLengthAndZeroTerminate(len);
} else {
Invalidate();
}
}

Expand Down
123 changes: 74 additions & 49 deletions src/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,77 +178,102 @@ inline TypeName* Unwrap(v8::Local<v8::Object> object);

inline void SwapBytes(uint16_t* dst, const uint16_t* src, size_t buflen);

class Utf8Value {
// Allocates an array of member type T. For up to kStackStorageSize items,
// the stack is used, otherwise malloc().
template <typename T, size_t kStackStorageSize = 1024>
class MaybeStackBuffer {
public:
explicit Utf8Value(v8::Isolate* isolate, v8::Local<v8::Value> value);
const T* out() const {
return buf_;
}

~Utf8Value() {
if (str_ != str_st_)
free(str_);
T* out() {
return buf_;
}

char* operator*() {
return str_;
};
// operator* for compatibility with `v8::String::(Utf8)Value`
T* operator*() {
return buf_;
}

const char* operator*() const {
return str_;
};
const T* operator*() const {
return buf_;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can I suggest you call this method e.g. buffer() and stay away from operator overloading?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bnoordhuis 1000 % yes, but this is basically just the operator* from the original classes, as it is used in ~100 places in the rest of the source code. If you’re okay with that, I’m happy to rename it (but maybe would prefer .data() as the name?).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I guess that would make it a much more intrusive change. I mentioned it because whenever I see code with **this in it, I get itchy.

(but maybe would prefer .data() as the name?)

.buffer() hints more at mutability, IMO, but maybe that's just me.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about just “standardizing” the out() method that StringBytes::InlineDecoder already has, and using that for the internal stuff too (instead of **this)?

I have nothing against .buffer(), and on second thought that sounds like a good name, but I’d like to keep this changeset small and pointing in the direction of a more consistent API.


size_t length() const {
return length_;
};

private:
size_t length_;
char* str_;
char str_st_[1024];
};
}

class TwoByteValue {
public:
explicit TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value);
// Call to make sure enough space for `storage` entries is available.
// There can only be 1 call to AllocateSufficientStorage or Invalidate
// per instance.
void AllocateSufficientStorage(size_t storage) {
if (storage <= kStackStorageSize) {
buf_ = buf_st_;
} else {
// Guard against overflow.
CHECK_LE(storage, sizeof(T) * storage);

buf_ = static_cast<T*>(malloc(sizeof(T) * storage));
CHECK_NE(buf_, nullptr);
}

// Remember how much was allocated to check against that in SetLength().
length_ = storage;
}

~TwoByteValue() {
if (str_ != str_st_)
free(str_);
void SetLength(size_t length) {
// length_ stores how much memory was allocated.
CHECK_LE(length, length_);
length_ = length;
}

uint16_t* operator*() {
return str_;
};
void SetLengthAndZeroTerminate(size_t length) {
// length_ stores how much memory was allocated.
CHECK_LE(length + 1, length_);
SetLength(length);

const uint16_t* operator*() const {
return str_;
};
// T() is 0 for integer types, nullptr for pointers, etc.
buf_[length] = T();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't this write past the end of the buffer in most cases? The SetLengthAndZeroTerminate(len) call sites all call EnsureSufficientStorage(len) first.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They call EnsureSufficientStorage() with a value that’s at least len+1. But if both you and @trevnorris stumble over that, I’ll take that as more than a clear sign to do some variable renaming in the callers (having both len and length with different meanings is probably bad anyway, so this is a good chance to clean that up).

}

size_t length() const {
return length_;
};
// Make derefencing this object return nullptr.
// Calling this is mutually exclusive with calling
// AllocateSufficientStorage.
void Invalidate() {
CHECK_EQ(buf_, buf_st_);
length_ = 0;
buf_ = nullptr;
}

MaybeStackBuffer() : length_(0), buf_(buf_st_) {
// Default to a zero-length, null-terminated buffer.
buf_[0] = T();
}

~MaybeStackBuffer() {
if (buf_ != buf_st_)
free(buf_);
}
private:
size_t length_;
uint16_t* str_;
uint16_t str_st_[1024];
T* buf_;
T buf_st_[kStackStorageSize];
};

class BufferValue {
class Utf8Value : public MaybeStackBuffer<char> {
public:
explicit BufferValue(v8::Isolate* isolate, v8::Local<v8::Value> value);

~BufferValue() {
if (str_ != str_st_)
free(str_);
}
explicit Utf8Value(v8::Isolate* isolate, v8::Local<v8::Value> value);
};

const char* operator*() const {
return fail_ ? nullptr : str_;
};
class TwoByteValue : public MaybeStackBuffer<uint16_t> {
public:
explicit TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value);
};

private:
char* str_;
char str_st_[1024];
bool fail_;
class BufferValue : public MaybeStackBuffer<char> {
public:
explicit BufferValue(v8::Isolate* isolate, v8::Local<v8::Value> value);
};

} // namespace node
Expand Down