Skip to content

Commit

Permalink
buffer: add encoding parameter to fill()
Browse files Browse the repository at this point in the history
Can now call fill() using following parameters if value is a String:

    fill(string[, start[, end]][, encoding])

And with the following if value is a Buffer:

    fill(buffer[, start[, end]])

The encoding is ignored if value is not a String. All other non-Buffer
values are coerced to a uint32.

A multibyte strings will simply be copied into the Buffer until the
number of bytes run out. Meaning partial strings can be left behind:

    Buffer(3).fill('\u0222');
    // returns: <Buffer c8 a2 c8>

In some encoding cases, such as 'hex', fill() will throw if the input
string is not valid.

PR-URL: #4935
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
trevnorris authored and rvagg committed Feb 15, 2016
1 parent b1a16d1 commit eb6d073
Show file tree
Hide file tree
Showing 6 changed files with 411 additions and 32 deletions.
27 changes: 21 additions & 6 deletions doc/api/buffer.markdown
Expand Up @@ -471,23 +471,38 @@ console.log(buf1.equals(buf3));
// Prints: false
```

### buf.fill(value[, offset[, end]])
### buf.fill(value[, offset[, end]][, encoding])

* `value` {String|Number}
* `value` {String|Buffer|Number}
* `offset` {Number} Default: 0
* `end` {Number} Default: `buffer.length`
* `end` {Number} Default: `buf.length`
* `encoding` {String} Default: `'utf8'`
* Return: {Buffer}

Fills the Buffer with the specified value. If the `offset` and `end` are not
given it will fill the entire Buffer. The method returns a reference to the
Buffer so calls can be chained.
Fills the Buffer with the specified value. If the `offset` (defaults to `0`)
and `end` (defaults to `buf.length`) are not given the entire buffer will be
filled. The method returns a reference to the Buffer, so calls can be chained.
This is meant as a small simplification to creating a Buffer. Allowing the
creation and fill of the Buffer to be done on a single line:

```js
const b = new Buffer(50).fill('h');
console.log(b.toString());
// Prints: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
```

`encoding` is only relevant if `value` is a string. Otherwise it is ignored.
`value` is coerced to a `uint32` value if it is not a String or Number.

The `fill()` operation writes bytes into the Buffer dumbly. If the final write
falls in between a multi-byte character then whatever bytes fit into the buffer
are written.

```js
Buffer(3).fill('\u0222');
// Prints: <Buffer c8 a2 c8>
```

### buf.indexOf(value[, byteOffset][, encoding])

* `value` {String|Buffer|Number}
Expand Down
46 changes: 35 additions & 11 deletions lib/buffer.js
Expand Up @@ -494,24 +494,48 @@ Buffer.prototype.includes = function includes(val, byteOffset, encoding) {
};


Buffer.prototype.fill = function fill(val, start, end) {
start = start >> 0;
end = (end === undefined) ? this.length : end >> 0;
// Usage:
// buffer.fill(number[, offset[, end]])
// buffer.fill(buffer[, offset[, end]])
// buffer.fill(string[, offset[, end]][, encoding])
Buffer.prototype.fill = function fill(val, start, end, encoding) {
// Handle string cases:
if (typeof val === 'string') {
if (typeof start === 'string') {
encoding = start;
start = 0;
end = this.length;
} else if (typeof end === 'string') {
encoding = end;
end = this.length;
}
if (val.length === 1) {
var code = val.charCodeAt(0);
if (code < 256)
val = code;
}
if (encoding !== undefined && typeof encoding !== 'string') {
throw new TypeError('encoding must be a string');
}
if (typeof encoding === 'string' && !Buffer.isEncoding(encoding)) {
throw new TypeError('Unknown encoding: ' + encoding);
}

} else if (typeof val === 'number') {
val = val & 255;
}

// Invalid ranges are not set to a default, so can range check early.
if (start < 0 || end > this.length)
throw new RangeError('out of range index');

if (end <= start)
return this;

if (typeof val !== 'string') {
val = val >>> 0;
} else if (val.length === 1) {
var code = val.charCodeAt(0);
if (code < 256)
val = code;
}
start = start >>> 0;
end = end === undefined ? this.length : end >>> 0;

binding.fill(this, val, start, end);
binding.fill(this, val, start, end, encoding);

return this;
};
Expand Down
79 changes: 64 additions & 15 deletions src/node_buffer.cc
Expand Up @@ -570,42 +570,91 @@ void Copy(const FunctionCallbackInfo<Value> &args) {


void Fill(const FunctionCallbackInfo<Value>& args) {
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
Environment* env = Environment::GetCurrent(args);

THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
SPREAD_ARG(args[0], ts_obj);

size_t start = args[2]->Uint32Value();
size_t end = args[3]->Uint32Value();
size_t length = end - start;
CHECK(length + start <= ts_obj_length);
size_t fill_length = end - start;
Local<String> str_obj;
size_t str_length;
enum encoding enc;
CHECK(fill_length + start <= ts_obj_length);

// First check if Buffer has been passed.
if (Buffer::HasInstance(args[1])) {
SPREAD_ARG(args[1], fill_obj);
str_length = fill_obj_length;
memcpy(ts_obj_data + start, fill_obj_data, MIN(str_length, fill_length));
goto start_fill;
}

if (args[1]->IsNumber()) {
// Then coerce everything that's not a string.
if (!args[1]->IsString()) {
int value = args[1]->Uint32Value() & 255;
memset(ts_obj_data + start, value, length);
memset(ts_obj_data + start, value, fill_length);
return;
}

node::Utf8Value str(args.GetIsolate(), args[1]);
size_t str_length = str.length();
size_t in_there = str_length;
char* ptr = ts_obj_data + start + str_length;
str_obj = args[1]->ToString(env->isolate());
enc = ParseEncoding(env->isolate(), args[4], UTF8);
str_length =
enc == UTF8 ? str_obj->Utf8Length() :
enc == UCS2 ? str_obj->Length() * sizeof(uint16_t) : str_obj->Length();

if (enc == HEX && str_length % 2 != 0)
return env->ThrowTypeError("Invalid hex string");

if (str_length == 0)
return;

memcpy(ts_obj_data + start, *str, MIN(str_length, length));
// Can't use StringBytes::Write() in all cases. For example if attempting
// to write a two byte character into a one byte Buffer.
if (enc == UTF8) {
node::Utf8Value str(env->isolate(), args[1]);
memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));

if (str_length >= length)
} else if (enc == UCS2) {
node::TwoByteValue str(env->isolate(), args[1]);
memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));

} else {
// Write initial String to Buffer, then use that memory to copy remainder
// of string. Correct the string length for cases like HEX where less than
// the total string length is written.
str_length = StringBytes::Write(env->isolate(),
ts_obj_data + start,
fill_length,
str_obj,
enc,
nullptr);
// This check is also needed in case Write() returns that no bytes could
// be written.
// TODO(trevnorris): Should this throw? Because of the string length was
// greater than 0 but couldn't be written then the string was invalid.
if (str_length == 0)
return;
}

start_fill:

if (str_length >= fill_length)
return;

while (in_there < length - in_there) {

size_t in_there = str_length;
char* ptr = ts_obj_data + start + str_length;

while (in_there < fill_length - in_there) {
memcpy(ptr, ts_obj_data + start, in_there);
ptr += in_there;
in_there *= 2;
}

if (in_there < length) {
memcpy(ptr, ts_obj_data + start, length - in_there);
in_there = length;
if (in_there < fill_length) {
memcpy(ptr, ts_obj_data + start, fill_length - in_there);
}
}

Expand Down
23 changes: 23 additions & 0 deletions src/util.cc
Expand Up @@ -25,4 +25,27 @@ Utf8Value::Utf8Value(v8::Isolate* isolate, v8::Local<v8::Value> value)
str_[length_] = '\0';
}


TwoByteValue::TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value)
: length_(0), str_(str_st_) {
if (value.IsEmpty())
return;

v8::Local<v8::String> string = value->ToString(isolate);
if (string.IsEmpty())
return;

// Allocate enough space to include the null terminator
size_t len = StringBytes::StorageSize(isolate, string, UCS2) + 1;
if (len > sizeof(str_st_)) {
str_ = static_cast<uint16_t*>(malloc(len));
CHECK_NE(str_, nullptr);
}

const int flags =
v8::String::NO_NULL_TERMINATION | v8::String::REPLACE_INVALID_UTF8;
length_ = string->Write(str_, 0, len, flags);
str_[length_] = '\0';
}

} // namespace node
27 changes: 27 additions & 0 deletions src/util.h
Expand Up @@ -205,6 +205,33 @@ class Utf8Value {
char str_st_[1024];
};

class TwoByteValue {
public:
explicit TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value);

~TwoByteValue() {
if (str_ != str_st_)
free(str_);
}

uint16_t* operator*() {
return str_;
};

const uint16_t* operator*() const {
return str_;
};

size_t length() const {
return length_;
};

private:
size_t length_;
uint16_t* str_;
uint16_t str_st_[1024];
};

} // namespace node

#endif // SRC_UTIL_H_

0 comments on commit eb6d073

Please sign in to comment.