Skip to content

Commit

Permalink
buffer: optimize from() and byteLength()
Browse files Browse the repository at this point in the history
PR-URL: #12361
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Anna Henningsen <anna@addaleax.net>
  • Loading branch information
mscdex authored and addaleax committed Apr 14, 2017
1 parent 46f2026 commit 4a86803
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 86 deletions.
34 changes: 20 additions & 14 deletions benchmark/buffers/buffer-bytelength.js
Expand Up @@ -2,7 +2,7 @@
var common = require('../common');

var bench = common.createBenchmark(main, {
encoding: ['utf8', 'base64'],
encoding: ['utf8', 'base64', 'buffer'],
len: [1, 2, 4, 16, 64, 256], // x16
n: [5e6]
});
Expand All @@ -21,21 +21,27 @@ function main(conf) {
var encoding = conf.encoding;

var strings = [];
for (var string of chars) {
// Strings must be built differently, depending on encoding
var data = buildString(string, len);
if (encoding === 'utf8') {
strings.push(data);
} else if (encoding === 'base64') {
// Base64 strings will be much longer than their UTF8 counterparts
strings.push(Buffer.from(data, 'utf8').toString('base64'));
var results;
if (encoding === 'buffer') {
strings = [ Buffer.alloc(len * 16, 'a') ];
results = [ len * 16 ];
} else {
for (var string of chars) {
// Strings must be built differently, depending on encoding
var data = buildString(string, len);
if (encoding === 'utf8') {
strings.push(data);
} else if (encoding === 'base64') {
// Base64 strings will be much longer than their UTF8 counterparts
strings.push(Buffer.from(data, 'utf8').toString('base64'));
}
}
}

// Check the result to ensure it is *properly* optimized
var results = strings.map(function(val) {
return Buffer.byteLength(val, encoding);
});
// Check the result to ensure it is *properly* optimized
results = strings.map(function(val) {
return Buffer.byteLength(val, encoding);
});
}

bench.start();
for (var i = 0; i < n; i++) {
Expand Down
10 changes: 9 additions & 1 deletion benchmark/buffers/buffer-from.js
Expand Up @@ -10,11 +10,12 @@ const bench = common.createBenchmark(main, {
'buffer',
'uint8array',
'string',
'string-utf8',
'string-base64',
'object'
],
len: [10, 2048],
n: [1024]
n: [2048]
});

function main(conf) {
Expand Down Expand Up @@ -75,6 +76,13 @@ function main(conf) {
}
bench.end(n);
break;
case 'string-utf8':
bench.start();
for (i = 0; i < n * 1024; i++) {
Buffer.from(str, 'utf8');
}
bench.end(n);
break;
case 'string-base64':
bench.start();
for (i = 0; i < n * 1024; i++) {
Expand Down
156 changes: 90 additions & 66 deletions lib/buffer.js
Expand Up @@ -23,8 +23,7 @@

const binding = process.binding('buffer');
const { compare: compare_, compareOffset } = binding;
const { isArrayBuffer, isSharedArrayBuffer, isUint8Array } =
process.binding('util');
const { isAnyArrayBuffer, isUint8Array } = process.binding('util');
const bindingObj = {};
const internalUtil = require('internal/util');

Expand Down Expand Up @@ -116,16 +115,19 @@ function Buffer(arg, encodingOrOffset, length) {
* Buffer.from(arrayBuffer[, byteOffset[, length]])
**/
Buffer.from = function(value, encodingOrOffset, length) {
if (typeof value === 'number')
throw new TypeError('"value" argument must not be a number');
if (typeof value === 'string')
return fromString(value, encodingOrOffset);

if (isArrayBuffer(value) || isSharedArrayBuffer(value))
if (isAnyArrayBuffer(value))
return fromArrayBuffer(value, encodingOrOffset, length);

if (typeof value === 'string')
return fromString(value, encodingOrOffset);
var b = fromObject(value);
if (b)
return b;

return fromObject(value);
if (typeof value === 'number')
throw new TypeError('"value" argument must not be a number');
throw new TypeError(kFromErrorMsg);
};

Object.setPrototypeOf(Buffer, Uint8Array);
Expand Down Expand Up @@ -218,24 +220,27 @@ function allocate(size) {


function fromString(string, encoding) {
if (typeof encoding !== 'string' || encoding === '')
var length;
if (typeof encoding !== 'string' || encoding.length === 0) {
encoding = 'utf8';

if (!Buffer.isEncoding(encoding))
throw new TypeError('"encoding" must be a valid string encoding');

if (string.length === 0)
return new FastBuffer();

var length = byteLength(string, encoding);
if (string.length === 0)
return new FastBuffer();
length = binding.byteLengthUtf8(string);
} else {
length = byteLength(string, encoding, true);
if (length === -1)
throw new TypeError('"encoding" must be a valid string encoding');
if (string.length === 0)
return new FastBuffer();
}

if (length >= (Buffer.poolSize >>> 1))
return binding.createFromString(string, encoding);

if (length > (poolSize - poolOffset))
createPool();
var b = new FastBuffer(allocPool, poolOffset, length);
var actual = b.write(string, encoding);
const actual = b.write(string, encoding);
if (actual !== length) {
// byteLength() may overestimate. That's a rare case, though.
b = new FastBuffer(allocPool, poolOffset, actual);
Expand All @@ -255,8 +260,14 @@ function fromArrayLike(obj) {

function fromArrayBuffer(obj, byteOffset, length) {
// convert byteOffset to integer
byteOffset = +byteOffset;
byteOffset = byteOffset ? Math.trunc(byteOffset) : 0;
if (byteOffset === undefined) {
byteOffset = 0;
} else {
byteOffset = +byteOffset;
// check for NaN
if (byteOffset !== byteOffset)
byteOffset = 0;
}

const maxLength = obj.byteLength - byteOffset;

Expand All @@ -268,11 +279,17 @@ function fromArrayBuffer(obj, byteOffset, length) {
} else {
// convert length to non-negative integer
length = +length;
length = length ? Math.trunc(length) : 0;
length = length <= 0 ? 0 : Math.min(length, Number.MAX_SAFE_INTEGER);

if (length > maxLength)
throw new RangeError("'length' is out of bounds");
// Check for NaN
if (length !== length) {
length = 0;
} else if (length > 0) {
length = (length < Number.MAX_SAFE_INTEGER ?
length : Number.MAX_SAFE_INTEGER);
if (length > maxLength)
throw new RangeError("'length' is out of bounds");
} else {
length = 0;
}
}

return new FastBuffer(obj, byteOffset, length);
Expand All @@ -289,9 +306,8 @@ function fromObject(obj) {
return b;
}

if (obj) {
if (obj.length !== undefined || isArrayBuffer(obj.buffer) ||
isSharedArrayBuffer(obj.buffer)) {
if (obj != undefined) {
if (obj.length !== undefined || isAnyArrayBuffer(obj.buffer)) {
if (typeof obj.length !== 'number' || obj.length !== obj.length) {
return new FastBuffer();
}
Expand All @@ -302,8 +318,6 @@ function fromObject(obj) {
return fromArrayLike(obj.data);
}
}

throw new TypeError(kFromErrorMsg);
}


Expand Down Expand Up @@ -388,53 +402,63 @@ function base64ByteLength(str, bytes) {

function byteLength(string, encoding) {
if (typeof string !== 'string') {
if (ArrayBuffer.isView(string) || isArrayBuffer(string) ||
isSharedArrayBuffer(string)) {
if (ArrayBuffer.isView(string) || isAnyArrayBuffer(string)) {
return string.byteLength;
}

throw new TypeError('"string" must be a string, Buffer, or ArrayBuffer');
}

var len = string.length;
if (len === 0)
const len = string.length;
const mustMatch = (arguments.length > 2 && arguments[2] === true);
if (!mustMatch && len === 0)
return 0;

// Use a for loop to avoid recursion
var loweredCase = false;
for (;;) {
switch (encoding) {
case 'ascii':
case 'latin1':
case 'binary':
return len;

case 'utf8':
case 'utf-8':
case undefined:
return binding.byteLengthUtf8(string);

case 'ucs2':
case 'ucs-2':
case 'utf16le':
case 'utf-16le':
if (!encoding)
return (mustMatch ? -1 : binding.byteLengthUtf8(string));

encoding += '';
switch (encoding.length) {
case 4:
if (encoding === 'utf8') return binding.byteLengthUtf8(string);
if (encoding === 'ucs2') return len * 2;
encoding = encoding.toLowerCase();
if (encoding === 'utf8') return binding.byteLengthUtf8(string);
if (encoding === 'ucs2') return len * 2;
break;
case 5:
if (encoding === 'utf-8') return binding.byteLengthUtf8(string);
if (encoding === 'ascii') return len;
if (encoding === 'ucs-2') return len * 2;
encoding = encoding.toLowerCase();
if (encoding === 'utf-8') return binding.byteLengthUtf8(string);
if (encoding === 'ascii') return len;
if (encoding === 'ucs-2') return len * 2;
break;
case 7:
if (encoding === 'utf16le' || encoding.toLowerCase() === 'utf16le')
return len * 2;

case 'hex':
break;
case 8:
if (encoding === 'utf-16le' || encoding.toLowerCase() === 'utf-16le')
return len * 2;
break;
case 6:
if (encoding === 'latin1' || encoding === 'binary') return len;
if (encoding === 'base64') return base64ByteLength(string, len);
encoding = encoding.toLowerCase();
if (encoding === 'latin1' || encoding === 'binary') return len;
if (encoding === 'base64') return base64ByteLength(string, len);
break;
case 3:
if (encoding === 'hex' || encoding.toLowerCase() === 'hex')
return len >>> 1;

case 'base64':
return base64ByteLength(string, len);

default:
// The C++ binding defaulted to UTF8, we should too.
if (loweredCase)
return binding.byteLengthUtf8(string);

encoding = ('' + encoding).toLowerCase();
loweredCase = true;
}
break;
}
if (mustMatch)
throw new TypeError('Unknown encoding: ' + encoding);
else
return binding.byteLengthUtf8(string);
}

Buffer.byteLength = byteLength;
Expand Down
5 changes: 2 additions & 3 deletions lib/util.js
Expand Up @@ -452,7 +452,7 @@ function formatValue(ctx, value, recurseTimes) {
// Fast path for ArrayBuffer and SharedArrayBuffer.
// Can't do the same for DataView because it has a non-primitive
// .buffer property that we need to recurse for.
if (binding.isArrayBuffer(value) || binding.isSharedArrayBuffer(value)) {
if (binding.isAnyArrayBuffer(value)) {
return `${constructor.name}` +
` { byteLength: ${formatNumber(ctx, value.byteLength)} }`;
}
Expand Down Expand Up @@ -494,8 +494,7 @@ function formatValue(ctx, value, recurseTimes) {
keys.unshift('size');
empty = value.size === 0;
formatter = formatMap;
} else if (binding.isArrayBuffer(value) ||
binding.isSharedArrayBuffer(value)) {
} else if (binding.isAnyArrayBuffer(value)) {
braces = ['{', '}'];
keys.unshift('byteLength');
visibleKeys.byteLength = true;
Expand Down
10 changes: 8 additions & 2 deletions src/node_util.cc
Expand Up @@ -20,7 +20,6 @@ using v8::Value;


#define VALUE_METHOD_MAP(V) \
V(isArrayBuffer, IsArrayBuffer) \
V(isDataView, IsDataView) \
V(isDate, IsDate) \
V(isExternal, IsExternal) \
Expand All @@ -30,7 +29,6 @@ using v8::Value;
V(isRegExp, IsRegExp) \
V(isSet, IsSet) \
V(isSetIterator, IsSetIterator) \
V(isSharedArrayBuffer, IsSharedArrayBuffer) \
V(isTypedArray, IsTypedArray) \
V(isUint8Array, IsUint8Array)

Expand All @@ -44,6 +42,12 @@ using v8::Value;
VALUE_METHOD_MAP(V)
#undef V

static void IsAnyArrayBuffer(const FunctionCallbackInfo<Value>& args) {
CHECK_EQ(1, args.Length());
args.GetReturnValue().Set(
args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer());
}

static void GetPromiseDetails(const FunctionCallbackInfo<Value>& args) {
// Return undefined if it's not a Promise.
if (!args[0]->IsPromise())
Expand Down Expand Up @@ -151,6 +155,8 @@ void Initialize(Local<Object> target,
VALUE_METHOD_MAP(V)
#undef V

env->SetMethod(target, "isAnyArrayBuffer", IsAnyArrayBuffer);

#define V(name, _) \
target->Set(context, \
FIXED_ONE_BYTE_STRING(env->isolate(), #name), \
Expand Down

0 comments on commit 4a86803

Please sign in to comment.