buffer: optimize from() and byteLength()

PR-URL: #12361 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net>
nodejs · Apr 14, 2017 · 4a86803 · 4a86803
1 parent 46f2026
commit 4a86803
Show file tree

Hide file tree

Showing 5 changed files with 129 additions and 86 deletions.
diff --git a/benchmark/buffers/buffer-bytelength.js b/benchmark/buffers/buffer-bytelength.js
@@ -2,7 +2,7 @@
 var common = require('../common');
 
 var bench = common.createBenchmark(main, {
-  encoding: ['utf8', 'base64'],
+  encoding: ['utf8', 'base64', 'buffer'],
   len: [1, 2, 4, 16, 64, 256], // x16
   n: [5e6]
 });
@@ -21,21 +21,27 @@ function main(conf) {
   var encoding = conf.encoding;
 
   var strings = [];
-  for (var string of chars) {
-    // Strings must be built differently, depending on encoding
-    var data = buildString(string, len);
-    if (encoding === 'utf8') {
-      strings.push(data);
-    } else if (encoding === 'base64') {
-      // Base64 strings will be much longer than their UTF8 counterparts
-      strings.push(Buffer.from(data, 'utf8').toString('base64'));
+  var results;
+  if (encoding === 'buffer') {
+    strings = [ Buffer.alloc(len * 16, 'a') ];
+    results = [ len * 16 ];
+  } else {
+    for (var string of chars) {
+      // Strings must be built differently, depending on encoding
+      var data = buildString(string, len);
+      if (encoding === 'utf8') {
+        strings.push(data);
+      } else if (encoding === 'base64') {
+        // Base64 strings will be much longer than their UTF8 counterparts
+        strings.push(Buffer.from(data, 'utf8').toString('base64'));
+      }
     }
-  }
 
-  // Check the result to ensure it is *properly* optimized
-  var results = strings.map(function(val) {
-    return Buffer.byteLength(val, encoding);
-  });
+    // Check the result to ensure it is *properly* optimized
+    results = strings.map(function(val) {
+      return Buffer.byteLength(val, encoding);
+    });
+  }
 
   bench.start();
   for (var i = 0; i < n; i++) {

diff --git a/benchmark/buffers/buffer-from.js b/benchmark/buffers/buffer-from.js
@@ -10,11 +10,12 @@ const bench = common.createBenchmark(main, {
     'buffer',
     'uint8array',
     'string',
+    'string-utf8',
     'string-base64',
     'object'
   ],
   len: [10, 2048],
-  n: [1024]
+  n: [2048]
 });
 
 function main(conf) {
@@ -75,6 +76,13 @@ function main(conf) {
       }
       bench.end(n);
       break;
+    case 'string-utf8':
+      bench.start();
+      for (i = 0; i < n * 1024; i++) {
+        Buffer.from(str, 'utf8');
+      }
+      bench.end(n);
+      break;
     case 'string-base64':
       bench.start();
       for (i = 0; i < n * 1024; i++) {

diff --git a/lib/buffer.js b/lib/buffer.js
@@ -23,8 +23,7 @@
 
 const binding = process.binding('buffer');
 const { compare: compare_, compareOffset } = binding;
-const { isArrayBuffer, isSharedArrayBuffer, isUint8Array } =
-    process.binding('util');
+const { isAnyArrayBuffer, isUint8Array } = process.binding('util');
 const bindingObj = {};
 const internalUtil = require('internal/util');
 
@@ -116,16 +115,19 @@ function Buffer(arg, encodingOrOffset, length) {
  * Buffer.from(arrayBuffer[, byteOffset[, length]])
  **/
 Buffer.from = function(value, encodingOrOffset, length) {
-  if (typeof value === 'number')
-    throw new TypeError('"value" argument must not be a number');
+  if (typeof value === 'string')
+    return fromString(value, encodingOrOffset);
 
-  if (isArrayBuffer(value) || isSharedArrayBuffer(value))
+  if (isAnyArrayBuffer(value))
     return fromArrayBuffer(value, encodingOrOffset, length);
 
-  if (typeof value === 'string')
-    return fromString(value, encodingOrOffset);
+  var b = fromObject(value);
+  if (b)
+    return b;
 
-  return fromObject(value);
+  if (typeof value === 'number')
+    throw new TypeError('"value" argument must not be a number');
+  throw new TypeError(kFromErrorMsg);
 };
 
 Object.setPrototypeOf(Buffer, Uint8Array);
@@ -218,24 +220,27 @@ function allocate(size) {
 
 
 function fromString(string, encoding) {
-  if (typeof encoding !== 'string' || encoding === '')
+  var length;
+  if (typeof encoding !== 'string' || encoding.length === 0) {
     encoding = 'utf8';
-
-  if (!Buffer.isEncoding(encoding))
-    throw new TypeError('"encoding" must be a valid string encoding');
-
-  if (string.length === 0)
-    return new FastBuffer();
-
-  var length = byteLength(string, encoding);
+    if (string.length === 0)
+      return new FastBuffer();
+    length = binding.byteLengthUtf8(string);
+  } else {
+    length = byteLength(string, encoding, true);
+    if (length === -1)
+      throw new TypeError('"encoding" must be a valid string encoding');
+    if (string.length === 0)
+      return new FastBuffer();
+  }
 
   if (length >= (Buffer.poolSize >>> 1))
     return binding.createFromString(string, encoding);
 
   if (length > (poolSize - poolOffset))
     createPool();
   var b = new FastBuffer(allocPool, poolOffset, length);
-  var actual = b.write(string, encoding);
+  const actual = b.write(string, encoding);
   if (actual !== length) {
     // byteLength() may overestimate. That's a rare case, though.
     b = new FastBuffer(allocPool, poolOffset, actual);
@@ -255,8 +260,14 @@ function fromArrayLike(obj) {
 
 function fromArrayBuffer(obj, byteOffset, length) {
   // convert byteOffset to integer
-  byteOffset = +byteOffset;
-  byteOffset = byteOffset ? Math.trunc(byteOffset) : 0;
+  if (byteOffset === undefined) {
+    byteOffset = 0;
+  } else {
+    byteOffset = +byteOffset;
+    // check for NaN
+    if (byteOffset !== byteOffset)
+      byteOffset = 0;
+  }
 
   const maxLength = obj.byteLength - byteOffset;
 
@@ -268,11 +279,17 @@ function fromArrayBuffer(obj, byteOffset, length) {
   } else {
     // convert length to non-negative integer
     length = +length;
-    length = length ? Math.trunc(length) : 0;
-    length = length <= 0 ? 0 : Math.min(length, Number.MAX_SAFE_INTEGER);
-
-    if (length > maxLength)
-      throw new RangeError("'length' is out of bounds");
+    // Check for NaN
+    if (length !== length) {
+      length = 0;
+    } else if (length > 0) {
+      length = (length < Number.MAX_SAFE_INTEGER ?
+                length : Number.MAX_SAFE_INTEGER);
+      if (length > maxLength)
+        throw new RangeError("'length' is out of bounds");
+    } else {
+      length = 0;
+    }
   }
 
   return new FastBuffer(obj, byteOffset, length);
@@ -289,9 +306,8 @@ function fromObject(obj) {
     return b;
   }
 
-  if (obj) {
-    if (obj.length !== undefined || isArrayBuffer(obj.buffer) ||
-        isSharedArrayBuffer(obj.buffer)) {
+  if (obj != undefined) {
+    if (obj.length !== undefined || isAnyArrayBuffer(obj.buffer)) {
       if (typeof obj.length !== 'number' || obj.length !== obj.length) {
         return new FastBuffer();
       }
@@ -302,8 +318,6 @@ function fromObject(obj) {
       return fromArrayLike(obj.data);
     }
   }
-
-  throw new TypeError(kFromErrorMsg);
 }
 
 
@@ -388,53 +402,63 @@ function base64ByteLength(str, bytes) {
 
 function byteLength(string, encoding) {
   if (typeof string !== 'string') {
-    if (ArrayBuffer.isView(string) || isArrayBuffer(string) ||
-        isSharedArrayBuffer(string)) {
+    if (ArrayBuffer.isView(string) || isAnyArrayBuffer(string)) {
       return string.byteLength;
     }
 
     throw new TypeError('"string" must be a string, Buffer, or ArrayBuffer');
   }
 
-  var len = string.length;
-  if (len === 0)
+  const len = string.length;
+  const mustMatch = (arguments.length > 2 && arguments[2] === true);
+  if (!mustMatch && len === 0)
     return 0;
 
-  // Use a for loop to avoid recursion
-  var loweredCase = false;
-  for (;;) {
-    switch (encoding) {
-      case 'ascii':
-      case 'latin1':
-      case 'binary':
-        return len;
-
-      case 'utf8':
-      case 'utf-8':
-      case undefined:
-        return binding.byteLengthUtf8(string);
-
-      case 'ucs2':
-      case 'ucs-2':
-      case 'utf16le':
-      case 'utf-16le':
+  if (!encoding)
+    return (mustMatch ? -1 : binding.byteLengthUtf8(string));
+
+  encoding += '';
+  switch (encoding.length) {
+    case 4:
+      if (encoding === 'utf8') return binding.byteLengthUtf8(string);
+      if (encoding === 'ucs2') return len * 2;
+      encoding = encoding.toLowerCase();
+      if (encoding === 'utf8') return binding.byteLengthUtf8(string);
+      if (encoding === 'ucs2') return len * 2;
+      break;
+    case 5:
+      if (encoding === 'utf-8') return binding.byteLengthUtf8(string);
+      if (encoding === 'ascii') return len;
+      if (encoding === 'ucs-2') return len * 2;
+      encoding = encoding.toLowerCase();
+      if (encoding === 'utf-8') return binding.byteLengthUtf8(string);
+      if (encoding === 'ascii') return len;
+      if (encoding === 'ucs-2') return len * 2;
+      break;
+    case 7:
+      if (encoding === 'utf16le' || encoding.toLowerCase() === 'utf16le')
         return len * 2;
-
-      case 'hex':
+      break;
+    case 8:
+      if (encoding === 'utf-16le' || encoding.toLowerCase() === 'utf-16le')
+        return len * 2;
+      break;
+    case 6:
+      if (encoding === 'latin1' || encoding === 'binary') return len;
+      if (encoding === 'base64') return base64ByteLength(string, len);
+      encoding = encoding.toLowerCase();
+      if (encoding === 'latin1' || encoding === 'binary') return len;
+      if (encoding === 'base64') return base64ByteLength(string, len);
+      break;
+    case 3:
+      if (encoding === 'hex' || encoding.toLowerCase() === 'hex')
         return len >>> 1;
-
-      case 'base64':
-        return base64ByteLength(string, len);
-
-      default:
-        // The C++ binding defaulted to UTF8, we should too.
-        if (loweredCase)
-          return binding.byteLengthUtf8(string);
-
-        encoding = ('' + encoding).toLowerCase();
-        loweredCase = true;
-    }
+      break;
   }
+  if (mustMatch)
+    throw new TypeError('Unknown encoding: ' + encoding);
+  else
+    return binding.byteLengthUtf8(string);
 }
 
 Buffer.byteLength = byteLength;

diff --git a/lib/util.js b/lib/util.js
@@ -452,7 +452,7 @@ function formatValue(ctx, value, recurseTimes) {
     // Fast path for ArrayBuffer and SharedArrayBuffer.
     // Can't do the same for DataView because it has a non-primitive
     // .buffer property that we need to recurse for.
-    if (binding.isArrayBuffer(value) || binding.isSharedArrayBuffer(value)) {
+    if (binding.isAnyArrayBuffer(value)) {
       return `${constructor.name}` +
              ` { byteLength: ${formatNumber(ctx, value.byteLength)} }`;
     }
@@ -494,8 +494,7 @@ function formatValue(ctx, value, recurseTimes) {
       keys.unshift('size');
     empty = value.size === 0;
     formatter = formatMap;
-  } else if (binding.isArrayBuffer(value) ||
-             binding.isSharedArrayBuffer(value)) {
+  } else if (binding.isAnyArrayBuffer(value)) {
     braces = ['{', '}'];
     keys.unshift('byteLength');
     visibleKeys.byteLength = true;

diff --git a/src/node_util.cc b/src/node_util.cc
@@ -20,7 +20,6 @@ using v8::Value;
 
 
 #define VALUE_METHOD_MAP(V)                                                   \
-  V(isArrayBuffer, IsArrayBuffer)                                             \
   V(isDataView, IsDataView)                                                   \
   V(isDate, IsDate)                                                           \
   V(isExternal, IsExternal)                                                   \
@@ -30,7 +29,6 @@ using v8::Value;
   V(isRegExp, IsRegExp)                                                       \
   V(isSet, IsSet)                                                             \
   V(isSetIterator, IsSetIterator)                                             \
-  V(isSharedArrayBuffer, IsSharedArrayBuffer)                                 \
   V(isTypedArray, IsTypedArray)                                               \
   V(isUint8Array, IsUint8Array)
 
@@ -44,6 +42,12 @@ using v8::Value;
   VALUE_METHOD_MAP(V)
 #undef V
 
+static void IsAnyArrayBuffer(const FunctionCallbackInfo<Value>& args) {
+  CHECK_EQ(1, args.Length());
+  args.GetReturnValue().Set(
+    args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer());
+}
+
 static void GetPromiseDetails(const FunctionCallbackInfo<Value>& args) {
   // Return undefined if it's not a Promise.
   if (!args[0]->IsPromise())
@@ -151,6 +155,8 @@ void Initialize(Local<Object> target,
   VALUE_METHOD_MAP(V)
 #undef V
 
+  env->SetMethod(target, "isAnyArrayBuffer", IsAnyArrayBuffer);
+
 #define V(name, _)                                                            \
   target->Set(context,                                                        \
               FIXED_ONE_BYTE_STRING(env->isolate(), #name),                   \