buffer: improve buff.toString(encoding, start, end) #10575

Open
wants to merge 1 commit into
from

Projects

None yet

6 participants

@JacksonTian
Contributor
JacksonTian commented Jan 2, 2017 edited

When buff.toString() without parameter will call
buff.utf8Slice(0, this.length) directly. It skips the start/end
check, even thougth the buffer length is zero.

The utf8Slice is a C++ binding method, when the buffer length is
zero, the case is unexpected.

And, the slowToString.apply(this, arguments) is slowly also,
change to slowToString.call(this, encoding, start, end).

Checklist
  • make -j4 test (UNIX), or vcbuild test (Windows) passes
  • commit message follows commit guidelines
Affected core subsystem(s)
@JacksonTian JacksonTian buffer: improve buff.toString(encoding, start, end)
When buff.toString() without parameter will call
buff.utf8Slice(0, this.length) directly. It skips the start/end
check, even thougth the buffer length is zero.

The utf8Slice is a C++ binding method, when the buffer length is
zero, the case is unexpected.

And, the slowToString.apply(this, arguments) is slowly also,
change to slowToString.call(this, encoding, start, end).
2f05f82
@JacksonTian
Contributor
JacksonTian commented Jan 2, 2017 edited

Following is benchmark results:

$ ./node benchmark/compare.js --new ./node --old ~/.tnvm/versions/node/v7.3.0/bin/node --filter tostring --runs 1 -- buffers
"binary", "filename", "configuration", "rate", "time"
"old", "buffers/buffer-tostring.js", "n=10000000 len=0 arg=""true""", 53075107.789838344, 0.188412241
"old", "buffers/buffer-tostring.js", "n=10000000 len=1 arg=""true""", 6970222.753242602, 1.434674379
"old", "buffers/buffer-tostring.js", "n=10000000 len=64 arg=""true""", 5725362.379855517, 1.746614334
"old", "buffers/buffer-tostring.js", "n=10000000 len=1024 arg=""true""", 3047914.7889390728, 3.280931618
"old", "buffers/buffer-tostring.js", "n=10000000 len=0 arg=""false""", 18727663.41050718, 0.533969443
"old", "buffers/buffer-tostring.js", "n=10000000 len=1 arg=""false""", 8294553.510086126, 1.205610403
"old", "buffers/buffer-tostring.js", "n=10000000 len=64 arg=""false""", 6364356.123134775, 1.571250855
"old", "buffers/buffer-tostring.js", "n=10000000 len=1024 arg=""false""", 3221132.404577592, 3.104498277
"new", "buffers/buffer-tostring.js", "n=10000000 len=0 arg=""true""", 74184534.29612593, 0.134798986
"new", "buffers/buffer-tostring.js", "n=10000000 len=1 arg=""true""", 7833625.1081898045, 1.276548196
"new", "buffers/buffer-tostring.js", "n=10000000 len=64 arg=""true""", 6383079.577419681, 1.566641913
"new", "buffers/buffer-tostring.js", "n=10000000 len=1024 arg=""true""", 3235248.73062808, 3.090952453
"new", "buffers/buffer-tostring.js", "n=10000000 len=0 arg=""false""", 76114013.64713609, 0.131381851
"new", "buffers/buffer-tostring.js", "n=10000000 len=1 arg=""false""", 7597410.922092946, 1.316237874
"new", "buffers/buffer-tostring.js", "n=10000000 len=64 arg=""false""", 6336344.836457308, 1.578196935
"new", "buffers/buffer-tostring.js", "n=10000000 len=1024 arg=""false""", 3238942.7802110813, 3.087427188

The new binary is better than old binary(v7.3.0, close to master).

When buffer length is zero, the improve is more clearly.

"old", "buffers/buffer-tostring.js", "n=10000000 len=0 arg=""true""", 53075107.789838344, 0.188412241
"old", "buffers/buffer-tostring.js", "n=10000000 len=0 arg=""false""", 18727663.41050718, 0.533969443
"new", "buffers/buffer-tostring.js", "n=10000000 len=0 arg=""true""", 74184534.29612593, 0.134798986
"new", "buffers/buffer-tostring.js", "n=10000000 len=0 arg=""false""", 76114013.64713609, 0.131381851
@mscdex mscdex added performance and removed dont-land-on-v7.x labels Jan 2, 2017
@thefourtheye
Contributor

It skips the start/end check, even thougth the buffer length is zero.

Do you mean to say, the checks are not skipped, even though the length is zero?

@JacksonTian
Contributor

@thefourtheye

If no parameters passed into toString, the C++ binding method was called, the start/end checks are skipped.

When buffer is zero length, call C++ binding method is unnecessary.

@thefourtheye
Contributor

If no parameters passed into toString, the C++ binding method was called, the start/end checks are skipped.

We don't have to do the start and end checks, right? Because we know that the start will be zero and the end will be the length of the Buffer object.

But, if your only concern is about the zero length Buffers, you might want to try special casing that, like this

  if (this.length === 0) {
    return '';
  } else if (arguments.length === 0) {
    result = this.utf8Slice(0, this.length);
  } else {
    result = slowToString.apply(this, arguments);
  }
@JacksonTian
Contributor

The slowToString imply the zero check logic.

And, the another thing is .apply(this, arguments) is slowly.

@jasnell
Member
jasnell commented Jan 10, 2017
@mscdex
Contributor
mscdex commented Jan 11, 2017 edited

Ok so I ran the benchmarks with the changes this PR currently makes and I see this:

                                                            improvement significant      p.value
 buffers/buffer-tostring.js n=10000000 len=0 arg="false"       321.01 %         *** 4.523480e-21
 buffers/buffer-tostring.js n=10000000 len=0 arg="true"         53.44 %         *** 1.065349e-10
 buffers/buffer-tostring.js n=10000000 len=1 arg="false"       -10.71 %         *** 5.716243e-15
 buffers/buffer-tostring.js n=10000000 len=1 arg="true"          6.51 %          ** 1.326955e-03
 buffers/buffer-tostring.js n=10000000 len=1024 arg="false"     -3.22 %             6.904522e-02
 buffers/buffer-tostring.js n=10000000 len=1024 arg="true"       2.49 %         *** 5.648533e-08
 buffers/buffer-tostring.js n=10000000 len=64 arg="false"       -9.83 %         *** 9.958990e-04
 buffers/buffer-tostring.js n=10000000 len=64 arg="true"         7.26 %           * 1.565197e-02

However, if we both move and re-arrange (to avoid unnecessary conditionals) the start and end checks into .toString() we can get results like this:

                                                            improvement significant      p.value
 buffers/buffer-tostring.js n=10000000 len=0 arg="false"      1618.83 %         *** 6.399117e-46
 buffers/buffer-tostring.js n=10000000 len=0 arg="true"        542.12 %         *** 5.889205e-44
 buffers/buffer-tostring.js n=10000000 len=1 arg="false"        -0.98 %             1.011628e-01
 buffers/buffer-tostring.js n=10000000 len=1 arg="true"         10.12 %         *** 2.606760e-10
 buffers/buffer-tostring.js n=10000000 len=1024 arg="false"     -0.70 %           * 3.349085e-02
 buffers/buffer-tostring.js n=10000000 len=1024 arg="true"       2.74 %         *** 1.017629e-08
 buffers/buffer-tostring.js n=10000000 len=64 arg="false"       -1.90 %          ** 3.225796e-03
 buffers/buffer-tostring.js n=10000000 len=64 arg="true"         7.29 %         *** 5.686678e-15

I'm not sure what's up with the len=64 arg="false" case.

I also changed slowToString() to take the instance as a parameter instead of using .call(), so that helped a little performance-wise too. Lastly, when I moved the checks, I moved the comments to the top of .toString() to keep the function inlineable.

@targos
Member
targos commented Jan 11, 2017

I thought V8 stopped looking at the length of a function to determine inlinability. Maybe it's in a later version...

@mscdex
Contributor
mscdex commented Jan 11, 2017 edited

@targos AFAIK it definitely still takes it into account with at least V8 5.4 from my experience. It's certainly not the only factor, but it is one of them.

@thefourtheye
Contributor

@mscdex Can you share the second patch you tried?

@mscdex
Contributor
mscdex commented Jan 11, 2017 edited

@thefourtheye

Here's my patch on top of this PR:

diff --git a/lib/buffer.js b/lib/buffer.js
index 4a69db6..cfd976b 100644
--- a/lib/buffer.js
+++ b/lib/buffer.js
@@ -432,63 +432,27 @@ Object.defineProperty(Buffer.prototype, 'offset', {
 });
 
 
-function slowToString(encoding, start, end) {
+function slowToString(buf, encoding, start, end) {
   var loweredCase = false;
-
-  // No need to verify that "this.length <= MAX_UINT32" since it's a read-only
-  // property of a typed array.
-
-  // This behaves neither like String nor Uint8Array in that we set start/end
-  // to their upper/lower bounds if the value passed is out of range.
-  // undefined is handled specially as per ECMA-262 6th Edition,
-  // Section 13.3.3.7 Runtime Semantics: KeyedBindingInitialization.
-  if (start === undefined || start < 0)
-    start = 0;
-  // Return early if start > this.length. Done here to prevent potential uint32
-  // coercion fail below.
-  if (start > this.length)
-    return '';
-
-  if (end === undefined || end > this.length)
-    end = this.length;
-
-  if (end <= 0)
-    return '';
-
-  // Force coersion to uint32. This will also coerce falsey/NaN values to 0.
-  end >>>= 0;
-  start >>>= 0;
-
-  if (end <= start)
-    return '';
-
-  if (!encoding) encoding = 'utf8';
-
   while (true) {
     switch (encoding) {
-      case 'hex':
-        return this.hexSlice(start, end);
-
       case 'utf8':
       case 'utf-8':
-        return this.utf8Slice(start, end);
-
+        return buf.utf8Slice(start, end);
+      case 'hex':
+        return buf.hexSlice(start, end);
       case 'ascii':
-        return this.asciiSlice(start, end);
-
+        return buf.asciiSlice(start, end);
       case 'latin1':
       case 'binary':
-        return this.latin1Slice(start, end);
-
+        return buf.latin1Slice(start, end);
       case 'base64':
-        return this.base64Slice(start, end);
-
+        return buf.base64Slice(start, end);
       case 'ucs2':
       case 'ucs-2':
       case 'utf16le':
       case 'utf-16le':
-        return this.ucs2Slice(start, end);
-
+        return buf.ucs2Slice(start, end);
       default:
         if (loweredCase)
           throw new TypeError('Unknown encoding: ' + encoding);
@@ -498,19 +462,47 @@ function slowToString(encoding, start, end) {
   }
 }
 
-Buffer.prototype.copy = function(target, targetStart, sourceStart, sourceEnd) {
-  return binding.copy(this, target, targetStart, sourceStart, sourceEnd);
-};
-
-
+// When comparing `start` and `end` with `buffer.length`, there is no need to
+// verify that "this.length <= MAX_UINT32" since it's a read-only property of a
+// typed array.
+// The bounds checking performed by `toString()` behaves neither like String nor
+// Uint8Array in that we set start/end to their upper/lower bounds if the value
+// passed is out of range.
+// `undefined` is handled specially as per ECMA-262 6th Edition,
+// Section 13.3.3.7 Runtime Semantics: KeyedBindingInitialization.
+// Additionally, we return early if `start > this.length`. This is done to
+// prevent potential uint32 coercion failures.
 Buffer.prototype.toString = function(encoding, start, end) {
-  const result = slowToString.call(this, encoding, start, end);
+  if (start === undefined || start < 0)
+    start = 0;
+  else if (start > this.length)
+    return '';
+  else
+    start >>>= 0;
+  if (end === undefined || end > this.length)
+    end = this.length;
+  else if (end <= 0)
+    return '';
+  else
+    end >>>= 0;
+  if (end <= start)
+    return '';
+  var result;
+  if (!encoding)
+    result = this.utf8Slice(start, end);
+  else
+    result = slowToString(this, encoding, start, end);
   if (result === undefined)
     throw new Error('"toString()" failed');
   return result;
 };
 
 
+Buffer.prototype.copy = function(target, targetStart, sourceStart, sourceEnd) {
+  return binding.copy(this, target, targetStart, sourceStart, sourceEnd);
+};
+
+
 Buffer.prototype.equals = function equals(b) {
   if (!isUint8Array(b))
     throw new TypeError('Argument must be a Buffer or Uint8Array');

@targos
Member
targos commented Jan 11, 2017

@mscdex Found the commit: v8/v8@1b33028
So it is only for TurboFan, in V8 >= 5.5

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment