Permalink
Browse files

Optimization for mat4.multily under Firefox, thanks to Fabrice Bazzaro

This closes #29.
  • Loading branch information...
1 parent 1a302bc commit 38572e0c53d8a32a1c88a52860905524877e4b13 @sinisterchipmunk sinisterchipmunk committed May 17, 2012
Showing with 192 additions and 26 deletions.
  1. +38 −26 gl-matrix.js
  2. +25 −0 spec/benchmarks/mat4-multiplication.htm
  3. +116 −0 spec/benchmarks/mat4-multiplication.js
  4. +13 −0 spec/javascripts/mat4_spec.js
View
@@ -1231,32 +1231,44 @@
if (!dest) { dest = mat; }
// Cache the matrix values (makes for huge speed increases!)
- var a00 = mat[0], a01 = mat[1], a02 = mat[2], a03 = mat[3],
- a10 = mat[4], a11 = mat[5], a12 = mat[6], a13 = mat[7],
- a20 = mat[8], a21 = mat[9], a22 = mat[10], a23 = mat[11],
- a30 = mat[12], a31 = mat[13], a32 = mat[14], a33 = mat[15],
-
- b00 = mat2[0], b01 = mat2[1], b02 = mat2[2], b03 = mat2[3],
- b10 = mat2[4], b11 = mat2[5], b12 = mat2[6], b13 = mat2[7],
- b20 = mat2[8], b21 = mat2[9], b22 = mat2[10], b23 = mat2[11],
- b30 = mat2[12], b31 = mat2[13], b32 = mat2[14], b33 = mat2[15];
-
- dest[0] = b00 * a00 + b01 * a10 + b02 * a20 + b03 * a30;
- dest[1] = b00 * a01 + b01 * a11 + b02 * a21 + b03 * a31;
- dest[2] = b00 * a02 + b01 * a12 + b02 * a22 + b03 * a32;
- dest[3] = b00 * a03 + b01 * a13 + b02 * a23 + b03 * a33;
- dest[4] = b10 * a00 + b11 * a10 + b12 * a20 + b13 * a30;
- dest[5] = b10 * a01 + b11 * a11 + b12 * a21 + b13 * a31;
- dest[6] = b10 * a02 + b11 * a12 + b12 * a22 + b13 * a32;
- dest[7] = b10 * a03 + b11 * a13 + b12 * a23 + b13 * a33;
- dest[8] = b20 * a00 + b21 * a10 + b22 * a20 + b23 * a30;
- dest[9] = b20 * a01 + b21 * a11 + b22 * a21 + b23 * a31;
- dest[10] = b20 * a02 + b21 * a12 + b22 * a22 + b23 * a32;
- dest[11] = b20 * a03 + b21 * a13 + b22 * a23 + b23 * a33;
- dest[12] = b30 * a00 + b31 * a10 + b32 * a20 + b33 * a30;
- dest[13] = b30 * a01 + b31 * a11 + b32 * a21 + b33 * a31;
- dest[14] = b30 * a02 + b31 * a12 + b32 * a22 + b33 * a32;
- dest[15] = b30 * a03 + b31 * a13 + b32 * a23 + b33 * a33;
+ var a00 = mat[ 0], a01 = mat[ 1], a02 = mat[ 2], a03 = mat[3];
+ var a10 = mat[ 4], a11 = mat[ 5], a12 = mat[ 6], a13 = mat[7];
+ var a20 = mat[ 8], a21 = mat[ 9], a22 = mat[10], a23 = mat[11];
+ var a30 = mat[12], a31 = mat[13], a32 = mat[14], a33 = mat[15];
+
+ // Cache only the current line of the second matrix
+ var b0 = mat2[0], b1 = mat2[1], b2 = mat2[2], b3 = mat2[3];
+ dest[0] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
+ dest[1] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
+ dest[2] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
+ dest[3] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
+
+ b0 = mat2[4];
+ b1 = mat2[5];
+ b2 = mat2[6];
+ b3 = mat2[7];
+ dest[4] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
+ dest[5] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
+ dest[6] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
+ dest[7] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
+
+ b0 = mat2[8];
+ b1 = mat2[9];
+ b2 = mat2[10];
+ b3 = mat2[11];
+ dest[8] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
+ dest[9] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
+ dest[10] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
+ dest[11] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
+
+ b0 = mat2[12];
+ b1 = mat2[13];
+ b2 = mat2[14];
+ b3 = mat2[15];
+ dest[12] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
+ dest[13] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
+ dest[14] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
+ dest[15] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
return dest;
};
@@ -0,0 +1,25 @@
+<html>
+ <head>
+ <script>
+ function require(path) {
+ // dummy
+ }
+
+ console.log = function(text) {
+ text = document.createTextNode(text);
+ document.body.appendChild(text);
+ document.body.appendChild(document.createElement('br'));
+ };
+ </script>
+ <script src="../../node_modules/benchmark/benchmark.js"></script>
+ <script src="../../gl-matrix.js"></script>
+ <script src="mat4-multiplication.js"></script>
+ <style>
+ body { font-family: monospace; }
+ </style>
+ </head>
+ <body>
+ Running benchmark...<br/>
+ </body>
+</html>
+
@@ -0,0 +1,116 @@
+/**
+ Optimization for mat4.multiply. Doesn't show any difference to speak of in node.js / Chrome,
+ but Firefox shows an approx 10% improvement:
+
+ current mat4.multiply x 3,956,052 ops/sec ±0.45% (64 runs sampled)
+ optimized mat4.multiply x 4,379,678 ops/sec ±0.71% (62 runs sampled)
+
+ **/
+
+require("gl-matrix");
+Benchmark = typeof(Benchmark) === 'undefined' ? require('benchmark') : Benchmark;
+var suite = new Benchmark.Suite;
+
+setMatrixArrayType(Array);
+var matA = mat4.identity(mat4.create()), matB = mat4.identity(mat4.create()), matC = mat4.create();
+
+var multiply = function (mat, mat2, dest) {
+ if (!dest) { dest = mat; }
+
+ // Cache the matrix values (makes for huge speed increases!)
+ var a00 = mat[0], a01 = mat[1], a02 = mat[2], a03 = mat[3],
+ a10 = mat[4], a11 = mat[5], a12 = mat[6], a13 = mat[7],
+ a20 = mat[8], a21 = mat[9], a22 = mat[10], a23 = mat[11],
+ a30 = mat[12], a31 = mat[13], a32 = mat[14], a33 = mat[15],
+
+ b00 = mat2[0], b01 = mat2[1], b02 = mat2[2], b03 = mat2[3],
+ b10 = mat2[4], b11 = mat2[5], b12 = mat2[6], b13 = mat2[7],
+ b20 = mat2[8], b21 = mat2[9], b22 = mat2[10], b23 = mat2[11],
+ b30 = mat2[12], b31 = mat2[13], b32 = mat2[14], b33 = mat2[15];
+
+ dest[0] = b00 * a00 + b01 * a10 + b02 * a20 + b03 * a30;
+ dest[1] = b00 * a01 + b01 * a11 + b02 * a21 + b03 * a31;
+ dest[2] = b00 * a02 + b01 * a12 + b02 * a22 + b03 * a32;
+ dest[3] = b00 * a03 + b01 * a13 + b02 * a23 + b03 * a33;
+ dest[4] = b10 * a00 + b11 * a10 + b12 * a20 + b13 * a30;
+ dest[5] = b10 * a01 + b11 * a11 + b12 * a21 + b13 * a31;
+ dest[6] = b10 * a02 + b11 * a12 + b12 * a22 + b13 * a32;
+ dest[7] = b10 * a03 + b11 * a13 + b12 * a23 + b13 * a33;
+ dest[8] = b20 * a00 + b21 * a10 + b22 * a20 + b23 * a30;
+ dest[9] = b20 * a01 + b21 * a11 + b22 * a21 + b23 * a31;
+ dest[10] = b20 * a02 + b21 * a12 + b22 * a22 + b23 * a32;
+ dest[11] = b20 * a03 + b21 * a13 + b22 * a23 + b23 * a33;
+ dest[12] = b30 * a00 + b31 * a10 + b32 * a20 + b33 * a30;
+ dest[13] = b30 * a01 + b31 * a11 + b32 * a21 + b33 * a31;
+ dest[14] = b30 * a02 + b31 * a12 + b32 * a22 + b33 * a32;
+ dest[15] = b30 * a03 + b31 * a13 + b32 * a23 + b33 * a33;
+
+ return dest;
+};
+
+var multiplyOpt = function (mat, mat2, dest) {
+ if (!dest) { dest = mat; }
+
+ // Cache the matrix values (makes for huge speed increases!)
+ var a00 = mat[ 0], a01 = mat[ 1], a02 = mat[ 2], a03 = mat[3];
+ var a10 = mat[ 4], a11 = mat[ 5], a12 = mat[ 6], a13 = mat[7];
+ var a20 = mat[ 8], a21 = mat[ 9], a22 = mat[10], a23 = mat[11];
+ var a30 = mat[12], a31 = mat[13], a32 = mat[14], a33 = mat[15];
+
+ // Cache only the current line of the second matrix
+ var b0 = mat2[0], b1 = mat2[1], b2 = mat2[2], b3 = mat2[3];
+ dest[0] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
+ dest[1] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
+ dest[2] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
+ dest[3] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
+
+ b0 = mat2[4];
+ b1 = mat2[5];
+ b2 = mat2[6];
+ b3 = mat2[7];
+ dest[4] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
+ dest[5] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
+ dest[6] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
+ dest[7] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
+
+ b0 = mat2[8];
+ b1 = mat2[9];
+ b2 = mat2[10];
+ b3 = mat2[11];
+ dest[8] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
+ dest[9] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
+ dest[10] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
+ dest[11] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
+
+ b0 = mat2[12];
+ b1 = mat2[13];
+ b2 = mat2[14];
+ b3 = mat2[15];
+ dest[12] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
+ dest[13] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
+ dest[14] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
+ dest[15] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
+
+ return dest;
+};
+
+// add tests
+suite.add('current mat4.multiply', function() {
+ multiply(matA, matB, matC);
+});
+
+suite.add('optimized mat4.multiply', function() {
+ multiplyOpt(matA, matB, matC);
+});
+
+// add listeners
+suite.on('cycle', function(event, bench) {
+ console.log(String(bench));
+});
+
+suite.on('complete', function() {
+ console.log('Fastest is ' + this.filter('fastest').pluck('name'));
+});
+
+// run async
+suite.run({'async': true});
@@ -0,0 +1,13 @@
+describe("mat4", function() {
+ var result, a, b, dest;
+
+ beforeEach(function() {
+ a = mat4.identity(mat4.create());
+ });
+
+ describe("multiply", function() {
+ it("an identity with itself should produce an identity", function() {
+ expect(mat4.multiply(a, a, dest)).toBeEqualish([1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1]);
+ });
+ });
+});

0 comments on commit 38572e0

Please sign in to comment.