# toji/gl-matrix

Optimization for mat4.multily under Firefox, thanks to Fabrice Bazzaro

`This closes #29.`
1 parent 1a302bc commit 38572e0c53d8a32a1c88a52860905524877e4b13 sinisterchipmunk committed May 17, 2012
Showing with 192 additions and 26 deletions.
1. +38 −26 gl-matrix.js
2. +25 −0 spec/benchmarks/mat4-multiplication.htm
3. +116 −0 spec/benchmarks/mat4-multiplication.js
4. +13 −0 spec/javascripts/mat4_spec.js
 @@ -1231,32 +1231,44 @@ if (!dest) { dest = mat; } // Cache the matrix values (makes for huge speed increases!) - var a00 = mat[0], a01 = mat[1], a02 = mat[2], a03 = mat[3], - a10 = mat[4], a11 = mat[5], a12 = mat[6], a13 = mat[7], - a20 = mat[8], a21 = mat[9], a22 = mat[10], a23 = mat[11], - a30 = mat[12], a31 = mat[13], a32 = mat[14], a33 = mat[15], - - b00 = mat2[0], b01 = mat2[1], b02 = mat2[2], b03 = mat2[3], - b10 = mat2[4], b11 = mat2[5], b12 = mat2[6], b13 = mat2[7], - b20 = mat2[8], b21 = mat2[9], b22 = mat2[10], b23 = mat2[11], - b30 = mat2[12], b31 = mat2[13], b32 = mat2[14], b33 = mat2[15]; - - dest[0] = b00 * a00 + b01 * a10 + b02 * a20 + b03 * a30; - dest[1] = b00 * a01 + b01 * a11 + b02 * a21 + b03 * a31; - dest[2] = b00 * a02 + b01 * a12 + b02 * a22 + b03 * a32; - dest[3] = b00 * a03 + b01 * a13 + b02 * a23 + b03 * a33; - dest[4] = b10 * a00 + b11 * a10 + b12 * a20 + b13 * a30; - dest[5] = b10 * a01 + b11 * a11 + b12 * a21 + b13 * a31; - dest[6] = b10 * a02 + b11 * a12 + b12 * a22 + b13 * a32; - dest[7] = b10 * a03 + b11 * a13 + b12 * a23 + b13 * a33; - dest[8] = b20 * a00 + b21 * a10 + b22 * a20 + b23 * a30; - dest[9] = b20 * a01 + b21 * a11 + b22 * a21 + b23 * a31; - dest[10] = b20 * a02 + b21 * a12 + b22 * a22 + b23 * a32; - dest[11] = b20 * a03 + b21 * a13 + b22 * a23 + b23 * a33; - dest[12] = b30 * a00 + b31 * a10 + b32 * a20 + b33 * a30; - dest[13] = b30 * a01 + b31 * a11 + b32 * a21 + b33 * a31; - dest[14] = b30 * a02 + b31 * a12 + b32 * a22 + b33 * a32; - dest[15] = b30 * a03 + b31 * a13 + b32 * a23 + b33 * a33; + var a00 = mat[ 0], a01 = mat[ 1], a02 = mat[ 2], a03 = mat[3]; + var a10 = mat[ 4], a11 = mat[ 5], a12 = mat[ 6], a13 = mat[7]; + var a20 = mat[ 8], a21 = mat[ 9], a22 = mat[10], a23 = mat[11]; + var a30 = mat[12], a31 = mat[13], a32 = mat[14], a33 = mat[15]; + + // Cache only the current line of the second matrix + var b0 = mat2[0], b1 = mat2[1], b2 = mat2[2], b3 = mat2[3]; + dest[0] = b0*a00 + b1*a10 + b2*a20 + b3*a30; + dest[1] = b0*a01 + b1*a11 + b2*a21 + b3*a31; + dest[2] = b0*a02 + b1*a12 + b2*a22 + b3*a32; + dest[3] = b0*a03 + b1*a13 + b2*a23 + b3*a33; + + b0 = mat2[4]; + b1 = mat2[5]; + b2 = mat2[6]; + b3 = mat2[7]; + dest[4] = b0*a00 + b1*a10 + b2*a20 + b3*a30; + dest[5] = b0*a01 + b1*a11 + b2*a21 + b3*a31; + dest[6] = b0*a02 + b1*a12 + b2*a22 + b3*a32; + dest[7] = b0*a03 + b1*a13 + b2*a23 + b3*a33; + + b0 = mat2[8]; + b1 = mat2[9]; + b2 = mat2[10]; + b3 = mat2[11]; + dest[8] = b0*a00 + b1*a10 + b2*a20 + b3*a30; + dest[9] = b0*a01 + b1*a11 + b2*a21 + b3*a31; + dest[10] = b0*a02 + b1*a12 + b2*a22 + b3*a32; + dest[11] = b0*a03 + b1*a13 + b2*a23 + b3*a33; + + b0 = mat2[12]; + b1 = mat2[13]; + b2 = mat2[14]; + b3 = mat2[15]; + dest[12] = b0*a00 + b1*a10 + b2*a20 + b3*a30; + dest[13] = b0*a01 + b1*a11 + b2*a21 + b3*a31; + dest[14] = b0*a02 + b1*a12 + b2*a22 + b3*a32; + dest[15] = b0*a03 + b1*a13 + b2*a23 + b3*a33; return dest; };
 @@ -0,0 +1,25 @@ + + + + + + + + + + Running benchmark...
+ + +
 @@ -0,0 +1,116 @@ +/** + Optimization for mat4.multiply. Doesn't show any difference to speak of in node.js / Chrome, + but Firefox shows an approx 10% improvement: + + current mat4.multiply x 3,956,052 ops/sec ±0.45% (64 runs sampled) + optimized mat4.multiply x 4,379,678 ops/sec ±0.71% (62 runs sampled) + + **/ + +require("gl-matrix"); +Benchmark = typeof(Benchmark) === 'undefined' ? require('benchmark') : Benchmark; +var suite = new Benchmark.Suite; + +setMatrixArrayType(Array); +var matA = mat4.identity(mat4.create()), matB = mat4.identity(mat4.create()), matC = mat4.create(); + +var multiply = function (mat, mat2, dest) { + if (!dest) { dest = mat; } + + // Cache the matrix values (makes for huge speed increases!) + var a00 = mat[0], a01 = mat[1], a02 = mat[2], a03 = mat[3], + a10 = mat[4], a11 = mat[5], a12 = mat[6], a13 = mat[7], + a20 = mat[8], a21 = mat[9], a22 = mat[10], a23 = mat[11], + a30 = mat[12], a31 = mat[13], a32 = mat[14], a33 = mat[15], + + b00 = mat2[0], b01 = mat2[1], b02 = mat2[2], b03 = mat2[3], + b10 = mat2[4], b11 = mat2[5], b12 = mat2[6], b13 = mat2[7], + b20 = mat2[8], b21 = mat2[9], b22 = mat2[10], b23 = mat2[11], + b30 = mat2[12], b31 = mat2[13], b32 = mat2[14], b33 = mat2[15]; + + dest[0] = b00 * a00 + b01 * a10 + b02 * a20 + b03 * a30; + dest[1] = b00 * a01 + b01 * a11 + b02 * a21 + b03 * a31; + dest[2] = b00 * a02 + b01 * a12 + b02 * a22 + b03 * a32; + dest[3] = b00 * a03 + b01 * a13 + b02 * a23 + b03 * a33; + dest[4] = b10 * a00 + b11 * a10 + b12 * a20 + b13 * a30; + dest[5] = b10 * a01 + b11 * a11 + b12 * a21 + b13 * a31; + dest[6] = b10 * a02 + b11 * a12 + b12 * a22 + b13 * a32; + dest[7] = b10 * a03 + b11 * a13 + b12 * a23 + b13 * a33; + dest[8] = b20 * a00 + b21 * a10 + b22 * a20 + b23 * a30; + dest[9] = b20 * a01 + b21 * a11 + b22 * a21 + b23 * a31; + dest[10] = b20 * a02 + b21 * a12 + b22 * a22 + b23 * a32; + dest[11] = b20 * a03 + b21 * a13 + b22 * a23 + b23 * a33; + dest[12] = b30 * a00 + b31 * a10 + b32 * a20 + b33 * a30; + dest[13] = b30 * a01 + b31 * a11 + b32 * a21 + b33 * a31; + dest[14] = b30 * a02 + b31 * a12 + b32 * a22 + b33 * a32; + dest[15] = b30 * a03 + b31 * a13 + b32 * a23 + b33 * a33; + + return dest; +}; + +var multiplyOpt = function (mat, mat2, dest) { + if (!dest) { dest = mat; } + + // Cache the matrix values (makes for huge speed increases!) + var a00 = mat[ 0], a01 = mat[ 1], a02 = mat[ 2], a03 = mat[3]; + var a10 = mat[ 4], a11 = mat[ 5], a12 = mat[ 6], a13 = mat[7]; + var a20 = mat[ 8], a21 = mat[ 9], a22 = mat[10], a23 = mat[11]; + var a30 = mat[12], a31 = mat[13], a32 = mat[14], a33 = mat[15]; + + // Cache only the current line of the second matrix + var b0 = mat2[0], b1 = mat2[1], b2 = mat2[2], b3 = mat2[3]; + dest[0] = b0*a00 + b1*a10 + b2*a20 + b3*a30; + dest[1] = b0*a01 + b1*a11 + b2*a21 + b3*a31; + dest[2] = b0*a02 + b1*a12 + b2*a22 + b3*a32; + dest[3] = b0*a03 + b1*a13 + b2*a23 + b3*a33; + + b0 = mat2[4]; + b1 = mat2[5]; + b2 = mat2[6]; + b3 = mat2[7]; + dest[4] = b0*a00 + b1*a10 + b2*a20 + b3*a30; + dest[5] = b0*a01 + b1*a11 + b2*a21 + b3*a31; + dest[6] = b0*a02 + b1*a12 + b2*a22 + b3*a32; + dest[7] = b0*a03 + b1*a13 + b2*a23 + b3*a33; + + b0 = mat2[8]; + b1 = mat2[9]; + b2 = mat2[10]; + b3 = mat2[11]; + dest[8] = b0*a00 + b1*a10 + b2*a20 + b3*a30; + dest[9] = b0*a01 + b1*a11 + b2*a21 + b3*a31; + dest[10] = b0*a02 + b1*a12 + b2*a22 + b3*a32; + dest[11] = b0*a03 + b1*a13 + b2*a23 + b3*a33; + + b0 = mat2[12]; + b1 = mat2[13]; + b2 = mat2[14]; + b3 = mat2[15]; + dest[12] = b0*a00 + b1*a10 + b2*a20 + b3*a30; + dest[13] = b0*a01 + b1*a11 + b2*a21 + b3*a31; + dest[14] = b0*a02 + b1*a12 + b2*a22 + b3*a32; + dest[15] = b0*a03 + b1*a13 + b2*a23 + b3*a33; + + return dest; +}; + +// add tests +suite.add('current mat4.multiply', function() { + multiply(matA, matB, matC); +}); + +suite.add('optimized mat4.multiply', function() { + multiplyOpt(matA, matB, matC); +}); + +// add listeners +suite.on('cycle', function(event, bench) { + console.log(String(bench)); +}); + +suite.on('complete', function() { + console.log('Fastest is ' + this.filter('fastest').pluck('name')); +}); + +// run async +suite.run({'async': true});
 @@ -0,0 +1,13 @@ +describe("mat4", function() { + var result, a, b, dest; + + beforeEach(function() { + a = mat4.identity(mat4.create()); + }); + + describe("multiply", function() { + it("an identity with itself should produce an identity", function() { + expect(mat4.multiply(a, a, dest)).toBeEqualish([1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1]); + }); + }); +});