From f4c0c3c98709ee76a20fc8c68f203489d03366db Mon Sep 17 00:00:00 2001 From: Ping Yu <4018+pyu10055@users.noreply.github.com> Date: Wed, 24 Jun 2020 13:15:56 -0700 Subject: [PATCH 1/4] fix functions ops summary --- tfjs-converter/tools/model_summary.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tfjs-converter/tools/model_summary.ts b/tfjs-converter/tools/model_summary.ts index 3acbc644c18..dde785e6f72 100644 --- a/tfjs-converter/tools/model_summary.ts +++ b/tfjs-converter/tools/model_summary.ts @@ -36,7 +36,9 @@ function summarize(argv: string[]) { if (library != null) { const functions = library['function']; // tslint:disable-next-line: no-any - functions.forEach((func: any) => nodes.concat(func['nodeDef'])); + if (functions != null) { + functions.forEach((func: any) => nodes.concat(func['nodeDef'])); + } } const opCount: {[key: string]: number} = {}; From 0bea1d2bbaa2c566511d0acd9c5e73f2208e3720 Mon Sep 17 00:00:00 2001 From: Ping Yu <4018+pyu10055@users.noreply.github.com> Date: Thu, 25 Jun 2020 20:24:35 -0700 Subject: [PATCH 2/4] rewrite the cumsum op with parallel algorithm --- tfjs-backend-webgl/src/backend_webgl.ts | 18 +++++++++++-- tfjs-backend-webgl/src/cumsum_gpu.ts | 36 ++++++++++++++----------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/tfjs-backend-webgl/src/backend_webgl.ts b/tfjs-backend-webgl/src/backend_webgl.ts index fe848478d54..a74ac159b03 100644 --- a/tfjs-backend-webgl/src/backend_webgl.ts +++ b/tfjs-backend-webgl/src/backend_webgl.ts @@ -1180,8 +1180,22 @@ export class MathBackendWebGL extends KernelBackend { `WebGL cumsum shader expects an inner-most axis=${x.rank - 1} ` + `but got axis=${axis}`); } - const program = new CumSumProgram(x.shape, exclusive, reverse); - return this.compileAndRun(program, [x]); + const size = x.shape[x.shape.length - 1]; + let result = x; + // Use cumsum parallel algorithm, ref: + // https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda + for (let i = 0; i <= Math.ceil(Math.log2(size)) - 1; i++) { + const program = new CumSumProgram(x.shape, false, reverse, i); + result = this.compileAndRun(program, [result]); + } + // For exclusive cumsum, shift the end result in the direction of sum and + // add 0 to the front index. + if (exclusive) { + const program = new CumSumProgram(x.shape, exclusive, reverse, 0); + result = this.compileAndRun(program, [result]); + } + + return result; } equal(a: Tensor, b: Tensor): Tensor { diff --git a/tfjs-backend-webgl/src/cumsum_gpu.ts b/tfjs-backend-webgl/src/cumsum_gpu.ts index d986e012555..9586376218a 100644 --- a/tfjs-backend-webgl/src/cumsum_gpu.ts +++ b/tfjs-backend-webgl/src/cumsum_gpu.ts @@ -23,29 +23,33 @@ export class CumSumProgram implements GPGPUProgram { outputShape: number[]; userCode: string; - constructor(shape: number[], exclusive: boolean, reverse: boolean) { + constructor( + shape: number[], exclusive: boolean, reverse: boolean, index: number) { this.outputShape = shape; const rank = shape.length; - const finalDim = shape[shape.length - 1]; - const comparator = reverse ? '<' : '>'; + const val = exclusive ? '0.0' : `getX(${getCoords(rank, 'coords')})`; + const length = shape[shape.length - 1]; + let condition = ''; + let idxString = ''; + // When exclusive is set, the cumsum op becomes roll op that copies the + // value from the previous index based on the direction specified by the + // reverse flag. + if (exclusive) { + condition = reverse ? `end != ${length - 1}` : 'end != 0'; + idxString = reverse ? 'end + 1' : 'end - 1'; + } else { + condition = reverse ? `end + pow2 < ${length}` : 'end >= pow2'; + idxString = (reverse ? 'end + pow2' : 'end - pow2'); + } this.userCode = ` - int getIndex(int i) { - ${reverse ? `return ${finalDim} -i - 1;` : 'return i;'} - } - void main() { ${getCoordsDataType(rank)} coords = getOutputCoords(); int end = ${getFinalCoord(rank, 'coords')}; - float val = 0.0; - for (int i = ${finalDim} - 1; i >= 0; i -= 1) { - int idx = getIndex(i); - if (idx ${comparator} end) { - continue; - } - if (idx == end && ${exclusive}) { - continue; - } + float val = ${val}; + int pow2 = int(pow(2.0, float(${index}))); + if (${condition}) { + int idx = ${idxString}; ${getFinalCoord(rank, 'coords')} = idx; val += getX(${getCoords(rank, 'coords')}); } From 7146e5d922111b523eea83e3573a1a3456a5df58 Mon Sep 17 00:00:00 2001 From: Ping Yu <4018+pyu10055@users.noreply.github.com> Date: Fri, 26 Jun 2020 08:43:55 -0700 Subject: [PATCH 3/4] address comments --- tfjs-backend-webgl/src/backend_webgl.ts | 9 +++++---- tfjs-backend-webgl/src/cumsum_gpu.ts | 20 ++++++++++++++++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/tfjs-backend-webgl/src/backend_webgl.ts b/tfjs-backend-webgl/src/backend_webgl.ts index 1660ea9b2ac..ea686c8f21f 100644 --- a/tfjs-backend-webgl/src/backend_webgl.ts +++ b/tfjs-backend-webgl/src/backend_webgl.ts @@ -1181,18 +1181,19 @@ export class MathBackendWebGL extends KernelBackend { `WebGL cumsum shader expects an inner-most axis=${x.rank - 1} ` + `but got axis=${axis}`); } - const size = x.shape[x.shape.length - 1]; + const size = x.shape[axis]; let result = x; // Use cumsum parallel algorithm, ref: // https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda for (let i = 0; i <= Math.ceil(Math.log2(size)) - 1; i++) { - const program = new CumSumProgram(x.shape, false, reverse, i); - result = this.compileAndRun(program, [result]); + const program = new CumSumProgram(x.shape, false, reverse); + const customSetup = program.getCustomSetupFunc(i); + result = this.compileAndRun(program, [result], result.dtype, customSetup); } // For exclusive cumsum, shift the end result in the direction of sum and // add 0 to the front index. if (exclusive) { - const program = new CumSumProgram(x.shape, exclusive, reverse, 0); + const program = new CumSumProgram(x.shape, exclusive, reverse); result = this.compileAndRun(program, [result]); } diff --git a/tfjs-backend-webgl/src/cumsum_gpu.ts b/tfjs-backend-webgl/src/cumsum_gpu.ts index f08d34b2501..58253e39e83 100644 --- a/tfjs-backend-webgl/src/cumsum_gpu.ts +++ b/tfjs-backend-webgl/src/cumsum_gpu.ts @@ -14,7 +14,7 @@ * limitations under the License. * ============================================================================= */ - +import {GPGPUContext} from './gpgpu_context'; import {GPGPUProgram} from './gpgpu_math'; import {getCoordsDataType} from './shader_compiler'; @@ -23,8 +23,10 @@ export class CumSumProgram implements GPGPUProgram { outputShape: number[]; userCode: string; - constructor( - shape: number[], exclusive: boolean, reverse: boolean, index: number) { + // Caching uniform location for speed. + index: WebGLUniformLocation; + + constructor(shape: number[], exclusive: boolean, reverse: boolean) { this.outputShape = shape; const rank = shape.length; const val = exclusive ? '0.0' : `getX(${getCoords(rank, 'coords')})`; @@ -43,11 +45,12 @@ export class CumSumProgram implements GPGPUProgram { } this.userCode = ` + uniform float index; void main() { ${getCoordsDataType(rank)} coords = getOutputCoords(); int end = ${getFinalCoord(rank, 'coords')}; float val = ${val}; - int pow2 = int(pow(2.0, float(${index}))); + int pow2 = int(pow(2.0, index)); if (${condition}) { int idx = ${idxString}; ${getFinalCoord(rank, 'coords')} = idx; @@ -57,6 +60,15 @@ export class CumSumProgram implements GPGPUProgram { } `; } + + getCustomSetupFunc(index: number) { + return (gpgpu: GPGPUContext, webGLProgram: WebGLProgram) => { + if (this.index == null) { + this.index = gpgpu.getUniformLocation(webGLProgram, 'index'); + } + gpgpu.gl.uniform1f(this.index, index); + }; + } } function getCoords(rank: number, name: string): string { From 65b7bb0ad9694c564908bdf2839b9d48f4d4bc5c Mon Sep 17 00:00:00 2001 From: Ping Yu <4018+pyu10055@users.noreply.github.com> Date: Fri, 26 Jun 2020 09:09:01 -0700 Subject: [PATCH 4/4] sorted the ops --- tfjs-converter/tools/model_summary.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tfjs-converter/tools/model_summary.ts b/tfjs-converter/tools/model_summary.ts index dde785e6f72..c6fddf384e6 100644 --- a/tfjs-converter/tools/model_summary.ts +++ b/tfjs-converter/tools/model_summary.ts @@ -31,13 +31,14 @@ function summarize(argv: string[]) { return; } // tslint:disable-next-line: no-any - const nodes: any[] = model['modelTopology']['node']; + let nodes: any[] = model['modelTopology']['node']; const library = model['modelTopology']['library']; if (library != null) { const functions = library['function']; + // tslint:disable-next-line: no-any if (functions != null) { - functions.forEach((func: any) => nodes.concat(func['nodeDef'])); + functions.forEach((func: any) => nodes = nodes.concat(func['nodeDef'])); } } @@ -51,7 +52,8 @@ function summarize(argv: string[]) { opCount[op] = count + 1; }); - console.log(opCount); + const keys = Object.keys(opCount).sort(); + keys.forEach(key => console.log(`${key}: ${opCount[key]}`)); console.log(`Total ops = ${nodes.length}`); }