From d1325db2037749532555dc274459ff0d3bc6fba7 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 21 Oct 2019 14:05:15 -0400 Subject: [PATCH 01/17] remove docs --- tfjs-core/src/ops/fused_ops.ts | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index 4052d4df83e..1a88446655b 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -48,7 +48,6 @@ import {Activation} from './fused_util'; * - `activation` Name of activation kernel (defaults to `linear`). * - `preluActivationWeights` Tensor of prelu weights. */ -/** @doc {heading: 'Operations', subheading: 'Matrices', namespace: 'fused'} */ function matMul_({ a, b, @@ -273,9 +272,6 @@ function matMul_({ * @param preluActivationWeights Tensor of prelu weights to be applied as part * of a `prelu` activation, typically the same shape as `x`. */ -/** - * @doc {heading: 'Operations', subheading: 'Convolution', namespace: 'fused'} - */ function conv2d_({ x, filter, @@ -481,9 +477,6 @@ function conv2d_({ * @param preluActivationWeights Tensor of prelu weights to be applied as part * of a `prelu` activation, typically the same shape as `x`. */ -/** - * @doc {heading: 'Operations', subheading: 'Convolution', namespace: 'fused'} - */ function depthwiseConv2d_({ x, filter, From 41abb5b7932715714272098b9b607ef00701865d Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 21 Oct 2019 14:09:18 -0400 Subject: [PATCH 02/17] add relu6 gradient support --- tfjs-core/src/ops/fused_ops.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index 1a88446655b..52b2328c83b 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -132,6 +132,9 @@ function matMul_({ dyActivation = dy; } else if (activation === 'relu') { dyActivation = dy.mul(y.step()); + } else if (activation === 'relu6') { + const mask = y.lessEqual(6).mul(y.step()); + dyActivation = dy.mul(mask); } else { throw new Error( `Gradient for activation ${activation} has not been ` + @@ -358,6 +361,9 @@ function conv2d_({ dyActivation = dy; } else if (activation === 'relu') { dyActivation = dy.mul(y.step()); + } else if (activation === 'relu6') { + const mask = y.lessEqual(6).mul(y.step()); + dyActivation = dy.mul(mask); } else { throw new Error( `Gradient for activation ${activation} has not been ` + @@ -569,6 +575,9 @@ function depthwiseConv2d_({ dyActivation = dy; } else if (activation === 'relu') { dyActivation = dy.mul(y.step()); + } else if (activation === 'relu6') { + const mask = y.lessEqual(6).mul(y.step()); + dyActivation = dy.mul(mask); } else { throw new Error( `Gradient for activation ${activation} has not been ` + From dc575a11cf7deb207b5182fefe1f53795a2ff3d7 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 21 Oct 2019 15:04:44 -0400 Subject: [PATCH 03/17] add elu --- tfjs-core/src/ops/fused_ops.ts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index 52b2328c83b..b5a89f455f8 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -135,6 +135,10 @@ function matMul_({ } else if (activation === 'relu6') { const mask = y.lessEqual(6).mul(y.step()); dyActivation = dy.mul(mask); + } else if (activation === 'elu') { + const eluDer = + ENGINE.runKernelFunc(backend => backend.eluDer(dy, y), {dy, y}); + dyActivation = dy.mul(eluDer); } else { throw new Error( `Gradient for activation ${activation} has not been ` + @@ -364,6 +368,10 @@ function conv2d_({ } else if (activation === 'relu6') { const mask = y.lessEqual(6).mul(y.step()); dyActivation = dy.mul(mask); + } else if (activation === 'elu') { + const eluDer = + ENGINE.runKernelFunc(backend => backend.eluDer(dy, y), {dy, y}); + dyActivation = dy.mul(eluDer); } else { throw new Error( `Gradient for activation ${activation} has not been ` + @@ -578,6 +586,10 @@ function depthwiseConv2d_({ } else if (activation === 'relu6') { const mask = y.lessEqual(6).mul(y.step()); dyActivation = dy.mul(mask); + } else if (activation === 'elu') { + const eluDer = + ENGINE.runKernelFunc(backend => backend.eluDer(dy, y), {dy, y}); + dyActivation = dy.mul(eluDer); } else { throw new Error( `Gradient for activation ${activation} has not been ` + From 7fbe71e80f9987d145703f2008165e125408e925 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 21 Oct 2019 17:17:20 -0400 Subject: [PATCH 04/17] remove relu6elu --- tfjs-core/src/ops/fused_ops.ts | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index b5a89f455f8..1a88446655b 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -132,13 +132,6 @@ function matMul_({ dyActivation = dy; } else if (activation === 'relu') { dyActivation = dy.mul(y.step()); - } else if (activation === 'relu6') { - const mask = y.lessEqual(6).mul(y.step()); - dyActivation = dy.mul(mask); - } else if (activation === 'elu') { - const eluDer = - ENGINE.runKernelFunc(backend => backend.eluDer(dy, y), {dy, y}); - dyActivation = dy.mul(eluDer); } else { throw new Error( `Gradient for activation ${activation} has not been ` + @@ -365,13 +358,6 @@ function conv2d_({ dyActivation = dy; } else if (activation === 'relu') { dyActivation = dy.mul(y.step()); - } else if (activation === 'relu6') { - const mask = y.lessEqual(6).mul(y.step()); - dyActivation = dy.mul(mask); - } else if (activation === 'elu') { - const eluDer = - ENGINE.runKernelFunc(backend => backend.eluDer(dy, y), {dy, y}); - dyActivation = dy.mul(eluDer); } else { throw new Error( `Gradient for activation ${activation} has not been ` + @@ -583,13 +569,6 @@ function depthwiseConv2d_({ dyActivation = dy; } else if (activation === 'relu') { dyActivation = dy.mul(y.step()); - } else if (activation === 'relu6') { - const mask = y.lessEqual(6).mul(y.step()); - dyActivation = dy.mul(mask); - } else if (activation === 'elu') { - const eluDer = - ENGINE.runKernelFunc(backend => backend.eluDer(dy, y), {dy, y}); - dyActivation = dy.mul(eluDer); } else { throw new Error( `Gradient for activation ${activation} has not been ` + From 05844fc6dbf68232eeb3cd0fe4395c310bbaea26 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 22 Oct 2019 09:43:33 -0400 Subject: [PATCH 05/17] refactor --- tfjs-core/src/ops/fused_ops.ts | 45 ++++++++++----------------------- tfjs-core/src/ops/fused_util.ts | 14 ++++++++++ 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index 1a88446655b..ad8316aa190 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -16,6 +16,7 @@ */ import {ENGINE} from '../engine'; +import * as tf from '../index'; import {conv2dDerFilter, conv2dDerInput, depthwiseConv2dDerFilter, depthwiseConv2dDerInput} from '../ops/conv'; import * as conv_util from '../ops/conv_util'; import {op} from '../ops/operation'; @@ -26,7 +27,7 @@ import {TensorLike} from '../types'; import * as util from '../util'; import * as broadcast_util from './broadcast_util'; -import {Activation} from './fused_util'; +import {Activation, getDyActivation} from './fused_util'; /** * Computes the dot product of two matrices with optional activation and bias. @@ -110,6 +111,14 @@ function matMul_({ const b3D = transposeB ? $b.as3D(batchDimB, outerShapeB, innerShapeB) : $b.as3D(batchDimB, innerShapeB, outerShapeB); + const gradientMode = ENGINE.state.gradientDepth > 0; + if (gradientMode) { + // const product = tf.matMul(a, b); + // return tf.relu(product); + console.log('gradient mode'); + console.log(tf); + } + let $bias: Tensor; if (bias != null) { $bias = convertToTensor(bias, 'bias', 'fused matMul'); @@ -126,17 +135,7 @@ function matMul_({ const grad = (dy: Tensor3D, saved: Tensor[]) => { const [a3D, b3D, y] = saved; - - let dyActivation: Tensor3D; - if (activation == null || activation === 'linear') { - dyActivation = dy; - } else if (activation === 'relu') { - dyActivation = dy.mul(y.step()); - } else { - throw new Error( - `Gradient for activation ${activation} has not been ` + - `implemented yet.`); - } + const dyActivation = getDyActivation(dy, y, activation); let biasGradient = {}; if (bias != null) { @@ -353,16 +352,7 @@ function conv2d_({ const grad = (dy: Tensor4D, saved: Tensor[]) => { const [$filter, x4D, y] = saved as [Tensor4D, Tensor4D, Tensor4D]; - let dyActivation: Tensor4D; - if (activation == null || activation === 'linear') { - dyActivation = dy; - } else if (activation === 'relu') { - dyActivation = dy.mul(y.step()); - } else { - throw new Error( - `Gradient for activation ${activation} has not been ` + - `implemented yet.`); - } + const dyActivation = getDyActivation(dy, y, activation) as Tensor4D; util.assert( conv_util.tupleValuesAreOne(dilations), @@ -564,16 +554,7 @@ function depthwiseConv2d_({ `'${dilations}'`); const [x4D, $filter, y] = saved; - let dyActivation: Tensor4D; - if (activation == null || activation === 'linear') { - dyActivation = dy; - } else if (activation === 'relu') { - dyActivation = dy.mul(y.step()); - } else { - throw new Error( - `Gradient for activation ${activation} has not been ` + - `implemented yet.`); - } + const dyActivation = getDyActivation(dy, y, activation) as Tensor4D; let biasGradient = {}; if (bias != null) { diff --git a/tfjs-core/src/ops/fused_util.ts b/tfjs-core/src/ops/fused_util.ts index c2ad2d2d3f6..45ae1faaa09 100644 --- a/tfjs-core/src/ops/fused_util.ts +++ b/tfjs-core/src/ops/fused_util.ts @@ -38,3 +38,17 @@ export type FusedConv2DConfig = { activation?: Activation, preluActivationWeights?: Tensor }; + +// Returns gradient for fused activation. +export const getDyActivation = + (dy: Tensor, y: Tensor, activation: Activation): Tensor => { + if (activation == null || activation === 'linear') { + return dy; + } else if (activation === 'relu') { + return dy.mul(y.step()); + } else { + throw new Error( + `Gradient for activation ${activation} has not been ` + + `implemented yet.`); + } + }; From 6f72f3ce7aafc563cce050095ba709bf0c5bb3c3 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 22 Oct 2019 11:13:14 -0400 Subject: [PATCH 06/17] more cleaning --- tfjs-core/src/ops/fused_ops.ts | 41 ++++----------------------------- tfjs-core/src/ops/fused_util.ts | 23 ++++++++++++++---- 2 files changed, 22 insertions(+), 42 deletions(-) diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index ad8316aa190..c3e20a95d4e 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -27,7 +27,7 @@ import {TensorLike} from '../types'; import * as util from '../util'; import * as broadcast_util from './broadcast_util'; -import {Activation, getDyActivation} from './fused_util'; +import {Activation, getBiasGradient, getDyActivation} from './fused_util'; /** * Computes the dot product of two matrices with optional activation and bias. @@ -139,20 +139,7 @@ function matMul_({ let biasGradient = {}; if (bias != null) { - biasGradient = { - $bias: () => { - let res = dyActivation; - // Using dyActivation as reference shape because outputShape does not - // account for the fact that we temporarily reshape inputs to 3D as - // part of batched matMul. - const reduceAxes = - broadcast_util.getReductionAxes($bias.shape, dyActivation.shape); - if (reduceAxes.length > 0) { - res = res.sum(reduceAxes); - } - return res.reshape($bias.shape); - } - }; + biasGradient = {$bias: () => getBiasGradient($bias, dyActivation)}; } if (!transposeA && !transposeB) { @@ -362,17 +349,7 @@ function conv2d_({ let biasGradient = {}; if (bias != null) { - biasGradient = { - $bias: () => { - let res = dyActivation; - const reduceAxes = - broadcast_util.getReductionAxes($bias.shape, dyActivation.shape); - if (reduceAxes.length > 0) { - res = res.sum(reduceAxes); - } - return res.reshape($bias.shape); - } - }; + biasGradient = {$bias: () => getBiasGradient($bias, dyActivation)}; } return Object.assign( @@ -558,17 +535,7 @@ function depthwiseConv2d_({ let biasGradient = {}; if (bias != null) { - biasGradient = { - $bias: () => { - let res = dyActivation; - const reduceAxes = - broadcast_util.getReductionAxes($bias.shape, dyActivation.shape); - if (reduceAxes.length > 0) { - res = res.sum(reduceAxes); - } - return res.reshape($bias.shape); - } - }; + biasGradient = {$bias: () => getBiasGradient($bias, dyActivation)}; } return Object.assign( diff --git a/tfjs-core/src/ops/fused_util.ts b/tfjs-core/src/ops/fused_util.ts index 45ae1faaa09..0013549fcca 100644 --- a/tfjs-core/src/ops/fused_util.ts +++ b/tfjs-core/src/ops/fused_util.ts @@ -16,6 +16,8 @@ */ import {Tensor, Tensor3D, Tensor4D} from '../tensor'; +import * as broadcast_util from './broadcast_util'; + import {Conv2DInfo} from './conv_util'; export type Activation = 'linear'|'relu'|'prelu'|'elu'|'relu6'; @@ -44,11 +46,22 @@ export const getDyActivation = (dy: Tensor, y: Tensor, activation: Activation): Tensor => { if (activation == null || activation === 'linear') { return dy; - } else if (activation === 'relu') { + } + if (activation === 'relu') { return dy.mul(y.step()); - } else { - throw new Error( - `Gradient for activation ${activation} has not been ` + - `implemented yet.`); } + throw new Error( + `Gradient for activation ${activation} has not been ` + + `implemented yet.`); }; + +// Returns gradient for fused bias. +export const getBiasGradient = (bias: Tensor, dyActivation: Tensor): Tensor => { + let res = dyActivation; + const reduceAxes = + broadcast_util.getReductionAxes(bias.shape, dyActivation.shape); + if (reduceAxes.length > 0) { + res = res.sum(reduceAxes); + } + return res.reshape(bias.shape); +}; From df7fc2b0535ac67db3eef167aac1fc0eb49d9a44 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 22 Oct 2019 11:34:30 -0400 Subject: [PATCH 07/17] forwarding --- tfjs-core/src/ops/fused_ops.ts | 15 ++++++++------- tfjs-core/src/ops/fused_util.ts | 25 ++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index c3e20a95d4e..87f67e96a6a 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -27,7 +27,7 @@ import {TensorLike} from '../types'; import * as util from '../util'; import * as broadcast_util from './broadcast_util'; -import {Activation, getBiasGradient, getDyActivation} from './fused_util'; +import {Activation, applyActivation, getBiasGradient, getDyActivation, shouldNotFuse} from './fused_util'; /** * Computes the dot product of two matrices with optional activation and bias. @@ -111,12 +111,13 @@ function matMul_({ const b3D = transposeB ? $b.as3D(batchDimB, outerShapeB, innerShapeB) : $b.as3D(batchDimB, innerShapeB, outerShapeB); - const gradientMode = ENGINE.state.gradientDepth > 0; - if (gradientMode) { - // const product = tf.matMul(a, b); - // return tf.relu(product); - console.log('gradient mode'); - console.log(tf); + if (shouldNotFuse(ENGINE.state.gradientDepth, activation)) { + let result = tf.matMul(a, b, transposeA, transposeB); + if (bias != null) { + result = tf.add(result, bias); + } + + return applyActivation(result, activation, preluActivationWeights) as T; } let $bias: Tensor; diff --git a/tfjs-core/src/ops/fused_util.ts b/tfjs-core/src/ops/fused_util.ts index 0013549fcca..f5e8f9f7822 100644 --- a/tfjs-core/src/ops/fused_util.ts +++ b/tfjs-core/src/ops/fused_util.ts @@ -15,9 +15,10 @@ * ============================================================================= */ +import * as tf from '../index'; import {Tensor, Tensor3D, Tensor4D} from '../tensor'; -import * as broadcast_util from './broadcast_util'; +import * as broadcast_util from './broadcast_util'; import {Conv2DInfo} from './conv_util'; export type Activation = 'linear'|'relu'|'prelu'|'elu'|'relu6'; @@ -65,3 +66,25 @@ export const getBiasGradient = (bias: Tensor, dyActivation: Tensor): Tensor => { } return res.reshape(bias.shape); }; + +// Whether we should call non-fused ops instead. +export const shouldNotFuse = + (gradientDepth: number, activation: Activation) => { + const gradientMode = gradientDepth > 0; + return gradientMode && activation !== 'linear' && activation !== 'relu'; + }; + +export const applyActivation = + (x: Tensor, activation: Activation, preluActivationWeights?: Tensor): + Tensor => { + if (activation === 'relu') { + return tf.relu(x); + } else if (activation === 'elu') { + return tf.elu(x); + } else if (activation === 'relu6') { + return tf.relu6(x); + } else if (activation === 'prelu') { + return tf.prelu(x, preluActivationWeights); + } + throw new Error(`Unknown fused activation ${activation}.`); + }; From 8011e50900ebd01378e442dc94c32ea6f070f1b6 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 22 Oct 2019 11:40:45 -0400 Subject: [PATCH 08/17] forwarding --- tfjs-core/src/ops/fused_ops.ts | 38 +++++++++++++++++++++++++-------- tfjs-core/src/ops/fused_util.ts | 4 +++- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index 87f67e96a6a..a653e2f636a 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -66,6 +66,15 @@ function matMul_({ activation?: Activation, preluActivationWeights?: Tensor }): T { + if (shouldNotFuse(ENGINE.state.gradientDepth, activation)) { + let result = tf.matMul(a, b, transposeA, transposeB); + if (bias != null) { + result = tf.add(result, bias); + } + + return applyActivation(result, activation, preluActivationWeights) as T; + } + let $a = convertToTensor(a, 'a', 'fused matMul'); let $b = convertToTensor(b, 'b', 'fused matMul'); [$a, $b] = makeTypesMatch($a, $b); @@ -111,15 +120,6 @@ function matMul_({ const b3D = transposeB ? $b.as3D(batchDimB, outerShapeB, innerShapeB) : $b.as3D(batchDimB, innerShapeB, outerShapeB); - if (shouldNotFuse(ENGINE.state.gradientDepth, activation)) { - let result = tf.matMul(a, b, transposeA, transposeB); - if (bias != null) { - result = tf.add(result, bias); - } - - return applyActivation(result, activation, preluActivationWeights) as T; - } - let $bias: Tensor; if (bias != null) { $bias = convertToTensor(bias, 'bias', 'fused matMul'); @@ -282,6 +282,16 @@ function conv2d_({ activation?: Activation, preluActivationWeights?: Tensor }): T { + if (shouldNotFuse(ENGINE.state.gradientDepth, activation)) { + let result = tf.conv2d( + x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); + if (bias != null) { + result = tf.add(result, bias); + } + + return applyActivation(result, activation, preluActivationWeights) as T; + } + const $x = convertToTensor(x, 'x', 'conv2d'); const $filter = convertToTensor(filter, 'filter', 'conv2d'); @@ -468,6 +478,16 @@ function depthwiseConv2d_({ activation?: Activation, preluActivationWeights?: Tensor }): T { + if (shouldNotFuse(ENGINE.state.gradientDepth, activation)) { + let result = tf.depthwiseConv2d( + x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); + if (bias != null) { + result = tf.add(result, bias); + } + + return applyActivation(result, activation, preluActivationWeights) as T; + } + const $x = convertToTensor(x, 'x', 'depthwiseConv2d'); const $filter = convertToTensor(filter, 'filter', 'depthwiseConv2d'); diff --git a/tfjs-core/src/ops/fused_util.ts b/tfjs-core/src/ops/fused_util.ts index f5e8f9f7822..a57c2d1a227 100644 --- a/tfjs-core/src/ops/fused_util.ts +++ b/tfjs-core/src/ops/fused_util.ts @@ -77,7 +77,9 @@ export const shouldNotFuse = export const applyActivation = (x: Tensor, activation: Activation, preluActivationWeights?: Tensor): Tensor => { - if (activation === 'relu') { + if (activation === 'linear') { + return x; + } else if (activation === 'relu') { return tf.relu(x); } else if (activation === 'elu') { return tf.elu(x); From 9f8373241caea7d032800a9ff833bb19f115b45b Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 22 Oct 2019 11:54:36 -0400 Subject: [PATCH 09/17] add test --- tfjs-core/src/ops/fused_test.ts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tfjs-core/src/ops/fused_test.ts b/tfjs-core/src/ops/fused_test.ts index 0ca1ede5678..8bf7403125a 100644 --- a/tfjs-core/src/ops/fused_test.ts +++ b/tfjs-core/src/ops/fused_test.ts @@ -948,4 +948,18 @@ describeWithFlags('fused conv2d', ALL_ENVS, () => { expectArraysClose(await dfilterFused.array(), await dfilter.array()); expectArraysClose(await dbiasFused.array(), await dbias.array()); }); + + fit('non fusable activations in gradient mode --> non fused', async () => { + spyOn(tf, 'matMul').and.callThrough(); + + const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); + const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + + const fusedGrads = tf.grads( + (a, b) => tf.fused.matMul({a, b, bias: null, activation: 'relu6'})); + + fusedGrads([a, b], dy); + expect((tf.matMul as jasmine.Spy).calls.count()).toBe(1); + }); }); From ba444416ae75575f8796fa4efc7e3e48df831b2c Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 22 Oct 2019 13:03:49 -0400 Subject: [PATCH 10/17] add test --- tfjs-core/src/ops/fused_test.ts | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tfjs-core/src/ops/fused_test.ts b/tfjs-core/src/ops/fused_test.ts index 8bf7403125a..2a38e078ef3 100644 --- a/tfjs-core/src/ops/fused_test.ts +++ b/tfjs-core/src/ops/fused_test.ts @@ -949,17 +949,21 @@ describeWithFlags('fused conv2d', ALL_ENVS, () => { expectArraysClose(await dbiasFused.array(), await dbias.array()); }); - fit('non fusable activations in gradient mode --> non fused', async () => { - spyOn(tf, 'matMul').and.callThrough(); + // tslint:disable-next-line:max-line-length + it('calling fused op in gradient mode with activation that does not support fused gradients forwards to unfused ops instead', + async () => { + spyOn(tf, 'matMul').and.callThrough(); + spyOn(tf, 'relu6').and.callThrough(); - const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); - const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); + const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - const fusedGrads = tf.grads( - (a, b) => tf.fused.matMul({a, b, bias: null, activation: 'relu6'})); + const fusedGrads = tf.grads( + (a, b) => tf.fused.matMul({a, b, bias: null, activation: 'relu6'})); - fusedGrads([a, b], dy); - expect((tf.matMul as jasmine.Spy).calls.count()).toBe(1); - }); + fusedGrads([a, b], dy); + expect((tf.matMul as jasmine.Spy).calls.count()).toBe(1); + expect((tf.relu6 as jasmine.Spy).calls.count()).toBe(1); + }); }); From d59d3637db6135176a667c78f8f5ec4d19fb57c4 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 22 Oct 2019 13:20:29 -0400 Subject: [PATCH 11/17] remove circular imports --- tfjs-core/src/ops/fused_ops.ts | 37 ++++++++++++++++++++++++++------- tfjs-core/src/ops/fused_util.ts | 18 ---------------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index a653e2f636a..1f74f842dca 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -16,7 +16,6 @@ */ import {ENGINE} from '../engine'; -import * as tf from '../index'; import {conv2dDerFilter, conv2dDerInput, depthwiseConv2dDerFilter, depthwiseConv2dDerInput} from '../ops/conv'; import * as conv_util from '../ops/conv_util'; import {op} from '../ops/operation'; @@ -26,8 +25,30 @@ import {convertToTensor} from '../tensor_util_env'; import {TensorLike} from '../types'; import * as util from '../util'; +import {add} from './binary_ops'; import * as broadcast_util from './broadcast_util'; -import {Activation, applyActivation, getBiasGradient, getDyActivation, shouldNotFuse} from './fused_util'; +import {conv2d as unfusedConv2d, depthwiseConv2d as unfusedDepthwiseConv2d} from './conv'; +import {Activation, getBiasGradient, getDyActivation, shouldNotFuse} from './fused_util'; +import {matMul as unfusedMatMul} from './matmul'; + +import {elu, prelu, relu, relu6} from './relu_ops'; + +const applyActivation = + (x: Tensor, activation: Activation, preluActivationWeights?: Tensor): + Tensor => { + if (activation === 'linear') { + return x; + } else if (activation === 'relu') { + return relu(x); + } else if (activation === 'elu') { + return elu(x); + } else if (activation === 'relu6') { + return relu6(x); + } else if (activation === 'prelu') { + return prelu(x, preluActivationWeights); + } + throw new Error(`Unknown fused activation ${activation}.`); + }; /** * Computes the dot product of two matrices with optional activation and bias. @@ -67,9 +88,9 @@ function matMul_({ preluActivationWeights?: Tensor }): T { if (shouldNotFuse(ENGINE.state.gradientDepth, activation)) { - let result = tf.matMul(a, b, transposeA, transposeB); + let result = unfusedMatMul(a, b, transposeA, transposeB); if (bias != null) { - result = tf.add(result, bias); + result = add(result, bias); } return applyActivation(result, activation, preluActivationWeights) as T; @@ -283,10 +304,10 @@ function conv2d_({ preluActivationWeights?: Tensor }): T { if (shouldNotFuse(ENGINE.state.gradientDepth, activation)) { - let result = tf.conv2d( + let result = unfusedConv2d( x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); if (bias != null) { - result = tf.add(result, bias); + result = add(result, bias); } return applyActivation(result, activation, preluActivationWeights) as T; @@ -479,10 +500,10 @@ function depthwiseConv2d_({ preluActivationWeights?: Tensor }): T { if (shouldNotFuse(ENGINE.state.gradientDepth, activation)) { - let result = tf.depthwiseConv2d( + let result = unfusedDepthwiseConv2d( x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); if (bias != null) { - result = tf.add(result, bias); + result = add(result, bias); } return applyActivation(result, activation, preluActivationWeights) as T; diff --git a/tfjs-core/src/ops/fused_util.ts b/tfjs-core/src/ops/fused_util.ts index a57c2d1a227..5d76845ee9e 100644 --- a/tfjs-core/src/ops/fused_util.ts +++ b/tfjs-core/src/ops/fused_util.ts @@ -15,7 +15,6 @@ * ============================================================================= */ -import * as tf from '../index'; import {Tensor, Tensor3D, Tensor4D} from '../tensor'; import * as broadcast_util from './broadcast_util'; @@ -73,20 +72,3 @@ export const shouldNotFuse = const gradientMode = gradientDepth > 0; return gradientMode && activation !== 'linear' && activation !== 'relu'; }; - -export const applyActivation = - (x: Tensor, activation: Activation, preluActivationWeights?: Tensor): - Tensor => { - if (activation === 'linear') { - return x; - } else if (activation === 'relu') { - return tf.relu(x); - } else if (activation === 'elu') { - return tf.elu(x); - } else if (activation === 'relu6') { - return tf.relu6(x); - } else if (activation === 'prelu') { - return tf.prelu(x, preluActivationWeights); - } - throw new Error(`Unknown fused activation ${activation}.`); - }; From debc9706013020709f98aca4280a64c98daadb55 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 22 Oct 2019 13:46:43 -0400 Subject: [PATCH 12/17] update test --- tfjs-core/src/ops/fused_test.ts | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tfjs-core/src/ops/fused_test.ts b/tfjs-core/src/ops/fused_test.ts index 2a38e078ef3..94d1823aaa9 100644 --- a/tfjs-core/src/ops/fused_test.ts +++ b/tfjs-core/src/ops/fused_test.ts @@ -950,20 +950,27 @@ describeWithFlags('fused conv2d', ALL_ENVS, () => { }); // tslint:disable-next-line:max-line-length - it('calling fused op in gradient mode with activation that does not support fused gradients forwards to unfused ops instead', + it('calling fused op in gradient mode with activation that does not support fused gradients works', async () => { - spyOn(tf, 'matMul').and.callThrough(); - spyOn(tf, 'relu6').and.callThrough(); - const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + const transposeA = false; + const transposeB = false; - const fusedGrads = tf.grads( - (a, b) => tf.fused.matMul({a, b, bias: null, activation: 'relu6'})); + const fusedGrads = tf.grads((a, b) => { + return tf.fused.matMul( + {a, b, transposeA, transposeB, bias: null, activation: 'relu6'}); + }); + const [fusedDa, fusedDb] = fusedGrads([a, b], dy); + + const grads = tf.grads((a, b) => { + const prod = tf.matMul(a, b, transposeA, transposeB); + return tf.relu6(prod); + }); + const [da, db] = grads([a, b], dy); - fusedGrads([a, b], dy); - expect((tf.matMul as jasmine.Spy).calls.count()).toBe(1); - expect((tf.relu6 as jasmine.Spy).calls.count()).toBe(1); + expectArraysClose(await da.array(), await fusedDa.array()); + expectArraysClose(await db.data(), await fusedDb.array()); }); }); From dea21203767e41a5e8ee1367ad6092e2f3942bba Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 22 Oct 2019 14:58:14 -0400 Subject: [PATCH 13/17] switch --- tfjs-core/src/ops/fused_ops.ts | 8 ++++---- tfjs-core/src/ops/fused_util.ts | 11 +++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index 1f74f842dca..9fac2dce4cf 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -28,7 +28,7 @@ import * as util from '../util'; import {add} from './binary_ops'; import * as broadcast_util from './broadcast_util'; import {conv2d as unfusedConv2d, depthwiseConv2d as unfusedDepthwiseConv2d} from './conv'; -import {Activation, getBiasGradient, getDyActivation, shouldNotFuse} from './fused_util'; +import {Activation, getBiasGradient, getDyActivation, shouldFuse} from './fused_util'; import {matMul as unfusedMatMul} from './matmul'; import {elu, prelu, relu, relu6} from './relu_ops'; @@ -87,7 +87,7 @@ function matMul_({ activation?: Activation, preluActivationWeights?: Tensor }): T { - if (shouldNotFuse(ENGINE.state.gradientDepth, activation)) { + if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { let result = unfusedMatMul(a, b, transposeA, transposeB); if (bias != null) { result = add(result, bias); @@ -303,7 +303,7 @@ function conv2d_({ activation?: Activation, preluActivationWeights?: Tensor }): T { - if (shouldNotFuse(ENGINE.state.gradientDepth, activation)) { + if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { let result = unfusedConv2d( x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); if (bias != null) { @@ -499,7 +499,7 @@ function depthwiseConv2d_({ activation?: Activation, preluActivationWeights?: Tensor }): T { - if (shouldNotFuse(ENGINE.state.gradientDepth, activation)) { + if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { let result = unfusedDepthwiseConv2d( x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); if (bias != null) { diff --git a/tfjs-core/src/ops/fused_util.ts b/tfjs-core/src/ops/fused_util.ts index 5d76845ee9e..631f64d3aa2 100644 --- a/tfjs-core/src/ops/fused_util.ts +++ b/tfjs-core/src/ops/fused_util.ts @@ -66,9 +66,8 @@ export const getBiasGradient = (bias: Tensor, dyActivation: Tensor): Tensor => { return res.reshape(bias.shape); }; -// Whether we should call non-fused ops instead. -export const shouldNotFuse = - (gradientDepth: number, activation: Activation) => { - const gradientMode = gradientDepth > 0; - return gradientMode && activation !== 'linear' && activation !== 'relu'; - }; +// Whether we should call fused ops. +export const shouldFuse = (gradientDepth: number, activation: Activation) => { + const gradientMode = gradientDepth > 0; + return !gradientMode && (activation === 'linear' || activation === 'relu'); +}; From c9ec4d8cce3b3dfc9fbc9dc71d746ab4f3e9614b Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 22 Oct 2019 15:01:37 -0400 Subject: [PATCH 14/17] tests --- tfjs-core/src/ops/fused_test.ts | 87 ++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 23 deletions(-) diff --git a/tfjs-core/src/ops/fused_test.ts b/tfjs-core/src/ops/fused_test.ts index 94d1823aaa9..8d324393298 100644 --- a/tfjs-core/src/ops/fused_test.ts +++ b/tfjs-core/src/ops/fused_test.ts @@ -904,7 +904,7 @@ describeWithFlags('fused conv2d', ALL_ENVS, () => { expectArraysClose(await dbiasFused.array(), await dbias.array()); }); - it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and activation', + it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and relu', async () => { const inputDepth = 1; const outputDepth = 1; @@ -949,28 +949,69 @@ describeWithFlags('fused conv2d', ALL_ENVS, () => { expectArraysClose(await dbiasFused.array(), await dbias.array()); }); - // tslint:disable-next-line:max-line-length - it('calling fused op in gradient mode with activation that does not support fused gradients works', - async () => { - const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); - const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - const transposeA = false; - const transposeB = false; - - const fusedGrads = tf.grads((a, b) => { - return tf.fused.matMul( - {a, b, transposeA, transposeB, bias: null, activation: 'relu6'}); - }); - const [fusedDa, fusedDb] = fusedGrads([a, b], dy); + it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and elu', async () => { + const inputDepth = 1; + const outputDepth = 1; + const inputShape: [number, number, number, number] = [2, 3, 3, inputDepth]; + const filterSize = 2; + const strides = 1; + const pad = 0; - const grads = tf.grads((a, b) => { - const prod = tf.matMul(a, b, transposeA, transposeB); - return tf.relu6(prod); - }); - const [da, db] = grads([a, b], dy); + const filterShape: [number, number, number, number] = + [filterSize, filterSize, inputDepth, outputDepth]; + const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + const bias = tf.ones([2, 2, 2, 1]); - expectArraysClose(await da.array(), await fusedDa.array()); - expectArraysClose(await db.data(), await fusedDb.array()); - }); + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + const fusedGrads = + tf.grads((x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.conv2d({ + x, + filter: w, + strides, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + bias: b, + activation: 'elu' + })); + const [dxFused, dfilterFused, dbiasFused] = + fusedGrads([x, filter, bias], dy); + + const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { + const conv = tf.conv2d(x, filter, strides, pad); + const sum = tf.add(conv, bias); + return tf.elu(sum); + }); + const [dx, dfilter, dbias] = grads([x, filter, bias], dy); + + expectArraysClose(await dxFused.array(), await dx.array()); + expectArraysClose(await dfilterFused.array(), await dfilter.array()); + expectArraysClose(await dbiasFused.array(), await dbias.array()); + }); + + it('fused matmul with relu6', async () => { + const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); + const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + const transposeA = false; + const transposeB = false; + + const fusedGrads = tf.grads((a, b) => { + return tf.fused.matMul( + {a, b, transposeA, transposeB, bias: null, activation: 'relu6'}); + }); + const [fusedDa, fusedDb] = fusedGrads([a, b], dy); + + const grads = tf.grads((a, b) => { + const prod = tf.matMul(a, b, transposeA, transposeB); + return tf.relu6(prod); + }); + const [da, db] = grads([a, b], dy); + + expectArraysClose(await da.array(), await fusedDa.array()); + expectArraysClose(await db.data(), await fusedDb.array()); + }); }); From d5f330e2a54c63af9884038b961df7754c805fff Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 22 Oct 2019 15:04:10 -0400 Subject: [PATCH 15/17] reorganize --- tfjs-core/src/ops/fused_ops.ts | 39 +++++++++++++++++++++++++++------ tfjs-core/src/ops/fused_util.ts | 26 ---------------------- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index 9fac2dce4cf..a08d17b6061 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -28,11 +28,36 @@ import * as util from '../util'; import {add} from './binary_ops'; import * as broadcast_util from './broadcast_util'; import {conv2d as unfusedConv2d, depthwiseConv2d as unfusedDepthwiseConv2d} from './conv'; -import {Activation, getBiasGradient, getDyActivation, shouldFuse} from './fused_util'; +import {Activation, shouldFuse} from './fused_util'; import {matMul as unfusedMatMul} from './matmul'; import {elu, prelu, relu, relu6} from './relu_ops'; +// Returns gradient for fused activation. +const getFusedDyActivation = + (dy: Tensor, y: Tensor, activation: Activation): Tensor => { + if (activation == null || activation === 'linear') { + return dy; + } + if (activation === 'relu') { + return dy.mul(y.step()); + } + throw new Error( + `Gradient for activation ${activation} has not been ` + + `implemented yet.`); + }; + +// Returns gradient for fused bias. +const getFusedBiasGradient = (bias: Tensor, dyActivation: Tensor): Tensor => { + let res = dyActivation; + const reduceAxes = + broadcast_util.getReductionAxes(bias.shape, dyActivation.shape); + if (reduceAxes.length > 0) { + res = res.sum(reduceAxes); + } + return res.reshape(bias.shape); +}; + const applyActivation = (x: Tensor, activation: Activation, preluActivationWeights?: Tensor): Tensor => { @@ -157,11 +182,11 @@ function matMul_({ const grad = (dy: Tensor3D, saved: Tensor[]) => { const [a3D, b3D, y] = saved; - const dyActivation = getDyActivation(dy, y, activation); + const dyActivation = getFusedDyActivation(dy, y, activation); let biasGradient = {}; if (bias != null) { - biasGradient = {$bias: () => getBiasGradient($bias, dyActivation)}; + biasGradient = {$bias: () => getFusedBiasGradient($bias, dyActivation)}; } if (!transposeA && !transposeB) { @@ -371,7 +396,7 @@ function conv2d_({ const grad = (dy: Tensor4D, saved: Tensor[]) => { const [$filter, x4D, y] = saved as [Tensor4D, Tensor4D, Tensor4D]; - const dyActivation = getDyActivation(dy, y, activation) as Tensor4D; + const dyActivation = getFusedDyActivation(dy, y, activation) as Tensor4D; util.assert( conv_util.tupleValuesAreOne(dilations), @@ -381,7 +406,7 @@ function conv2d_({ let biasGradient = {}; if (bias != null) { - biasGradient = {$bias: () => getBiasGradient($bias, dyActivation)}; + biasGradient = {$bias: () => getFusedBiasGradient($bias, dyActivation)}; } return Object.assign( @@ -573,11 +598,11 @@ function depthwiseConv2d_({ `'${dilations}'`); const [x4D, $filter, y] = saved; - const dyActivation = getDyActivation(dy, y, activation) as Tensor4D; + const dyActivation = getFusedDyActivation(dy, y, activation) as Tensor4D; let biasGradient = {}; if (bias != null) { - biasGradient = {$bias: () => getBiasGradient($bias, dyActivation)}; + biasGradient = {$bias: () => getFusedBiasGradient($bias, dyActivation)}; } return Object.assign( diff --git a/tfjs-core/src/ops/fused_util.ts b/tfjs-core/src/ops/fused_util.ts index 631f64d3aa2..73f1fc6d9dc 100644 --- a/tfjs-core/src/ops/fused_util.ts +++ b/tfjs-core/src/ops/fused_util.ts @@ -17,7 +17,6 @@ import {Tensor, Tensor3D, Tensor4D} from '../tensor'; -import * as broadcast_util from './broadcast_util'; import {Conv2DInfo} from './conv_util'; export type Activation = 'linear'|'relu'|'prelu'|'elu'|'relu6'; @@ -41,31 +40,6 @@ export type FusedConv2DConfig = { preluActivationWeights?: Tensor }; -// Returns gradient for fused activation. -export const getDyActivation = - (dy: Tensor, y: Tensor, activation: Activation): Tensor => { - if (activation == null || activation === 'linear') { - return dy; - } - if (activation === 'relu') { - return dy.mul(y.step()); - } - throw new Error( - `Gradient for activation ${activation} has not been ` + - `implemented yet.`); - }; - -// Returns gradient for fused bias. -export const getBiasGradient = (bias: Tensor, dyActivation: Tensor): Tensor => { - let res = dyActivation; - const reduceAxes = - broadcast_util.getReductionAxes(bias.shape, dyActivation.shape); - if (reduceAxes.length > 0) { - res = res.sum(reduceAxes); - } - return res.reshape(bias.shape); -}; - // Whether we should call fused ops. export const shouldFuse = (gradientDepth: number, activation: Activation) => { const gradientMode = gradientDepth > 0; From bc8e7c4d7991e694e0e497d9bf2b96e0970fceee Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 22 Oct 2019 15:13:17 -0400 Subject: [PATCH 16/17] save From fab0e46829e646742bee0aaa0acee1be959ff93e Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 22 Oct 2019 15:25:44 -0400 Subject: [PATCH 17/17] save