From f4c9762540def5c2e4b88101a49fb63583cf7aad Mon Sep 17 00:00:00 2001 From: Yannick Assogba Date: Mon, 13 Jul 2020 09:48:08 -0400 Subject: [PATCH 01/12] split out fused ops into their own files --- tfjs-core/src/kernel_names.ts | 35 ++ tfjs-core/src/ops/fused/conv2d.ts | 298 +++++++++ tfjs-core/src/ops/fused/depthwise_conv2d.ts | 234 +++++++ tfjs-core/src/ops/fused/mat_mul.ts | 208 ++++++ tfjs-core/src/ops/fused_ops.ts | 664 +------------------- tfjs-core/src/ops/fused_util.ts | 48 ++ 6 files changed, 828 insertions(+), 659 deletions(-) create mode 100644 tfjs-core/src/ops/fused/conv2d.ts create mode 100644 tfjs-core/src/ops/fused/depthwise_conv2d.ts create mode 100644 tfjs-core/src/ops/fused/mat_mul.ts diff --git a/tfjs-core/src/kernel_names.ts b/tfjs-core/src/kernel_names.ts index 70cf4cb01fa..237ccca263d 100644 --- a/tfjs-core/src/kernel_names.ts +++ b/tfjs-core/src/kernel_names.ts @@ -21,6 +21,7 @@ import {ExplicitPadding} from '../src/ops/conv_util'; import {NamedTensorInfoMap, TensorInfo} from './kernel_registry'; +import {Activation} from './ops/fused_util'; import {DataType, PixelData} from './types'; export const Abs = 'Abs'; @@ -642,3 +643,37 @@ export interface FromPixelsInputs { export interface FromPixelsAttrs { numChannels: number; } + +export const _FusedMatMul = '_FusedMatMul'; +export type _FusedMatMulInputs = + Pick; +// tslint:disable-next-line: class-name +export interface _FusedMatMulAttrs { + transposeA: number; + transposeB: number; + activation: Activation; +} + +export const FusedConv2D = 'FusedConv2D'; +export type FusedConv2DInputs = + Pick; +export interface FusedConv2DAttrs { + strides: [number, number]|number; + pad: 'valid'|'same'|number|ExplicitPadding; + dataFormat: 'NHWC'|'NCHW'; + dilations: [number, number]|number; + dimRoundingMode: 'floor'|'round'|'ceil'; + activation: Activation; +} + +export const FusedDepthwiseConv2D = 'FusedDepthwiseConv2D'; +export type FusedDepthwiseConv2DInputs = + Pick; +export interface FusedDepthwiseConv2DAttrs { + strides: [number, number]|number; + pad: 'valid'|'same'|number; + dataFormat: 'NHWC'|'NCHW'; + dilations: [number, number]|number; + dimRoundingMode: 'floor'|'round'|'ceil'; + activation: Activation; +} diff --git a/tfjs-core/src/ops/fused/conv2d.ts b/tfjs-core/src/ops/fused/conv2d.ts new file mode 100644 index 00000000000..e3504076aa3 --- /dev/null +++ b/tfjs-core/src/ops/fused/conv2d.ts @@ -0,0 +1,298 @@ +/** + * @license + * Copyright 2019 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {ENGINE} from '../../engine'; +import {Tensor, Tensor3D, Tensor4D} from '../../tensor'; +import {makeTypesMatch} from '../../tensor_util'; +import {convertToTensor} from '../../tensor_util_env'; +import {TensorLike} from '../../types'; +import * as util from '../../util'; +import {add} from '../add'; +import * as broadcast_util from '../broadcast_util'; +import {conv2d as unfusedConv2d} from '../conv2d'; +import {conv2DBackpropFilter} from '../conv2d_backprop_filter'; +import {conv2DBackpropInput} from '../conv2d_backprop_input'; +import {applyActivation, getFusedBiasGradient, getFusedDyActivation} from '../fused_util'; +import {Activation, shouldFuse} from '../fused_util'; +import * as conv_util from '../ops/../conv_util'; +import {op} from '../ops/../operation'; + +/** + * Computes a 2D convolution over the input x, optionally fused with adding a + * bias and applying an activation. + * + * ```js + * const inputDepth = 2; + * const inShape = [2, 2, 2, inputDepth]; + * const outputDepth = 2; + * const fSize = 1; + * const pad = 0; + * const strides = 1; + * + * const x = tf.tensor4d( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + * 16], inShape); + * const w = tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, + * outputDepth]); + * + * tf.fused.conv2d({ x, filter: w, strides, pad, dataFormat: 'NHWC', + * dilations: [1, 1], bias: tf.scalar(5), activation: 'relu' }).print(); + * ``` + * + * @param obj An object with the following properties: + * @param x The input tensor, of rank 4 or rank 3, of shape + * `[batch, height, width, inChannels]`. If rank 3, batch of 1 is + * assumed. + * @param filter The filter, rank 4, of shape + * `[filterHeight, filterWidth, inDepth, outDepth]`. + * @param strides The strides of the convolution: `[strideHeight, + * strideWidth]`. + * @param pad The type of padding algorithm. + * - `same` and stride 1: output will be of same size as input, + * regardless of filter size. + * - `valid` output will be smaller than input if filter is larger + * than 1x1. + * - For more info, see this guide: + * [https://www.tensorflow.org/api_guides/python/nn#Convolution]( + * https://www.tensorflow.org/api_guides/python/nn#Convolution) + * @param dataFormat An optional string from: "NHWC", "NCHW". Defaults to + * "NHWC". Specify the data format of the input and output data. With the + * default format "NHWC", the data is stored in the order of: [batch, + * height, width, channels]. Only "NHWC" is currently supported. + * @param dilations The dilation rates: `[dilationHeight, dilationWidth]` + * in which we sample input values across the height and width dimensions + * in atrous convolution. Defaults to `[1, 1]`. If `dilations` is a single + * number, then `dilationHeight == dilationWidth`. If it is greater than + * 1, then all values of `strides` must be 1. + * @param dimRoundingMode The rounding mode used when computing output + * dimensions if pad is a number. If none is provided, it will not round + * and error if the output is of fractional size. + * @param bias Tensor to be added to the result. + * @param activation Name of activation kernel (defaults to `linear`) to be + * applied + * after biasAdd. + * @param preluActivationWeights Tensor of prelu weights to be applied as part + * of a `prelu` activation, typically the same shape as `x`. + */ +/** + * Computes a 2D convolution over the input x, optionally fused with adding a + * bias and applying an activation. + * + * ```js + * const inputDepth = 2; + * const inShape = [2, 2, 2, inputDepth]; + * const outputDepth = 2; + * const fSize = 1; + * const pad = 0; + * const strides = 1; + * + * const x = tf.tensor4d( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + * 16], inShape); + * const w = tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, + * outputDepth]); + * + * tf.fused.conv2d({ x, filter: w, strides, pad, dataFormat: 'NHWC', + * dilations: [1, 1], bias: tf.scalar(5), activation: 'relu' }).print(); + * ``` + * + * @param obj An object with the following properties: + * @param x The input tensor, of rank 4 or rank 3, of shape + * `[batch, height, width, inChannels]`. If rank 3, batch of 1 is + * assumed. + * @param filter The filter, rank 4, of shape + * `[filterHeight, filterWidth, inDepth, outDepth]`. + * @param strides The strides of the convolution: `[strideHeight, + * strideWidth]`. + * @param pad The type of padding algorithm. + * - `same` and stride 1: output will be of same size as input, + * regardless of filter size. + * - `valid` output will be smaller than input if filter is larger + * than 1x1. + * - For more info, see this guide: + * [https://www.tensorflow.org/api_guides/python/nn#Convolution]( + * https://www.tensorflow.org/api_guides/python/nn#Convolution) + * @param dataFormat An optional string from: "NHWC", "NCHW". Defaults to + * "NHWC". Specify the data format of the input and output data. With the + * default format "NHWC", the data is stored in the order of: [batch, + * height, width, channels]. Only "NHWC" is currently supported. + * @param dilations The dilation rates: `[dilationHeight, dilationWidth]` + * in which we sample input values across the height and width dimensions + * in atrous convolution. Defaults to `[1, 1]`. If `dilations` is a single + * number, then `dilationHeight == dilationWidth`. If it is greater than + * 1, then all values of `strides` must be 1. + * @param dimRoundingMode The rounding mode used when computing output + * dimensions if pad is a number. If none is provided, it will not round + * and error if the output is of fractional size. + * @param bias Tensor to be added to the result. + * @param activation Name of activation kernel (defaults to `linear`) to be + * applied + * after biasAdd. + * @param preluActivationWeights Tensor of prelu weights to be applied as part + * of a `prelu` activation, typically the same shape as `x`. + */ +function fusedConv2d_({ + x, + filter, + strides, + pad, + dataFormat = 'NHWC', + dilations = [1, 1], + dimRoundingMode, + bias, + activation = 'linear', + preluActivationWeights +}: { + x: T|TensorLike, + filter: Tensor4D|TensorLike, + strides: [number, number]|number, + pad: 'valid'|'same'|number|conv_util.ExplicitPadding, + dataFormat?: 'NHWC'|'NCHW', + dilations?: [number, number]|number, + dimRoundingMode?: 'floor'|'round'|'ceil', + bias?: Tensor|TensorLike, + activation?: Activation, + preluActivationWeights?: Tensor +}): T { + activation = activation || 'linear'; + if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { + let result = unfusedConv2d( + x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); + if (bias != null) { + result = add(result, bias); + } + + return applyActivation(result, activation, preluActivationWeights) as T; + } + + const $x = convertToTensor(x, 'x', 'conv2d'); + const $filter = convertToTensor(filter, 'filter', 'conv2d'); + + let x4D = $x as Tensor4D; + let reshapedTo4D = false; + + if ($x.rank === 3) { + reshapedTo4D = true; + x4D = $x.as4D(1, $x.shape[0], $x.shape[1], $x.shape[2]); + } + util.assert( + x4D.rank === 4, + () => `Error in fused conv2d: input must be rank 4, but got rank ` + + `${x4D.rank}.`); + util.assert( + $filter.rank === 4, + () => `Error in fused conv2d: filter must be rank 4, but got rank ` + + `${$filter.rank}.`); + if (dimRoundingMode != null) { + util.assert( + util.isInt(pad as number), + () => `Error in fused conv2d: pad must be an integer when using, ` + + `dimRoundingMode ${dimRoundingMode} but got pad ${pad}.`); + } + + util.assert( + x4D.shape[3] === $filter.shape[2], + () => `Error in conv2d: depth of input (${x4D.shape[3]}) must match ` + + `input depth for filter ${$filter.shape[2]}.`); + util.assert( + conv_util.eitherStridesOrDilationsAreOne(strides, dilations), + () => 'Error in conv2D: Either strides or dilations must be 1. ' + + `Got strides ${strides} and dilations '${dilations}'`); + util.assert( + dataFormat === 'NHWC', + () => `Error in conv2d: got dataFormat of ${ + dataFormat} but only NHWC is currently supported.`); + + const convInfo = conv_util.computeConv2DInfo( + x4D.shape, $filter.shape, strides, dilations, pad, dimRoundingMode); + + let $bias: Tensor; + if (bias != null) { + $bias = convertToTensor(bias, 'bias', 'fused conv2d'); + [$bias] = makeTypesMatch($bias, $x); + + broadcast_util.assertAndGetBroadcastShape(convInfo.outShape, $bias.shape); + } + + let $preluActivationWeights: Tensor; + if (preluActivationWeights != null) { + $preluActivationWeights = convertToTensor( + preluActivationWeights, 'prelu weights', 'fused conv2d'); + } + + const grad = (dy: Tensor4D, saved: Tensor[]) => { + const [$filter, x4D, y] = saved as [Tensor4D, Tensor4D, Tensor4D]; + + const dyActivation = getFusedDyActivation(dy, y, activation) as Tensor4D; + + util.assert( + conv_util.tupleValuesAreOne(dilations), + () => 'Error in gradient of fused conv2D: ' + + `dilation rates greater than 1 ` + + `are not yet supported in gradients. Got dilations '${dilations}'`); + + let biasGradient = {}; + if (bias != null) { + biasGradient = {bias: () => getFusedBiasGradient($bias, dyActivation)}; + } + + return Object.assign( + { + x: () => conv2DBackpropInput( + x4D.shape, dyActivation, $filter, strides, pad), + filter: () => conv2DBackpropFilter( + x4D, dyActivation, $filter.shape, strides, pad) + }, + biasGradient); + }; + + const inputs: { + x: Tensor, + filter: Tensor, + bias?: Tensor, + preluActivationWeights?: Tensor + } = {x: x4D, filter: $filter}; + if (bias != null) { + inputs.bias = $bias; + } + if (preluActivationWeights != null) { + inputs.preluActivationWeights = $preluActivationWeights; + } + + const inputsToSave = [$filter, x4D]; + const outputsToSave = [true]; // Save the only output. + const res = ENGINE.runKernelFunc( + (backend, save) => { + const res = backend.fusedConv2d({ + input: x4D, + filter: $filter, + convInfo, + bias: $bias, + activation, + preluActivationWeights: $preluActivationWeights + }); + save([$filter, x4D, res]); + return res; + }, + inputs, grad, 'FusedConv2D', {convInfo, activation}, inputsToSave, + outputsToSave); + + if (reshapedTo4D) { + return res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; + } + + return res as T; +} +export const conv2d = op({fusedConv2d_}); diff --git a/tfjs-core/src/ops/fused/depthwise_conv2d.ts b/tfjs-core/src/ops/fused/depthwise_conv2d.ts new file mode 100644 index 00000000000..cadefd6d964 --- /dev/null +++ b/tfjs-core/src/ops/fused/depthwise_conv2d.ts @@ -0,0 +1,234 @@ +/** + * @license + * Copyright 2019 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {ENGINE} from '../../engine'; +import * as conv_util from '../../ops/conv_util'; +import {op} from '../../ops/operation'; +import {Tensor, Tensor3D, Tensor4D} from '../../tensor'; +import {makeTypesMatch} from '../../tensor_util'; +import {convertToTensor} from '../../tensor_util_env'; +import {TensorLike} from '../../types'; +import * as util from '../../util'; +import {add} from '../add'; +import * as broadcast_util from '../broadcast_util'; +import {depthwiseConv2d as unfusedDepthwiseConv2d} from '../depthwise_conv2d'; +import {depthwiseConv2dNativeBackpropFilter} from '../depthwise_conv2d_native_backprop_filter'; +import {depthwiseConv2dNativeBackpropInput} from '../depthwise_conv2d_native_backprop_input'; +import {Activation, applyActivation, getFusedBiasGradient, getFusedDyActivation, shouldFuse} from '../fused_util'; + +/** + * Computes depthwise 2D convolution, optionally fused with adding a + * bias and applying an activation. + * + * Given a 4D `input` array and a `filter` array of shape + * `[filterHeight, filterWidth, inChannels, channelMultiplier]` containing + * `inChannels` convolutional filters of depth 1, this op applies a + * different filter to each input channel (expanding from 1 channel to + * `channelMultiplier` channels for each), then concatenates the results + * together. The output has `inChannels * channelMultiplier` channels. + * + * See + * [https://www.tensorflow.org/api_docs/python/tf/nn/depthwise_conv2d]( + * https://www.tensorflow.org/api_docs/python/tf/nn/depthwise_conv2d) + * for more details. + * + * @param obj An object with the following properties: + * @param x The input tensor, of rank 4 or rank 3, of shape + * `[batch, height, width, inChannels]`. If rank 3, batch of 1 is + * assumed. + * @param filter The filter tensor, rank 4, of shape + * `[filterHeight, filterWidth, inChannels, channelMultiplier]`. + * @param strides The strides of the convolution: `[strideHeight, + * strideWidth]`. If strides is a single number, then `strideHeight == + * strideWidth`. + * @param pad The type of padding algorithm. + * - `same` and stride 1: output will be of same size as input, + * regardless of filter size. + * - `valid`: output will be smaller than input if filter is larger + * than 1x1. + * - For more info, see this guide: + * [https://www.tensorflow.org/api_guides/python/nn#Convolution]( + * https://www.tensorflow.org/api_guides/python/nn#Convolution) + * @param dilations The dilation rates: `[dilationHeight, dilationWidth]` + * in which we sample input values across the height and width dimensions + * in atrous convolution. Defaults to `[1, 1]`. If `rate` is a single + * number, then `dilationHeight == dilationWidth`. If it is greater than + * 1, then all values of `strides` must be 1. + * @param dataFormat: An optional string from: "NHWC", "NCHW". Defaults to + * "NHWC". Specify the data format of the input and output data. With the + * default format "NHWC", the data is stored in the order of: [batch, + * height, width, channels]. Only "NHWC" is currently supported. + * @param dimRoundingMode The rounding mode used when computing output + * dimensions if pad is a number. If none is provided, it will not round + * and error if the output is of fractional size. + * @param bias Tensor to be added to the result. + * @param activation Name of activation kernel (defaults to `linear`). + * @param preluActivationWeights Tensor of prelu weights to be applied as part + * of a `prelu` activation, typically the same shape as `x`. + */ +function fusedDepthwiseConv2d_({ + x, + filter, + strides, + pad, + dataFormat = 'NHWC', + dilations = [1, 1], + dimRoundingMode, + bias, + activation = 'linear', + preluActivationWeights +}: { + x: T|TensorLike, + filter: Tensor4D|TensorLike, + strides: [number, number]|number, + pad: 'valid'|'same'|number, + dataFormat?: 'NHWC'|'NCHW', + dilations?: [number, number]|number, + dimRoundingMode?: 'floor'|'round'|'ceil', + bias?: Tensor|TensorLike, + activation?: Activation, + preluActivationWeights?: Tensor +}): T { + if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { + let result = unfusedDepthwiseConv2d( + x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); + if (bias != null) { + result = add(result, bias); + } + + return applyActivation(result, activation, preluActivationWeights) as T; + } + + const $x = convertToTensor(x, 'x', 'depthwiseConv2d'); + const $filter = convertToTensor(filter, 'filter', 'depthwiseConv2d'); + + let x4D = $x as Tensor4D; + let reshapedTo4D = false; + if ($x.rank === 3) { + reshapedTo4D = true; + x4D = $x.as4D(1, $x.shape[0], $x.shape[1], $x.shape[2]); + } + util.assert( + x4D.rank === 4, + () => `Error in fused depthwiseConv2d: input must be rank 4, but got ` + + `rank ${x4D.rank}.`); + util.assert( + $filter.rank === 4, + () => `Error in fused depthwiseConv2d: filter must be rank 4, ` + + `but got rank ${$filter.rank}.`); + util.assert( + x4D.shape[3] === $filter.shape[2], + () => `Error in fused depthwiseConv2d: number of input channels ` + + `(${x4D.shape[3]}) must match the inChannels dimension in ` + + `filter ${$filter.shape[2]}.`); + if (dilations == null) { + dilations = [1, 1]; + } + util.assert( + conv_util.eitherStridesOrDilationsAreOne(strides, dilations), + () => + 'Error in fused depthwiseConv2d: Either strides or dilations must ' + + `be 1. Got strides ${strides} and dilations '${dilations}'`); + + if (dimRoundingMode != null) { + util.assert( + util.isInt(pad as number), + () => `Error in fused depthwiseConv2d: pad must be an integer when ` + + `using dimRoundingMode ${dimRoundingMode} but got pad ${pad}.`); + } + + const convInfo = conv_util.computeConv2DInfo( + x4D.shape, $filter.shape, strides, dilations, pad, dimRoundingMode, + true /* depthwise */); + + let $bias: Tensor; + if (bias != null) { + $bias = convertToTensor(bias, 'bias', 'fused conv2d'); + [$bias] = makeTypesMatch($bias, $x); + + broadcast_util.assertAndGetBroadcastShape(convInfo.outShape, $bias.shape); + } + + let $preluActivationWeights: Tensor; + if (preluActivationWeights != null) { + $preluActivationWeights = convertToTensor( + preluActivationWeights, 'prelu weights', 'fused depthwiseConv2d'); + } + + const grad = (dy: Tensor4D, saved: Tensor[]) => { + util.assert( + conv_util.tupleValuesAreOne(dilations), + () => 'Error in gradient of fused depthwiseConv2d: dilation rates ' + + `greater than 1 are not yet supported. Got dilations ` + + `'${dilations}'`); + const [$filter, x4D, y] = saved; + + const dyActivation = getFusedDyActivation(dy, y, activation) as Tensor4D; + + let biasGradient = {}; + if (bias != null) { + biasGradient = {bias: () => getFusedBiasGradient($bias, dyActivation)}; + } + + return Object.assign( + { + x: () => depthwiseConv2dNativeBackpropInput( + (x4D as Tensor4D).shape, dyActivation, $filter as Tensor4D, + convInfo), + filter: () => depthwiseConv2dNativeBackpropFilter( + x4D as Tensor4D, dyActivation, ($filter as Tensor4D).shape, + convInfo), + }, + biasGradient); + }; + + const inputs: { + x: Tensor, + filter: Tensor, + bias?: Tensor, + preluActivationWeights?: Tensor + } = {x: x4D, filter: $filter}; + if (bias != null) { + inputs.bias = $bias; + } + if (preluActivationWeights != null) { + inputs.preluActivationWeights = $preluActivationWeights; + } + + const inputsToSave = [$filter, x4D]; + const outputsToSave = [true]; + const res = ENGINE.runKernelFunc( + (backend, save) => { + const res = backend.fusedDepthwiseConv2D({ + input: x4D, + filter: $filter, + convInfo, + bias: $bias, + activation, + preluActivationWeights: $preluActivationWeights + }); + save([$filter, x4D, res]); + return res; + }, + inputs, grad, 'FusedDepthwiseConv2D', {convInfo, activation}, + inputsToSave, outputsToSave); + if (reshapedTo4D) { + return res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; + } + return res as T; +} +export const depthwiseConv2d = op({fusedDepthwiseConv2d_}); diff --git a/tfjs-core/src/ops/fused/mat_mul.ts b/tfjs-core/src/ops/fused/mat_mul.ts new file mode 100644 index 00000000000..db0a2ed2f95 --- /dev/null +++ b/tfjs-core/src/ops/fused/mat_mul.ts @@ -0,0 +1,208 @@ +/** + * @license + * Copyright 2019 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {ENGINE} from '../../engine'; +import {op} from '../../ops/operation'; +import {Tensor, Tensor3D} from '../../tensor'; +import {makeTypesMatch} from '../../tensor_util'; +import {convertToTensor} from '../../tensor_util_env'; +import {TensorLike} from '../../types'; +import * as util from '../../util'; +import {add} from '../add'; +import * as broadcast_util from '../broadcast_util'; +import {Activation, applyActivation, getFusedBiasGradient, getFusedDyActivation, shouldFuse} from '../fused_util'; +import {matMul as unfusedMatMul} from '../mat_mul'; + +/** + * Computes the dot product of two matrices with optional activation and bias. + * + * ```js + * const a = tf.tensor2d([-1, -2], [1, 2]); + * const b = tf.tensor2d([1, 2, 3, 4], [2, 2]); + * const bias = tf.tensor2d([1, 2], [1, 2]); + * + * tf.fused.matMul({a, b, bias, activation: 'relu'}).print(); + * ``` + * + * @param obj An object with the following properties: + * - `a` First matrix in dot product operation. + * - `b` Second matrix in dot product operation. + * - `transposeA` If true, `a` is transposed before multiplication. + * - `transposeB` If true, `b` is transposed before multiplication. + * - `bias` Matrix to be added to the result. + * - `activation` Name of activation kernel (defaults to `linear`). + * - `preluActivationWeights` Tensor of prelu weights. + */ +function fusedMatMul_({ + a, + b, + transposeA = false, + transposeB = false, + bias, + activation = 'linear', + preluActivationWeights +}: { + a: T|TensorLike, + b: T|TensorLike, + transposeA?: boolean, + transposeB?: boolean, + bias?: Tensor|TensorLike, + activation?: Activation, + preluActivationWeights?: Tensor +}): T { + if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { + let result = unfusedMatMul(a, b, transposeA, transposeB); + if (bias != null) { + result = add(result, bias); + } + + return applyActivation(result, activation, preluActivationWeights) as T; + } + + let $a = convertToTensor(a, 'a', 'fused matMul'); + let $b = convertToTensor(b, 'b', 'fused matMul'); + [$a, $b] = makeTypesMatch($a, $b); + + const innerShapeA = + transposeA ? $a.shape[$a.rank - 2] : $a.shape[$a.rank - 1]; + const innerShapeB = + transposeB ? $b.shape[$b.rank - 1] : $b.shape[$b.rank - 2]; + + const outerShapeA = + transposeA ? $a.shape[$a.rank - 1] : $a.shape[$a.rank - 2]; + const outerShapeB = + transposeB ? $b.shape[$b.rank - 2] : $b.shape[$b.rank - 1]; + + const outerDimsA = $a.shape.slice(0, -2); + const outerDimsB = $b.shape.slice(0, -2); + const batchDimA = util.sizeFromShape(outerDimsA); + const batchDimB = util.sizeFromShape(outerDimsB); + + util.assert( + $a.rank >= 2 && $b.rank >= 2 && $a.rank === $b.rank, + () => + `Error in fused matMul: inputs must have the same rank of at least ` + + `2, got ranks ${$a.rank} and ${$b.rank}.`); + + util.assert( + util.arraysEqual(outerDimsA, outerDimsB), + () => `Error in fused matMul: outer dimensions (${outerDimsA}) and (` + + `${outerDimsB}) of Tensors with shapes ${$a.shape} and ` + + `${$b.shape} must match.`); + + util.assert( + innerShapeA === innerShapeB, + () => `Error in fused matMul: inner shapes (${innerShapeA}) and (` + + `${innerShapeB}) of Tensors with shapes ${$a.shape} and ` + + `${$b.shape} and transposeA=${transposeA}` + + ` and transposeB=${transposeB} must match.`); + + const outShape = $a.shape.slice(0, -2).concat([outerShapeA, outerShapeB]); + + const a3D = transposeA ? $a.as3D(batchDimA, innerShapeA, outerShapeA) : + $a.as3D(batchDimA, outerShapeA, innerShapeA); + const b3D = transposeB ? $b.as3D(batchDimB, outerShapeB, innerShapeB) : + $b.as3D(batchDimB, innerShapeB, outerShapeB); + + let $bias: Tensor; + if (bias != null) { + $bias = convertToTensor(bias, 'bias', 'fused matMul'); + [$bias] = makeTypesMatch($bias, $a); + + broadcast_util.assertAndGetBroadcastShape(outShape, $bias.shape); + } + + let $preluActivationWeights: Tensor; + if (preluActivationWeights != null) { + $preluActivationWeights = convertToTensor( + preluActivationWeights, 'prelu weights', 'fused matMul'); + } + + const grad = (dy: Tensor3D, saved: Tensor[]) => { + const [a3D, b3D, y] = saved; + const dyActivation = getFusedDyActivation(dy, y, activation); + + let biasGradient = {}; + if (bias != null) { + biasGradient = {bias: () => getFusedBiasGradient($bias, dyActivation)}; + } + + if (!transposeA && !transposeB) { + return Object.assign( + { + a: () => dyActivation.matMul(b3D as Tensor3D, false, true), + b: () => a3D.matMul(dyActivation, true, false) + }, + biasGradient); + } else if (!transposeA && transposeB) { + return Object.assign( + { + a: () => dyActivation.matMul(b3D as Tensor3D, false, false), + b: () => dyActivation.matMul(a3D as Tensor3D, true, false) + }, + biasGradient); + } else if (transposeA && !transposeB) { + return Object.assign( + { + a: () => b3D.matMul(dyActivation, false, true), + b: () => a3D.matMul(dyActivation, false, false) + }, + biasGradient); + } else { + return Object.assign( + { + a: () => b3D.matMul(dyActivation, true, true), + b: () => dyActivation.matMul(a3D as Tensor3D, true, true) + }, + biasGradient); + } + }; + + const inputs: + {a: Tensor, b: Tensor, + bias?: Tensor, + preluActivationWeights?: Tensor} = {a: a3D, b: b3D}; + if (bias != null) { + inputs.bias = $bias; + } + if (preluActivationWeights != null) { + inputs.preluActivationWeights = $preluActivationWeights; + } + + const inputsToSave = [a3D, b3D]; + const outputsToSave = [true]; + + const res = ENGINE.runKernelFunc( + (backend, save) => { + const y = backend.fusedBatchMatMul({ + a: a3D, + b: b3D, + transposeA, + transposeB, + bias: $bias, + activation, + preluActivationWeights: $preluActivationWeights + }); + save([a3D, b3D, y]); + return y; + }, + inputs, grad, '_FusedMatMul', {transposeA, transposeB, activation}, + inputsToSave, outputsToSave); + return res.reshape(outShape); +} + +export const matMul = op({fusedMatMul_}); diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index f416d224a7d..ba930839e3c 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -15,663 +15,9 @@ * ============================================================================= */ -import {ENGINE} from '../engine'; -import * as conv_util from '../ops/conv_util'; -import {op} from '../ops/operation'; -import {Tensor, Tensor3D, Tensor4D} from '../tensor'; -import {makeTypesMatch} from '../tensor_util'; -import {convertToTensor} from '../tensor_util_env'; -import {TensorLike} from '../types'; -import * as util from '../util'; +import {conv2d} from './fused/conv2d'; +import {depthwiseConv2d} from './fused/depthwise_conv2d'; +import {matMul} from './fused/mat_mul'; +import {Activation} from './fused_util'; -import {add} from './add'; -import * as broadcast_util from './broadcast_util'; -import {conv2d as unfusedConv2d} from './conv2d'; -import {conv2DBackpropFilter} from './conv2d_backprop_filter'; -import {conv2DBackpropInput} from './conv2d_backprop_input'; -import {depthwiseConv2d as unfusedDepthwiseConv2d} from './depthwise_conv2d'; -import {depthwiseConv2dNativeBackpropFilter} from './depthwise_conv2d_native_backprop_filter'; -import {depthwiseConv2dNativeBackpropInput} from './depthwise_conv2d_native_backprop_input'; -import {elu} from './elu'; -import {Activation, shouldFuse} from './fused_util'; -import {matMul as unfusedMatMul} from './mat_mul'; -import {prelu} from './prelu'; -import {relu} from './relu'; -import {relu6} from './relu6'; - -// Returns gradient for fused activation. -const getFusedDyActivation = - (dy: Tensor, y: Tensor, activation: Activation): Tensor => { - if (activation == null || activation === 'linear') { - return dy; - } - if (activation === 'relu') { - return dy.mul(y.step()); - } - throw new Error( - `Gradient for activation ${activation} has not been ` + - `implemented yet.`); - }; - -// Returns gradient for fused bias. -const getFusedBiasGradient = (bias: Tensor, dyActivation: Tensor): Tensor => { - let res = dyActivation; - const reduceAxes = - broadcast_util.getReductionAxes(bias.shape, dyActivation.shape); - if (reduceAxes.length > 0) { - res = res.sum(reduceAxes); - } - return res.reshape(bias.shape); -}; - -const applyActivation = - (x: Tensor, activation: Activation, preluActivationWeights?: Tensor): - Tensor => { - if (activation === 'linear') { - return x; - } else if (activation === 'relu') { - return relu(x); - } else if (activation === 'elu') { - return elu(x); - } else if (activation === 'relu6') { - return relu6(x); - } else if (activation === 'prelu') { - return prelu(x, preluActivationWeights); - } - throw new Error(`Unknown fused activation ${activation}.`); - }; - -/** - * Computes the dot product of two matrices with optional activation and bias. - * - * ```js - * const a = tf.tensor2d([-1, -2], [1, 2]); - * const b = tf.tensor2d([1, 2, 3, 4], [2, 2]); - * const bias = tf.tensor2d([1, 2], [1, 2]); - * - * tf.fused.matMul({a, b, bias, activation: 'relu'}).print(); - * ``` - * - * @param obj An object with the following properties: - * - `a` First matrix in dot product operation. - * - `b` Second matrix in dot product operation. - * - `transposeA` If true, `a` is transposed before multiplication. - * - `transposeB` If true, `b` is transposed before multiplication. - * - `bias` Matrix to be added to the result. - * - `activation` Name of activation kernel (defaults to `linear`). - * - `preluActivationWeights` Tensor of prelu weights. - */ -function fusedMatMul_({ - a, - b, - transposeA = false, - transposeB = false, - bias, - activation = 'linear', - preluActivationWeights -}: { - a: T|TensorLike, - b: T|TensorLike, - transposeA?: boolean, - transposeB?: boolean, - bias?: Tensor|TensorLike, - activation?: Activation, - preluActivationWeights?: Tensor -}): T { - if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { - let result = unfusedMatMul(a, b, transposeA, transposeB); - if (bias != null) { - result = add(result, bias); - } - - return applyActivation(result, activation, preluActivationWeights) as T; - } - - let $a = convertToTensor(a, 'a', 'fused matMul'); - let $b = convertToTensor(b, 'b', 'fused matMul'); - [$a, $b] = makeTypesMatch($a, $b); - - const innerShapeA = - transposeA ? $a.shape[$a.rank - 2] : $a.shape[$a.rank - 1]; - const innerShapeB = - transposeB ? $b.shape[$b.rank - 1] : $b.shape[$b.rank - 2]; - - const outerShapeA = - transposeA ? $a.shape[$a.rank - 1] : $a.shape[$a.rank - 2]; - const outerShapeB = - transposeB ? $b.shape[$b.rank - 2] : $b.shape[$b.rank - 1]; - - const outerDimsA = $a.shape.slice(0, -2); - const outerDimsB = $b.shape.slice(0, -2); - const batchDimA = util.sizeFromShape(outerDimsA); - const batchDimB = util.sizeFromShape(outerDimsB); - - util.assert( - $a.rank >= 2 && $b.rank >= 2 && $a.rank === $b.rank, - () => - `Error in fused matMul: inputs must have the same rank of at least ` + - `2, got ranks ${$a.rank} and ${$b.rank}.`); - - util.assert( - util.arraysEqual(outerDimsA, outerDimsB), - () => `Error in fused matMul: outer dimensions (${outerDimsA}) and (` + - `${outerDimsB}) of Tensors with shapes ${$a.shape} and ` + - `${$b.shape} must match.`); - - util.assert( - innerShapeA === innerShapeB, - () => `Error in fused matMul: inner shapes (${innerShapeA}) and (` + - `${innerShapeB}) of Tensors with shapes ${$a.shape} and ` + - `${$b.shape} and transposeA=${transposeA}` + - ` and transposeB=${transposeB} must match.`); - - const outShape = $a.shape.slice(0, -2).concat([outerShapeA, outerShapeB]); - - const a3D = transposeA ? $a.as3D(batchDimA, innerShapeA, outerShapeA) : - $a.as3D(batchDimA, outerShapeA, innerShapeA); - const b3D = transposeB ? $b.as3D(batchDimB, outerShapeB, innerShapeB) : - $b.as3D(batchDimB, innerShapeB, outerShapeB); - - let $bias: Tensor; - if (bias != null) { - $bias = convertToTensor(bias, 'bias', 'fused matMul'); - [$bias] = makeTypesMatch($bias, $a); - - broadcast_util.assertAndGetBroadcastShape(outShape, $bias.shape); - } - - let $preluActivationWeights: Tensor; - if (preluActivationWeights != null) { - $preluActivationWeights = convertToTensor( - preluActivationWeights, 'prelu weights', 'fused matMul'); - } - - const grad = (dy: Tensor3D, saved: Tensor[]) => { - const [a3D, b3D, y] = saved; - const dyActivation = getFusedDyActivation(dy, y, activation); - - let biasGradient = {}; - if (bias != null) { - biasGradient = {bias: () => getFusedBiasGradient($bias, dyActivation)}; - } - - if (!transposeA && !transposeB) { - return Object.assign( - { - a: () => dyActivation.matMul(b3D as Tensor3D, false, true), - b: () => a3D.matMul(dyActivation, true, false) - }, - biasGradient); - } else if (!transposeA && transposeB) { - return Object.assign( - { - a: () => dyActivation.matMul(b3D as Tensor3D, false, false), - b: () => dyActivation.matMul(a3D as Tensor3D, true, false) - }, - biasGradient); - } else if (transposeA && !transposeB) { - return Object.assign( - { - a: () => b3D.matMul(dyActivation, false, true), - b: () => a3D.matMul(dyActivation, false, false) - }, - biasGradient); - } else { - return Object.assign( - { - a: () => b3D.matMul(dyActivation, true, true), - b: () => dyActivation.matMul(a3D as Tensor3D, true, true) - }, - biasGradient); - } - }; - - const inputs: - {a: Tensor, b: Tensor, - bias?: Tensor, - preluActivationWeights?: Tensor} = {a: a3D, b: b3D}; - if (bias != null) { - inputs.bias = $bias; - } - if (preluActivationWeights != null) { - inputs.preluActivationWeights = $preluActivationWeights; - } - - const inputsToSave = [a3D, b3D]; - const outputsToSave = [true]; - - const res = ENGINE.runKernelFunc( - (backend, save) => { - const y = backend.fusedBatchMatMul({ - a: a3D, - b: b3D, - transposeA, - transposeB, - bias: $bias, - activation, - preluActivationWeights: $preluActivationWeights - }); - save([a3D, b3D, y]); - return y; - }, - inputs, grad, '_FusedMatMul', {transposeA, transposeB, activation}, - inputsToSave, outputsToSave); - return res.reshape(outShape); -} - -/** - * Computes a 2D convolution over the input x, optionally fused with adding a - * bias and applying an activation. - * - * ```js - * const inputDepth = 2; - * const inShape = [2, 2, 2, inputDepth]; - * const outputDepth = 2; - * const fSize = 1; - * const pad = 0; - * const strides = 1; - * - * const x = tf.tensor4d( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - * 16], inShape); - * const w = tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, - * outputDepth]); - * - * tf.fused.conv2d({ x, filter: w, strides, pad, dataFormat: 'NHWC', - * dilations: [1, 1], bias: tf.scalar(5), activation: 'relu' }).print(); - * ``` - * - * @param obj An object with the following properties: - * @param x The input tensor, of rank 4 or rank 3, of shape - * `[batch, height, width, inChannels]`. If rank 3, batch of 1 is - * assumed. - * @param filter The filter, rank 4, of shape - * `[filterHeight, filterWidth, inDepth, outDepth]`. - * @param strides The strides of the convolution: `[strideHeight, - * strideWidth]`. - * @param pad The type of padding algorithm. - * - `same` and stride 1: output will be of same size as input, - * regardless of filter size. - * - `valid` output will be smaller than input if filter is larger - * than 1x1. - * - For more info, see this guide: - * [https://www.tensorflow.org/api_guides/python/nn#Convolution]( - * https://www.tensorflow.org/api_guides/python/nn#Convolution) - * @param dataFormat An optional string from: "NHWC", "NCHW". Defaults to - * "NHWC". Specify the data format of the input and output data. With the - * default format "NHWC", the data is stored in the order of: [batch, - * height, width, channels]. Only "NHWC" is currently supported. - * @param dilations The dilation rates: `[dilationHeight, dilationWidth]` - * in which we sample input values across the height and width dimensions - * in atrous convolution. Defaults to `[1, 1]`. If `dilations` is a single - * number, then `dilationHeight == dilationWidth`. If it is greater than - * 1, then all values of `strides` must be 1. - * @param dimRoundingMode The rounding mode used when computing output - * dimensions if pad is a number. If none is provided, it will not round - * and error if the output is of fractional size. - * @param bias Tensor to be added to the result. - * @param activation Name of activation kernel (defaults to `linear`) to be - * applied - * after biasAdd. - * @param preluActivationWeights Tensor of prelu weights to be applied as part - * of a `prelu` activation, typically the same shape as `x`. - */ -function fusedConv2d_({ - x, - filter, - strides, - pad, - dataFormat = 'NHWC', - dilations = [1, 1], - dimRoundingMode, - bias, - activation = 'linear', - preluActivationWeights -}: { - x: T|TensorLike, - filter: Tensor4D|TensorLike, - strides: [number, number]|number, - pad: 'valid'|'same'|number|conv_util.ExplicitPadding, - dataFormat?: 'NHWC'|'NCHW', - dilations?: [number, number]|number, - dimRoundingMode?: 'floor'|'round'|'ceil', - bias?: Tensor|TensorLike, - activation?: Activation, - preluActivationWeights?: Tensor -}): T { - activation = activation || 'linear'; - if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { - let result = unfusedConv2d( - x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); - if (bias != null) { - result = add(result, bias); - } - - return applyActivation(result, activation, preluActivationWeights) as T; - } - - const $x = convertToTensor(x, 'x', 'conv2d'); - const $filter = convertToTensor(filter, 'filter', 'conv2d'); - - let x4D = $x as Tensor4D; - let reshapedTo4D = false; - - if ($x.rank === 3) { - reshapedTo4D = true; - x4D = $x.as4D(1, $x.shape[0], $x.shape[1], $x.shape[2]); - } - util.assert( - x4D.rank === 4, - () => `Error in fused conv2d: input must be rank 4, but got rank ` + - `${x4D.rank}.`); - util.assert( - $filter.rank === 4, - () => `Error in fused conv2d: filter must be rank 4, but got rank ` + - `${$filter.rank}.`); - if (dimRoundingMode != null) { - util.assert( - util.isInt(pad as number), - () => `Error in fused conv2d: pad must be an integer when using, ` + - `dimRoundingMode ${dimRoundingMode} but got pad ${pad}.`); - } - - util.assert( - x4D.shape[3] === $filter.shape[2], - () => `Error in conv2d: depth of input (${x4D.shape[3]}) must match ` + - `input depth for filter ${$filter.shape[2]}.`); - util.assert( - conv_util.eitherStridesOrDilationsAreOne(strides, dilations), - () => 'Error in conv2D: Either strides or dilations must be 1. ' + - `Got strides ${strides} and dilations '${dilations}'`); - util.assert( - dataFormat === 'NHWC', - () => `Error in conv2d: got dataFormat of ${ - dataFormat} but only NHWC is currently supported.`); - - const convInfo = conv_util.computeConv2DInfo( - x4D.shape, $filter.shape, strides, dilations, pad, dimRoundingMode); - - let $bias: Tensor; - if (bias != null) { - $bias = convertToTensor(bias, 'bias', 'fused conv2d'); - [$bias] = makeTypesMatch($bias, $x); - - broadcast_util.assertAndGetBroadcastShape(convInfo.outShape, $bias.shape); - } - - let $preluActivationWeights: Tensor; - if (preluActivationWeights != null) { - $preluActivationWeights = convertToTensor( - preluActivationWeights, 'prelu weights', 'fused conv2d'); - } - - const grad = (dy: Tensor4D, saved: Tensor[]) => { - const [$filter, x4D, y] = saved as [Tensor4D, Tensor4D, Tensor4D]; - - const dyActivation = getFusedDyActivation(dy, y, activation) as Tensor4D; - - util.assert( - conv_util.tupleValuesAreOne(dilations), - () => 'Error in gradient of fused conv2D: ' + - `dilation rates greater than 1 ` + - `are not yet supported in gradients. Got dilations '${dilations}'`); - - let biasGradient = {}; - if (bias != null) { - biasGradient = {bias: () => getFusedBiasGradient($bias, dyActivation)}; - } - - return Object.assign( - { - x: () => conv2DBackpropInput( - x4D.shape, dyActivation, $filter, strides, pad), - filter: () => conv2DBackpropFilter( - x4D, dyActivation, $filter.shape, strides, pad) - }, - biasGradient); - }; - - const inputs: { - x: Tensor, - filter: Tensor, - bias?: Tensor, - preluActivationWeights?: Tensor - } = {x: x4D, filter: $filter}; - if (bias != null) { - inputs.bias = $bias; - } - if (preluActivationWeights != null) { - inputs.preluActivationWeights = $preluActivationWeights; - } - - const inputsToSave = [$filter, x4D]; - const outputsToSave = [true]; // Save the only output. - const res = ENGINE.runKernelFunc( - (backend, save) => { - const res = backend.fusedConv2d({ - input: x4D, - filter: $filter, - convInfo, - bias: $bias, - activation, - preluActivationWeights: $preluActivationWeights - }); - save([$filter, x4D, res]); - return res; - }, - inputs, grad, 'FusedConv2D', {convInfo, activation}, inputsToSave, - outputsToSave); - - if (reshapedTo4D) { - return res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; - } - - return res as T; -} - -/** - * Computes depthwise 2D convolution, optionally fused with adding a - * bias and applying an activation. - * - * Given a 4D `input` array and a `filter` array of shape - * `[filterHeight, filterWidth, inChannels, channelMultiplier]` containing - * `inChannels` convolutional filters of depth 1, this op applies a - * different filter to each input channel (expanding from 1 channel to - * `channelMultiplier` channels for each), then concatenates the results - * together. The output has `inChannels * channelMultiplier` channels. - * - * See - * [https://www.tensorflow.org/api_docs/python/tf/nn/depthwise_conv2d]( - * https://www.tensorflow.org/api_docs/python/tf/nn/depthwise_conv2d) - * for more details. - * - * @param obj An object with the following properties: - * @param x The input tensor, of rank 4 or rank 3, of shape - * `[batch, height, width, inChannels]`. If rank 3, batch of 1 is - * assumed. - * @param filter The filter tensor, rank 4, of shape - * `[filterHeight, filterWidth, inChannels, channelMultiplier]`. - * @param strides The strides of the convolution: `[strideHeight, - * strideWidth]`. If strides is a single number, then `strideHeight == - * strideWidth`. - * @param pad The type of padding algorithm. - * - `same` and stride 1: output will be of same size as input, - * regardless of filter size. - * - `valid`: output will be smaller than input if filter is larger - * than 1x1. - * - For more info, see this guide: - * [https://www.tensorflow.org/api_guides/python/nn#Convolution]( - * https://www.tensorflow.org/api_guides/python/nn#Convolution) - * @param dilations The dilation rates: `[dilationHeight, dilationWidth]` - * in which we sample input values across the height and width dimensions - * in atrous convolution. Defaults to `[1, 1]`. If `rate` is a single - * number, then `dilationHeight == dilationWidth`. If it is greater than - * 1, then all values of `strides` must be 1. - * @param dataFormat: An optional string from: "NHWC", "NCHW". Defaults to - * "NHWC". Specify the data format of the input and output data. With the - * default format "NHWC", the data is stored in the order of: [batch, - * height, width, channels]. Only "NHWC" is currently supported. - * @param dimRoundingMode The rounding mode used when computing output - * dimensions if pad is a number. If none is provided, it will not round - * and error if the output is of fractional size. - * @param bias Tensor to be added to the result. - * @param activation Name of activation kernel (defaults to `linear`). - * @param preluActivationWeights Tensor of prelu weights to be applied as part - * of a `prelu` activation, typically the same shape as `x`. - */ -function fusedDepthwiseConv2d_({ - x, - filter, - strides, - pad, - dataFormat = 'NHWC', - dilations = [1, 1], - dimRoundingMode, - bias, - activation = 'linear', - preluActivationWeights -}: { - x: T|TensorLike, - filter: Tensor4D|TensorLike, - strides: [number, number]|number, - pad: 'valid'|'same'|number, - dataFormat?: 'NHWC'|'NCHW', - dilations?: [number, number]|number, - dimRoundingMode?: 'floor'|'round'|'ceil', - bias?: Tensor|TensorLike, - activation?: Activation, - preluActivationWeights?: Tensor -}): T { - if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { - let result = unfusedDepthwiseConv2d( - x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); - if (bias != null) { - result = add(result, bias); - } - - return applyActivation(result, activation, preluActivationWeights) as T; - } - - const $x = convertToTensor(x, 'x', 'depthwiseConv2d'); - const $filter = convertToTensor(filter, 'filter', 'depthwiseConv2d'); - - let x4D = $x as Tensor4D; - let reshapedTo4D = false; - if ($x.rank === 3) { - reshapedTo4D = true; - x4D = $x.as4D(1, $x.shape[0], $x.shape[1], $x.shape[2]); - } - util.assert( - x4D.rank === 4, - () => `Error in fused depthwiseConv2d: input must be rank 4, but got ` + - `rank ${x4D.rank}.`); - util.assert( - $filter.rank === 4, - () => `Error in fused depthwiseConv2d: filter must be rank 4, ` + - `but got rank ${$filter.rank}.`); - util.assert( - x4D.shape[3] === $filter.shape[2], - () => `Error in fused depthwiseConv2d: number of input channels ` + - `(${x4D.shape[3]}) must match the inChannels dimension in ` + - `filter ${$filter.shape[2]}.`); - if (dilations == null) { - dilations = [1, 1]; - } - util.assert( - conv_util.eitherStridesOrDilationsAreOne(strides, dilations), - () => - 'Error in fused depthwiseConv2d: Either strides or dilations must ' + - `be 1. Got strides ${strides} and dilations '${dilations}'`); - - if (dimRoundingMode != null) { - util.assert( - util.isInt(pad as number), - () => `Error in fused depthwiseConv2d: pad must be an integer when ` + - `using dimRoundingMode ${dimRoundingMode} but got pad ${pad}.`); - } - - const convInfo = conv_util.computeConv2DInfo( - x4D.shape, $filter.shape, strides, dilations, pad, dimRoundingMode, - true /* depthwise */); - - let $bias: Tensor; - if (bias != null) { - $bias = convertToTensor(bias, 'bias', 'fused conv2d'); - [$bias] = makeTypesMatch($bias, $x); - - broadcast_util.assertAndGetBroadcastShape(convInfo.outShape, $bias.shape); - } - - let $preluActivationWeights: Tensor; - if (preluActivationWeights != null) { - $preluActivationWeights = convertToTensor( - preluActivationWeights, 'prelu weights', 'fused depthwiseConv2d'); - } - - const grad = (dy: Tensor4D, saved: Tensor[]) => { - util.assert( - conv_util.tupleValuesAreOne(dilations), - () => 'Error in gradient of fused depthwiseConv2d: dilation rates ' + - `greater than 1 are not yet supported. Got dilations ` + - `'${dilations}'`); - const [$filter, x4D, y] = saved; - - const dyActivation = getFusedDyActivation(dy, y, activation) as Tensor4D; - - let biasGradient = {}; - if (bias != null) { - biasGradient = {bias: () => getFusedBiasGradient($bias, dyActivation)}; - } - - return Object.assign( - { - x: () => depthwiseConv2dNativeBackpropInput( - (x4D as Tensor4D).shape, dyActivation, $filter as Tensor4D, - convInfo), - filter: () => depthwiseConv2dNativeBackpropFilter( - x4D as Tensor4D, dyActivation, ($filter as Tensor4D).shape, - convInfo), - }, - biasGradient); - }; - - const inputs: { - x: Tensor, - filter: Tensor, - bias?: Tensor, - preluActivationWeights?: Tensor - } = {x: x4D, filter: $filter}; - if (bias != null) { - inputs.bias = $bias; - } - if (preluActivationWeights != null) { - inputs.preluActivationWeights = $preluActivationWeights; - } - - const inputsToSave = [$filter, x4D]; - const outputsToSave = [true]; - const res = ENGINE.runKernelFunc( - (backend, save) => { - const res = backend.fusedDepthwiseConv2D({ - input: x4D, - filter: $filter, - convInfo, - bias: $bias, - activation, - preluActivationWeights: $preluActivationWeights - }); - save([$filter, x4D, res]); - return res; - }, - inputs, grad, 'FusedDepthwiseConv2D', {convInfo, activation}, - inputsToSave, outputsToSave); - if (reshapedTo4D) { - return res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; - } - return res as T; -} - -export const matMul = op({fusedMatMul_}); -export const conv2d = op({fusedConv2d_}); -export const depthwiseConv2d = op({fusedDepthwiseConv2d_}); - -export {Activation}; +export {Activation, conv2d, depthwiseConv2d, matMul}; diff --git a/tfjs-core/src/ops/fused_util.ts b/tfjs-core/src/ops/fused_util.ts index d9f16133966..ff90c751c0f 100644 --- a/tfjs-core/src/ops/fused_util.ts +++ b/tfjs-core/src/ops/fused_util.ts @@ -16,8 +16,13 @@ */ import {Tensor, Tensor3D, Tensor4D} from '../tensor'; +import * as broadcast_util from './broadcast_util'; import {Conv2DInfo} from './conv_util'; +import {elu} from './elu'; +import {prelu} from './prelu'; +import {relu} from './relu'; +import {relu6} from './relu6'; export type Activation = 'linear'|'relu'|'prelu'|'elu'|'relu6'; @@ -45,3 +50,46 @@ export const shouldFuse = (gradientDepth: number, activation: Activation) => { const gradientMode = gradientDepth > 0; return !gradientMode || activation === 'linear'; }; + +// Returns gradient for fused activation. +export function getFusedDyActivation( + dy: Tensor, y: Tensor, activation: Activation): Tensor { + if (activation == null || activation === 'linear') { + return dy; + } + if (activation === 'relu') { + return dy.mul(y.step()); + } + throw new Error( + `Gradient for activation ${activation} has not been ` + + `implemented yet.`); +} + +// Returns gradient for fused bias. +export function getFusedBiasGradient( + bias: Tensor, dyActivation: Tensor): Tensor { + let res = dyActivation; + const reduceAxes = + broadcast_util.getReductionAxes(bias.shape, dyActivation.shape); + if (reduceAxes.length > 0) { + res = res.sum(reduceAxes); + } + return res.reshape(bias.shape); +} + +export function applyActivation( + x: Tensor, activation: Activation, + preluActivationWeights?: Tensor): Tensor { + if (activation === 'linear') { + return x; + } else if (activation === 'relu') { + return relu(x); + } else if (activation === 'elu') { + return elu(x); + } else if (activation === 'relu6') { + return relu6(x); + } else if (activation === 'prelu') { + return prelu(x, preluActivationWeights); + } + throw new Error(`Unknown fused activation ${activation}.`); +} From 2c0cf0dd602c229531a9d19cc02f8d30694ffb03 Mon Sep 17 00:00:00 2001 From: Yannick Assogba Date: Mon, 13 Jul 2020 13:23:13 -0400 Subject: [PATCH 02/12] remove unused kernel interfaces --- tfjs-core/src/kernel_names.ts | 39 ++++++----------------------------- 1 file changed, 6 insertions(+), 33 deletions(-) diff --git a/tfjs-core/src/kernel_names.ts b/tfjs-core/src/kernel_names.ts index efe84f8ac85..2020d63ee8a 100644 --- a/tfjs-core/src/kernel_names.ts +++ b/tfjs-core/src/kernel_names.ts @@ -21,7 +21,6 @@ import {ExplicitPadding} from '../src/ops/conv_util'; import {NamedTensorInfoMap, TensorInfo} from './kernel_registry'; -import {Activation} from './ops/fused_util'; import {DataType, PixelData} from './types'; export const Abs = 'Abs'; @@ -734,36 +733,10 @@ export interface FromPixelsAttrs { numChannels: number; } -export const _FusedMatMul = '_FusedMatMul'; -export type _FusedMatMulInputs = - Pick; -// tslint:disable-next-line: class-name -export interface _FusedMatMulAttrs { - transposeA: number; - transposeB: number; - activation: Activation; -} - -export const FusedConv2D = 'FusedConv2D'; -export type FusedConv2DInputs = - Pick; -export interface FusedConv2DAttrs { - strides: [number, number]|number; - pad: 'valid'|'same'|number|ExplicitPadding; - dataFormat: 'NHWC'|'NCHW'; - dilations: [number, number]|number; - dimRoundingMode: 'floor'|'round'|'ceil'; - activation: Activation; -} - -export const FusedDepthwiseConv2D = 'FusedDepthwiseConv2D'; -export type FusedDepthwiseConv2DInputs = - Pick; -export interface FusedDepthwiseConv2DAttrs { - strides: [number, number]|number; - pad: 'valid'|'same'|number; - dataFormat: 'NHWC'|'NCHW'; - dilations: [number, number]|number; - dimRoundingMode: 'floor'|'round'|'ceil'; - activation: Activation; +export const RotateWithOffset = 'RotateWithOffset'; +export type RotateWithOffsetInputs = Pick; +export interface RotateWithOffsetAttrs { + radians: number; + fillValue: number|[number, number, number]; + center: number|[number, number]; } From c5a81b01b6c25dc2bddb3b91dd754850c20f3ed9 Mon Sep 17 00:00:00 2001 From: Yannick Assogba Date: Mon, 13 Jul 2020 15:00:28 -0400 Subject: [PATCH 03/12] fix circular deps --- tfjs-core/src/backends/backend.ts | 2 +- tfjs-core/src/backends/backend_util.ts | 3 +- tfjs-core/src/ops/fused/conv2d.ts | 3 +- tfjs-core/src/ops/fused/depthwise_conv2d.ts | 3 +- tfjs-core/src/ops/fused/mat_mul.ts | 3 +- tfjs-core/src/ops/fused/types.ts | 40 +++++++++++++++++++++ tfjs-core/src/ops/fused_ops.ts | 2 +- tfjs-core/src/ops/fused_util.ts | 27 ++------------ 8 files changed, 53 insertions(+), 30 deletions(-) create mode 100644 tfjs-core/src/ops/fused/types.ts diff --git a/tfjs-core/src/backends/backend.ts b/tfjs-core/src/backends/backend.ts index a93f3552b15..23cf94a0ca5 100644 --- a/tfjs-core/src/backends/backend.ts +++ b/tfjs-core/src/backends/backend.ts @@ -16,7 +16,7 @@ */ import {Conv2DInfo, Conv3DInfo} from '../ops/conv_util'; -import {FusedBatchMatMulConfig, FusedConv2DConfig} from '../ops/fused_util'; +import {FusedBatchMatMulConfig, FusedConv2DConfig} from '../ops/fused/types'; import {Backend, DataId, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D} from '../tensor'; import {BackendValues, DataType, Rank, ShapeMap} from '../types'; diff --git a/tfjs-core/src/backends/backend_util.ts b/tfjs-core/src/backends/backend_util.ts index 6baa8b629f5..6f43b966f12 100644 --- a/tfjs-core/src/backends/backend_util.ts +++ b/tfjs-core/src/backends/backend_util.ts @@ -31,7 +31,8 @@ export * from '../ops/axis_util'; export * from '../ops/broadcast_util'; export * from '../ops/concat_util'; export * from '../ops/conv_util'; -export {Activation, FusedConv2DConfig} from '../ops/fused_util'; +export * from '../ops/fused_util'; +export * from '../ops/fused/types'; export * from '../ops/reduce_util'; export {BackendValues, TypedArray, upcastType, PixelData} from '../types'; diff --git a/tfjs-core/src/ops/fused/conv2d.ts b/tfjs-core/src/ops/fused/conv2d.ts index e3504076aa3..6a051c36ae4 100644 --- a/tfjs-core/src/ops/fused/conv2d.ts +++ b/tfjs-core/src/ops/fused/conv2d.ts @@ -27,9 +27,10 @@ import {conv2d as unfusedConv2d} from '../conv2d'; import {conv2DBackpropFilter} from '../conv2d_backprop_filter'; import {conv2DBackpropInput} from '../conv2d_backprop_input'; import {applyActivation, getFusedBiasGradient, getFusedDyActivation} from '../fused_util'; -import {Activation, shouldFuse} from '../fused_util'; +import {shouldFuse} from '../fused_util'; import * as conv_util from '../ops/../conv_util'; import {op} from '../ops/../operation'; +import {Activation} from './types'; /** * Computes a 2D convolution over the input x, optionally fused with adding a diff --git a/tfjs-core/src/ops/fused/depthwise_conv2d.ts b/tfjs-core/src/ops/fused/depthwise_conv2d.ts index cadefd6d964..4b9b8067399 100644 --- a/tfjs-core/src/ops/fused/depthwise_conv2d.ts +++ b/tfjs-core/src/ops/fused/depthwise_conv2d.ts @@ -28,7 +28,8 @@ import * as broadcast_util from '../broadcast_util'; import {depthwiseConv2d as unfusedDepthwiseConv2d} from '../depthwise_conv2d'; import {depthwiseConv2dNativeBackpropFilter} from '../depthwise_conv2d_native_backprop_filter'; import {depthwiseConv2dNativeBackpropInput} from '../depthwise_conv2d_native_backprop_input'; -import {Activation, applyActivation, getFusedBiasGradient, getFusedDyActivation, shouldFuse} from '../fused_util'; +import {applyActivation, getFusedBiasGradient, getFusedDyActivation, shouldFuse} from '../fused_util'; +import {Activation} from './types'; /** * Computes depthwise 2D convolution, optionally fused with adding a diff --git a/tfjs-core/src/ops/fused/mat_mul.ts b/tfjs-core/src/ops/fused/mat_mul.ts index db0a2ed2f95..66e8a9fcff6 100644 --- a/tfjs-core/src/ops/fused/mat_mul.ts +++ b/tfjs-core/src/ops/fused/mat_mul.ts @@ -24,8 +24,9 @@ import {TensorLike} from '../../types'; import * as util from '../../util'; import {add} from '../add'; import * as broadcast_util from '../broadcast_util'; -import {Activation, applyActivation, getFusedBiasGradient, getFusedDyActivation, shouldFuse} from '../fused_util'; +import {applyActivation, getFusedBiasGradient, getFusedDyActivation, shouldFuse} from '../fused_util'; import {matMul as unfusedMatMul} from '../mat_mul'; +import {Activation} from './types'; /** * Computes the dot product of two matrices with optional activation and bias. diff --git a/tfjs-core/src/ops/fused/types.ts b/tfjs-core/src/ops/fused/types.ts new file mode 100644 index 00000000000..ffd5c423a4e --- /dev/null +++ b/tfjs-core/src/ops/fused/types.ts @@ -0,0 +1,40 @@ +/** + * @license + * Copyright 2020 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {Tensor, Tensor3D, Tensor4D} from '../../tensor'; +import {Conv2DInfo} from '../conv_util'; + +export type FusedConv2DConfig = { + input: Tensor4D, + filter: Tensor4D, + convInfo: Conv2DInfo, + bias?: Tensor, + activation?: Activation, + preluActivationWeights?: Tensor +}; + +export type FusedBatchMatMulConfig = { + a: Tensor3D, + b: Tensor3D, + transposeA: boolean, + transposeB: boolean, + bias?: Tensor, + activation?: Activation, + preluActivationWeights?: Tensor +}; + +export type Activation = 'linear'|'relu'|'prelu'|'elu'|'relu6'; diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index ba930839e3c..bf458da5c9e 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -18,6 +18,6 @@ import {conv2d} from './fused/conv2d'; import {depthwiseConv2d} from './fused/depthwise_conv2d'; import {matMul} from './fused/mat_mul'; -import {Activation} from './fused_util'; +import {Activation} from './fused/types'; export {Activation, conv2d, depthwiseConv2d, matMul}; diff --git a/tfjs-core/src/ops/fused_util.ts b/tfjs-core/src/ops/fused_util.ts index ff90c751c0f..8784be9af29 100644 --- a/tfjs-core/src/ops/fused_util.ts +++ b/tfjs-core/src/ops/fused_util.ts @@ -15,36 +15,15 @@ * ============================================================================= */ -import {Tensor, Tensor3D, Tensor4D} from '../tensor'; -import * as broadcast_util from './broadcast_util'; +import {Tensor} from '../tensor'; -import {Conv2DInfo} from './conv_util'; +import * as broadcast_util from './broadcast_util'; import {elu} from './elu'; +import {Activation} from './fused/types'; import {prelu} from './prelu'; import {relu} from './relu'; import {relu6} from './relu6'; -export type Activation = 'linear'|'relu'|'prelu'|'elu'|'relu6'; - -export type FusedBatchMatMulConfig = { - a: Tensor3D, - b: Tensor3D, - transposeA: boolean, - transposeB: boolean, - bias?: Tensor, - activation?: Activation, - preluActivationWeights?: Tensor -}; - -export type FusedConv2DConfig = { - input: Tensor4D, - filter: Tensor4D, - convInfo: Conv2DInfo, - bias?: Tensor, - activation?: Activation, - preluActivationWeights?: Tensor -}; - // Whether we should call fused ops. export const shouldFuse = (gradientDepth: number, activation: Activation) => { const gradientMode = gradientDepth > 0; From e737248d91f4bfb93edee5313369211bebae19c3 Mon Sep 17 00:00:00 2001 From: Yannick Assogba Date: Tue, 14 Jul 2020 17:13:51 -0400 Subject: [PATCH 04/12] move fused files to top level. delete gradients --- tfjs-core/src/backends/backend.ts | 2 +- tfjs-core/src/backends/backend_util.ts | 2 +- .../ops/{fused/conv2d.ts => fused_conv2d.ts} | 71 +- tfjs-core/src/ops/fused_conv2d_test.ts | 864 ++++++++++ ...se_conv2d.ts => fused_depthwise_conv2d.ts} | 68 +- .../src/ops/fused_depthwise_conv2d_test.ts | 253 +++ .../{fused/mat_mul.ts => fused_mat_mul.ts} | 78 +- tfjs-core/src/ops/fused_mat_mul_test.ts | 310 ++++ tfjs-core/src/ops/fused_ops.ts | 8 +- tfjs-core/src/ops/fused_test.ts | 1391 ----------------- .../ops/{fused/types.ts => fused_types.ts} | 4 +- tfjs-core/src/ops/fused_util.ts | 8 +- tfjs-core/src/tests.ts | 4 +- 13 files changed, 1481 insertions(+), 1582 deletions(-) rename tfjs-core/src/ops/{fused/conv2d.ts => fused_conv2d.ts} (81%) create mode 100644 tfjs-core/src/ops/fused_conv2d_test.ts rename tfjs-core/src/ops/{fused/depthwise_conv2d.ts => fused_depthwise_conv2d.ts} (75%) create mode 100644 tfjs-core/src/ops/fused_depthwise_conv2d_test.ts rename tfjs-core/src/ops/{fused/mat_mul.ts => fused_mat_mul.ts} (67%) create mode 100644 tfjs-core/src/ops/fused_mat_mul_test.ts delete mode 100644 tfjs-core/src/ops/fused_test.ts rename tfjs-core/src/ops/{fused/types.ts => fused_types.ts} (92%) diff --git a/tfjs-core/src/backends/backend.ts b/tfjs-core/src/backends/backend.ts index 23cf94a0ca5..66907f2fa8d 100644 --- a/tfjs-core/src/backends/backend.ts +++ b/tfjs-core/src/backends/backend.ts @@ -16,7 +16,7 @@ */ import {Conv2DInfo, Conv3DInfo} from '../ops/conv_util'; -import {FusedBatchMatMulConfig, FusedConv2DConfig} from '../ops/fused/types'; +import {FusedBatchMatMulConfig, FusedConv2DConfig} from '../ops/fused_types'; import {Backend, DataId, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D} from '../tensor'; import {BackendValues, DataType, Rank, ShapeMap} from '../types'; diff --git a/tfjs-core/src/backends/backend_util.ts b/tfjs-core/src/backends/backend_util.ts index 6f43b966f12..c1302627922 100644 --- a/tfjs-core/src/backends/backend_util.ts +++ b/tfjs-core/src/backends/backend_util.ts @@ -32,7 +32,7 @@ export * from '../ops/broadcast_util'; export * from '../ops/concat_util'; export * from '../ops/conv_util'; export * from '../ops/fused_util'; -export * from '../ops/fused/types'; +export * from '../ops/fused_types'; export * from '../ops/reduce_util'; export {BackendValues, TypedArray, upcastType, PixelData} from '../types'; diff --git a/tfjs-core/src/ops/fused/conv2d.ts b/tfjs-core/src/ops/fused_conv2d.ts similarity index 81% rename from tfjs-core/src/ops/fused/conv2d.ts rename to tfjs-core/src/ops/fused_conv2d.ts index 6a051c36ae4..5d507b6f415 100644 --- a/tfjs-core/src/ops/fused/conv2d.ts +++ b/tfjs-core/src/ops/fused_conv2d.ts @@ -15,22 +15,19 @@ * ============================================================================= */ -import {ENGINE} from '../../engine'; -import {Tensor, Tensor3D, Tensor4D} from '../../tensor'; -import {makeTypesMatch} from '../../tensor_util'; -import {convertToTensor} from '../../tensor_util_env'; -import {TensorLike} from '../../types'; -import * as util from '../../util'; -import {add} from '../add'; -import * as broadcast_util from '../broadcast_util'; -import {conv2d as unfusedConv2d} from '../conv2d'; -import {conv2DBackpropFilter} from '../conv2d_backprop_filter'; -import {conv2DBackpropInput} from '../conv2d_backprop_input'; -import {applyActivation, getFusedBiasGradient, getFusedDyActivation} from '../fused_util'; -import {shouldFuse} from '../fused_util'; -import * as conv_util from '../ops/../conv_util'; -import {op} from '../ops/../operation'; -import {Activation} from './types'; +import {ENGINE} from '../engine'; +import {Tensor, Tensor3D, Tensor4D} from '../tensor'; +import {makeTypesMatch} from '../tensor_util'; +import {convertToTensor} from '../tensor_util_env'; +import {TensorLike} from '../types'; +import * as util from '../util'; + +import * as broadcast_util from './broadcast_util'; +import * as conv_util from './conv_util'; +import {Activation} from './fused_types'; +import {op} from './operation'; + + /** * Computes a 2D convolution over the input x, optionally fused with adding a @@ -168,15 +165,6 @@ function fusedConv2d_({ preluActivationWeights?: Tensor }): T { activation = activation || 'linear'; - if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { - let result = unfusedConv2d( - x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); - if (bias != null) { - result = add(result, bias); - } - - return applyActivation(result, activation, preluActivationWeights) as T; - } const $x = convertToTensor(x, 'x', 'conv2d'); const $filter = convertToTensor(filter, 'filter', 'conv2d'); @@ -233,32 +221,6 @@ function fusedConv2d_({ preluActivationWeights, 'prelu weights', 'fused conv2d'); } - const grad = (dy: Tensor4D, saved: Tensor[]) => { - const [$filter, x4D, y] = saved as [Tensor4D, Tensor4D, Tensor4D]; - - const dyActivation = getFusedDyActivation(dy, y, activation) as Tensor4D; - - util.assert( - conv_util.tupleValuesAreOne(dilations), - () => 'Error in gradient of fused conv2D: ' + - `dilation rates greater than 1 ` + - `are not yet supported in gradients. Got dilations '${dilations}'`); - - let biasGradient = {}; - if (bias != null) { - biasGradient = {bias: () => getFusedBiasGradient($bias, dyActivation)}; - } - - return Object.assign( - { - x: () => conv2DBackpropInput( - x4D.shape, dyActivation, $filter, strides, pad), - filter: () => conv2DBackpropFilter( - x4D, dyActivation, $filter.shape, strides, pad) - }, - biasGradient); - }; - const inputs: { x: Tensor, filter: Tensor, @@ -275,7 +237,7 @@ function fusedConv2d_({ const inputsToSave = [$filter, x4D]; const outputsToSave = [true]; // Save the only output. const res = ENGINE.runKernelFunc( - (backend, save) => { + (backend) => { const res = backend.fusedConv2d({ input: x4D, filter: $filter, @@ -284,11 +246,10 @@ function fusedConv2d_({ activation, preluActivationWeights: $preluActivationWeights }); - save([$filter, x4D, res]); return res; }, - inputs, grad, 'FusedConv2D', {convInfo, activation}, inputsToSave, - outputsToSave); + inputs, null /* grad */, 'FusedConv2D', {convInfo, activation}, + inputsToSave, outputsToSave); if (reshapedTo4D) { return res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; diff --git a/tfjs-core/src/ops/fused_conv2d_test.ts b/tfjs-core/src/ops/fused_conv2d_test.ts new file mode 100644 index 00000000000..1f2b34cc84d --- /dev/null +++ b/tfjs-core/src/ops/fused_conv2d_test.ts @@ -0,0 +1,864 @@ +/** + * @license + * Copyright 2020 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import * as tf from '../index'; +import {ALL_ENVS, describeWithFlags} from '../jasmine_util'; +import {expectArraysClose} from '../test_util'; + +function generateCaseInputs(totalSizeTensor: number, totalSizeFilter: number) { + const inp = new Array(totalSizeTensor); + const filt = new Array(totalSizeFilter); + + for (let i = 0; i < totalSizeTensor; i++) { + inp[i] = i * 0.001 - totalSizeTensor * 0.001 / 2; + } + for (let i = 0; i < totalSizeFilter; i++) { + const sign = i % 2 === 0 ? -1 : 1; + filt[i] = i * 0.001 * sign; + } + + return {input: inp, filter: filt}; +} + +describeWithFlags('fused conv2d', ALL_ENVS, () => { + it('basic', async () => { + const inputDepth = 2; + const inShape: [number, number, number, number] = [2, 2, 2, inputDepth]; + const outputDepth = 2; + const fSize = 1; + const pad = 0; + const stride = 1; + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], inShape); + const w = + tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, outputDepth]); + + const result = tf.fused.conv2d({x, filter: w, strides: stride, pad}); + expect(result.shape).toEqual([2, 2, 2, 2]); + const expected = + [-5, 2, -11, 5, -17, 8, -23, 11, -29, 14, -35, 17, -41, 20, -47, 23]; + + expectArraysClose(await result.data(), expected); + }); + + it('basic with relu', async () => { + const inputDepth = 2; + const inShape: [number, number, number, number] = [2, 2, 2, inputDepth]; + const outputDepth = 2; + const fSize = 1; + const pad = 0; + const stride = 1; + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], inShape); + const w = + tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, outputDepth]); + + const result = tf.fused.conv2d({ + x, + filter: w, + strides: stride, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + activation: 'relu' + }); + expect(result.shape).toEqual([2, 2, 2, 2]); + const expected = [0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23]; + + expectArraysClose(await result.data(), expected); + }); + + it('relu with stride 2 x=[1,8,8,16] f=[3,3,16,1] s=[2,2] d=1 p=same', + async () => { + const inputDepth = 16; + const xSize = 8; + const inputShape: [number, number, number, number] = + [1, xSize, xSize, inputDepth]; + const outputDepth = 1; + const fSize = 3; + const pad = 'same'; + const stride: [number, number] = [2, 2]; + + // TODO(annxingyuan): Make this test work with large inputs + // https://github.com/tensorflow/tfjs/issues/3143 + const inputData = []; + for (let i = 0; i < xSize * xSize * inputDepth; i++) { + inputData.push(i % 5); + } + + const wData = []; + for (let i = 0; i < fSize * fSize * inputDepth * outputDepth; i++) { + wData.push(i % 5); + } + + const x = tf.tensor4d(inputData, inputShape); + const w = tf.tensor4d(wData, [fSize, fSize, inputDepth, outputDepth]); + + const result = tf.fused.conv2d({ + x, + filter: w, + strides: stride, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + activation: 'relu' + }); + expect(result.shape).toEqual([1, 4, 4, 1]); + expectArraysClose(await result.data(), new Float32Array([ + 854, 431, 568, 382, 580, 427, 854, 288, 431, 568, + 580, 289, 285, 570, 285, 258 + ])); + }); + + it('relu bias stride 2 x=[1,8,8,16] f=[3,3,16,1] s=[2,2] d=8 p=same', + async () => { + const inputDepth = 16; + const xSize = 8; + const inputShape: [number, number, number, number] = + [1, xSize, xSize, inputDepth]; + const outputDepth = 8; + const fSize = 3; + const pad = 'same'; + const stride: [number, number] = [2, 2]; + + const inputs = generateCaseInputs( + 1 * xSize * xSize * inputDepth, + fSize * fSize * inputDepth * outputDepth); + const x = tf.tensor4d(inputs.input, inputShape); + const w = + tf.tensor4d(inputs.filter, [fSize, fSize, inputDepth, outputDepth]); + const bias = tf.tensor1d([1, 4, 2, 3, 9, 6, 5, 8]); + const result = tf.fused.conv2d({ + x, + filter: w, + strides: stride, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + activation: 'relu', + bias + }); + expect(result.shape).toEqual([1, 4, 4, 8]); + expectArraysClose(await result.data(), new Float32Array([ + 25.75398063659668, + 0, + 26.857805252075195, + 0, + 33.961631774902344, + 0, + 30.065458297729492, + 0, + 23.118206024169922, + 0, + 24.212820053100586, + 0, + 31.307422637939453, + 0, + 27.402034759521484, + 0, + 20.482431411743164, + 0, + 21.567821502685547, + 0, + 28.653217315673828, + 0, + 24.73861312866211, + 0, + 11.078080177307129, + 0, + 12.130399703979492, + 0, + 19.182720184326172, + 0, + 15.235037803649902, + 0, + 4.6677775382995605, + 0.31717729568481445, + 5.697869777679443, + 0, + 12.727968215942383, + 2.2569849491119385, + 8.758066177368164, + 4.226885795593262, + 2.0319995880126953, + 2.9575586318969727, + 3.052880048751831, + 1.9366796016693115, + 10.073760032653809, + 4.915799617767334, + 6.094639778137207, + 6.89492130279541, + 0, + 5.5979437828063965, + 0.4078875780105591, + 4.586280822753906, + 7.419551849365234, + 7.5746169090271, + 3.43121600151062, + 9.562952041625977, + 0, + 6.404943943023682, + 0, + 5.401776313781738, + 6.5998077392578125, + 8.398608207702637, + 2.602976083755493, + 10.395440101623535, + 0, + 21.440250396728516, + 0, + 20.483882904052734, + 0, + 23.527509689331055, + 0, + 25.571144104003906, + 0, + 24.080629348754883, + 0, + 23.133480072021484, + 0, + 26.186328887939453, + 0, + 28.239177703857422, + 0, + 26.721012115478516, + 0, + 25.783079147338867, + 0, + 28.84514808654785, + 0, + 30.907209396362305, + 0, + 18.914127349853516, + 0, + 17.960111618041992, + 0, + 21.006093978881836, + 0, + 23.052082061767578, + 0, + 17.89089584350586, + 0, + 16.95684814453125, + 0, + 20.022798538208008, + 0, + 22.088754653930664, + 0, + 19.06132698059082, + 0, + 18.133424758911133, + 0, + 21.205520629882812, + 0, + 23.27761459350586, + 0, + 20.23175811767578, + 0, + 19.309999465942383, + 0, + 22.388240814208984, + 0, + 24.46647834777832, + 0, + 13.584352493286133, + 0, + 12.6395845413208, + 0, + 15.694815635681152, + 0, + 17.750045776367188 + ])); + }); + + it('prelu bias stride 2 x=[1,8,8,16] f=[3,3,16,1] s=[2,2] d=8 p=same', + async () => { + const inputDepth = 16; + const xSize = 8; + const inputShape: [number, number, number, number] = + [1, xSize, xSize, inputDepth]; + const outputDepth = 8; + const fSize = 3; + const pad = 'same'; + const stride: [number, number] = [2, 2]; + + const inputs = generateCaseInputs( + 1 * xSize * xSize * inputDepth, + fSize * fSize * inputDepth * outputDepth); + const x = tf.tensor4d(inputs.input, inputShape); + const w = + tf.tensor4d(inputs.filter, [fSize, fSize, inputDepth, outputDepth]); + const bias = tf.tensor1d([1, 4, 2, 3, 9, 6, 5, 8]); + const preluActivationWeights = tf.tensor1d([1, 2, 3, 4, 5, 6, 7, 8]); + + const result = tf.fused.conv2d({ + x, + filter: w, + strides: stride, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + activation: 'prelu', + preluActivationWeights, + bias + }); + expect(result.shape).toEqual([1, 4, 4, 8]); + expectArraysClose( + await result.data(), new Float32Array([ + 25.75398063659668, -41.61178970336914, 26.857805252075195, + -87.63885498046875, 33.961631774902344, -114.0812759399414, + 30.065458297729492, -136.93893432617188, 23.118206024169922, + -36.33102035522461, 24.212820053100586, -77.04048156738281, + 31.307422637939453, -98.12835693359375, 27.402034759521484, + -115.5947265625, 20.482431411743164, -31.050262451171875, + 21.567821502685547, -66.44209289550781, 28.653217315673828, + -82.17544555664062, 24.73861312866211, -94.25041198730469, + 11.078080177307129, -12.208478927612305, 12.130399703979492, + -28.626232147216797, 19.182720184326172, -25.253299713134766, + 15.235037803649902, -18.08960723876953, 4.6677775382995605, + 0.31717729568481445, 5.697869777679443, -2.8516759872436523, + 12.727968215942383, 2.2569849491119385, 8.758066177368164, + 4.226885795593262, 2.0319995880126953, 2.9575586318969727, + 3.052880048751831, 1.9366796016693115, 10.073760032653809, + 4.915799617767334, 6.094639778137207, 6.89492130279541, + -0.6037763357162476, 5.5979437828063965, 0.4078875780105591, + 4.586280822753906, 7.419551849365234, 7.5746169090271, + 3.43121600151062, 9.562952041625977, -1.4065279960632324, + 6.404943943023682, -1.2100803852081299, 5.401776313781738, + 6.5998077392578125, 8.398608207702637, 2.602976083755493, + 10.395440101623535, -16.418434143066406, 21.440250396728516, + -46.38618850708008, 20.483882904052734, -42.52848815917969, + 23.527509689331055, -87.84530639648438, 25.571144104003906, + -19.054208755493164, 24.080629348754883, -54.32115936279297, + 23.133480072021484, -55.79951477050781, 26.186328887939453, + -106.48924255371094, 28.239177703857422, -21.689987182617188, + 26.721012115478516, -62.25614929199219, 25.783079147338867, + -69.070556640625, 28.84514808654785, -125.13325500488281, + 30.907209396362305, -13.891133308410645, 18.914127349853516, + -38.81135940551758, 17.960111618041992, -29.915504455566406, + 21.006093978881836, -70.20361328125, 23.052082061767578, + -12.857919692993164, 17.89089584350586, -35.771610260009766, + 16.95684814453125, -24.949115753173828, 20.022798538208008, + -63.39042282104492, 22.088754653930664, -14.02528190612793, + 19.06132698059082, -39.2921257019043, 18.133424758911133, + -30.847349166870117, 21.205520629882812, -71.69097137451172, + 23.27761459350586, -15.192638397216797, 20.23175811767578, + -42.8126335144043, 19.309999465942383, -36.74560546875, + 22.388240814208984, -79.99152374267578, 24.46647834777832, + -8.556736946105957, 13.584352493286133, -22.835901260375977, + 12.6395845413208, -3.336000442504883, 15.694815635681152, + -33.0570182800293, 17.750045776367188 + ])); + }); + + it('basic with bias', async () => { + const inputDepth = 2; + const inShape: [number, number, number, number] = [2, 2, 2, inputDepth]; + const outputDepth = 2; + const fSize = 1; + const pad = 0; + const stride = 1; + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], inShape); + const w = + tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, outputDepth]); + + const result = tf.fused.conv2d({ + x, + filter: w, + strides: stride, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + bias: tf.tensor1d([5, 6]) + }); + expect(result.shape).toEqual([2, 2, 2, 2]); + const expected = + [0, 8, -6, 11, -12, 14, -18, 17, -24, 20, -30, 23, -36, 26, -42, 29]; + + expectArraysClose(await result.data(), expected); + }); + + it('basic with explicit padding', async () => { + const inputDepth = 1; + const outputDepth = 1; + const pad = + [[0, 0], [1, 2], [0, 1], [0, 0]] as tf.backend_util.ExplicitPadding; + const stride = 1; + const dataFormat = 'NHWC'; + const dilation = 1; + + const x = tf.tensor3d([1, 2, 3, 4, 5, 6, 7, 8], [4, 2, inputDepth]); + const w = + tf.tensor4d([3, 1, 5, 0, 2, 7, 8, 9], [4, 2, inputDepth, outputDepth]); + + const result = tf.fused.conv2d( + {x, filter: w, strides: stride, pad, dataFormat, dilations: dilation}); + + const resultData = await result.data(); + expect(result.shape).toEqual([4, 2, 1]); + expectArraysClose(resultData, [133, 66, 200, 102, 108, 58, 56, 58]); + }); + + it('basic with elu', async () => { + const inputDepth = 2; + const inShape: [number, number, number, number] = [2, 2, 2, inputDepth]; + const outputDepth = 2; + const fSize = 1; + const pad = 0; + const stride = 1; + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], inShape); + const w = + tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, outputDepth]); + + const result = tf.fused.conv2d({ + x, + filter: w, + strides: stride, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + activation: 'elu' + }); + expect(result.shape).toEqual([2, 2, 2, 2]); + const expected = + [-0.99326, 2, -1, 5, -1, 8, -1, 11, -1, 14, -1, 17, -1, 20, -1, 23]; + + expectArraysClose(await result.data(), expected); + }); + + it('basic with prelu', async () => { + const inputDepth = 2; + const inShape: [number, number, number, number] = [2, 2, 2, inputDepth]; + const outputDepth = 2; + const fSize = 1; + const pad = 0; + const stride = 1; + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], inShape); + const alpha = tf.tensor3d([0.25, 0.75], [1, 1, 2]); + const w = + tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, outputDepth]); + + const result = tf.fused.conv2d({ + x, + filter: w, + strides: stride, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + activation: 'prelu', + preluActivationWeights: alpha + }); + expect(result.shape).toEqual([2, 2, 2, 2]); + const expected = [ + -1.25, 2, -2.75, 5, -4.25, 8, -5.75, 11, -7.25, 14, -8.75, 17, -10.25, 20, + -11.75, 23 + ]; + + expectArraysClose(await result.data(), expected); + }); + + it('basic with broadcasted bias and relu', async () => { + const inputDepth = 2; + const inShape: [number, number, number, number] = [2, 2, 2, inputDepth]; + const outputDepth = 2; + const fSize = 1; + const pad = 0; + const stride = 1; + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], inShape); + const w = + tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, outputDepth]); + + const result = tf.fused.conv2d({ + x, + filter: w, + strides: stride, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + bias: tf.scalar(5), + activation: 'relu' + }); + expect(result.shape).toEqual([2, 2, 2, 2]); + const expected = [0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 25, 0, 28]; + + expectArraysClose(await result.data(), expected); + }); + + it('im2row', async () => { + const inputDepth = 1; + const inputShape: [number, number, number] = [4, 4, inputDepth]; + const outputDepth = 3; + const fSize = 1; + const pad = 'same'; + const strides: [number, number] = [2, 2]; + + const x = tf.tensor3d( + [ + 10, 30, 50, 70, 20, 40, 60, 80, -10, -30, -50, -70, -20, -40, -60, -80 + ], + inputShape); + const w = tf.tensor4d([1, 0.5, 1], [fSize, fSize, inputDepth, outputDepth]); + + const result = tf.fused.conv2d({x, filter: w, strides, pad}); + + expectArraysClose( + await result.data(), + [10, 5, 10, 50, 25, 50, -10, -5, -10, -50, -25, -50]); + }); + + it('im2row with relu', async () => { + const inputDepth = 1; + const inputShape: [number, number, number] = [4, 4, inputDepth]; + const outputDepth = 3; + const fSize = 1; + const pad = 'same'; + const strides: [number, number] = [2, 2]; + + const x = tf.tensor3d( + [ + 10, 30, 50, 70, 20, 40, 60, 80, -10, -30, -50, -70, -20, -40, -60, -80 + ], + inputShape); + const w = tf.tensor4d([1, 0.5, 1], [fSize, fSize, inputDepth, outputDepth]); + + const result = tf.fused.conv2d({ + x, + filter: w, + strides, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + activation: 'relu' + }); + + expectArraysClose( + await result.data(), [10, 5, 10, 50, 25, 50, 0, 0, 0, 0, 0, 0]); + }); + + it('im2row with prelu', async () => { + const inputDepth = 1; + const inputShape: [number, number, number] = [4, 4, inputDepth]; + const outputDepth = 3; + const fSize = 1; + const pad = 'same'; + const strides: [number, number] = [2, 2]; + + const x = tf.tensor3d( + [ + 10, 30, 50, 70, 20, 40, 60, 80, -10, -30, -50, -70, -20, -40, -60, -80 + ], + inputShape); + const w = tf.tensor4d([1, 0.5, 1], [fSize, fSize, inputDepth, outputDepth]); + const alpha = tf.tensor3d([0.5], [1, 1, inputDepth]); + + const result = tf.fused.conv2d({ + x, + filter: w, + strides, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + activation: 'prelu', + preluActivationWeights: alpha + }); + + expectArraysClose( + await result.data(), + [10, 5, 10, 50, 25, 50, -5, -2.5, -5, -25, -12.5, -25]); + }); + + it('pointwise with prelu', async () => { + const inputDepth = 1; + const inputShape: [number, number, number] = [4, 4, inputDepth]; + const outputDepth = 3; + const fSize = 1; + const pad = 'same'; + const strides: [number, number] = [1, 1]; + + const x = tf.tensor3d( + [ + 10, 30, 50, 70, 20, 40, 60, 80, -10, -30, -50, -70, -20, -40, -60, -80 + ], + inputShape); + const w = tf.tensor4d([1, 0.5, 1], [fSize, fSize, inputDepth, outputDepth]); + const alpha = tf.tensor3d([0.5], [1, 1, inputDepth]); + + const result = tf.fused.conv2d({ + x, + filter: w, + strides, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + activation: 'prelu', + preluActivationWeights: alpha + }); + + expectArraysClose(await result.data(), [ + 10, 5, 10, 30, 15, 30, 50, 25, 50, 70, 35, 70, + 20, 10, 20, 40, 20, 40, 60, 30, 60, 80, 40, 80, + -5, -2.5, -5, -15, -7.5, -15, -25, -12.5, -25, -35, -17.5, -35, + -10, -5, -10, -20, -10, -20, -30, -15, -30, -40, -20, -40 + ]); + }); + + it('im2row with broadcasted bias and relu', async () => { + const inputDepth = 1; + const inputShape: [number, number, number] = [4, 4, inputDepth]; + const outputDepth = 3; + const fSize = 1; + const pad = 'same'; + const strides: [number, number] = [2, 2]; + + const x = tf.tensor3d( + [ + 10, 30, 50, 70, 20, 40, 60, 80, -10, -30, -50, -70, -20, -40, -60, -80 + ], + inputShape); + const w = tf.tensor4d([1, 0.5, 1], [fSize, fSize, inputDepth, outputDepth]); + + const result = tf.fused.conv2d({ + x, + filter: w, + strides, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + bias: tf.scalar(5), + activation: 'relu' + }); + + expectArraysClose( + await result.data(), [15, 10, 15, 55, 30, 55, 0, 0, 0, 0, 0, 0]); + }); + + // it('backProp input x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', async () => { + // const inputDepth = 1; + // const outputDepth = 1; + // const inputShape: [number, number, number, number] = [2, 3, 3, + // inputDepth]; const filterSize = 2; const strides = 1; const pad = 0; + + // const filterShape: [number, number, number, number] = + // [filterSize, filterSize, inputDepth, outputDepth]; + // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + + // const x = tf.tensor4d( + // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + // const grads = tf.grads( + // (x: tf.Tensor4D) => tf.fused.conv2d({x, filter, strides, pad})); + // const [dx] = grads([x], dy); + + // expect(dx.shape).toEqual(x.shape); + // expectArraysClose( + // await dx.data(), + // [-3, 2, 1, -8, 1.5, 0.5, -4, 1, 0, -3, 2, 1, -8, 1.5, 0.5, -4, 1, + // 0]); + // }); + + // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', async () => { + // const inputDepth = 1; + // const outputDepth = 1; + // const inputShape: [number, number, number, number] = [2, 3, 3, + // inputDepth]; const filterSize = 2; const strides = 1; const pad = 0; + + // const filterShape: [number, number, number, number] = + // [filterSize, filterSize, inputDepth, outputDepth]; + // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + + // const x = tf.tensor4d( + // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + // const grads = tf.grads( + // (x: tf.Tensor4D, filter: tf.Tensor4D) => + // tf.fused.conv2d({x, filter, strides, pad})); + // const [dx, dfilter] = grads([x, filter], dy); + + // expect(dx.shape).toEqual(x.shape); + // expectArraysClose( + // await dx.data(), + // [-3, 2, 1, -8, 1.5, 0.5, -4, 1, 0, -3, 2, 1, -8, 1.5, 0.5, -4, 1, + // 0]); + + // expect(dfilter.shape).toEqual(filterShape); + // expectArraysClose(await dfilter.data(), [26, 38, 62, 74]); + // }); + + // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias', async () => { + // const inputDepth = 1; + // const outputDepth = 1; + // const inputShape: [number, number, number, number] = [2, 3, 3, + // inputDepth]; const filterSize = 2; const strides = 1; const pad = 0; + + // const filterShape: [number, number, number, number] = + // [filterSize, filterSize, inputDepth, outputDepth]; + // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + // const bias = tf.ones([2, 2, 2, 1]); + + // const x = tf.tensor4d( + // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + // const fusedGrads = + // tf.grads((x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.conv2d({ + // x, + // filter: w, + // strides, + // pad, + // dataFormat: 'NHWC', + // dilations: [1, 1], + // bias: b + // })); + // const [dxFused, dfilterFused, dbiasFused] = + // fusedGrads([x, filter, bias], dy); + + // const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { + // const conv = tf.conv2d(x, filter, strides, pad); + // const sum = tf.add(conv, bias); + // return sum; + // }); + // const [dx, dfilter, dbias] = grads([x, filter, bias], dy); + + // expectArraysClose(await dxFused.array(), await dx.array()); + // expectArraysClose(await dfilterFused.array(), await dfilter.array()); + // expectArraysClose(await dbiasFused.array(), await dbias.array()); + // }); + + // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and relu', + // async () => { + // const inputDepth = 1; + // const outputDepth = 1; + // const inputShape: [number, number, number, number] = + // [2, 3, 3, inputDepth]; + // const filterSize = 2; + // const strides = 1; + // const pad = 0; + + // const filterShape: [number, number, number, number] = + // [filterSize, filterSize, inputDepth, outputDepth]; + // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + // const bias = tf.ones([2, 2, 2, 1]); + + // const x = tf.tensor4d( + // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], + // inputShape); + // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + // const fusedGrads = + // tf.grads((x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.conv2d({ + // x, + // filter: w, + // strides, + // pad, + // dataFormat: 'NHWC', + // dilations: [1, 1], + // bias: b, + // activation: 'relu' + // })); + // const [dxFused, dfilterFused, dbiasFused] = + // fusedGrads([x, filter, bias], dy); + + // const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) + // => { + // const conv = tf.conv2d(x, filter, strides, pad); + // const sum = tf.add(conv, bias); + // return tf.relu(sum); + // }); + // const [dx, dfilter, dbias] = grads([x, filter, bias], dy); + + // expectArraysClose(await dxFused.array(), await dx.array()); + // expectArraysClose(await dfilterFused.array(), await dfilter.array()); + // expectArraysClose(await dbiasFused.array(), await dbias.array()); + // }); + + // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and elu', async () + // => { + // const inputDepth = 1; + // const outputDepth = 1; + // const inputShape: [number, number, number, number] = [2, 3, 3, + // inputDepth]; const filterSize = 2; const strides = 1; const pad = 0; + + // const filterShape: [number, number, number, number] = + // [filterSize, filterSize, inputDepth, outputDepth]; + // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + // const bias = tf.ones([2, 2, 2, 1]); + + // const x = tf.tensor4d( + // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + // const fusedGrads = + // tf.grads((x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.conv2d({ + // x, + // filter: w, + // strides, + // pad, + // dataFormat: 'NHWC', + // dilations: [1, 1], + // bias: b, + // activation: 'elu' + // })); + // const [dxFused, dfilterFused, dbiasFused] = + // fusedGrads([x, filter, bias], dy); + + // const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { + // const conv = tf.conv2d(x, filter, strides, pad); + // const sum = tf.add(conv, bias); + // return tf.elu(sum); + // }); + // const [dx, dfilter, dbias] = grads([x, filter, bias], dy); + + // expectArraysClose(await dxFused.array(), await dx.array()); + // expectArraysClose(await dfilterFused.array(), await dfilter.array()); + // expectArraysClose(await dbiasFused.array(), await dbias.array()); + // }); + + // it('fused matmul with relu6 and gradients', async () => { + // const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); + // const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + // const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + // const transposeA = false; + // const transposeB = false; + + // const fusedGrads = tf.grads((a, b) => { + // return tf.fused.matMul( + // {a, b, transposeA, transposeB, bias: null, activation: 'relu6'}); + // }); + // const [fusedDa, fusedDb] = fusedGrads([a, b], dy); + + // const grads = tf.grads((a, b) => { + // const prod = tf.matMul(a, b, transposeA, transposeB); + // return tf.relu6(prod); + // }); + // const [da, db] = grads([a, b], dy); + + // expectArraysClose(await da.array(), await fusedDa.array()); + // expectArraysClose(await db.data(), await fusedDb.array()); + // }); +}); diff --git a/tfjs-core/src/ops/fused/depthwise_conv2d.ts b/tfjs-core/src/ops/fused_depthwise_conv2d.ts similarity index 75% rename from tfjs-core/src/ops/fused/depthwise_conv2d.ts rename to tfjs-core/src/ops/fused_depthwise_conv2d.ts index 4b9b8067399..64180ac8ab0 100644 --- a/tfjs-core/src/ops/fused/depthwise_conv2d.ts +++ b/tfjs-core/src/ops/fused_depthwise_conv2d.ts @@ -15,21 +15,17 @@ * ============================================================================= */ -import {ENGINE} from '../../engine'; -import * as conv_util from '../../ops/conv_util'; -import {op} from '../../ops/operation'; -import {Tensor, Tensor3D, Tensor4D} from '../../tensor'; -import {makeTypesMatch} from '../../tensor_util'; -import {convertToTensor} from '../../tensor_util_env'; -import {TensorLike} from '../../types'; -import * as util from '../../util'; -import {add} from '../add'; -import * as broadcast_util from '../broadcast_util'; -import {depthwiseConv2d as unfusedDepthwiseConv2d} from '../depthwise_conv2d'; -import {depthwiseConv2dNativeBackpropFilter} from '../depthwise_conv2d_native_backprop_filter'; -import {depthwiseConv2dNativeBackpropInput} from '../depthwise_conv2d_native_backprop_input'; -import {applyActivation, getFusedBiasGradient, getFusedDyActivation, shouldFuse} from '../fused_util'; -import {Activation} from './types'; +import {ENGINE} from '../engine'; +import {Tensor, Tensor3D, Tensor4D} from '../tensor'; +import {makeTypesMatch} from '../tensor_util'; +import {convertToTensor} from '../tensor_util_env'; +import {TensorLike} from '../types'; +import * as util from '../util'; + +import * as broadcast_util from './broadcast_util'; +import * as conv_util from './conv_util'; +import {Activation} from './fused_types'; +import {op} from './operation'; /** * Computes depthwise 2D convolution, optionally fused with adding a @@ -104,16 +100,6 @@ function fusedDepthwiseConv2d_({ activation?: Activation, preluActivationWeights?: Tensor }): T { - if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { - let result = unfusedDepthwiseConv2d( - x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); - if (bias != null) { - result = add(result, bias); - } - - return applyActivation(result, activation, preluActivationWeights) as T; - } - const $x = convertToTensor(x, 'x', 'depthwiseConv2d'); const $filter = convertToTensor(filter, 'filter', 'depthwiseConv2d'); @@ -170,33 +156,6 @@ function fusedDepthwiseConv2d_({ preluActivationWeights, 'prelu weights', 'fused depthwiseConv2d'); } - const grad = (dy: Tensor4D, saved: Tensor[]) => { - util.assert( - conv_util.tupleValuesAreOne(dilations), - () => 'Error in gradient of fused depthwiseConv2d: dilation rates ' + - `greater than 1 are not yet supported. Got dilations ` + - `'${dilations}'`); - const [$filter, x4D, y] = saved; - - const dyActivation = getFusedDyActivation(dy, y, activation) as Tensor4D; - - let biasGradient = {}; - if (bias != null) { - biasGradient = {bias: () => getFusedBiasGradient($bias, dyActivation)}; - } - - return Object.assign( - { - x: () => depthwiseConv2dNativeBackpropInput( - (x4D as Tensor4D).shape, dyActivation, $filter as Tensor4D, - convInfo), - filter: () => depthwiseConv2dNativeBackpropFilter( - x4D as Tensor4D, dyActivation, ($filter as Tensor4D).shape, - convInfo), - }, - biasGradient); - }; - const inputs: { x: Tensor, filter: Tensor, @@ -213,7 +172,7 @@ function fusedDepthwiseConv2d_({ const inputsToSave = [$filter, x4D]; const outputsToSave = [true]; const res = ENGINE.runKernelFunc( - (backend, save) => { + (backend) => { const res = backend.fusedDepthwiseConv2D({ input: x4D, filter: $filter, @@ -222,10 +181,9 @@ function fusedDepthwiseConv2d_({ activation, preluActivationWeights: $preluActivationWeights }); - save([$filter, x4D, res]); return res; }, - inputs, grad, 'FusedDepthwiseConv2D', {convInfo, activation}, + inputs, null /* grad */, 'FusedDepthwiseConv2D', {convInfo, activation}, inputsToSave, outputsToSave); if (reshapedTo4D) { return res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; diff --git a/tfjs-core/src/ops/fused_depthwise_conv2d_test.ts b/tfjs-core/src/ops/fused_depthwise_conv2d_test.ts new file mode 100644 index 00000000000..49e318a7844 --- /dev/null +++ b/tfjs-core/src/ops/fused_depthwise_conv2d_test.ts @@ -0,0 +1,253 @@ +/** + * @license + * Copyright 2020 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import * as tf from '../index'; +import {ALL_ENVS, describeWithFlags} from '../jasmine_util'; +import {expectArraysClose} from '../test_util'; + +describeWithFlags('fused depthwiseConv2D', ALL_ENVS, () => { + it('basic', async () => { + const fSize = 2; + const pad = 'valid'; + const strides = 1; + const chMul = 1; + const inDepth = 1; + + const x = tf.tensor4d( + [ + 0.230664, 0.987388, 0.0685208, 0.419224, 0.887861, 0.731641, + 0.0741907, 0.409265, 0.351377 + ], + [1, 3, 3, inDepth]); + const w = tf.tensor4d( + [-0.303873, -0.229223, 0.144333, 0.803373], + [fSize, fSize, inDepth, chMul], + ); + + const result = tf.fused.depthwiseConv2d({x, filter: w, strides, pad}); + expect(result.shape).toEqual([1, 2, 2, 1]); + const expected = [0.47737, 0.40018, 0.00859, -0.09615]; + expectArraysClose(await result.data(), expected); + }); + + it('basic with relu', async () => { + const fSize = 2; + const pad = 'valid'; + const strides = 1; + const chMul = 1; + const inDepth = 1; + + const x = tf.tensor4d( + [ + 0.230664, 0.987388, 0.0685208, 0.419224, 0.887861, 0.731641, + 0.0741907, 0.409265, 0.351377 + ], + [1, 3, 3, inDepth]); + const w = tf.tensor4d( + [-0.303873, -0.229223, 0.144333, 0.803373], + [fSize, fSize, inDepth, chMul], + ); + + const result = tf.fused.depthwiseConv2d( + {x, filter: w, strides, pad, activation: 'relu'}); + expect(result.shape).toEqual([1, 2, 2, 1]); + const expected = [0.47737, 0.40018, 0.00859, 0]; + expectArraysClose(await result.data(), expected); + }); + + it('basic with broadcasted bias and relu', async () => { + const fSize = 2; + const pad = 'valid'; + const strides = 1; + const chMul = 1; + const inDepth = 1; + + const x = tf.tensor4d( + [ + 0.230664, 0.987388, 0.0685208, 0.419224, 0.887861, 0.731641, + 0.0741907, 0.409265, 0.351377 + ], + [1, 3, 3, inDepth]); + const w = tf.tensor4d( + [-0.303873, -0.229223, 0.144333, 0.803373], + [fSize, fSize, inDepth, chMul], + ); + + const result = tf.fused.depthwiseConv2d( + {x, filter: w, strides, pad, bias: tf.scalar(1), activation: 'relu'}); + expect(result.shape).toEqual([1, 2, 2, 1]); + const expected = [1.47737, 1.40018, 1.00859, 0.90385]; + expectArraysClose(await result.data(), expected); + }); + + it('prelu', async () => { + const fSize = 3; + const pad = 'valid'; + const strides = 1; + const chMul = 1; + const inDepth = 1; + + const x = tf.tensor4d( + [ + 0.149194, 0.089009, 0.654891, 0.083324, 0.537043, 0.644331, 0.563037, + 0.211859, 0.633501, 0.186427, 0.777034, 0.50001, 0.607341, 0.95303, + 0.696479, 0.050387, 0.62045, 0.728049, 0.028043, 0.437009, 0.712881, + 0.741935, 0.974474, 0.621102, 0.171411 + ], + [1, 5, 5, inDepth]); + const alpha = tf.tensor4d( + [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], [1, 3, 3, 1]); + const w = tf.tensor4d( + [ + -0.125386, -0.975199, -0.640437, -0.281895, -0.990968, -0.347208, + -0.889702, -0.180695, -0.691992 + ], + [fSize, fSize, inDepth, chMul], + ); + + const result = tf.fused.depthwiseConv2d({ + x, + filter: w, + strides, + pad, + activation: 'prelu', + preluActivationWeights: alpha + }); + expect(result.shape).toEqual([1, 3, 3, 1]); + const expected = [ + -0.25400, -0.50118, -0.73622, -0.94068, -1.2298, -1.84585, -2.3089, + -2.7499, -2.64077 + ]; + expectArraysClose(await result.data(), expected); + }); + + // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', async () => { + // const inputDepth = 1; + // const outputDepth = 1; + // const inputShape: [number, number, number, number] = [2, 3, 3, + // inputDepth]; const filterSize = 2; const strides = 1; const pad = 0; + + // const filterShape: [number, number, number, number] = + // [filterSize, filterSize, inputDepth, outputDepth]; + // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + + // const x = tf.tensor4d( + // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + // const grads = tf.grads( + // (x: tf.Tensor4D, filter: tf.Tensor4D) => + // tf.fused.depthwiseConv2d({x, filter, strides, pad})); + // const [dx, dfilter] = grads([x, filter], dy); + + // expect(dx.shape).toEqual(x.shape); + // expectArraysClose( + // await dx.data(), + // [-3, 2, 1, -8, 1.5, 0.5, -4, 1, 0, -3, 2, 1, -8, 1.5, 0.5, -4, 1, + // 0]); + + // expect(dfilter.shape).toEqual(filterShape); + // expectArraysClose(await dfilter.data(), [26, 38, 62, 74]); + // }); + + // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias', async () => { + // const inputDepth = 1; + // const outputDepth = 1; + // const inputShape: [number, number, number, number] = [2, 3, 3, + // inputDepth]; const filterSize = 2; const strides = 1; const pad = 0; + + // const filterShape: [number, number, number, number] = + // [filterSize, filterSize, inputDepth, outputDepth]; + // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + // const bias = tf.ones([2, 2, 2, 1]); + + // const x = tf.tensor4d( + // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + // const fusedGrads = tf.grads( + // (x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.depthwiseConv2d({ + // x, + // filter: w, + // strides, + // pad, + // dataFormat: 'NHWC', + // dilations: [1, 1], + // bias: b + // })); + // const [dxFused, dfilterFused, dbiasFused] = + // fusedGrads([x, filter, bias], dy); + + // const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { + // const conv = tf.depthwiseConv2d(x, filter, strides, pad); + // const sum = tf.add(conv, bias); + // return sum; + // }); + // const [dx, dfilter, dbias] = grads([x, filter, bias], dy); + + // expectArraysClose(await dxFused.array(), await dx.array()); + // expectArraysClose(await dfilterFused.array(), await dfilter.array()); + // expectArraysClose(await dbiasFused.array(), await dbias.array()); + // }); + + // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and activation', + // async () => { + // const inputDepth = 1; + // const outputDepth = 1; + // const inputShape: [number, number, number, number] = + // [2, 3, 3, inputDepth]; + // const filterSize = 2; + // const strides = 1; + // const pad = 0; + + // const filterShape: [number, number, number, number] = + // [filterSize, filterSize, inputDepth, outputDepth]; + // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + // const bias = tf.ones([2, 2, 2, 1]); + + // const x = tf.tensor4d( + // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], + // inputShape); + // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + // const fusedGrads = tf.grads( + // (x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.depthwiseConv2d({ + // x, + // filter: w, + // strides, + // pad, + // dataFormat: 'NHWC', + // dilations: [1, 1], + // bias: b, + // activation: 'relu' + // })); + // const [dxFused, dfilterFused, dbiasFused] = + // fusedGrads([x, filter, bias], dy); + + // const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) + // => { + // const conv = tf.depthwiseConv2d(x, filter, strides, pad); + // const sum = tf.add(conv, bias); + // return tf.relu(sum); + // }); + // const [dx, dfilter, dbias] = grads([x, filter, bias], dy); + + // expectArraysClose(await dxFused.array(), await dx.array()); + // expectArraysClose(await dfilterFused.array(), await dfilter.array()); + // expectArraysClose(await dbiasFused.array(), await dbias.array()); + // }); +}); diff --git a/tfjs-core/src/ops/fused/mat_mul.ts b/tfjs-core/src/ops/fused_mat_mul.ts similarity index 67% rename from tfjs-core/src/ops/fused/mat_mul.ts rename to tfjs-core/src/ops/fused_mat_mul.ts index 66e8a9fcff6..3340bcfa102 100644 --- a/tfjs-core/src/ops/fused/mat_mul.ts +++ b/tfjs-core/src/ops/fused_mat_mul.ts @@ -15,18 +15,16 @@ * ============================================================================= */ -import {ENGINE} from '../../engine'; -import {op} from '../../ops/operation'; -import {Tensor, Tensor3D} from '../../tensor'; -import {makeTypesMatch} from '../../tensor_util'; -import {convertToTensor} from '../../tensor_util_env'; -import {TensorLike} from '../../types'; -import * as util from '../../util'; -import {add} from '../add'; -import * as broadcast_util from '../broadcast_util'; -import {applyActivation, getFusedBiasGradient, getFusedDyActivation, shouldFuse} from '../fused_util'; -import {matMul as unfusedMatMul} from '../mat_mul'; -import {Activation} from './types'; +import {ENGINE} from '../engine'; +import {Tensor} from '../tensor'; +import {makeTypesMatch} from '../tensor_util'; +import {convertToTensor} from '../tensor_util_env'; +import {TensorLike} from '../types'; +import * as util from '../util'; + +import * as broadcast_util from './broadcast_util'; +import {Activation} from './fused_types'; +import {op} from './operation'; /** * Computes the dot product of two matrices with optional activation and bias. @@ -65,15 +63,6 @@ function fusedMatMul_({ activation?: Activation, preluActivationWeights?: Tensor }): T { - if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { - let result = unfusedMatMul(a, b, transposeA, transposeB); - if (bias != null) { - result = add(result, bias); - } - - return applyActivation(result, activation, preluActivationWeights) as T; - } - let $a = convertToTensor(a, 'a', 'fused matMul'); let $b = convertToTensor(b, 'b', 'fused matMul'); [$a, $b] = makeTypesMatch($a, $b); @@ -133,46 +122,6 @@ function fusedMatMul_({ preluActivationWeights, 'prelu weights', 'fused matMul'); } - const grad = (dy: Tensor3D, saved: Tensor[]) => { - const [a3D, b3D, y] = saved; - const dyActivation = getFusedDyActivation(dy, y, activation); - - let biasGradient = {}; - if (bias != null) { - biasGradient = {bias: () => getFusedBiasGradient($bias, dyActivation)}; - } - - if (!transposeA && !transposeB) { - return Object.assign( - { - a: () => dyActivation.matMul(b3D as Tensor3D, false, true), - b: () => a3D.matMul(dyActivation, true, false) - }, - biasGradient); - } else if (!transposeA && transposeB) { - return Object.assign( - { - a: () => dyActivation.matMul(b3D as Tensor3D, false, false), - b: () => dyActivation.matMul(a3D as Tensor3D, true, false) - }, - biasGradient); - } else if (transposeA && !transposeB) { - return Object.assign( - { - a: () => b3D.matMul(dyActivation, false, true), - b: () => a3D.matMul(dyActivation, false, false) - }, - biasGradient); - } else { - return Object.assign( - { - a: () => b3D.matMul(dyActivation, true, true), - b: () => dyActivation.matMul(a3D as Tensor3D, true, true) - }, - biasGradient); - } - }; - const inputs: {a: Tensor, b: Tensor, bias?: Tensor, @@ -188,7 +137,7 @@ function fusedMatMul_({ const outputsToSave = [true]; const res = ENGINE.runKernelFunc( - (backend, save) => { + (backend) => { const y = backend.fusedBatchMatMul({ a: a3D, b: b3D, @@ -198,11 +147,10 @@ function fusedMatMul_({ activation, preluActivationWeights: $preluActivationWeights }); - save([a3D, b3D, y]); return y; }, - inputs, grad, '_FusedMatMul', {transposeA, transposeB, activation}, - inputsToSave, outputsToSave); + inputs, null /* grad */, '_FusedMatMul', + {transposeA, transposeB, activation}, inputsToSave, outputsToSave); return res.reshape(outShape); } diff --git a/tfjs-core/src/ops/fused_mat_mul_test.ts b/tfjs-core/src/ops/fused_mat_mul_test.ts new file mode 100644 index 00000000000..c85123cde11 --- /dev/null +++ b/tfjs-core/src/ops/fused_mat_mul_test.ts @@ -0,0 +1,310 @@ +/** + * @license + * Copyright 2020 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import * as tf from '../index'; +import {ALL_ENVS, describeWithFlags} from '../jasmine_util'; +import {expectArraysClose} from '../test_util'; + +describeWithFlags('fused matmul', ALL_ENVS, () => { + it('fused A x B', async () => { + const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); + const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); + + const c = tf.fused.matMul({a, b}); + + expect(c.shape).toEqual([2, 2]); + expectArraysClose(await c.data(), [0, 8, -3, 20]); + }); + + it('fused A x B with relu', async () => { + const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); + const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); + const transposeA = false; + const transposeB = false; + + const c = tf.fused.matMul( + {a, b, transposeA, transposeB, bias: null, activation: 'relu'}); + + expect(c.shape).toEqual([2, 2]); + expectArraysClose(await c.data(), [0, 8, 0, 20]); + }); + + it('fused A x B with elu', async () => { + const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); + const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); + const transposeA = false; + const transposeB = false; + + const c = tf.fused.matMul( + {a, b, transposeA, transposeB, bias: null, activation: 'elu'}); + + expect(c.shape).toEqual([2, 2]); + expectArraysClose(await c.data(), [0, 8, -0.9502, 20]); + }); + + it('fused A x B with relu6', async () => { + const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); + const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); + const transposeA = false; + const transposeB = false; + + const c = tf.fused.matMul( + {a, b, transposeA, transposeB, bias: null, activation: 'relu6'}); + + expect(c.shape).toEqual([2, 2]); + expectArraysClose(await c.data(), [0, 6, 0, 6]); + }); + + it('fused A x B with prelu', async () => { + const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); + const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); + const alpha = tf.tensor2d([0.5, 0.5], [1, 2]); + const transposeA = false; + const transposeB = false; + + const c = tf.fused.matMul({ + a, + b, + transposeA, + transposeB, + bias: null, + activation: 'prelu', + preluActivationWeights: alpha + }); + + expect(c.shape).toEqual([2, 2]); + expectArraysClose(await c.data(), [0, 8, -1.5, 20]); + }); + + it('fused A x B with relu transpose', async () => { + const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); + const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [2, 3]); + const transposeA = false; + const transposeB = true; + + const c = tf.fused.matMul( + {a, b, transposeA, transposeB, bias: null, activation: 'relu'}); + + expect(c.shape).toEqual([2, 2]); + expectArraysClose(await c.data(), [0, 9, 0, 24]); + }); + + it('fused A x B with 2d bias and relu', async () => { + const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); + const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); + const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); + const transposeA = false; + const transposeB = false; + + const d = tf.fused.matMul( + {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); + + expect(d.shape).toEqual([2, 2]); + expectArraysClose(await d.data(), [1, 9, 0, 21]); + }); + + it('fused A x B with relu and broadcasted bias', async () => { + const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); + const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); + const c = tf.tensor1d([1, 1]); + const act: tf.fused.Activation = 'relu'; + const transposeA = false; + const transposeB = false; + + const d = tf.fused.matMul( + {a, b, transposeA, transposeB, bias: c, activation: act}); + + expect(d.shape).toEqual([2, 2]); + expectArraysClose(await d.data(), [1, 9, 0, 21]); + }); + + it('fused A x B with elu and broadcasted bias', async () => { + const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); + const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); + const c = tf.tensor1d([1, 1]); + const act: tf.fused.Activation = 'elu'; + const transposeA = false; + const transposeB = false; + + const d = tf.fused.matMul( + {a, b, transposeA, transposeB, bias: c, activation: act}); + + expect(d.shape).toEqual([2, 2]); + expectArraysClose(await d.data(), [1, 9, -0.8647, 21]); + }); + + it('fused A x B with relu and broadcasted bias different rank', async () => { + const a = tf.tensor3d([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [2, 2, 3]); + const b = tf.tensor3d([0, 1, -3, 2, 2, 1, 0, 1, -3, 2, 2, 1], [2, 3, 2]); + const c = tf.tensor2d([1, 2], [1, 2]); + const act: tf.fused.Activation = 'relu'; + const transposeA = false; + const transposeB = false; + + const d = tf.fused.matMul( + {a, b, transposeA, transposeB, bias: c, activation: act}); + + expect(d.shape).toEqual([2, 2, 2]); + expectArraysClose(await d.data(), [2, 6, 0, 18, 0, 30, 0, 42]); + }); + + it('fused A x B with 2d bias only', async () => { + const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); + const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); + const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); + const transposeA = false; + const transposeB = false; + + const d = tf.fused.matMul( + {a, b, transposeA, transposeB, bias: c, activation: 'linear'}); + + expect(d.shape).toEqual([2, 2]); + expectArraysClose(await d.data(), [1, 9, -2, 21]); + }); + + // it('fused A x B with relu gradient', async () => { + // const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); + // const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + // const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + // const transposeA = false; + // const transposeB = false; + + // const grads = tf.grads((a, b) => { + // const prod = tf.matMul(a, b, transposeA, transposeB); + // return tf.relu(prod); + // }); + + // const fusedGrads = tf.grads((a, b) => { + // return tf.fused.matMul( + // {a, b, transposeA, transposeB, bias: null, activation: 'relu'}); + // }); + + // const [da, db] = grads([a, b], dy); + // const [fusedDa, fusedDb] = fusedGrads([a, b], dy); + // expectArraysClose(await da.array(), await fusedDa.array()); + // expectArraysClose(await db.data(), await fusedDb.array()); + // }); + + // it('gradient with clones A x B with relu', () => { + // const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); + // const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + // const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + // const transposeA = false; + // const transposeB = false; + + // const fusedGrads = tf.grads((a, b) => { + // return tf.fused + // .matMul({ + // a: a.clone(), + // b: b.clone(), + // transposeA, + // transposeB, + // bias: null, + // activation: 'relu' + // }) + // .clone(); + // }); + + // const [fusedDa, fusedDb] = fusedGrads([a, b], dy); + // expect(fusedDa.shape).toEqual(a.shape); + // expect(fusedDb.shape).toEqual(b.shape); + // }); + + // it('fused A x B with relu bias gradient', async () => { + // const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); + // const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + // const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); + // const transposeA = false; + // const transposeB = false; + + // const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + + // const grads = tf.grads((a, b, c) => { + // const prod = tf.matMul(a, b, transposeA, transposeB); + // const sum = tf.add(prod, c); + // return tf.relu(sum); + // }); + + // const fusedGrads = tf.grads((a, b, c) => { + // return tf.fused.matMul( + // {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); + // }); + + // const [da, db, dc] = grads([a, b, c], dy); + // const [fusedDa, fusedDb, fusedDc] = fusedGrads([a, b, c], dy); + + // expectArraysClose(await da.array(), await fusedDa.array()); + // expectArraysClose(await db.array(), await fusedDb.array()); + // expectArraysClose(await dc.array(), await fusedDc.array()); + // }); + + // it('fused A x B with relu bias gradient transpose', async () => { + // const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [3, 2]); + // const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + // const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); + // const transposeA = true; + // const transposeB = false; + + // const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + + // const grads = tf.grads((a, b, c) => { + // const prod = tf.matMul(a, b, transposeA, transposeB); + // const sum = tf.add(prod, c); + // return tf.relu(sum); + // }); + + // const fusedGrads = tf.grads((a, b, c) => { + // return tf.fused.matMul( + // {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); + // }); + + // const [da, db, dc] = grads([a, b, c], dy); + // const [fusedDa, fusedDb, fusedDc] = fusedGrads([a, b, c], dy); + + // expectArraysClose(await da.array(), await fusedDa.array()); + // expectArraysClose(await db.array(), await fusedDb.array()); + // expectArraysClose(await dc.array(), await fusedDc.array()); + // }); + + // it('fused A x B with relu and broadcasted bias gradient', async () => { + // const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); + // const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + // const c = tf.tensor2d([[1]]); + // const transposeA = false; + // const transposeB = false; + + // const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + + // const grads = tf.grads((a, b, c) => { + // const prod = tf.matMul(a, b, transposeA, transposeB); + // const sum = tf.add(prod, c); + // return tf.relu(sum); + // }); + + // const fusedGrads = tf.grads((a, b, c) => { + // return tf.fused.matMul( + // {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); + // }); + + // const [da, db, dc] = grads([a, b, c], dy); + // const [fusedDa, fusedDb, fusedDc] = fusedGrads([a, b, c], dy); + + // expectArraysClose(await da.array(), await fusedDa.array()); + // expectArraysClose(await db.array(), await fusedDb.array()); + // expectArraysClose(await dc.array(), await fusedDc.array()); + // }); +}); diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index bf458da5c9e..32d8b26770c 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -15,9 +15,9 @@ * ============================================================================= */ -import {conv2d} from './fused/conv2d'; -import {depthwiseConv2d} from './fused/depthwise_conv2d'; -import {matMul} from './fused/mat_mul'; -import {Activation} from './fused/types'; +import {conv2d} from './fused_conv2d'; +import {depthwiseConv2d} from './fused_depthwise_conv2d'; +import {matMul} from './fused_mat_mul'; +import {Activation} from './fused_types'; export {Activation, conv2d, depthwiseConv2d, matMul}; diff --git a/tfjs-core/src/ops/fused_test.ts b/tfjs-core/src/ops/fused_test.ts deleted file mode 100644 index 7003c3070de..00000000000 --- a/tfjs-core/src/ops/fused_test.ts +++ /dev/null @@ -1,1391 +0,0 @@ -/** - * @license - * Copyright 2019 Google LLC. All Rights Reserved. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ============================================================================= - */ - -import * as tf from '../index'; -import {ALL_ENVS, describeWithFlags} from '../jasmine_util'; -import {expectArraysClose} from '../test_util'; - -function generateCaseInputs(totalSizeTensor: number, totalSizeFilter: number) { - const inp = new Array(totalSizeTensor); - const filt = new Array(totalSizeFilter); - - for (let i = 0; i < totalSizeTensor; i++) { - inp[i] = i * 0.001 - totalSizeTensor * 0.001 / 2; - } - for (let i = 0; i < totalSizeFilter; i++) { - const sign = i % 2 === 0 ? -1 : 1; - filt[i] = i * 0.001 * sign; - } - - return {input: inp, filter: filt}; -} - -describeWithFlags('fused matmul', ALL_ENVS, () => { - it('fused A x B', async () => { - const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); - const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); - - const c = tf.fused.matMul({a, b}); - - expect(c.shape).toEqual([2, 2]); - expectArraysClose(await c.data(), [0, 8, -3, 20]); - }); - - it('fused A x B with relu', async () => { - const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); - const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); - const transposeA = false; - const transposeB = false; - - const c = tf.fused.matMul( - {a, b, transposeA, transposeB, bias: null, activation: 'relu'}); - - expect(c.shape).toEqual([2, 2]); - expectArraysClose(await c.data(), [0, 8, 0, 20]); - }); - - it('fused A x B with elu', async () => { - const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); - const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); - const transposeA = false; - const transposeB = false; - - const c = tf.fused.matMul( - {a, b, transposeA, transposeB, bias: null, activation: 'elu'}); - - expect(c.shape).toEqual([2, 2]); - expectArraysClose(await c.data(), [0, 8, -0.9502, 20]); - }); - - it('fused A x B with relu6', async () => { - const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); - const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); - const transposeA = false; - const transposeB = false; - - const c = tf.fused.matMul( - {a, b, transposeA, transposeB, bias: null, activation: 'relu6'}); - - expect(c.shape).toEqual([2, 2]); - expectArraysClose(await c.data(), [0, 6, 0, 6]); - }); - - it('fused A x B with prelu', async () => { - const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); - const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); - const alpha = tf.tensor2d([0.5, 0.5], [1, 2]); - const transposeA = false; - const transposeB = false; - - const c = tf.fused.matMul({ - a, - b, - transposeA, - transposeB, - bias: null, - activation: 'prelu', - preluActivationWeights: alpha - }); - - expect(c.shape).toEqual([2, 2]); - expectArraysClose(await c.data(), [0, 8, -1.5, 20]); - }); - - it('fused A x B with relu transpose', async () => { - const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); - const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [2, 3]); - const transposeA = false; - const transposeB = true; - - const c = tf.fused.matMul( - {a, b, transposeA, transposeB, bias: null, activation: 'relu'}); - - expect(c.shape).toEqual([2, 2]); - expectArraysClose(await c.data(), [0, 9, 0, 24]); - }); - - it('fused A x B with 2d bias and relu', async () => { - const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); - const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); - const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); - const transposeA = false; - const transposeB = false; - - const d = tf.fused.matMul( - {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); - - expect(d.shape).toEqual([2, 2]); - expectArraysClose(await d.data(), [1, 9, 0, 21]); - }); - - it('fused A x B with relu and broadcasted bias', async () => { - const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); - const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); - const c = tf.tensor1d([1, 1]); - const act: tf.fused.Activation = 'relu'; - const transposeA = false; - const transposeB = false; - - const d = tf.fused.matMul( - {a, b, transposeA, transposeB, bias: c, activation: act}); - - expect(d.shape).toEqual([2, 2]); - expectArraysClose(await d.data(), [1, 9, 0, 21]); - }); - - it('fused A x B with elu and broadcasted bias', async () => { - const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); - const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); - const c = tf.tensor1d([1, 1]); - const act: tf.fused.Activation = 'elu'; - const transposeA = false; - const transposeB = false; - - const d = tf.fused.matMul( - {a, b, transposeA, transposeB, bias: c, activation: act}); - - expect(d.shape).toEqual([2, 2]); - expectArraysClose(await d.data(), [1, 9, -0.8647, 21]); - }); - - it('fused A x B with relu and broadcasted bias different rank', async () => { - const a = tf.tensor3d([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [2, 2, 3]); - const b = tf.tensor3d([0, 1, -3, 2, 2, 1, 0, 1, -3, 2, 2, 1], [2, 3, 2]); - const c = tf.tensor2d([1, 2], [1, 2]); - const act: tf.fused.Activation = 'relu'; - const transposeA = false; - const transposeB = false; - - const d = tf.fused.matMul( - {a, b, transposeA, transposeB, bias: c, activation: act}); - - expect(d.shape).toEqual([2, 2, 2]); - expectArraysClose(await d.data(), [2, 6, 0, 18, 0, 30, 0, 42]); - }); - - it('fused A x B with 2d bias only', async () => { - const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); - const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); - const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); - const transposeA = false; - const transposeB = false; - - const d = tf.fused.matMul( - {a, b, transposeA, transposeB, bias: c, activation: 'linear'}); - - expect(d.shape).toEqual([2, 2]); - expectArraysClose(await d.data(), [1, 9, -2, 21]); - }); - - it('fused A x B with relu gradient', async () => { - const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); - const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - const transposeA = false; - const transposeB = false; - - const grads = tf.grads((a, b) => { - const prod = tf.matMul(a, b, transposeA, transposeB); - return tf.relu(prod); - }); - - const fusedGrads = tf.grads((a, b) => { - return tf.fused.matMul( - {a, b, transposeA, transposeB, bias: null, activation: 'relu'}); - }); - - const [da, db] = grads([a, b], dy); - const [fusedDa, fusedDb] = fusedGrads([a, b], dy); - expectArraysClose(await da.array(), await fusedDa.array()); - expectArraysClose(await db.data(), await fusedDb.array()); - }); - - it('gradient with clones A x B with relu', () => { - const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); - const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - const transposeA = false; - const transposeB = false; - - const fusedGrads = tf.grads((a, b) => { - return tf.fused - .matMul({ - a: a.clone(), - b: b.clone(), - transposeA, - transposeB, - bias: null, - activation: 'relu' - }) - .clone(); - }); - - const [fusedDa, fusedDb] = fusedGrads([a, b], dy); - expect(fusedDa.shape).toEqual(a.shape); - expect(fusedDb.shape).toEqual(b.shape); - }); - - it('fused A x B with relu bias gradient', async () => { - const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); - const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); - const transposeA = false; - const transposeB = false; - - const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - - const grads = tf.grads((a, b, c) => { - const prod = tf.matMul(a, b, transposeA, transposeB); - const sum = tf.add(prod, c); - return tf.relu(sum); - }); - - const fusedGrads = tf.grads((a, b, c) => { - return tf.fused.matMul( - {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); - }); - - const [da, db, dc] = grads([a, b, c], dy); - const [fusedDa, fusedDb, fusedDc] = fusedGrads([a, b, c], dy); - - expectArraysClose(await da.array(), await fusedDa.array()); - expectArraysClose(await db.array(), await fusedDb.array()); - expectArraysClose(await dc.array(), await fusedDc.array()); - }); - - it('fused A x B with relu bias gradient transpose', async () => { - const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [3, 2]); - const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); - const transposeA = true; - const transposeB = false; - - const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - - const grads = tf.grads((a, b, c) => { - const prod = tf.matMul(a, b, transposeA, transposeB); - const sum = tf.add(prod, c); - return tf.relu(sum); - }); - - const fusedGrads = tf.grads((a, b, c) => { - return tf.fused.matMul( - {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); - }); - - const [da, db, dc] = grads([a, b, c], dy); - const [fusedDa, fusedDb, fusedDc] = fusedGrads([a, b, c], dy); - - expectArraysClose(await da.array(), await fusedDa.array()); - expectArraysClose(await db.array(), await fusedDb.array()); - expectArraysClose(await dc.array(), await fusedDc.array()); - }); - - it('fused A x B with relu and broadcasted bias gradient', async () => { - const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); - const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - const c = tf.tensor2d([[1]]); - const transposeA = false; - const transposeB = false; - - const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - - const grads = tf.grads((a, b, c) => { - const prod = tf.matMul(a, b, transposeA, transposeB); - const sum = tf.add(prod, c); - return tf.relu(sum); - }); - - const fusedGrads = tf.grads((a, b, c) => { - return tf.fused.matMul( - {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); - }); - - const [da, db, dc] = grads([a, b, c], dy); - const [fusedDa, fusedDb, fusedDc] = fusedGrads([a, b, c], dy); - - expectArraysClose(await da.array(), await fusedDa.array()); - expectArraysClose(await db.array(), await fusedDb.array()); - expectArraysClose(await dc.array(), await fusedDc.array()); - }); -}); - -describeWithFlags('fused depthwiseConv2D', ALL_ENVS, () => { - it('basic', async () => { - const fSize = 2; - const pad = 'valid'; - const strides = 1; - const chMul = 1; - const inDepth = 1; - - const x = tf.tensor4d( - [ - 0.230664, 0.987388, 0.0685208, 0.419224, 0.887861, 0.731641, - 0.0741907, 0.409265, 0.351377 - ], - [1, 3, 3, inDepth]); - const w = tf.tensor4d( - [-0.303873, -0.229223, 0.144333, 0.803373], - [fSize, fSize, inDepth, chMul], - ); - - const result = tf.fused.depthwiseConv2d({x, filter: w, strides, pad}); - expect(result.shape).toEqual([1, 2, 2, 1]); - const expected = [0.47737, 0.40018, 0.00859, -0.09615]; - expectArraysClose(await result.data(), expected); - }); - - it('basic with relu', async () => { - const fSize = 2; - const pad = 'valid'; - const strides = 1; - const chMul = 1; - const inDepth = 1; - - const x = tf.tensor4d( - [ - 0.230664, 0.987388, 0.0685208, 0.419224, 0.887861, 0.731641, - 0.0741907, 0.409265, 0.351377 - ], - [1, 3, 3, inDepth]); - const w = tf.tensor4d( - [-0.303873, -0.229223, 0.144333, 0.803373], - [fSize, fSize, inDepth, chMul], - ); - - const result = tf.fused.depthwiseConv2d( - {x, filter: w, strides, pad, activation: 'relu'}); - expect(result.shape).toEqual([1, 2, 2, 1]); - const expected = [0.47737, 0.40018, 0.00859, 0]; - expectArraysClose(await result.data(), expected); - }); - - it('basic with broadcasted bias and relu', async () => { - const fSize = 2; - const pad = 'valid'; - const strides = 1; - const chMul = 1; - const inDepth = 1; - - const x = tf.tensor4d( - [ - 0.230664, 0.987388, 0.0685208, 0.419224, 0.887861, 0.731641, - 0.0741907, 0.409265, 0.351377 - ], - [1, 3, 3, inDepth]); - const w = tf.tensor4d( - [-0.303873, -0.229223, 0.144333, 0.803373], - [fSize, fSize, inDepth, chMul], - ); - - const result = tf.fused.depthwiseConv2d( - {x, filter: w, strides, pad, bias: tf.scalar(1), activation: 'relu'}); - expect(result.shape).toEqual([1, 2, 2, 1]); - const expected = [1.47737, 1.40018, 1.00859, 0.90385]; - expectArraysClose(await result.data(), expected); - }); - - it('prelu', async () => { - const fSize = 3; - const pad = 'valid'; - const strides = 1; - const chMul = 1; - const inDepth = 1; - - const x = tf.tensor4d( - [ - 0.149194, 0.089009, 0.654891, 0.083324, 0.537043, 0.644331, 0.563037, - 0.211859, 0.633501, 0.186427, 0.777034, 0.50001, 0.607341, 0.95303, - 0.696479, 0.050387, 0.62045, 0.728049, 0.028043, 0.437009, 0.712881, - 0.741935, 0.974474, 0.621102, 0.171411 - ], - [1, 5, 5, inDepth]); - const alpha = tf.tensor4d( - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], [1, 3, 3, 1]); - const w = tf.tensor4d( - [ - -0.125386, -0.975199, -0.640437, -0.281895, -0.990968, -0.347208, - -0.889702, -0.180695, -0.691992 - ], - [fSize, fSize, inDepth, chMul], - ); - - const result = tf.fused.depthwiseConv2d({ - x, - filter: w, - strides, - pad, - activation: 'prelu', - preluActivationWeights: alpha - }); - expect(result.shape).toEqual([1, 3, 3, 1]); - const expected = [ - -0.25400, -0.50118, -0.73622, -0.94068, -1.2298, -1.84585, -2.3089, - -2.7499, -2.64077 - ]; - expectArraysClose(await result.data(), expected); - }); - - it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', async () => { - const inputDepth = 1; - const outputDepth = 1; - const inputShape: [number, number, number, number] = [2, 3, 3, inputDepth]; - const filterSize = 2; - const strides = 1; - const pad = 0; - - const filterShape: [number, number, number, number] = - [filterSize, filterSize, inputDepth, outputDepth]; - const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - const grads = tf.grads( - (x: tf.Tensor4D, filter: tf.Tensor4D) => - tf.fused.depthwiseConv2d({x, filter, strides, pad})); - const [dx, dfilter] = grads([x, filter], dy); - - expect(dx.shape).toEqual(x.shape); - expectArraysClose( - await dx.data(), - [-3, 2, 1, -8, 1.5, 0.5, -4, 1, 0, -3, 2, 1, -8, 1.5, 0.5, -4, 1, 0]); - - expect(dfilter.shape).toEqual(filterShape); - expectArraysClose(await dfilter.data(), [26, 38, 62, 74]); - }); - - it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias', async () => { - const inputDepth = 1; - const outputDepth = 1; - const inputShape: [number, number, number, number] = [2, 3, 3, inputDepth]; - const filterSize = 2; - const strides = 1; - const pad = 0; - - const filterShape: [number, number, number, number] = - [filterSize, filterSize, inputDepth, outputDepth]; - const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - const bias = tf.ones([2, 2, 2, 1]); - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - const fusedGrads = tf.grads( - (x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.depthwiseConv2d({ - x, - filter: w, - strides, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - bias: b - })); - const [dxFused, dfilterFused, dbiasFused] = - fusedGrads([x, filter, bias], dy); - - const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { - const conv = tf.depthwiseConv2d(x, filter, strides, pad); - const sum = tf.add(conv, bias); - return sum; - }); - const [dx, dfilter, dbias] = grads([x, filter, bias], dy); - - expectArraysClose(await dxFused.array(), await dx.array()); - expectArraysClose(await dfilterFused.array(), await dfilter.array()); - expectArraysClose(await dbiasFused.array(), await dbias.array()); - }); - - it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and activation', - async () => { - const inputDepth = 1; - const outputDepth = 1; - const inputShape: [number, number, number, number] = - [2, 3, 3, inputDepth]; - const filterSize = 2; - const strides = 1; - const pad = 0; - - const filterShape: [number, number, number, number] = - [filterSize, filterSize, inputDepth, outputDepth]; - const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - const bias = tf.ones([2, 2, 2, 1]); - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - const fusedGrads = tf.grads( - (x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.depthwiseConv2d({ - x, - filter: w, - strides, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - bias: b, - activation: 'relu' - })); - const [dxFused, dfilterFused, dbiasFused] = - fusedGrads([x, filter, bias], dy); - - const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { - const conv = tf.depthwiseConv2d(x, filter, strides, pad); - const sum = tf.add(conv, bias); - return tf.relu(sum); - }); - const [dx, dfilter, dbias] = grads([x, filter, bias], dy); - - expectArraysClose(await dxFused.array(), await dx.array()); - expectArraysClose(await dfilterFused.array(), await dfilter.array()); - expectArraysClose(await dbiasFused.array(), await dbias.array()); - }); -}); - -describeWithFlags('fused conv2d', ALL_ENVS, () => { - it('basic', async () => { - const inputDepth = 2; - const inShape: [number, number, number, number] = [2, 2, 2, inputDepth]; - const outputDepth = 2; - const fSize = 1; - const pad = 0; - const stride = 1; - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], inShape); - const w = - tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, outputDepth]); - - const result = tf.fused.conv2d({x, filter: w, strides: stride, pad}); - expect(result.shape).toEqual([2, 2, 2, 2]); - const expected = - [-5, 2, -11, 5, -17, 8, -23, 11, -29, 14, -35, 17, -41, 20, -47, 23]; - - expectArraysClose(await result.data(), expected); - }); - - it('basic with relu', async () => { - const inputDepth = 2; - const inShape: [number, number, number, number] = [2, 2, 2, inputDepth]; - const outputDepth = 2; - const fSize = 1; - const pad = 0; - const stride = 1; - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], inShape); - const w = - tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, outputDepth]); - - const result = tf.fused.conv2d({ - x, - filter: w, - strides: stride, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - activation: 'relu' - }); - expect(result.shape).toEqual([2, 2, 2, 2]); - const expected = [0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23]; - - expectArraysClose(await result.data(), expected); - }); - - it('relu with stride 2 x=[1,8,8,16] f=[3,3,16,1] s=[2,2] d=1 p=same', - async () => { - const inputDepth = 16; - const xSize = 8; - const inputShape: [number, number, number, number] = - [1, xSize, xSize, inputDepth]; - const outputDepth = 1; - const fSize = 3; - const pad = 'same'; - const stride: [number, number] = [2, 2]; - - // TODO(annxingyuan): Make this test work with large inputs - // https://github.com/tensorflow/tfjs/issues/3143 - const inputData = []; - for (let i = 0; i < xSize * xSize * inputDepth; i++) { - inputData.push(i % 5); - } - - const wData = []; - for (let i = 0; i < fSize * fSize * inputDepth * outputDepth; i++) { - wData.push(i % 5); - } - - const x = tf.tensor4d(inputData, inputShape); - const w = tf.tensor4d(wData, [fSize, fSize, inputDepth, outputDepth]); - - const result = tf.fused.conv2d({ - x, - filter: w, - strides: stride, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - activation: 'relu' - }); - expect(result.shape).toEqual([1, 4, 4, 1]); - expectArraysClose(await result.data(), new Float32Array([ - 854, 431, 568, 382, 580, 427, 854, 288, 431, 568, - 580, 289, 285, 570, 285, 258 - ])); - }); - - it('relu bias stride 2 x=[1,8,8,16] f=[3,3,16,1] s=[2,2] d=8 p=same', - async () => { - const inputDepth = 16; - const xSize = 8; - const inputShape: [number, number, number, number] = - [1, xSize, xSize, inputDepth]; - const outputDepth = 8; - const fSize = 3; - const pad = 'same'; - const stride: [number, number] = [2, 2]; - - const inputs = generateCaseInputs( - 1 * xSize * xSize * inputDepth, - fSize * fSize * inputDepth * outputDepth); - const x = tf.tensor4d(inputs.input, inputShape); - const w = - tf.tensor4d(inputs.filter, [fSize, fSize, inputDepth, outputDepth]); - const bias = tf.tensor1d([1, 4, 2, 3, 9, 6, 5, 8]); - const result = tf.fused.conv2d({ - x, - filter: w, - strides: stride, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - activation: 'relu', - bias - }); - expect(result.shape).toEqual([1, 4, 4, 8]); - expectArraysClose(await result.data(), new Float32Array([ - 25.75398063659668, - 0, - 26.857805252075195, - 0, - 33.961631774902344, - 0, - 30.065458297729492, - 0, - 23.118206024169922, - 0, - 24.212820053100586, - 0, - 31.307422637939453, - 0, - 27.402034759521484, - 0, - 20.482431411743164, - 0, - 21.567821502685547, - 0, - 28.653217315673828, - 0, - 24.73861312866211, - 0, - 11.078080177307129, - 0, - 12.130399703979492, - 0, - 19.182720184326172, - 0, - 15.235037803649902, - 0, - 4.6677775382995605, - 0.31717729568481445, - 5.697869777679443, - 0, - 12.727968215942383, - 2.2569849491119385, - 8.758066177368164, - 4.226885795593262, - 2.0319995880126953, - 2.9575586318969727, - 3.052880048751831, - 1.9366796016693115, - 10.073760032653809, - 4.915799617767334, - 6.094639778137207, - 6.89492130279541, - 0, - 5.5979437828063965, - 0.4078875780105591, - 4.586280822753906, - 7.419551849365234, - 7.5746169090271, - 3.43121600151062, - 9.562952041625977, - 0, - 6.404943943023682, - 0, - 5.401776313781738, - 6.5998077392578125, - 8.398608207702637, - 2.602976083755493, - 10.395440101623535, - 0, - 21.440250396728516, - 0, - 20.483882904052734, - 0, - 23.527509689331055, - 0, - 25.571144104003906, - 0, - 24.080629348754883, - 0, - 23.133480072021484, - 0, - 26.186328887939453, - 0, - 28.239177703857422, - 0, - 26.721012115478516, - 0, - 25.783079147338867, - 0, - 28.84514808654785, - 0, - 30.907209396362305, - 0, - 18.914127349853516, - 0, - 17.960111618041992, - 0, - 21.006093978881836, - 0, - 23.052082061767578, - 0, - 17.89089584350586, - 0, - 16.95684814453125, - 0, - 20.022798538208008, - 0, - 22.088754653930664, - 0, - 19.06132698059082, - 0, - 18.133424758911133, - 0, - 21.205520629882812, - 0, - 23.27761459350586, - 0, - 20.23175811767578, - 0, - 19.309999465942383, - 0, - 22.388240814208984, - 0, - 24.46647834777832, - 0, - 13.584352493286133, - 0, - 12.6395845413208, - 0, - 15.694815635681152, - 0, - 17.750045776367188 - ])); - }); - - it('prelu bias stride 2 x=[1,8,8,16] f=[3,3,16,1] s=[2,2] d=8 p=same', - async () => { - const inputDepth = 16; - const xSize = 8; - const inputShape: [number, number, number, number] = - [1, xSize, xSize, inputDepth]; - const outputDepth = 8; - const fSize = 3; - const pad = 'same'; - const stride: [number, number] = [2, 2]; - - const inputs = generateCaseInputs( - 1 * xSize * xSize * inputDepth, - fSize * fSize * inputDepth * outputDepth); - const x = tf.tensor4d(inputs.input, inputShape); - const w = - tf.tensor4d(inputs.filter, [fSize, fSize, inputDepth, outputDepth]); - const bias = tf.tensor1d([1, 4, 2, 3, 9, 6, 5, 8]); - const preluActivationWeights = tf.tensor1d([1, 2, 3, 4, 5, 6, 7, 8]); - - const result = tf.fused.conv2d({ - x, - filter: w, - strides: stride, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - activation: 'prelu', - preluActivationWeights, - bias - }); - expect(result.shape).toEqual([1, 4, 4, 8]); - expectArraysClose( - await result.data(), new Float32Array([ - 25.75398063659668, -41.61178970336914, 26.857805252075195, - -87.63885498046875, 33.961631774902344, -114.0812759399414, - 30.065458297729492, -136.93893432617188, 23.118206024169922, - -36.33102035522461, 24.212820053100586, -77.04048156738281, - 31.307422637939453, -98.12835693359375, 27.402034759521484, - -115.5947265625, 20.482431411743164, -31.050262451171875, - 21.567821502685547, -66.44209289550781, 28.653217315673828, - -82.17544555664062, 24.73861312866211, -94.25041198730469, - 11.078080177307129, -12.208478927612305, 12.130399703979492, - -28.626232147216797, 19.182720184326172, -25.253299713134766, - 15.235037803649902, -18.08960723876953, 4.6677775382995605, - 0.31717729568481445, 5.697869777679443, -2.8516759872436523, - 12.727968215942383, 2.2569849491119385, 8.758066177368164, - 4.226885795593262, 2.0319995880126953, 2.9575586318969727, - 3.052880048751831, 1.9366796016693115, 10.073760032653809, - 4.915799617767334, 6.094639778137207, 6.89492130279541, - -0.6037763357162476, 5.5979437828063965, 0.4078875780105591, - 4.586280822753906, 7.419551849365234, 7.5746169090271, - 3.43121600151062, 9.562952041625977, -1.4065279960632324, - 6.404943943023682, -1.2100803852081299, 5.401776313781738, - 6.5998077392578125, 8.398608207702637, 2.602976083755493, - 10.395440101623535, -16.418434143066406, 21.440250396728516, - -46.38618850708008, 20.483882904052734, -42.52848815917969, - 23.527509689331055, -87.84530639648438, 25.571144104003906, - -19.054208755493164, 24.080629348754883, -54.32115936279297, - 23.133480072021484, -55.79951477050781, 26.186328887939453, - -106.48924255371094, 28.239177703857422, -21.689987182617188, - 26.721012115478516, -62.25614929199219, 25.783079147338867, - -69.070556640625, 28.84514808654785, -125.13325500488281, - 30.907209396362305, -13.891133308410645, 18.914127349853516, - -38.81135940551758, 17.960111618041992, -29.915504455566406, - 21.006093978881836, -70.20361328125, 23.052082061767578, - -12.857919692993164, 17.89089584350586, -35.771610260009766, - 16.95684814453125, -24.949115753173828, 20.022798538208008, - -63.39042282104492, 22.088754653930664, -14.02528190612793, - 19.06132698059082, -39.2921257019043, 18.133424758911133, - -30.847349166870117, 21.205520629882812, -71.69097137451172, - 23.27761459350586, -15.192638397216797, 20.23175811767578, - -42.8126335144043, 19.309999465942383, -36.74560546875, - 22.388240814208984, -79.99152374267578, 24.46647834777832, - -8.556736946105957, 13.584352493286133, -22.835901260375977, - 12.6395845413208, -3.336000442504883, 15.694815635681152, - -33.0570182800293, 17.750045776367188 - ])); - }); - - it('basic with bias', async () => { - const inputDepth = 2; - const inShape: [number, number, number, number] = [2, 2, 2, inputDepth]; - const outputDepth = 2; - const fSize = 1; - const pad = 0; - const stride = 1; - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], inShape); - const w = - tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, outputDepth]); - - const result = tf.fused.conv2d({ - x, - filter: w, - strides: stride, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - bias: tf.tensor1d([5, 6]) - }); - expect(result.shape).toEqual([2, 2, 2, 2]); - const expected = - [0, 8, -6, 11, -12, 14, -18, 17, -24, 20, -30, 23, -36, 26, -42, 29]; - - expectArraysClose(await result.data(), expected); - }); - - it('basic with explicit padding', async () => { - const inputDepth = 1; - const outputDepth = 1; - const pad = - [[0, 0], [1, 2], [0, 1], [0, 0]] as tf.backend_util.ExplicitPadding; - const stride = 1; - const dataFormat = 'NHWC'; - const dilation = 1; - - const x = tf.tensor3d([1, 2, 3, 4, 5, 6, 7, 8], [4, 2, inputDepth]); - const w = - tf.tensor4d([3, 1, 5, 0, 2, 7, 8, 9], [4, 2, inputDepth, outputDepth]); - - const result = tf.fused.conv2d( - {x, filter: w, strides: stride, pad, dataFormat, dilations: dilation}); - - const resultData = await result.data(); - expect(result.shape).toEqual([4, 2, 1]); - expectArraysClose(resultData, [133, 66, 200, 102, 108, 58, 56, 58]); - }); - - it('basic with elu', async () => { - const inputDepth = 2; - const inShape: [number, number, number, number] = [2, 2, 2, inputDepth]; - const outputDepth = 2; - const fSize = 1; - const pad = 0; - const stride = 1; - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], inShape); - const w = - tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, outputDepth]); - - const result = tf.fused.conv2d({ - x, - filter: w, - strides: stride, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - activation: 'elu' - }); - expect(result.shape).toEqual([2, 2, 2, 2]); - const expected = - [-0.99326, 2, -1, 5, -1, 8, -1, 11, -1, 14, -1, 17, -1, 20, -1, 23]; - - expectArraysClose(await result.data(), expected); - }); - - it('basic with prelu', async () => { - const inputDepth = 2; - const inShape: [number, number, number, number] = [2, 2, 2, inputDepth]; - const outputDepth = 2; - const fSize = 1; - const pad = 0; - const stride = 1; - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], inShape); - const alpha = tf.tensor3d([0.25, 0.75], [1, 1, 2]); - const w = - tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, outputDepth]); - - const result = tf.fused.conv2d({ - x, - filter: w, - strides: stride, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - activation: 'prelu', - preluActivationWeights: alpha - }); - expect(result.shape).toEqual([2, 2, 2, 2]); - const expected = [ - -1.25, 2, -2.75, 5, -4.25, 8, -5.75, 11, -7.25, 14, -8.75, 17, -10.25, 20, - -11.75, 23 - ]; - - expectArraysClose(await result.data(), expected); - }); - - it('basic with broadcasted bias and relu', async () => { - const inputDepth = 2; - const inShape: [number, number, number, number] = [2, 2, 2, inputDepth]; - const outputDepth = 2; - const fSize = 1; - const pad = 0; - const stride = 1; - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], inShape); - const w = - tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, outputDepth]); - - const result = tf.fused.conv2d({ - x, - filter: w, - strides: stride, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - bias: tf.scalar(5), - activation: 'relu' - }); - expect(result.shape).toEqual([2, 2, 2, 2]); - const expected = [0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 25, 0, 28]; - - expectArraysClose(await result.data(), expected); - }); - - it('im2row', async () => { - const inputDepth = 1; - const inputShape: [number, number, number] = [4, 4, inputDepth]; - const outputDepth = 3; - const fSize = 1; - const pad = 'same'; - const strides: [number, number] = [2, 2]; - - const x = tf.tensor3d( - [ - 10, 30, 50, 70, 20, 40, 60, 80, -10, -30, -50, -70, -20, -40, -60, -80 - ], - inputShape); - const w = tf.tensor4d([1, 0.5, 1], [fSize, fSize, inputDepth, outputDepth]); - - const result = tf.fused.conv2d({x, filter: w, strides, pad}); - - expectArraysClose( - await result.data(), - [10, 5, 10, 50, 25, 50, -10, -5, -10, -50, -25, -50]); - }); - - it('im2row with relu', async () => { - const inputDepth = 1; - const inputShape: [number, number, number] = [4, 4, inputDepth]; - const outputDepth = 3; - const fSize = 1; - const pad = 'same'; - const strides: [number, number] = [2, 2]; - - const x = tf.tensor3d( - [ - 10, 30, 50, 70, 20, 40, 60, 80, -10, -30, -50, -70, -20, -40, -60, -80 - ], - inputShape); - const w = tf.tensor4d([1, 0.5, 1], [fSize, fSize, inputDepth, outputDepth]); - - const result = tf.fused.conv2d({ - x, - filter: w, - strides, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - activation: 'relu' - }); - - expectArraysClose( - await result.data(), [10, 5, 10, 50, 25, 50, 0, 0, 0, 0, 0, 0]); - }); - - it('im2row with prelu', async () => { - const inputDepth = 1; - const inputShape: [number, number, number] = [4, 4, inputDepth]; - const outputDepth = 3; - const fSize = 1; - const pad = 'same'; - const strides: [number, number] = [2, 2]; - - const x = tf.tensor3d( - [ - 10, 30, 50, 70, 20, 40, 60, 80, -10, -30, -50, -70, -20, -40, -60, -80 - ], - inputShape); - const w = tf.tensor4d([1, 0.5, 1], [fSize, fSize, inputDepth, outputDepth]); - const alpha = tf.tensor3d([0.5], [1, 1, inputDepth]); - - const result = tf.fused.conv2d({ - x, - filter: w, - strides, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - activation: 'prelu', - preluActivationWeights: alpha - }); - - expectArraysClose( - await result.data(), - [10, 5, 10, 50, 25, 50, -5, -2.5, -5, -25, -12.5, -25]); - }); - - it('pointwise with prelu', async () => { - const inputDepth = 1; - const inputShape: [number, number, number] = [4, 4, inputDepth]; - const outputDepth = 3; - const fSize = 1; - const pad = 'same'; - const strides: [number, number] = [1, 1]; - - const x = tf.tensor3d( - [ - 10, 30, 50, 70, 20, 40, 60, 80, -10, -30, -50, -70, -20, -40, -60, -80 - ], - inputShape); - const w = tf.tensor4d([1, 0.5, 1], [fSize, fSize, inputDepth, outputDepth]); - const alpha = tf.tensor3d([0.5], [1, 1, inputDepth]); - - const result = tf.fused.conv2d({ - x, - filter: w, - strides, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - activation: 'prelu', - preluActivationWeights: alpha - }); - - expectArraysClose(await result.data(), [ - 10, 5, 10, 30, 15, 30, 50, 25, 50, 70, 35, 70, - 20, 10, 20, 40, 20, 40, 60, 30, 60, 80, 40, 80, - -5, -2.5, -5, -15, -7.5, -15, -25, -12.5, -25, -35, -17.5, -35, - -10, -5, -10, -20, -10, -20, -30, -15, -30, -40, -20, -40 - ]); - }); - - it('im2row with broadcasted bias and relu', async () => { - const inputDepth = 1; - const inputShape: [number, number, number] = [4, 4, inputDepth]; - const outputDepth = 3; - const fSize = 1; - const pad = 'same'; - const strides: [number, number] = [2, 2]; - - const x = tf.tensor3d( - [ - 10, 30, 50, 70, 20, 40, 60, 80, -10, -30, -50, -70, -20, -40, -60, -80 - ], - inputShape); - const w = tf.tensor4d([1, 0.5, 1], [fSize, fSize, inputDepth, outputDepth]); - - const result = tf.fused.conv2d({ - x, - filter: w, - strides, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - bias: tf.scalar(5), - activation: 'relu' - }); - - expectArraysClose( - await result.data(), [15, 10, 15, 55, 30, 55, 0, 0, 0, 0, 0, 0]); - }); - - it('backProp input x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', async () => { - const inputDepth = 1; - const outputDepth = 1; - const inputShape: [number, number, number, number] = [2, 3, 3, inputDepth]; - const filterSize = 2; - const strides = 1; - const pad = 0; - - const filterShape: [number, number, number, number] = - [filterSize, filterSize, inputDepth, outputDepth]; - const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - const grads = tf.grads( - (x: tf.Tensor4D) => tf.fused.conv2d({x, filter, strides, pad})); - const [dx] = grads([x], dy); - - expect(dx.shape).toEqual(x.shape); - expectArraysClose( - await dx.data(), - [-3, 2, 1, -8, 1.5, 0.5, -4, 1, 0, -3, 2, 1, -8, 1.5, 0.5, -4, 1, 0]); - }); - - it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', async () => { - const inputDepth = 1; - const outputDepth = 1; - const inputShape: [number, number, number, number] = [2, 3, 3, inputDepth]; - const filterSize = 2; - const strides = 1; - const pad = 0; - - const filterShape: [number, number, number, number] = - [filterSize, filterSize, inputDepth, outputDepth]; - const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - const grads = tf.grads( - (x: tf.Tensor4D, filter: tf.Tensor4D) => - tf.fused.conv2d({x, filter, strides, pad})); - const [dx, dfilter] = grads([x, filter], dy); - - expect(dx.shape).toEqual(x.shape); - expectArraysClose( - await dx.data(), - [-3, 2, 1, -8, 1.5, 0.5, -4, 1, 0, -3, 2, 1, -8, 1.5, 0.5, -4, 1, 0]); - - expect(dfilter.shape).toEqual(filterShape); - expectArraysClose(await dfilter.data(), [26, 38, 62, 74]); - }); - - it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias', async () => { - const inputDepth = 1; - const outputDepth = 1; - const inputShape: [number, number, number, number] = [2, 3, 3, inputDepth]; - const filterSize = 2; - const strides = 1; - const pad = 0; - - const filterShape: [number, number, number, number] = - [filterSize, filterSize, inputDepth, outputDepth]; - const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - const bias = tf.ones([2, 2, 2, 1]); - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - const fusedGrads = - tf.grads((x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.conv2d({ - x, - filter: w, - strides, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - bias: b - })); - const [dxFused, dfilterFused, dbiasFused] = - fusedGrads([x, filter, bias], dy); - - const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { - const conv = tf.conv2d(x, filter, strides, pad); - const sum = tf.add(conv, bias); - return sum; - }); - const [dx, dfilter, dbias] = grads([x, filter, bias], dy); - - expectArraysClose(await dxFused.array(), await dx.array()); - expectArraysClose(await dfilterFused.array(), await dfilter.array()); - expectArraysClose(await dbiasFused.array(), await dbias.array()); - }); - - it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and relu', - async () => { - const inputDepth = 1; - const outputDepth = 1; - const inputShape: [number, number, number, number] = - [2, 3, 3, inputDepth]; - const filterSize = 2; - const strides = 1; - const pad = 0; - - const filterShape: [number, number, number, number] = - [filterSize, filterSize, inputDepth, outputDepth]; - const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - const bias = tf.ones([2, 2, 2, 1]); - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - const fusedGrads = - tf.grads((x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.conv2d({ - x, - filter: w, - strides, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - bias: b, - activation: 'relu' - })); - const [dxFused, dfilterFused, dbiasFused] = - fusedGrads([x, filter, bias], dy); - - const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { - const conv = tf.conv2d(x, filter, strides, pad); - const sum = tf.add(conv, bias); - return tf.relu(sum); - }); - const [dx, dfilter, dbias] = grads([x, filter, bias], dy); - - expectArraysClose(await dxFused.array(), await dx.array()); - expectArraysClose(await dfilterFused.array(), await dfilter.array()); - expectArraysClose(await dbiasFused.array(), await dbias.array()); - }); - - it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and elu', async () => { - const inputDepth = 1; - const outputDepth = 1; - const inputShape: [number, number, number, number] = [2, 3, 3, inputDepth]; - const filterSize = 2; - const strides = 1; - const pad = 0; - - const filterShape: [number, number, number, number] = - [filterSize, filterSize, inputDepth, outputDepth]; - const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - const bias = tf.ones([2, 2, 2, 1]); - - const x = tf.tensor4d( - [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - const fusedGrads = - tf.grads((x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.conv2d({ - x, - filter: w, - strides, - pad, - dataFormat: 'NHWC', - dilations: [1, 1], - bias: b, - activation: 'elu' - })); - const [dxFused, dfilterFused, dbiasFused] = - fusedGrads([x, filter, bias], dy); - - const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { - const conv = tf.conv2d(x, filter, strides, pad); - const sum = tf.add(conv, bias); - return tf.elu(sum); - }); - const [dx, dfilter, dbias] = grads([x, filter, bias], dy); - - expectArraysClose(await dxFused.array(), await dx.array()); - expectArraysClose(await dfilterFused.array(), await dfilter.array()); - expectArraysClose(await dbiasFused.array(), await dbias.array()); - }); - - it('fused matmul with relu6 and gradients', async () => { - const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); - const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - const transposeA = false; - const transposeB = false; - - const fusedGrads = tf.grads((a, b) => { - return tf.fused.matMul( - {a, b, transposeA, transposeB, bias: null, activation: 'relu6'}); - }); - const [fusedDa, fusedDb] = fusedGrads([a, b], dy); - - const grads = tf.grads((a, b) => { - const prod = tf.matMul(a, b, transposeA, transposeB); - return tf.relu6(prod); - }); - const [da, db] = grads([a, b], dy); - - expectArraysClose(await da.array(), await fusedDa.array()); - expectArraysClose(await db.data(), await fusedDb.array()); - }); -}); diff --git a/tfjs-core/src/ops/fused/types.ts b/tfjs-core/src/ops/fused_types.ts similarity index 92% rename from tfjs-core/src/ops/fused/types.ts rename to tfjs-core/src/ops/fused_types.ts index ffd5c423a4e..894e2708869 100644 --- a/tfjs-core/src/ops/fused/types.ts +++ b/tfjs-core/src/ops/fused_types.ts @@ -15,8 +15,8 @@ * ============================================================================= */ -import {Tensor, Tensor3D, Tensor4D} from '../../tensor'; -import {Conv2DInfo} from '../conv_util'; +import {Tensor, Tensor3D, Tensor4D} from '../tensor'; +import {Conv2DInfo} from './conv_util'; export type FusedConv2DConfig = { input: Tensor4D, diff --git a/tfjs-core/src/ops/fused_util.ts b/tfjs-core/src/ops/fused_util.ts index 8784be9af29..1250b3460c5 100644 --- a/tfjs-core/src/ops/fused_util.ts +++ b/tfjs-core/src/ops/fused_util.ts @@ -19,17 +19,11 @@ import {Tensor} from '../tensor'; import * as broadcast_util from './broadcast_util'; import {elu} from './elu'; -import {Activation} from './fused/types'; +import {Activation} from './fused_types'; import {prelu} from './prelu'; import {relu} from './relu'; import {relu6} from './relu6'; -// Whether we should call fused ops. -export const shouldFuse = (gradientDepth: number, activation: Activation) => { - const gradientMode = gradientDepth > 0; - return !gradientMode || activation === 'linear'; -}; - // Returns gradient for fused activation. export function getFusedDyActivation( dy: Tensor, y: Tensor, activation: Activation): Tensor { diff --git a/tfjs-core/src/tests.ts b/tfjs-core/src/tests.ts index 5b2cbfcbaff..0ed5690dd44 100644 --- a/tfjs-core/src/tests.ts +++ b/tfjs-core/src/tests.ts @@ -102,7 +102,9 @@ import './ops/fill_test'; import './ops/floor_test'; import './ops/frame_test'; import './ops/from_pixels_test'; -import './ops/fused_test'; +import './ops/fused_conv2d_test'; +import './ops/fused_depthwise_conv2d_test'; +import './ops/fused_mat_mul_test'; import './ops/gather_nd_test'; import './ops/gather_test'; import './ops/gram_schmidt_test'; From bb2e13e498e012603c3a2ecf2354e44bd66f0104 Mon Sep 17 00:00:00 2001 From: Yannick Assogba Date: Tue, 14 Jul 2020 17:56:56 -0400 Subject: [PATCH 05/12] modularise fused ops --- tfjs-core/src/kernel_names.ts | 48 +++++++++++++++++ tfjs-core/src/ops/fused_conv2d.ts | 57 ++++++++++----------- tfjs-core/src/ops/fused_depthwise_conv2d.ts | 55 ++++++++++---------- tfjs-core/src/ops/fused_mat_mul.ts | 55 ++++++++++---------- 4 files changed, 129 insertions(+), 86 deletions(-) diff --git a/tfjs-core/src/kernel_names.ts b/tfjs-core/src/kernel_names.ts index 2020d63ee8a..64fb16e672a 100644 --- a/tfjs-core/src/kernel_names.ts +++ b/tfjs-core/src/kernel_names.ts @@ -21,6 +21,7 @@ import {ExplicitPadding} from '../src/ops/conv_util'; import {NamedTensorInfoMap, TensorInfo} from './kernel_registry'; +import {Activation} from './ops/fused_types'; import {DataType, PixelData} from './types'; export const Abs = 'Abs'; @@ -740,3 +741,50 @@ export interface RotateWithOffsetAttrs { fillValue: number|[number, number, number]; center: number|[number, number]; } + +export const _FusedMatMul = '_FusedMatMul'; +// tslint:disable-next-line: class-name +export interface _FusedMatMulInputs extends NamedTensorInfoMap { + a: TensorInfo; + b: TensorInfo; + bias?: TensorInfo; + preluActivationWeights?: TensorInfo; +} +// tslint:disable-next-line: class-name +export interface _FusedMatMulAttrs { + transposeA: boolean; + transposeB: boolean; + activation: Activation; +} + +export const FusedConv2D = 'FusedConv2D'; +export interface FusedConv2DInputs extends NamedTensorInfoMap { + x: TensorInfo; + filter: TensorInfo; + bias?: TensorInfo; + preluActivationWeights?: TensorInfo; +} +export interface FusedConv2DAttrs { + strides: [number, number]|number; + pad: 'valid'|'same'|number|ExplicitPadding; + dataFormat: 'NHWC'|'NCHW'; + dilations: [number, number]|number; + dimRoundingMode: 'floor'|'round'|'ceil'; + activation: Activation; +} + +export const FusedDepthwiseConv2D = 'FusedDepthwiseConv2D'; +export interface FusedDepthwiseConv2DInputs extends NamedTensorInfoMap { + x: TensorInfo; + filter: TensorInfo; + bias?: TensorInfo; + preluActivationWeights?: TensorInfo; +} +export interface FusedDepthwiseConv2DAttrs { + strides: [number, number]|number; + pad: 'valid'|'same'|number; + dataFormat: 'NHWC'|'NCHW'; + dilations: [number, number]|number; + dimRoundingMode: 'floor'|'round'|'ceil'; + activation: Activation; +} diff --git a/tfjs-core/src/ops/fused_conv2d.ts b/tfjs-core/src/ops/fused_conv2d.ts index 5d507b6f415..e9c35c0a87f 100644 --- a/tfjs-core/src/ops/fused_conv2d.ts +++ b/tfjs-core/src/ops/fused_conv2d.ts @@ -15,8 +15,11 @@ * ============================================================================= */ -import {ENGINE} from '../engine'; +import {ENGINE, ForwardFunc} from '../engine'; +import {FusedConv2D, FusedConv2DAttrs, FusedConv2DInputs} from '../kernel_names'; +import {NamedAttrMap} from '../kernel_registry'; import {Tensor, Tensor3D, Tensor4D} from '../tensor'; +import {NamedTensorMap} from '../tensor_types'; import {makeTypesMatch} from '../tensor_util'; import {convertToTensor} from '../tensor_util_env'; import {TensorLike} from '../types'; @@ -27,8 +30,6 @@ import * as conv_util from './conv_util'; import {Activation} from './fused_types'; import {op} from './operation'; - - /** * Computes a 2D convolution over the input x, optionally fused with adding a * bias and applying an activation. @@ -221,35 +222,31 @@ function fusedConv2d_({ preluActivationWeights, 'prelu weights', 'fused conv2d'); } - const inputs: { - x: Tensor, - filter: Tensor, - bias?: Tensor, - preluActivationWeights?: Tensor - } = {x: x4D, filter: $filter}; - if (bias != null) { - inputs.bias = $bias; - } - if (preluActivationWeights != null) { - inputs.preluActivationWeights = $preluActivationWeights; - } + const inputs: FusedConv2DInputs = { + x: x4D, + filter: $filter, + bias: $bias, + preluActivationWeights: $preluActivationWeights + }; + + const forward: ForwardFunc = (backend) => { + const res = backend.fusedConv2d({ + input: x4D, + filter: $filter, + convInfo, + bias: $bias, + activation, + preluActivationWeights: $preluActivationWeights + }); + return res; + }; + + const attrs: FusedConv2DAttrs = + {strides, pad, dataFormat, dilations, dimRoundingMode, activation}; - const inputsToSave = [$filter, x4D]; - const outputsToSave = [true]; // Save the only output. const res = ENGINE.runKernelFunc( - (backend) => { - const res = backend.fusedConv2d({ - input: x4D, - filter: $filter, - convInfo, - bias: $bias, - activation, - preluActivationWeights: $preluActivationWeights - }); - return res; - }, - inputs, null /* grad */, 'FusedConv2D', {convInfo, activation}, - inputsToSave, outputsToSave); + forward, inputs as {} as NamedTensorMap, null /* grad */, FusedConv2D, + attrs as {} as NamedAttrMap); if (reshapedTo4D) { return res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; diff --git a/tfjs-core/src/ops/fused_depthwise_conv2d.ts b/tfjs-core/src/ops/fused_depthwise_conv2d.ts index 64180ac8ab0..f253757bf3d 100644 --- a/tfjs-core/src/ops/fused_depthwise_conv2d.ts +++ b/tfjs-core/src/ops/fused_depthwise_conv2d.ts @@ -15,8 +15,11 @@ * ============================================================================= */ -import {ENGINE} from '../engine'; +import {ENGINE, ForwardFunc} from '../engine'; +import {FusedDepthwiseConv2D, FusedDepthwiseConv2DAttrs, FusedDepthwiseConv2DInputs} from '../kernel_names'; +import {NamedAttrMap} from '../kernel_registry'; import {Tensor, Tensor3D, Tensor4D} from '../tensor'; +import {NamedTensorMap} from '../tensor_types'; import {makeTypesMatch} from '../tensor_util'; import {convertToTensor} from '../tensor_util_env'; import {TensorLike} from '../types'; @@ -156,35 +159,31 @@ function fusedDepthwiseConv2d_({ preluActivationWeights, 'prelu weights', 'fused depthwiseConv2d'); } - const inputs: { - x: Tensor, - filter: Tensor, - bias?: Tensor, - preluActivationWeights?: Tensor - } = {x: x4D, filter: $filter}; - if (bias != null) { - inputs.bias = $bias; - } - if (preluActivationWeights != null) { - inputs.preluActivationWeights = $preluActivationWeights; - } + const forward: ForwardFunc = (backend) => { + const res = backend.fusedDepthwiseConv2D({ + input: x4D, + filter: $filter, + convInfo, + bias: $bias, + activation, + preluActivationWeights: $preluActivationWeights + }); + return res; + }; + + const inputs: FusedDepthwiseConv2DInputs = { + x: x4D, + filter: $filter, + bias: $bias, + preluActivationWeights: $preluActivationWeights + }; + const attrs: FusedDepthwiseConv2DAttrs = + {strides, pad, dataFormat, dilations, dimRoundingMode, activation}; - const inputsToSave = [$filter, x4D]; - const outputsToSave = [true]; const res = ENGINE.runKernelFunc( - (backend) => { - const res = backend.fusedDepthwiseConv2D({ - input: x4D, - filter: $filter, - convInfo, - bias: $bias, - activation, - preluActivationWeights: $preluActivationWeights - }); - return res; - }, - inputs, null /* grad */, 'FusedDepthwiseConv2D', {convInfo, activation}, - inputsToSave, outputsToSave); + forward, inputs as {} as NamedTensorMap, null /* grad */, + FusedDepthwiseConv2D, attrs as {} as NamedAttrMap); + if (reshapedTo4D) { return res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; } diff --git a/tfjs-core/src/ops/fused_mat_mul.ts b/tfjs-core/src/ops/fused_mat_mul.ts index 3340bcfa102..a4254ea959b 100644 --- a/tfjs-core/src/ops/fused_mat_mul.ts +++ b/tfjs-core/src/ops/fused_mat_mul.ts @@ -15,8 +15,11 @@ * ============================================================================= */ -import {ENGINE} from '../engine'; +import {ENGINE, ForwardFunc} from '../engine'; +import {_FusedMatMul, _FusedMatMulAttrs, _FusedMatMulInputs} from '../kernel_names'; +import {NamedAttrMap} from '../kernel_registry'; import {Tensor} from '../tensor'; +import {NamedTensorMap} from '../tensor_types'; import {makeTypesMatch} from '../tensor_util'; import {convertToTensor} from '../tensor_util_env'; import {TensorLike} from '../types'; @@ -122,35 +125,31 @@ function fusedMatMul_({ preluActivationWeights, 'prelu weights', 'fused matMul'); } - const inputs: - {a: Tensor, b: Tensor, - bias?: Tensor, - preluActivationWeights?: Tensor} = {a: a3D, b: b3D}; - if (bias != null) { - inputs.bias = $bias; - } - if (preluActivationWeights != null) { - inputs.preluActivationWeights = $preluActivationWeights; - } - - const inputsToSave = [a3D, b3D]; - const outputsToSave = [true]; + const forward: ForwardFunc = (backend) => { + const y = backend.fusedBatchMatMul({ + a: a3D, + b: b3D, + transposeA, + transposeB, + bias: $bias, + activation, + preluActivationWeights: $preluActivationWeights + }); + return y; + }; + + const inputs: _FusedMatMulInputs = { + a: a3D, + b: b3D, + bias: $bias, + preluActivationWeights: $preluActivationWeights + }; + const attrs: _FusedMatMulAttrs = {transposeA, transposeB, activation}; const res = ENGINE.runKernelFunc( - (backend) => { - const y = backend.fusedBatchMatMul({ - a: a3D, - b: b3D, - transposeA, - transposeB, - bias: $bias, - activation, - preluActivationWeights: $preluActivationWeights - }); - return y; - }, - inputs, null /* grad */, '_FusedMatMul', - {transposeA, transposeB, activation}, inputsToSave, outputsToSave); + forward, inputs as {} as NamedTensorMap, null /* grad */, _FusedMatMul, + attrs as {} as NamedAttrMap); + return res.reshape(outShape); } From d541877eaf45411b2b51a3401174ee104fe37f6e Mon Sep 17 00:00:00 2001 From: Yannick Assogba Date: Thu, 16 Jul 2020 22:59:14 -0400 Subject: [PATCH 06/12] restore gradients to fused ops with customGrad --- tfjs-core/src/ops/fused_conv2d.ts | 94 ++++- tfjs-core/src/ops/fused_conv2d_test.ts | 390 +++++++++--------- tfjs-core/src/ops/fused_depthwise_conv2d.ts | 81 +++- .../src/ops/fused_depthwise_conv2d_test.ts | 231 +++++------ tfjs-core/src/ops/fused_mat_mul.ts | 82 +++- tfjs-core/src/ops/fused_mat_mul_test.ts | 285 +++++++------ tfjs-core/src/ops/fused_util.ts | 6 + 7 files changed, 691 insertions(+), 478 deletions(-) diff --git a/tfjs-core/src/ops/fused_conv2d.ts b/tfjs-core/src/ops/fused_conv2d.ts index e9c35c0a87f..00317430a20 100644 --- a/tfjs-core/src/ops/fused_conv2d.ts +++ b/tfjs-core/src/ops/fused_conv2d.ts @@ -16,18 +16,24 @@ */ import {ENGINE, ForwardFunc} from '../engine'; +import {customGrad} from '../gradients'; import {FusedConv2D, FusedConv2DAttrs, FusedConv2DInputs} from '../kernel_names'; import {NamedAttrMap} from '../kernel_registry'; +import {conv2DBackpropFilter} from '../ops/conv2d_backprop_filter'; +import {conv2DBackpropInput} from '../ops/conv2d_backprop_input'; import {Tensor, Tensor3D, Tensor4D} from '../tensor'; -import {NamedTensorMap} from '../tensor_types'; +import {GradSaveFunc, NamedTensorMap} from '../tensor_types'; import {makeTypesMatch} from '../tensor_util'; import {convertToTensor} from '../tensor_util_env'; import {TensorLike} from '../types'; import * as util from '../util'; +import {add} from './add'; import * as broadcast_util from './broadcast_util'; +import {conv2d as unfusedConv2d} from './conv2d'; import * as conv_util from './conv_util'; import {Activation} from './fused_types'; +import {applyActivation, getFusedBiasGradient, getFusedDyActivation, shouldFuse} from './fused_util'; import {op} from './operation'; /** @@ -167,6 +173,16 @@ function fusedConv2d_({ }): T { activation = activation || 'linear'; + if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { + let result = unfusedConv2d( + x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); + if (bias != null) { + result = add(result, bias); + } + + return applyActivation(result, activation, preluActivationWeights) as T; + } + const $x = convertToTensor(x, 'x', 'conv2d'); const $filter = convertToTensor(filter, 'filter', 'conv2d'); @@ -222,11 +238,29 @@ function fusedConv2d_({ preluActivationWeights, 'prelu weights', 'fused conv2d'); } - const inputs: FusedConv2DInputs = { - x: x4D, - filter: $filter, - bias: $bias, - preluActivationWeights: $preluActivationWeights + const grad = (dy: Tensor4D, saved: Tensor[]) => { + const [$filter, x4D, y, $bias] = + saved as [Tensor4D, Tensor4D, Tensor4D, Tensor]; + + const dyActivation = getFusedDyActivation(dy, y, activation) as Tensor4D; + + util.assert( + conv_util.tupleValuesAreOne(dilations), + () => 'Error in gradient of fused conv2D: ' + + `dilation rates greater than 1 ` + + `are not yet supported in gradients. Got dilations '${dilations}'`); + + const xDer = + conv2DBackpropInput(x4D.shape, dyActivation, $filter, strides, pad); + const filterDer = + conv2DBackpropFilter(x4D, dyActivation, $filter.shape, strides, pad); + const der: Tensor[] = [xDer, filterDer]; + + if ($bias != null) { + const biasDer = getFusedBiasGradient($bias, dyActivation); + der.push(biasDer); + } + return der; }; const forward: ForwardFunc = (backend) => { @@ -241,17 +275,51 @@ function fusedConv2d_({ return res; }; + const inputs: FusedConv2DInputs = { + x: x4D, + filter: $filter, + bias: $bias, + preluActivationWeights: $preluActivationWeights + }; + const attrs: FusedConv2DAttrs = {strides, pad, dataFormat, dilations, dimRoundingMode, activation}; - const res = ENGINE.runKernelFunc( - forward, inputs as {} as NamedTensorMap, null /* grad */, FusedConv2D, - attrs as {} as NamedAttrMap); + // Depending on the the params passed in we will have different number of + // inputs and thus a a different number of elements in the gradient. + if (bias == null) { + const customOp = + customGrad((x4D: Tensor4D, filter: Tensor4D, save: GradSaveFunc) => { + let res = ENGINE.runKernelFunc( + forward, inputs as {} as NamedTensorMap, null /* grad */, + FusedConv2D, attrs as {} as NamedAttrMap); - if (reshapedTo4D) { - return res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; - } + save([filter, x4D, res]); - return res as T; + if (reshapedTo4D) { + res = res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; + } + + return {value: res, gradFunc: grad}; + }); + return customOp(x4D, $filter) as T; + } else { + const customOpWithBias = customGrad( + (x4D: Tensor4D, filter: Tensor4D, bias: Tensor, save: GradSaveFunc) => { + let res = ENGINE.runKernelFunc( + forward, inputs as {} as NamedTensorMap, null /* grad */, + FusedConv2D, attrs as {} as NamedAttrMap); + + save([filter, x4D, res, bias]); + + if (reshapedTo4D) { + res = res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; + } + + return {value: res, gradFunc: grad}; + }); + + return customOpWithBias(x4D, $filter, $bias) as T; + } } export const conv2d = op({fusedConv2d_}); diff --git a/tfjs-core/src/ops/fused_conv2d_test.ts b/tfjs-core/src/ops/fused_conv2d_test.ts index 1f2b34cc84d..fa20cd0d87d 100644 --- a/tfjs-core/src/ops/fused_conv2d_test.ts +++ b/tfjs-core/src/ops/fused_conv2d_test.ts @@ -656,209 +656,189 @@ describeWithFlags('fused conv2d', ALL_ENVS, () => { await result.data(), [15, 10, 15, 55, 30, 55, 0, 0, 0, 0, 0, 0]); }); - // it('backProp input x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', async () => { - // const inputDepth = 1; - // const outputDepth = 1; - // const inputShape: [number, number, number, number] = [2, 3, 3, - // inputDepth]; const filterSize = 2; const strides = 1; const pad = 0; - - // const filterShape: [number, number, number, number] = - // [filterSize, filterSize, inputDepth, outputDepth]; - // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - - // const x = tf.tensor4d( - // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - // const grads = tf.grads( - // (x: tf.Tensor4D) => tf.fused.conv2d({x, filter, strides, pad})); - // const [dx] = grads([x], dy); - - // expect(dx.shape).toEqual(x.shape); - // expectArraysClose( - // await dx.data(), - // [-3, 2, 1, -8, 1.5, 0.5, -4, 1, 0, -3, 2, 1, -8, 1.5, 0.5, -4, 1, - // 0]); - // }); - - // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', async () => { - // const inputDepth = 1; - // const outputDepth = 1; - // const inputShape: [number, number, number, number] = [2, 3, 3, - // inputDepth]; const filterSize = 2; const strides = 1; const pad = 0; - - // const filterShape: [number, number, number, number] = - // [filterSize, filterSize, inputDepth, outputDepth]; - // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - - // const x = tf.tensor4d( - // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - // const grads = tf.grads( - // (x: tf.Tensor4D, filter: tf.Tensor4D) => - // tf.fused.conv2d({x, filter, strides, pad})); - // const [dx, dfilter] = grads([x, filter], dy); - - // expect(dx.shape).toEqual(x.shape); - // expectArraysClose( - // await dx.data(), - // [-3, 2, 1, -8, 1.5, 0.5, -4, 1, 0, -3, 2, 1, -8, 1.5, 0.5, -4, 1, - // 0]); - - // expect(dfilter.shape).toEqual(filterShape); - // expectArraysClose(await dfilter.data(), [26, 38, 62, 74]); - // }); - - // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias', async () => { - // const inputDepth = 1; - // const outputDepth = 1; - // const inputShape: [number, number, number, number] = [2, 3, 3, - // inputDepth]; const filterSize = 2; const strides = 1; const pad = 0; - - // const filterShape: [number, number, number, number] = - // [filterSize, filterSize, inputDepth, outputDepth]; - // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - // const bias = tf.ones([2, 2, 2, 1]); - - // const x = tf.tensor4d( - // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - // const fusedGrads = - // tf.grads((x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.conv2d({ - // x, - // filter: w, - // strides, - // pad, - // dataFormat: 'NHWC', - // dilations: [1, 1], - // bias: b - // })); - // const [dxFused, dfilterFused, dbiasFused] = - // fusedGrads([x, filter, bias], dy); - - // const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { - // const conv = tf.conv2d(x, filter, strides, pad); - // const sum = tf.add(conv, bias); - // return sum; - // }); - // const [dx, dfilter, dbias] = grads([x, filter, bias], dy); - - // expectArraysClose(await dxFused.array(), await dx.array()); - // expectArraysClose(await dfilterFused.array(), await dfilter.array()); - // expectArraysClose(await dbiasFused.array(), await dbias.array()); - // }); - - // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and relu', - // async () => { - // const inputDepth = 1; - // const outputDepth = 1; - // const inputShape: [number, number, number, number] = - // [2, 3, 3, inputDepth]; - // const filterSize = 2; - // const strides = 1; - // const pad = 0; - - // const filterShape: [number, number, number, number] = - // [filterSize, filterSize, inputDepth, outputDepth]; - // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - // const bias = tf.ones([2, 2, 2, 1]); - - // const x = tf.tensor4d( - // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], - // inputShape); - // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - // const fusedGrads = - // tf.grads((x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.conv2d({ - // x, - // filter: w, - // strides, - // pad, - // dataFormat: 'NHWC', - // dilations: [1, 1], - // bias: b, - // activation: 'relu' - // })); - // const [dxFused, dfilterFused, dbiasFused] = - // fusedGrads([x, filter, bias], dy); - - // const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) - // => { - // const conv = tf.conv2d(x, filter, strides, pad); - // const sum = tf.add(conv, bias); - // return tf.relu(sum); - // }); - // const [dx, dfilter, dbias] = grads([x, filter, bias], dy); - - // expectArraysClose(await dxFused.array(), await dx.array()); - // expectArraysClose(await dfilterFused.array(), await dfilter.array()); - // expectArraysClose(await dbiasFused.array(), await dbias.array()); - // }); - - // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and elu', async () - // => { - // const inputDepth = 1; - // const outputDepth = 1; - // const inputShape: [number, number, number, number] = [2, 3, 3, - // inputDepth]; const filterSize = 2; const strides = 1; const pad = 0; - - // const filterShape: [number, number, number, number] = - // [filterSize, filterSize, inputDepth, outputDepth]; - // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - // const bias = tf.ones([2, 2, 2, 1]); - - // const x = tf.tensor4d( - // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - // const fusedGrads = - // tf.grads((x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.conv2d({ - // x, - // filter: w, - // strides, - // pad, - // dataFormat: 'NHWC', - // dilations: [1, 1], - // bias: b, - // activation: 'elu' - // })); - // const [dxFused, dfilterFused, dbiasFused] = - // fusedGrads([x, filter, bias], dy); - - // const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { - // const conv = tf.conv2d(x, filter, strides, pad); - // const sum = tf.add(conv, bias); - // return tf.elu(sum); - // }); - // const [dx, dfilter, dbias] = grads([x, filter, bias], dy); - - // expectArraysClose(await dxFused.array(), await dx.array()); - // expectArraysClose(await dfilterFused.array(), await dfilter.array()); - // expectArraysClose(await dbiasFused.array(), await dbias.array()); - // }); - - // it('fused matmul with relu6 and gradients', async () => { - // const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); - // const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - // const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - // const transposeA = false; - // const transposeB = false; - - // const fusedGrads = tf.grads((a, b) => { - // return tf.fused.matMul( - // {a, b, transposeA, transposeB, bias: null, activation: 'relu6'}); - // }); - // const [fusedDa, fusedDb] = fusedGrads([a, b], dy); - - // const grads = tf.grads((a, b) => { - // const prod = tf.matMul(a, b, transposeA, transposeB); - // return tf.relu6(prod); - // }); - // const [da, db] = grads([a, b], dy); - - // expectArraysClose(await da.array(), await fusedDa.array()); - // expectArraysClose(await db.data(), await fusedDb.array()); - // }); + it('backProp input x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', async () => { + const inputDepth = 1; + const outputDepth = 1; + const inputShape: [number, number, number, number] = [2, 3, 3, inputDepth]; + const filterSize = 2; + const strides = 1; + const pad = 0; + + const filterShape: [number, number, number, number] = + [filterSize, filterSize, inputDepth, outputDepth]; + const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + const grads = tf.grads( + (x: tf.Tensor4D) => tf.fused.conv2d({x, filter, strides, pad})); + const [dx] = grads([x], dy); + + expect(dx.shape).toEqual(x.shape); + expectArraysClose( + await dx.data(), + [-3, 2, 1, -8, 1.5, 0.5, -4, 1, 0, -3, 2, 1, -8, 1.5, 0.5, -4, 1, 0]); + }); + + it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', async () => { + const inputDepth = 1; + const outputDepth = 1; + const inputShape: [number, number, number, number] = [2, 3, 3, inputDepth]; + const filterSize = 2; + const strides = 1; + const pad = 0; + + const filterShape: [number, number, number, number] = + [filterSize, filterSize, inputDepth, outputDepth]; + const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + const grads = tf.grads( + (x: tf.Tensor4D, filter: tf.Tensor4D) => + tf.fused.conv2d({x, filter, strides, pad})); + const [dx, dfilter] = grads([x, filter], dy); + + expect(dx.shape).toEqual(x.shape); + expectArraysClose( + await dx.data(), + [-3, 2, 1, -8, 1.5, 0.5, -4, 1, 0, -3, 2, 1, -8, 1.5, 0.5, -4, 1, 0]); + + expect(dfilter.shape).toEqual(filterShape); + expectArraysClose(await dfilter.data(), [26, 38, 62, 74]); + }); + + it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias', async () => { + const inputDepth = 1; + const outputDepth = 1; + const inputShape: [number, number, number, number] = [2, 3, 3, inputDepth]; + const filterSize = 2; + const strides = 1; + const pad = 0; + + const filterShape: [number, number, number, number] = + [filterSize, filterSize, inputDepth, outputDepth]; + const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + const bias = tf.ones([2, 2, 2, 1]); + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + const fusedGrads = + tf.grads((x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.conv2d({ + x, + filter: w, + strides, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + bias: b + })); + const [dxFused, dfilterFused, dbiasFused] = + fusedGrads([x, filter, bias], dy); + + const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { + const conv = tf.conv2d(x, filter, strides, pad); + const sum = tf.add(conv, bias); + return sum; + }); + const [dx, dfilter, dbias] = grads([x, filter, bias], dy); + + expectArraysClose(await dxFused.array(), await dx.array()); + expectArraysClose(await dfilterFused.array(), await dfilter.array()); + expectArraysClose(await dbiasFused.array(), await dbias.array()); + }); + + it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and relu', + async () => { + const inputDepth = 1; + const outputDepth = 1; + const inputShape: [number, number, number, number] = + [2, 3, 3, inputDepth]; + const filterSize = 2; + const strides = 1; + const pad = 0; + + const filterShape: [number, number, number, number] = + [filterSize, filterSize, inputDepth, outputDepth]; + const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + const bias = tf.ones([2, 2, 2, 1]); + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + const fusedGrads = + tf.grads((x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.conv2d({ + x, + filter: w, + strides, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + bias: b, + activation: 'relu' + })); + const [dxFused, dfilterFused, dbiasFused] = + fusedGrads([x, filter, bias], dy); + + const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { + const conv = tf.conv2d(x, filter, strides, pad); + const sum = tf.add(conv, bias); + return tf.relu(sum); + }); + const [dx, dfilter, dbias] = grads([x, filter, bias], dy); + + expectArraysClose(await dxFused.array(), await dx.array()); + expectArraysClose(await dfilterFused.array(), await dfilter.array()); + expectArraysClose(await dbiasFused.array(), await dbias.array()); + }); + + it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and elu', async () => { + const inputDepth = 1; + const outputDepth = 1; + const inputShape: [number, number, number, number] = [2, 3, 3, inputDepth]; + const filterSize = 2; + const strides = 1; + const pad = 0; + + const filterShape: [number, number, number, number] = + [filterSize, filterSize, inputDepth, outputDepth]; + const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + const bias = tf.ones([2, 2, 2, 1]); + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + const fusedGrads = + tf.grads((x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.conv2d({ + x, + filter: w, + strides, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + bias: b, + activation: 'elu' + })); + const [dxFused, dfilterFused, dbiasFused] = + fusedGrads([x, filter, bias], dy); + + const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { + const conv = tf.conv2d(x, filter, strides, pad); + const sum = tf.add(conv, bias); + return tf.elu(sum); + }); + const [dx, dfilter, dbias] = grads([x, filter, bias], dy); + + expectArraysClose(await dxFused.array(), await dx.array()); + expectArraysClose(await dfilterFused.array(), await dfilter.array()); + expectArraysClose(await dbiasFused.array(), await dbias.array()); + }); }); diff --git a/tfjs-core/src/ops/fused_depthwise_conv2d.ts b/tfjs-core/src/ops/fused_depthwise_conv2d.ts index f253757bf3d..8b6f9f49b29 100644 --- a/tfjs-core/src/ops/fused_depthwise_conv2d.ts +++ b/tfjs-core/src/ops/fused_depthwise_conv2d.ts @@ -16,18 +16,24 @@ */ import {ENGINE, ForwardFunc} from '../engine'; +import {customGrad} from '../gradients'; import {FusedDepthwiseConv2D, FusedDepthwiseConv2DAttrs, FusedDepthwiseConv2DInputs} from '../kernel_names'; import {NamedAttrMap} from '../kernel_registry'; import {Tensor, Tensor3D, Tensor4D} from '../tensor'; -import {NamedTensorMap} from '../tensor_types'; +import {GradSaveFunc, NamedTensorMap} from '../tensor_types'; import {makeTypesMatch} from '../tensor_util'; import {convertToTensor} from '../tensor_util_env'; import {TensorLike} from '../types'; import * as util from '../util'; +import {add} from './add'; import * as broadcast_util from './broadcast_util'; import * as conv_util from './conv_util'; +import {depthwiseConv2d as unfusedDepthwiseConv2d} from './depthwise_conv2d'; +import {depthwiseConv2dNativeBackpropFilter} from './depthwise_conv2d_native_backprop_filter'; +import {depthwiseConv2dNativeBackpropInput} from './depthwise_conv2d_native_backprop_input'; import {Activation} from './fused_types'; +import {applyActivation, getFusedBiasGradient, getFusedDyActivation, shouldFuse} from './fused_util'; import {op} from './operation'; /** @@ -103,6 +109,16 @@ function fusedDepthwiseConv2d_({ activation?: Activation, preluActivationWeights?: Tensor }): T { + if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { + let result = unfusedDepthwiseConv2d( + x, filter, strides, pad, dataFormat, dilations, dimRoundingMode); + if (bias != null) { + result = add(result, bias); + } + + return applyActivation(result, activation, preluActivationWeights) as T; + } + const $x = convertToTensor(x, 'x', 'depthwiseConv2d'); const $filter = convertToTensor(filter, 'filter', 'depthwiseConv2d'); @@ -159,6 +175,29 @@ function fusedDepthwiseConv2d_({ preluActivationWeights, 'prelu weights', 'fused depthwiseConv2d'); } + const grad = (dy: Tensor4D, saved: Tensor[]) => { + util.assert( + conv_util.tupleValuesAreOne(dilations), + () => 'Error in gradient of fused depthwiseConv2d: dilation rates ' + + `greater than 1 are not yet supported. Got dilations ` + + `'${dilations}'`); + const [$filter, x4D, y, bias] = saved; + + const dyActivation = getFusedDyActivation(dy, y, activation) as Tensor4D; + + const xDer = depthwiseConv2dNativeBackpropInput( + (x4D as Tensor4D).shape, dyActivation, $filter as Tensor4D, convInfo); + const filterDer = depthwiseConv2dNativeBackpropFilter( + x4D as Tensor4D, dyActivation, ($filter as Tensor4D).shape, convInfo); + + if (bias != null) { + const biasDer = getFusedBiasGradient($bias, dyActivation); + return [xDer, filterDer, biasDer]; + } else { + return [xDer, filterDer]; + } + }; + const forward: ForwardFunc = (backend) => { const res = backend.fusedDepthwiseConv2D({ input: x4D, @@ -180,13 +219,41 @@ function fusedDepthwiseConv2d_({ const attrs: FusedDepthwiseConv2DAttrs = {strides, pad, dataFormat, dilations, dimRoundingMode, activation}; - const res = ENGINE.runKernelFunc( - forward, inputs as {} as NamedTensorMap, null /* grad */, - FusedDepthwiseConv2D, attrs as {} as NamedAttrMap); + // Depending on the the params passed in we will have different number of + // inputs and thus a a different number of elements in the gradient. + if (bias == null) { + const customOp = + customGrad((x4D: Tensor4D, filter: Tensor4D, save: GradSaveFunc) => { + let res = ENGINE.runKernelFunc( + forward, inputs as {} as NamedTensorMap, null /* grad */, + FusedDepthwiseConv2D, attrs as {} as NamedAttrMap); + + save([filter, x4D, res]); + + if (reshapedTo4D) { + res = res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; + } + + return {value: res, gradFunc: grad}; + }); + return customOp(x4D, $filter) as T; + } else { + const customOpWithBias = customGrad( + (x4D: Tensor4D, filter: Tensor4D, bias: Tensor, save: GradSaveFunc) => { + let res = ENGINE.runKernelFunc( + forward, inputs as {} as NamedTensorMap, null /* grad */, + FusedDepthwiseConv2D, attrs as {} as NamedAttrMap); + + save([filter, x4D, res, bias]); + + if (reshapedTo4D) { + res = res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; + } + + return {value: res, gradFunc: grad}; + }); - if (reshapedTo4D) { - return res.as3D(res.shape[1], res.shape[2], res.shape[3]) as T; + return customOpWithBias(x4D, $filter, $bias) as T; } - return res as T; } export const depthwiseConv2d = op({fusedDepthwiseConv2d_}); diff --git a/tfjs-core/src/ops/fused_depthwise_conv2d_test.ts b/tfjs-core/src/ops/fused_depthwise_conv2d_test.ts index 49e318a7844..ada8531b9c8 100644 --- a/tfjs-core/src/ops/fused_depthwise_conv2d_test.ts +++ b/tfjs-core/src/ops/fused_depthwise_conv2d_test.ts @@ -135,119 +135,120 @@ describeWithFlags('fused depthwiseConv2D', ALL_ENVS, () => { expectArraysClose(await result.data(), expected); }); - // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', async () => { - // const inputDepth = 1; - // const outputDepth = 1; - // const inputShape: [number, number, number, number] = [2, 3, 3, - // inputDepth]; const filterSize = 2; const strides = 1; const pad = 0; - - // const filterShape: [number, number, number, number] = - // [filterSize, filterSize, inputDepth, outputDepth]; - // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - - // const x = tf.tensor4d( - // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - // const grads = tf.grads( - // (x: tf.Tensor4D, filter: tf.Tensor4D) => - // tf.fused.depthwiseConv2d({x, filter, strides, pad})); - // const [dx, dfilter] = grads([x, filter], dy); - - // expect(dx.shape).toEqual(x.shape); - // expectArraysClose( - // await dx.data(), - // [-3, 2, 1, -8, 1.5, 0.5, -4, 1, 0, -3, 2, 1, -8, 1.5, 0.5, -4, 1, - // 0]); - - // expect(dfilter.shape).toEqual(filterShape); - // expectArraysClose(await dfilter.data(), [26, 38, 62, 74]); - // }); - - // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias', async () => { - // const inputDepth = 1; - // const outputDepth = 1; - // const inputShape: [number, number, number, number] = [2, 3, 3, - // inputDepth]; const filterSize = 2; const strides = 1; const pad = 0; - - // const filterShape: [number, number, number, number] = - // [filterSize, filterSize, inputDepth, outputDepth]; - // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - // const bias = tf.ones([2, 2, 2, 1]); - - // const x = tf.tensor4d( - // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); - // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - // const fusedGrads = tf.grads( - // (x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.depthwiseConv2d({ - // x, - // filter: w, - // strides, - // pad, - // dataFormat: 'NHWC', - // dilations: [1, 1], - // bias: b - // })); - // const [dxFused, dfilterFused, dbiasFused] = - // fusedGrads([x, filter, bias], dy); - - // const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { - // const conv = tf.depthwiseConv2d(x, filter, strides, pad); - // const sum = tf.add(conv, bias); - // return sum; - // }); - // const [dx, dfilter, dbias] = grads([x, filter, bias], dy); - - // expectArraysClose(await dxFused.array(), await dx.array()); - // expectArraysClose(await dfilterFused.array(), await dfilter.array()); - // expectArraysClose(await dbiasFused.array(), await dbias.array()); - // }); - - // it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and activation', - // async () => { - // const inputDepth = 1; - // const outputDepth = 1; - // const inputShape: [number, number, number, number] = - // [2, 3, 3, inputDepth]; - // const filterSize = 2; - // const strides = 1; - // const pad = 0; - - // const filterShape: [number, number, number, number] = - // [filterSize, filterSize, inputDepth, outputDepth]; - // const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); - // const bias = tf.ones([2, 2, 2, 1]); - - // const x = tf.tensor4d( - // [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], - // inputShape); - // const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); - - // const fusedGrads = tf.grads( - // (x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.depthwiseConv2d({ - // x, - // filter: w, - // strides, - // pad, - // dataFormat: 'NHWC', - // dilations: [1, 1], - // bias: b, - // activation: 'relu' - // })); - // const [dxFused, dfilterFused, dbiasFused] = - // fusedGrads([x, filter, bias], dy); - - // const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) - // => { - // const conv = tf.depthwiseConv2d(x, filter, strides, pad); - // const sum = tf.add(conv, bias); - // return tf.relu(sum); - // }); - // const [dx, dfilter, dbias] = grads([x, filter, bias], dy); - - // expectArraysClose(await dxFused.array(), await dx.array()); - // expectArraysClose(await dfilterFused.array(), await dfilter.array()); - // expectArraysClose(await dbiasFused.array(), await dbias.array()); - // }); + it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', async () => { + const inputDepth = 1; + const outputDepth = 1; + const inputShape: [number, number, number, number] = [2, 3, 3, inputDepth]; + const filterSize = 2; + const strides = 1; + const pad = 0; + + const filterShape: [number, number, number, number] = + [filterSize, filterSize, inputDepth, outputDepth]; + const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + const grads = tf.grads( + (x: tf.Tensor4D, filter: tf.Tensor4D) => + tf.fused.depthwiseConv2d({x, filter, strides, pad})); + const [dx, dfilter] = grads([x, filter], dy); + + expect(dx.shape).toEqual(x.shape); + expectArraysClose( + await dx.data(), + [-3, 2, 1, -8, 1.5, 0.5, -4, 1, 0, -3, 2, 1, -8, 1.5, 0.5, -4, 1, 0]); + + expect(dfilter.shape).toEqual(filterShape); + expectArraysClose(await dfilter.data(), [26, 38, 62, 74]); + }); + + it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias', async () => { + const inputDepth = 1; + const outputDepth = 1; + const inputShape: [number, number, number, number] = [2, 3, 3, inputDepth]; + const filterSize = 2; + const strides = 1; + const pad = 0; + + const filterShape: [number, number, number, number] = + [filterSize, filterSize, inputDepth, outputDepth]; + const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + const bias = tf.ones([2, 2, 2, 1]); + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + const fusedGrads = tf.grads( + (x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.depthwiseConv2d({ + x, + filter: w, + strides, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + bias: b + })); + const [dxFused, dfilterFused, dbiasFused] = + fusedGrads([x, filter, bias], dy); + + const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { + const conv = tf.depthwiseConv2d(x, filter, strides, pad); + const sum = tf.add(conv, bias); + return sum; + }); + const [dx, dfilter, dbias] = grads([x, filter, bias], dy); + + expectArraysClose(await dxFused.array(), await dx.array()); + expectArraysClose(await dfilterFused.array(), await dfilter.array()); + expectArraysClose(await dbiasFused.array(), await dbias.array()); + }); + + it('gradient x=[2,3,3,1] f=[2,2,1,1] s=1 p=0 with bias and activation', + async () => { + const inputDepth = 1; + const outputDepth = 1; + const inputShape: [number, number, number, number] = + [2, 3, 3, inputDepth]; + const filterSize = 2; + const strides = 1; + const pad = 0; + + const filterShape: [number, number, number, number] = + [filterSize, filterSize, inputDepth, outputDepth]; + const filter = tf.tensor4d([-1, 1, -2, 0.5], filterShape); + const bias = tf.ones([2, 2, 2, 1]); + + const x = tf.tensor4d( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9], inputShape); + const dy = tf.tensor4d([3, 1, 2, 0, 3, 1, 2, 0], [2, 2, 2, 1]); + + const fusedGrads = tf.grads( + (x: tf.Tensor4D, w: tf.Tensor4D, b) => tf.fused.depthwiseConv2d({ + x, + filter: w, + strides, + pad, + dataFormat: 'NHWC', + dilations: [1, 1], + bias: b, + activation: 'relu' + })); + const [dxFused, dfilterFused, dbiasFused] = + fusedGrads([x, filter, bias], dy); + + const grads = tf.grads((x: tf.Tensor4D, filter: tf.Tensor4D, bias) => { + const conv = tf.depthwiseConv2d(x, filter, strides, pad); + const sum = tf.add(conv, bias); + return tf.relu(sum); + }); + const [dx, dfilter, dbias] = grads([x, filter, bias], dy); + + expectArraysClose(await dxFused.array(), await dx.array()); + expectArraysClose(await dfilterFused.array(), await dfilter.array()); + expectArraysClose(await dbiasFused.array(), await dbias.array()); + }); }); diff --git a/tfjs-core/src/ops/fused_mat_mul.ts b/tfjs-core/src/ops/fused_mat_mul.ts index a4254ea959b..b2cf41505ec 100644 --- a/tfjs-core/src/ops/fused_mat_mul.ts +++ b/tfjs-core/src/ops/fused_mat_mul.ts @@ -16,17 +16,21 @@ */ import {ENGINE, ForwardFunc} from '../engine'; +import {customGrad} from '../gradients'; import {_FusedMatMul, _FusedMatMulAttrs, _FusedMatMulInputs} from '../kernel_names'; import {NamedAttrMap} from '../kernel_registry'; -import {Tensor} from '../tensor'; -import {NamedTensorMap} from '../tensor_types'; +import {Tensor, Tensor3D} from '../tensor'; +import {GradSaveFunc, NamedTensorMap} from '../tensor_types'; import {makeTypesMatch} from '../tensor_util'; import {convertToTensor} from '../tensor_util_env'; import {TensorLike} from '../types'; import * as util from '../util'; +import {add} from './add'; import * as broadcast_util from './broadcast_util'; import {Activation} from './fused_types'; +import {applyActivation, getFusedBiasGradient, getFusedDyActivation, shouldFuse} from './fused_util'; +import {matMul as unfusedMatMul} from './mat_mul'; import {op} from './operation'; /** @@ -66,6 +70,15 @@ function fusedMatMul_({ activation?: Activation, preluActivationWeights?: Tensor }): T { + if (shouldFuse(ENGINE.state.gradientDepth, activation) === false) { + let result = unfusedMatMul(a, b, transposeA, transposeB); + if (bias != null) { + result = add(result, bias); + } + + return applyActivation(result, activation, preluActivationWeights) as T; + } + let $a = convertToTensor(a, 'a', 'fused matMul'); let $b = convertToTensor(b, 'b', 'fused matMul'); [$a, $b] = makeTypesMatch($a, $b); @@ -125,6 +138,38 @@ function fusedMatMul_({ preluActivationWeights, 'prelu weights', 'fused matMul'); } + const grad = (dy: Tensor3D, saved: Tensor[]) => { + const [a3D, b3D, y, bias] = saved; + const dyActivation = getFusedDyActivation(dy, y, activation); + + let aDer: Tensor; + let bDer: Tensor; + + if (!transposeA && !transposeB) { + aDer = dyActivation.matMul(b3D as Tensor3D, false, true); + bDer = a3D.matMul(dyActivation, true, false); + + } else if (!transposeA && transposeB) { + aDer = dyActivation.matMul(b3D as Tensor3D, false, false); + bDer = dyActivation.matMul(a3D as Tensor3D, true, false); + + } else if (transposeA && !transposeB) { + aDer = b3D.matMul(dyActivation, false, true); + bDer = a3D.matMul(dyActivation, false, false); + + } else { + aDer = b3D.matMul(dyActivation, true, true); + bDer = dyActivation.matMul(a3D as Tensor3D, true, true); + } + + if (bias != null) { + const biasDer = getFusedBiasGradient(bias, dyActivation); + return [aDer, bDer, biasDer]; + } else { + return [aDer, bDer]; + } + }; + const forward: ForwardFunc = (backend) => { const y = backend.fusedBatchMatMul({ a: a3D, @@ -146,11 +191,34 @@ function fusedMatMul_({ }; const attrs: _FusedMatMulAttrs = {transposeA, transposeB, activation}; - const res = ENGINE.runKernelFunc( - forward, inputs as {} as NamedTensorMap, null /* grad */, _FusedMatMul, - attrs as {} as NamedAttrMap); - - return res.reshape(outShape); + // Depending on the the params passed in we will have different number of + // inputs and thus a a different number of elements in the gradient. + if (bias == null) { + const customOp = + customGrad((a: Tensor3D, b: Tensor3D, save: GradSaveFunc) => { + const res = ENGINE.runKernelFunc( + forward, inputs as {} as NamedTensorMap, null /* grad */, + _FusedMatMul, attrs as {} as NamedAttrMap); + + save([a, b, res]); + + return {value: res.reshape(outShape), gradFunc: grad}; + }); + return customOp(a3D, b3D) as T; + } else { + const customOpWithBias = customGrad( + (a: Tensor3D, b: Tensor3D, bias: Tensor, save: GradSaveFunc) => { + const res = ENGINE.runKernelFunc( + forward, inputs as {} as NamedTensorMap, null /* grad */, + _FusedMatMul, attrs as {} as NamedAttrMap); + + save([a, b, res, bias]); + + return {value: res.reshape(outShape), gradFunc: grad}; + }); + + return customOpWithBias(a3D, b3D, $bias) as T; + } } export const matMul = op({fusedMatMul_}); diff --git a/tfjs-core/src/ops/fused_mat_mul_test.ts b/tfjs-core/src/ops/fused_mat_mul_test.ts index c85123cde11..cb15f706b42 100644 --- a/tfjs-core/src/ops/fused_mat_mul_test.ts +++ b/tfjs-core/src/ops/fused_mat_mul_test.ts @@ -176,135 +176,158 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await d.data(), [1, 9, -2, 21]); }); - // it('fused A x B with relu gradient', async () => { - // const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); - // const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - // const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - // const transposeA = false; - // const transposeB = false; - - // const grads = tf.grads((a, b) => { - // const prod = tf.matMul(a, b, transposeA, transposeB); - // return tf.relu(prod); - // }); - - // const fusedGrads = tf.grads((a, b) => { - // return tf.fused.matMul( - // {a, b, transposeA, transposeB, bias: null, activation: 'relu'}); - // }); - - // const [da, db] = grads([a, b], dy); - // const [fusedDa, fusedDb] = fusedGrads([a, b], dy); - // expectArraysClose(await da.array(), await fusedDa.array()); - // expectArraysClose(await db.data(), await fusedDb.array()); - // }); - - // it('gradient with clones A x B with relu', () => { - // const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); - // const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - // const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - // const transposeA = false; - // const transposeB = false; - - // const fusedGrads = tf.grads((a, b) => { - // return tf.fused - // .matMul({ - // a: a.clone(), - // b: b.clone(), - // transposeA, - // transposeB, - // bias: null, - // activation: 'relu' - // }) - // .clone(); - // }); - - // const [fusedDa, fusedDb] = fusedGrads([a, b], dy); - // expect(fusedDa.shape).toEqual(a.shape); - // expect(fusedDb.shape).toEqual(b.shape); - // }); - - // it('fused A x B with relu bias gradient', async () => { - // const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); - // const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - // const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); - // const transposeA = false; - // const transposeB = false; - - // const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - - // const grads = tf.grads((a, b, c) => { - // const prod = tf.matMul(a, b, transposeA, transposeB); - // const sum = tf.add(prod, c); - // return tf.relu(sum); - // }); - - // const fusedGrads = tf.grads((a, b, c) => { - // return tf.fused.matMul( - // {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); - // }); - - // const [da, db, dc] = grads([a, b, c], dy); - // const [fusedDa, fusedDb, fusedDc] = fusedGrads([a, b, c], dy); - - // expectArraysClose(await da.array(), await fusedDa.array()); - // expectArraysClose(await db.array(), await fusedDb.array()); - // expectArraysClose(await dc.array(), await fusedDc.array()); - // }); - - // it('fused A x B with relu bias gradient transpose', async () => { - // const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [3, 2]); - // const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - // const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); - // const transposeA = true; - // const transposeB = false; - - // const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - - // const grads = tf.grads((a, b, c) => { - // const prod = tf.matMul(a, b, transposeA, transposeB); - // const sum = tf.add(prod, c); - // return tf.relu(sum); - // }); - - // const fusedGrads = tf.grads((a, b, c) => { - // return tf.fused.matMul( - // {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); - // }); - - // const [da, db, dc] = grads([a, b, c], dy); - // const [fusedDa, fusedDb, fusedDc] = fusedGrads([a, b, c], dy); - - // expectArraysClose(await da.array(), await fusedDa.array()); - // expectArraysClose(await db.array(), await fusedDb.array()); - // expectArraysClose(await dc.array(), await fusedDc.array()); - // }); - - // it('fused A x B with relu and broadcasted bias gradient', async () => { - // const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); - // const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); - // const c = tf.tensor2d([[1]]); - // const transposeA = false; - // const transposeB = false; - - // const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); - - // const grads = tf.grads((a, b, c) => { - // const prod = tf.matMul(a, b, transposeA, transposeB); - // const sum = tf.add(prod, c); - // return tf.relu(sum); - // }); - - // const fusedGrads = tf.grads((a, b, c) => { - // return tf.fused.matMul( - // {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); - // }); - - // const [da, db, dc] = grads([a, b, c], dy); - // const [fusedDa, fusedDb, fusedDc] = fusedGrads([a, b, c], dy); - - // expectArraysClose(await da.array(), await fusedDa.array()); - // expectArraysClose(await db.array(), await fusedDb.array()); - // expectArraysClose(await dc.array(), await fusedDc.array()); - // }); + it('fused A x B with relu gradient', async () => { + const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); + const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + const transposeA = false; + const transposeB = false; + + const grads = tf.grads((a, b) => { + const prod = tf.matMul(a, b, transposeA, transposeB); + return tf.relu(prod); + }); + + const fusedGrads = tf.grads((a, b) => { + return tf.fused.matMul( + {a, b, transposeA, transposeB, bias: null, activation: 'relu'}); + }); + + const [da, db] = grads([a, b], dy); + const [fusedDa, fusedDb] = fusedGrads([a, b], dy); + expectArraysClose(await da.array(), await fusedDa.array()); + expectArraysClose(await db.data(), await fusedDb.array()); + }); + + it('gradient with clones A x B with relu', () => { + const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); + const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + const transposeA = false; + const transposeB = false; + + const fusedGrads = tf.grads((a, b) => { + return tf.fused + .matMul({ + a: a.clone(), + b: b.clone(), + transposeA, + transposeB, + bias: null, + activation: 'relu' + }) + .clone(); + }); + + const [fusedDa, fusedDb] = fusedGrads([a, b], dy); + expect(fusedDa.shape).toEqual(a.shape); + expect(fusedDb.shape).toEqual(b.shape); + }); + + it('fused A x B with relu bias gradient', async () => { + const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); + const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); + const transposeA = false; + const transposeB = false; + + const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + + const grads = tf.grads((a, b, c) => { + const prod = tf.matMul(a, b, transposeA, transposeB); + const sum = tf.add(prod, c); + return tf.relu(sum); + }); + + const fusedGrads = tf.grads((a, b, c) => { + return tf.fused.matMul( + {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); + }); + + const [da, db, dc] = grads([a, b, c], dy); + const [fusedDa, fusedDb, fusedDc] = fusedGrads([a, b, c], dy); + + expectArraysClose(await da.array(), await fusedDa.array()); + expectArraysClose(await db.array(), await fusedDb.array()); + expectArraysClose(await dc.array(), await fusedDc.array()); + }); + + it('fused A x B with relu bias gradient transpose', async () => { + const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [3, 2]); + const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); + const transposeA = true; + const transposeB = false; + + const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + + const grads = tf.grads((a, b, c) => { + const prod = tf.matMul(a, b, transposeA, transposeB); + const sum = tf.add(prod, c); + return tf.relu(sum); + }); + + const fusedGrads = tf.grads((a, b, c) => { + return tf.fused.matMul( + {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); + }); + + const [da, db, dc] = grads([a, b, c], dy); + const [fusedDa, fusedDb, fusedDc] = fusedGrads([a, b, c], dy); + + expectArraysClose(await da.array(), await fusedDa.array()); + expectArraysClose(await db.array(), await fusedDb.array()); + expectArraysClose(await dc.array(), await fusedDc.array()); + }); + + it('fused A x B with relu and broadcasted bias gradient', async () => { + const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); + const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + const c = tf.tensor2d([[1]]); + const transposeA = false; + const transposeB = false; + + const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + + const grads = tf.grads((a, b, c) => { + const prod = tf.matMul(a, b, transposeA, transposeB); + const sum = tf.add(prod, c); + return tf.relu(sum); + }); + + const fusedGrads = tf.grads((a, b, c) => { + return tf.fused.matMul( + {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); + }); + + const [da, db, dc] = grads([a, b, c], dy); + const [fusedDa, fusedDb, fusedDc] = fusedGrads([a, b, c], dy); + + expectArraysClose(await da.array(), await fusedDa.array()); + expectArraysClose(await db.array(), await fusedDb.array()); + expectArraysClose(await dc.array(), await fusedDc.array()); + }); + + it('fused matmul with relu6 and gradients', async () => { + const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); + const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); + const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); + const transposeA = false; + const transposeB = false; + + const fusedGrads = tf.grads((a, b) => { + return tf.fused.matMul( + {a, b, transposeA, transposeB, bias: null, activation: 'relu6'}); + }); + const [fusedDa, fusedDb] = fusedGrads([a, b], dy); + + const grads = tf.grads((a, b) => { + const prod = tf.matMul(a, b, transposeA, transposeB); + return tf.relu6(prod); + }); + const [da, db] = grads([a, b], dy); + + expectArraysClose(await da.array(), await fusedDa.array()); + expectArraysClose(await db.data(), await fusedDb.array()); + }); }); diff --git a/tfjs-core/src/ops/fused_util.ts b/tfjs-core/src/ops/fused_util.ts index 1250b3460c5..206cdaa4913 100644 --- a/tfjs-core/src/ops/fused_util.ts +++ b/tfjs-core/src/ops/fused_util.ts @@ -66,3 +66,9 @@ export function applyActivation( } throw new Error(`Unknown fused activation ${activation}.`); } + +// Whether we should call fused ops. +export const shouldFuse = (gradientDepth: number, activation: Activation) => { + const gradientMode = gradientDepth > 0; + return !gradientMode || activation === 'linear'; +}; From 4e126489a34eceacb22f946a6424575bc8c9c7bd Mon Sep 17 00:00:00 2001 From: Yannick Assogba Date: Fri, 17 Jul 2020 00:19:28 -0400 Subject: [PATCH 07/12] fix gradient of fusedMatMul_ --- tfjs-core/src/ops/fused_mat_mul.ts | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tfjs-core/src/ops/fused_mat_mul.ts b/tfjs-core/src/ops/fused_mat_mul.ts index b2cf41505ec..2500731115e 100644 --- a/tfjs-core/src/ops/fused_mat_mul.ts +++ b/tfjs-core/src/ops/fused_mat_mul.ts @@ -139,31 +139,31 @@ function fusedMatMul_({ } const grad = (dy: Tensor3D, saved: Tensor[]) => { - const [a3D, b3D, y, bias] = saved; - const dyActivation = getFusedDyActivation(dy, y, activation); - + const [a3D, b3D, y, $bias] = saved; + // we reshape dy because the result of the forward is not + // necessarily going to be a 3d tensor due to a reshape done at the end of + // the customOp. + const dyActivation = + getFusedDyActivation(dy.reshape(y.shape), y, activation) let aDer: Tensor; let bDer: Tensor; if (!transposeA && !transposeB) { - aDer = dyActivation.matMul(b3D as Tensor3D, false, true); + aDer = dyActivation.matMul(b3D, false, true); bDer = a3D.matMul(dyActivation, true, false); - } else if (!transposeA && transposeB) { - aDer = dyActivation.matMul(b3D as Tensor3D, false, false); - bDer = dyActivation.matMul(a3D as Tensor3D, true, false); - + aDer = dyActivation.matMul(b3D, false, false); + bDer = dyActivation.matMul(a3D, true, false); } else if (transposeA && !transposeB) { aDer = b3D.matMul(dyActivation, false, true); bDer = a3D.matMul(dyActivation, false, false); - } else { aDer = b3D.matMul(dyActivation, true, true); - bDer = dyActivation.matMul(a3D as Tensor3D, true, true); + bDer = dyActivation.matMul(a3D, true, true); } if (bias != null) { - const biasDer = getFusedBiasGradient(bias, dyActivation); + const biasDer = getFusedBiasGradient($bias, dyActivation); return [aDer, bDer, biasDer]; } else { return [aDer, bDer]; @@ -195,24 +195,24 @@ function fusedMatMul_({ // inputs and thus a a different number of elements in the gradient. if (bias == null) { const customOp = - customGrad((a: Tensor3D, b: Tensor3D, save: GradSaveFunc) => { + customGrad((a3D: Tensor3D, b3D: Tensor3D, save: GradSaveFunc) => { const res = ENGINE.runKernelFunc( forward, inputs as {} as NamedTensorMap, null /* grad */, _FusedMatMul, attrs as {} as NamedAttrMap); - save([a, b, res]); + save([a3D, b3D, res]); return {value: res.reshape(outShape), gradFunc: grad}; }); return customOp(a3D, b3D) as T; } else { const customOpWithBias = customGrad( - (a: Tensor3D, b: Tensor3D, bias: Tensor, save: GradSaveFunc) => { + (a3D: Tensor3D, b3D: Tensor3D, $bias: Tensor, save: GradSaveFunc) => { const res = ENGINE.runKernelFunc( forward, inputs as {} as NamedTensorMap, null /* grad */, _FusedMatMul, attrs as {} as NamedAttrMap); - save([a, b, res, bias]); + save([a3D, b3D, res, $bias]); return {value: res.reshape(outShape), gradFunc: grad}; }); From 80d998e92adc5db9d017ac76c8cce15643f84cfd Mon Sep 17 00:00:00 2001 From: Yannick Assogba Date: Fri, 17 Jul 2020 00:22:11 -0400 Subject: [PATCH 08/12] save --- tfjs-core/src/ops/fused_mat_mul.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tfjs-core/src/ops/fused_mat_mul.ts b/tfjs-core/src/ops/fused_mat_mul.ts index 2500731115e..9e65e3549f8 100644 --- a/tfjs-core/src/ops/fused_mat_mul.ts +++ b/tfjs-core/src/ops/fused_mat_mul.ts @@ -144,7 +144,7 @@ function fusedMatMul_({ // necessarily going to be a 3d tensor due to a reshape done at the end of // the customOp. const dyActivation = - getFusedDyActivation(dy.reshape(y.shape), y, activation) + getFusedDyActivation(dy.reshape(y.shape), y, activation); let aDer: Tensor; let bDer: Tensor; From a5059f9895c702cc2ddf3dc37022d8f8282c981e Mon Sep 17 00:00:00 2001 From: Yannick Assogba Date: Fri, 17 Jul 2020 11:52:40 -0400 Subject: [PATCH 09/12] wasm fix --- tfjs-backend-wasm/src/kernels/FusedConv2D.ts | 27 +++++++++---------- .../src/kernels/FusedDepthwiseConv2D.ts | 27 +++++++++---------- tfjs-backend-wasm/src/kernels/_FusedMatMul.ts | 23 ++++------------ tfjs-backend-wasm/src/setup_test.ts | 3 ++- 4 files changed, 33 insertions(+), 47 deletions(-) diff --git a/tfjs-backend-wasm/src/kernels/FusedConv2D.ts b/tfjs-backend-wasm/src/kernels/FusedConv2D.ts index e22ef305a73..77e426a8f4c 100644 --- a/tfjs-backend-wasm/src/kernels/FusedConv2D.ts +++ b/tfjs-backend-wasm/src/kernels/FusedConv2D.ts @@ -15,18 +15,12 @@ * ============================================================================= */ -import {backend_util, KernelConfig, KernelFunc, NamedTensorInfoMap, TensorInfo} from '@tensorflow/tfjs-core'; +import {backend_util, FusedConv2D, FusedConv2DAttrs, FusedConv2DInputs, KernelConfig, KernelFunc, Tensor4D} from '@tensorflow/tfjs-core'; import {BackendWasm} from '../backend_wasm'; import {FusableActivation} from './types'; -interface FusedConv2DInputs extends NamedTensorInfoMap { - x: TensorInfo; - filter: TensorInfo; - bias?: TensorInfo; -} - let wasmFusedConv2d: ( xId: number, batchSize: number, inputHeight: number, inputWidth: number, filterId: number, filterHeight: number, filterWidth: number, biasId: number, @@ -66,11 +60,17 @@ function setup(backend: BackendWasm) { function fusedConv2d(args: { inputs: FusedConv2DInputs, backend: BackendWasm, - attrs: - {convInfo: backend_util.Conv2DInfo, activation: backend_util.Activation} + attrs: FusedConv2DAttrs }) { const {inputs, attrs, backend} = args; - const {convInfo, activation} = attrs; + const {x, filter, bias, preluActivationWeights} = inputs; + const {strides, pad, dilations, dataFormat, dimRoundingMode, activation} = + attrs; + + const convInfo = backend_util.computeConv2DInfo( + (x as Tensor4D).shape, (filter as Tensor4D).shape, strides, dilations, + pad, dimRoundingMode); + const fusedActivation = FusableActivation[activation as {} as keyof typeof FusableActivation]; if (fusedActivation == null) { @@ -79,7 +79,6 @@ function fusedConv2d(args: { `in the wasm backend.`); } - const {x, filter, bias, preluActivationWeights} = inputs; const xId = backend.dataIdMap.get(x.dataId).id; const filterId = backend.dataIdMap.get(filter.dataId).id; @@ -117,10 +116,10 @@ function fusedConv2d(args: { const inHeight = convInfo.inHeight; const inWidth = convInfo.inWidth; - if (convInfo.dataFormat !== 'channelsLast') { + if (dataFormat !== 'NHWC') { throw new Error( `wasm backend FusedConv2D does not support dataFormat:'` + - `${convInfo.dataFormat}'. Please use 'channelsLast'.`); + `${dataFormat}'. Please use 'NHWC'.`); } const out = backend.makeOutput(convInfo.outShape, 'float32'); @@ -137,7 +136,7 @@ function fusedConv2d(args: { } export const fusedConv2DConfig: KernelConfig = { - kernelName: 'FusedConv2D', + kernelName: FusedConv2D, backendName: 'wasm', setupFunc: setup, kernelFunc: fusedConv2d as {} as KernelFunc diff --git a/tfjs-backend-wasm/src/kernels/FusedDepthwiseConv2D.ts b/tfjs-backend-wasm/src/kernels/FusedDepthwiseConv2D.ts index e0b95f65c06..208d1574ade 100644 --- a/tfjs-backend-wasm/src/kernels/FusedDepthwiseConv2D.ts +++ b/tfjs-backend-wasm/src/kernels/FusedDepthwiseConv2D.ts @@ -15,18 +15,12 @@ * ============================================================================= */ -import {backend_util, KernelConfig, KernelFunc, NamedTensorInfoMap, TensorInfo} from '@tensorflow/tfjs-core'; +import {backend_util, FusedDepthwiseConv2D, FusedDepthwiseConv2DAttrs, FusedDepthwiseConv2DInputs, KernelConfig, KernelFunc, Tensor4D} from '@tensorflow/tfjs-core'; import {BackendWasm} from '../backend_wasm'; import {FusableActivation} from './types'; -interface FusedDepthwiseConv2DInputs extends NamedTensorInfoMap { - x: TensorInfo; - filter: TensorInfo; - bias?: TensorInfo; -} - let wasmFusedDepthwiseConv2d: ( xId: number, batchSize: number, inputHeight: number, inputWidth: number, filterId: number, filterHeight: number, filterWidth: number, biasId: number, @@ -67,11 +61,17 @@ function setup(backend: BackendWasm) { function fusedDepthwiseConv2d(args: { inputs: FusedDepthwiseConv2DInputs, backend: BackendWasm, - attrs: - {convInfo: backend_util.Conv2DInfo, activation: backend_util.Activation} + attrs: FusedDepthwiseConv2DAttrs }) { const {inputs, attrs, backend} = args; - const {convInfo, activation} = attrs; + const {x, filter, bias, preluActivationWeights} = inputs; + const {strides, pad, dilations, dataFormat, dimRoundingMode, activation} = + attrs; + + const convInfo = backend_util.computeConv2DInfo( + (x as Tensor4D).shape, (filter as Tensor4D).shape, strides, dilations, + pad, dimRoundingMode); + const fusedActivation = FusableActivation[activation as {} as keyof typeof FusableActivation]; if (fusedActivation == null) { @@ -80,7 +80,6 @@ function fusedDepthwiseConv2d(args: { `in the wasm backend.`); } - const {x, filter, bias, preluActivationWeights} = inputs; const xId = backend.dataIdMap.get(x.dataId).id; const filterId = backend.dataIdMap.get(filter.dataId).id; @@ -118,10 +117,10 @@ function fusedDepthwiseConv2d(args: { const inHeight = convInfo.inHeight; const inWidth = convInfo.inWidth; - if (convInfo.dataFormat !== 'channelsLast') { + if (dataFormat !== 'NHWC') { throw new Error( `wasm backend FusedDepthwiseConv2D does not support dataFormat:'` + - `${convInfo.dataFormat}'. Please use 'channelsLast'.`); + `${dataFormat}'. Please use 'NHWC'.`); } const out = backend.makeOutput(convInfo.outShape, 'float32'); @@ -138,7 +137,7 @@ function fusedDepthwiseConv2d(args: { } export const fusedDepthwiseConv2DConfig: KernelConfig = { - kernelName: 'FusedDepthwiseConv2D', + kernelName: FusedDepthwiseConv2D, backendName: 'wasm', setupFunc: setup, kernelFunc: fusedDepthwiseConv2d as {} as KernelFunc diff --git a/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts b/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts index 16349df81a9..3ae7e8ef977 100644 --- a/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts +++ b/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts @@ -15,25 +15,12 @@ * ============================================================================= */ -import {KernelConfig, NamedAttrMap, NamedTensorInfoMap, TensorInfo} from '@tensorflow/tfjs-core'; +import {_FusedMatMul, _FusedMatMulAttrs, _FusedMatMulInputs, KernelConfig, KernelFunc} from '@tensorflow/tfjs-core'; import {BackendWasm} from '../backend_wasm'; import {FusableActivation} from './types'; -interface FusedMatMulInputs extends NamedTensorInfoMap { - a: TensorInfo; - b: TensorInfo; - bias?: TensorInfo; - preluActivationWeights?: TensorInfo; -} - -interface FusedMatMulAttrs extends NamedAttrMap { - transposeA: boolean; - transposeB: boolean; - activation: FusableActivation; -} - let wasmFusedMatMul: ( aId: number, aShape: Uint8Array, aShapeSize: number, bId: number, bShape: Uint8Array, bShapeSize: number, transposeA: boolean, @@ -58,9 +45,9 @@ function setup(backend: BackendWasm) { } function fusedBatchMatMul(args: { - inputs: FusedMatMulInputs, + inputs: _FusedMatMulInputs, backend: BackendWasm, - attrs: FusedMatMulAttrs + attrs: _FusedMatMulAttrs }) { const {inputs, backend, attrs} = args; const {a, b, bias, preluActivationWeights} = inputs; @@ -114,8 +101,8 @@ function fusedBatchMatMul(args: { } export const fusedMatMulConfig: KernelConfig = { - kernelName: '_FusedMatMul', + kernelName: _FusedMatMul, backendName: 'wasm', setupFunc: setup, - kernelFunc: fusedBatchMatMul + kernelFunc: fusedBatchMatMul as {} as KernelFunc }; diff --git a/tfjs-backend-wasm/src/setup_test.ts b/tfjs-backend-wasm/src/setup_test.ts index c21a3382585..1f32a51601b 100644 --- a/tfjs-backend-wasm/src/setup_test.ts +++ b/tfjs-backend-wasm/src/setup_test.ts @@ -117,7 +117,8 @@ const TEST_FILTERS: TestFilter[] = [ 'basic with elu', // Only fused relu, relu6, prelu activations // supported. 'gradient', // Gradients not defined yet. - 'NCHW', // xnn pack does not support channels first. + 'backProp input x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', + 'NCHW', // xnn pack does not support channels first. // Issue: https://github.com/tensorflow/tfjs/issues/3104. // Actual != expected. 'relu bias stride 2 x=[1,8,8,16] f=[3,3,16,1] s=[2,2] d=8 p=same', From 641cf58ba9319a0538607a7b52effc4d84a2c137 Mon Sep 17 00:00:00 2001 From: Yannick Assogba Date: Fri, 17 Jul 2020 13:18:20 -0400 Subject: [PATCH 10/12] remove chaining --- tfjs-core/src/ops/fused_mat_mul.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tfjs-core/src/ops/fused_mat_mul.ts b/tfjs-core/src/ops/fused_mat_mul.ts index 9e65e3549f8..8cf4a46e28a 100644 --- a/tfjs-core/src/ops/fused_mat_mul.ts +++ b/tfjs-core/src/ops/fused_mat_mul.ts @@ -32,6 +32,7 @@ import {Activation} from './fused_types'; import {applyActivation, getFusedBiasGradient, getFusedDyActivation, shouldFuse} from './fused_util'; import {matMul as unfusedMatMul} from './mat_mul'; import {op} from './operation'; +import {reshape} from './reshape'; /** * Computes the dot product of two matrices with optional activation and bias. @@ -144,7 +145,7 @@ function fusedMatMul_({ // necessarily going to be a 3d tensor due to a reshape done at the end of // the customOp. const dyActivation = - getFusedDyActivation(dy.reshape(y.shape), y, activation); + getFusedDyActivation(reshape(dy, y.shape), y, activation); let aDer: Tensor; let bDer: Tensor; From 52648c195820814917eb7877685c7504888096a0 Mon Sep 17 00:00:00 2001 From: Yannick Assogba Date: Mon, 20 Jul 2020 15:24:29 -0400 Subject: [PATCH 11/12] code review comments --- .../src/kernels/FusedDepthwiseConv2D.ts | 2 +- tfjs-backend-wasm/src/kernels/_FusedMatMul.ts | 2 +- tfjs-backend-wasm/src/setup_test.ts | 3 +- tfjs-core/src/ops/fused_conv2d.ts | 56 ------------------- tfjs-core/src/ops/fused_depthwise_conv2d.ts | 3 +- tfjs-core/src/ops/fused_mat_mul.ts | 20 +++---- 6 files changed, 15 insertions(+), 71 deletions(-) diff --git a/tfjs-backend-wasm/src/kernels/FusedDepthwiseConv2D.ts b/tfjs-backend-wasm/src/kernels/FusedDepthwiseConv2D.ts index 208d1574ade..8231ccc521b 100644 --- a/tfjs-backend-wasm/src/kernels/FusedDepthwiseConv2D.ts +++ b/tfjs-backend-wasm/src/kernels/FusedDepthwiseConv2D.ts @@ -32,7 +32,7 @@ let wasmFusedDepthwiseConv2d: ( function setup(backend: BackendWasm) { wasmFusedDepthwiseConv2d = - backend.wasm.cwrap('FusedDepthwiseConv2D', null /* void */, [ + backend.wasm.cwrap(FusedDepthwiseConv2D, null /* void */, [ 'number', // xId 'number', // batchSize 'number', // inputHeight diff --git a/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts b/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts index 3ae7e8ef977..3fcb21745a7 100644 --- a/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts +++ b/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts @@ -28,7 +28,7 @@ let wasmFusedMatMul: ( preluActivationWeightsId: number, outId: number) => void; function setup(backend: BackendWasm) { - wasmFusedMatMul = backend.wasm.cwrap('_FusedMatMul', null /* void */, [ + wasmFusedMatMul = backend.wasm.cwrap(_FusedMatMul, null /* void */, [ 'number', // a_id 'array', // a_shape 'number', // a_shape.length diff --git a/tfjs-backend-wasm/src/setup_test.ts b/tfjs-backend-wasm/src/setup_test.ts index 1f32a51601b..507703a9298 100644 --- a/tfjs-backend-wasm/src/setup_test.ts +++ b/tfjs-backend-wasm/src/setup_test.ts @@ -117,7 +117,8 @@ const TEST_FILTERS: TestFilter[] = [ 'basic with elu', // Only fused relu, relu6, prelu activations // supported. 'gradient', // Gradients not defined yet. - 'backProp input x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', + 'backProp input x=[2,3,3,1] f=[2,2,1,1] s=1 p=0', // Gradients not + // defined. 'NCHW', // xnn pack does not support channels first. // Issue: https://github.com/tensorflow/tfjs/issues/3104. // Actual != expected. diff --git a/tfjs-core/src/ops/fused_conv2d.ts b/tfjs-core/src/ops/fused_conv2d.ts index 00317430a20..c1e86df2c6c 100644 --- a/tfjs-core/src/ops/fused_conv2d.ts +++ b/tfjs-core/src/ops/fused_conv2d.ts @@ -36,62 +36,6 @@ import {Activation} from './fused_types'; import {applyActivation, getFusedBiasGradient, getFusedDyActivation, shouldFuse} from './fused_util'; import {op} from './operation'; -/** - * Computes a 2D convolution over the input x, optionally fused with adding a - * bias and applying an activation. - * - * ```js - * const inputDepth = 2; - * const inShape = [2, 2, 2, inputDepth]; - * const outputDepth = 2; - * const fSize = 1; - * const pad = 0; - * const strides = 1; - * - * const x = tf.tensor4d( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - * 16], inShape); - * const w = tf.tensor4d([-1, 1, -2, 0.5], [fSize, fSize, inputDepth, - * outputDepth]); - * - * tf.fused.conv2d({ x, filter: w, strides, pad, dataFormat: 'NHWC', - * dilations: [1, 1], bias: tf.scalar(5), activation: 'relu' }).print(); - * ``` - * - * @param obj An object with the following properties: - * @param x The input tensor, of rank 4 or rank 3, of shape - * `[batch, height, width, inChannels]`. If rank 3, batch of 1 is - * assumed. - * @param filter The filter, rank 4, of shape - * `[filterHeight, filterWidth, inDepth, outDepth]`. - * @param strides The strides of the convolution: `[strideHeight, - * strideWidth]`. - * @param pad The type of padding algorithm. - * - `same` and stride 1: output will be of same size as input, - * regardless of filter size. - * - `valid` output will be smaller than input if filter is larger - * than 1x1. - * - For more info, see this guide: - * [https://www.tensorflow.org/api_guides/python/nn#Convolution]( - * https://www.tensorflow.org/api_guides/python/nn#Convolution) - * @param dataFormat An optional string from: "NHWC", "NCHW". Defaults to - * "NHWC". Specify the data format of the input and output data. With the - * default format "NHWC", the data is stored in the order of: [batch, - * height, width, channels]. Only "NHWC" is currently supported. - * @param dilations The dilation rates: `[dilationHeight, dilationWidth]` - * in which we sample input values across the height and width dimensions - * in atrous convolution. Defaults to `[1, 1]`. If `dilations` is a single - * number, then `dilationHeight == dilationWidth`. If it is greater than - * 1, then all values of `strides` must be 1. - * @param dimRoundingMode The rounding mode used when computing output - * dimensions if pad is a number. If none is provided, it will not round - * and error if the output is of fractional size. - * @param bias Tensor to be added to the result. - * @param activation Name of activation kernel (defaults to `linear`) to be - * applied - * after biasAdd. - * @param preluActivationWeights Tensor of prelu weights to be applied as part - * of a `prelu` activation, typically the same shape as `x`. - */ /** * Computes a 2D convolution over the input x, optionally fused with adding a * bias and applying an activation. diff --git a/tfjs-core/src/ops/fused_depthwise_conv2d.ts b/tfjs-core/src/ops/fused_depthwise_conv2d.ts index 8b6f9f49b29..5f62f1617c0 100644 --- a/tfjs-core/src/ops/fused_depthwise_conv2d.ts +++ b/tfjs-core/src/ops/fused_depthwise_conv2d.ts @@ -193,9 +193,8 @@ function fusedDepthwiseConv2d_({ if (bias != null) { const biasDer = getFusedBiasGradient($bias, dyActivation); return [xDer, filterDer, biasDer]; - } else { - return [xDer, filterDer]; } + return [xDer, filterDer]; }; const forward: ForwardFunc = (backend) => { diff --git a/tfjs-core/src/ops/fused_mat_mul.ts b/tfjs-core/src/ops/fused_mat_mul.ts index 8cf4a46e28a..7abf4e65e00 100644 --- a/tfjs-core/src/ops/fused_mat_mul.ts +++ b/tfjs-core/src/ops/fused_mat_mul.ts @@ -150,17 +150,17 @@ function fusedMatMul_({ let bDer: Tensor; if (!transposeA && !transposeB) { - aDer = dyActivation.matMul(b3D, false, true); - bDer = a3D.matMul(dyActivation, true, false); + aDer = unfusedMatMul(dyActivation, b3D, false, true); + bDer = unfusedMatMul(a3D, dyActivation, true, false); } else if (!transposeA && transposeB) { - aDer = dyActivation.matMul(b3D, false, false); - bDer = dyActivation.matMul(a3D, true, false); + aDer = unfusedMatMul(dyActivation, b3D, false, false); + bDer = unfusedMatMul(dyActivation, a3D, true, false); } else if (transposeA && !transposeB) { - aDer = b3D.matMul(dyActivation, false, true); - bDer = a3D.matMul(dyActivation, false, false); + aDer = unfusedMatMul(b3D, dyActivation, false, true); + bDer = unfusedMatMul(a3D, dyActivation, false, false); } else { - aDer = b3D.matMul(dyActivation, true, true); - bDer = dyActivation.matMul(a3D, true, true); + aDer = unfusedMatMul(b3D, dyActivation, true, true); + bDer = unfusedMatMul(dyActivation, a3D, true, true); } if (bias != null) { @@ -203,7 +203,7 @@ function fusedMatMul_({ save([a3D, b3D, res]); - return {value: res.reshape(outShape), gradFunc: grad}; + return {value: reshape(res, outShape), gradFunc: grad}; }); return customOp(a3D, b3D) as T; } else { @@ -215,7 +215,7 @@ function fusedMatMul_({ save([a3D, b3D, res, $bias]); - return {value: res.reshape(outShape), gradFunc: grad}; + return {value: reshape(res, outShape), gradFunc: grad}; }); return customOpWithBias(a3D, b3D, $bias) as T; From bb340988cb894d07b936a9bd5577f382f2a3ad24 Mon Sep 17 00:00:00 2001 From: Yannick Assogba Date: Wed, 22 Jul 2020 11:50:29 -0400 Subject: [PATCH 12/12] update error message --- tfjs-core/src/ops/fused_util.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tfjs-core/src/ops/fused_util.ts b/tfjs-core/src/ops/fused_util.ts index 206cdaa4913..a41c7a574bb 100644 --- a/tfjs-core/src/ops/fused_util.ts +++ b/tfjs-core/src/ops/fused_util.ts @@ -34,8 +34,7 @@ export function getFusedDyActivation( return dy.mul(y.step()); } throw new Error( - `Gradient for activation ${activation} has not been ` + - `implemented yet.`); + `Cannot compute gradient for fused activation ${activation}.`); } // Returns gradient for fused bias.