From 22ba0f2126041013cfd03c44d94afa075dcc0513 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 24 Feb 2020 15:51:22 -0500 Subject: [PATCH 01/35] initial --- tfjs-backend-wasm/src/cc/BUILD | 10 ++++ tfjs-backend-wasm/src/cc/kernels/Pow.cc | 61 ++++++++++++++++++++ tfjs-backend-wasm/src/kernels/Pow.ts | 20 +++++++ tfjs-backend-wasm/src/kernels/all_kernels.ts | 1 + tfjs-backend-wasm/src/setup_test.ts | 1 + tfjs-core/src/ops/binary_ops.ts | 4 +- 6 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 tfjs-backend-wasm/src/cc/kernels/Pow.cc create mode 100644 tfjs-backend-wasm/src/kernels/Pow.ts diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index 0126adfa16e..ff6cce26bcb 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -182,6 +182,7 @@ tfjs_cc_library( ":NonMaxSuppressionV5", ":NotEqual", ":PadV2", + ":Pow", ":Prelu", ":Relu", ":Relu6", @@ -593,6 +594,15 @@ tfjs_cc_library( ], ) +tfjs_cc_library( + name = "Pow", + srcs = ["kernels/Pow.cc"], + deps = [ + ":backend", + ":util", + ], +) + tfjs_cc_library( name = "Prelu", srcs = ["kernels/Prelu.cc"], diff --git a/tfjs-backend-wasm/src/cc/kernels/Pow.cc b/tfjs-backend-wasm/src/cc/kernels/Pow.cc new file mode 100644 index 00000000000..d6080d1ca56 --- /dev/null +++ b/tfjs-backend-wasm/src/cc/kernels/Pow.cc @@ -0,0 +1,61 @@ +/* Copyright 2019 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ===========================================================================*/ + +#ifdef __EMSCRIPTEN__ +#include +#endif + +#include +#include + +#include "src/cc/binary.h" +#include "src/cc/util.h" + +namespace { +template +inline T pow(T a, T b) { + return pow(a, b); +} +} // namespace + +namespace tfjs { +namespace wasm { +// We use C-style API to interface with Javascript. +extern "C" { + +#ifdef __EMSCRIPTEN__ +EMSCRIPTEN_KEEPALIVE +#endif +void Pow(const size_t a_id, const size_t* a_shape_ptr, const size_t a_shape_len, + const size_t b_id, const size_t* b_shape_ptr, const size_t b_shape_len, + const DType dtype, const size_t out_id) { + switch (dtype) { + case DType::float32: + binary_f32(a_id, b_id, out_id, pow); + break; + case DType::int32: + binary_i32(a_id, b_id, out_id, pow); + break; + case DType::boolean: + binary_bool(a_id, b_id, out_id, pow); + break; + default: + util::warn("Pow for tensor ids %d and %d failed. Unknown dtype %d", a_id, + b_id, dtype); + } +} + +} // extern "C" +} // namespace wasm +} // namespace tfjs diff --git a/tfjs-backend-wasm/src/kernels/Pow.ts b/tfjs-backend-wasm/src/kernels/Pow.ts new file mode 100644 index 00000000000..fae7489c2e6 --- /dev/null +++ b/tfjs-backend-wasm/src/kernels/Pow.ts @@ -0,0 +1,20 @@ +/** + * @license + * Copyright 2019 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {registerBinaryKernel} from './binary_kernel'; +const supportsFullBroadcast = false; +registerBinaryKernel('Pow', supportsFullBroadcast); diff --git a/tfjs-backend-wasm/src/kernels/all_kernels.ts b/tfjs-backend-wasm/src/kernels/all_kernels.ts index 157a4cbd8fa..be3f297fdb2 100644 --- a/tfjs-backend-wasm/src/kernels/all_kernels.ts +++ b/tfjs-backend-wasm/src/kernels/all_kernels.ts @@ -56,6 +56,7 @@ import './NonMaxSuppressionV3'; import './NonMaxSuppressionV5'; import './NotEqual'; import './PadV2'; +import './Pow'; import './Prelu'; import './Relu'; import './Relu6'; diff --git a/tfjs-backend-wasm/src/setup_test.ts b/tfjs-backend-wasm/src/setup_test.ts index 8c4d289b7e1..212b63cea95 100644 --- a/tfjs-backend-wasm/src/setup_test.ts +++ b/tfjs-backend-wasm/src/setup_test.ts @@ -35,6 +35,7 @@ const TEST_FILTERS: TestFilter[] = [ ] }, {include: 'softmax'}, + {include: 'pow'}, { include: 'add ', excludes: [ diff --git a/tfjs-core/src/ops/binary_ops.ts b/tfjs-core/src/ops/binary_ops.ts index e6d7e241429..3e34249e008 100644 --- a/tfjs-core/src/ops/binary_ops.ts +++ b/tfjs-core/src/ops/binary_ops.ts @@ -276,13 +276,13 @@ function pow_(base: T|TensorLike, exp: Tensor|TensorLike): T { } return res.reshape($exp.shape); }; - return {$base: derBase, $exp: derExp}; + return {base: derBase, exp: derExp}; }; return ENGINE.runKernelFunc((backend, save) => { const y = backend.pow($base, $exp); save([$base, $exp, y]); return y; - }, {$base, $exp}, grad) as T; + }, {base: $base, exp: $exp}, grad, 'Pow') as T; } /** From 20049d142e49ae19b75e301625afe5285e813880 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 25 Feb 2020 08:53:29 -0500 Subject: [PATCH 02/35] build --- tfjs-backend-wasm/src/cc/BUILD | 1 + tfjs-backend-wasm/src/index_test.ts | 13 +++++++++++-- tfjs-core/src/ops/binary_ops.ts | 8 ++++++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index ff6cce26bcb..b32b78b2f76 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -599,6 +599,7 @@ tfjs_cc_library( srcs = ["kernels/Pow.cc"], deps = [ ":backend", + ":binary", ":util", ], ) diff --git a/tfjs-backend-wasm/src/index_test.ts b/tfjs-backend-wasm/src/index_test.ts index 370615f2cf3..3c97cf9457a 100644 --- a/tfjs-backend-wasm/src/index_test.ts +++ b/tfjs-backend-wasm/src/index_test.ts @@ -58,8 +58,8 @@ describeWithFlags('wasm init', BROWSER_ENVS, () => { }, 100); // Silences backend registration warnings. - spyOn(console, 'warn'); - spyOn(console, 'log'); + // spyOn(console, 'warn'); + // spyOn(console, 'log'); }); afterEach(() => { @@ -92,4 +92,13 @@ describeWithFlags('wasm init', BROWSER_ENVS, () => { expect(() => setWasmPath('too/late')) .toThrowError(/The WASM backend was already initialized. Make sure/); }); + + fit('pow', async () => { + const a = tf.tensor2d([1, -2, -3, 0, 7, 1], [2, 3]); + const b = tf.tensor2d([5, 3, 4, 5, 2, -3], [2, 3], 'int32'); + // const expected = [1, -8, 81, 0, 49, 1]; + const result = tf.pow(a, b); + const data = await result.data(); + console.log(Array.from(data)); + }); }); diff --git a/tfjs-core/src/ops/binary_ops.ts b/tfjs-core/src/ops/binary_ops.ts index 3e34249e008..52fcebbc638 100644 --- a/tfjs-core/src/ops/binary_ops.ts +++ b/tfjs-core/src/ops/binary_ops.ts @@ -276,13 +276,17 @@ function pow_(base: T|TensorLike, exp: Tensor|TensorLike): T { } return res.reshape($exp.shape); }; - return {base: derBase, exp: derExp}; + return {a: derBase, b: derExp}; }; + + const attrs = {}; + const inputsToSave = [$base, $exp]; + const outputsToSave = [true]; return ENGINE.runKernelFunc((backend, save) => { const y = backend.pow($base, $exp); save([$base, $exp, y]); return y; - }, {base: $base, exp: $exp}, grad, 'Pow') as T; + }, {a: $base, b: $exp}, grad, 'Pow', attrs, inputsToSave, outputsToSave) as T; } /** From d7aaf12ea12a806e067d57647f2a2cfbcfc425a5 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 25 Feb 2020 09:27:00 -0500 Subject: [PATCH 03/35] update --- tfjs-backend-wasm/src/cc/kernels/Pow.cc | 8 ++++---- tfjs-backend-wasm/src/index_test.ts | 1 + tfjs-core/src/ops/binary_ops.ts | 12 ++++++------ 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/kernels/Pow.cc b/tfjs-backend-wasm/src/cc/kernels/Pow.cc index d6080d1ca56..646fe355fa1 100644 --- a/tfjs-backend-wasm/src/cc/kernels/Pow.cc +++ b/tfjs-backend-wasm/src/cc/kernels/Pow.cc @@ -24,7 +24,7 @@ namespace { template -inline T pow(T a, T b) { +inline T power(T a, T b) { return pow(a, b); } } // namespace @@ -42,13 +42,13 @@ void Pow(const size_t a_id, const size_t* a_shape_ptr, const size_t a_shape_len, const DType dtype, const size_t out_id) { switch (dtype) { case DType::float32: - binary_f32(a_id, b_id, out_id, pow); + binary_f32(a_id, b_id, out_id, power); break; case DType::int32: - binary_i32(a_id, b_id, out_id, pow); + binary_i32(a_id, b_id, out_id, power); break; case DType::boolean: - binary_bool(a_id, b_id, out_id, pow); + binary_bool(a_id, b_id, out_id, power); break; default: util::warn("Pow for tensor ids %d and %d failed. Unknown dtype %d", a_id, diff --git a/tfjs-backend-wasm/src/index_test.ts b/tfjs-backend-wasm/src/index_test.ts index 3c97cf9457a..b7d00105195 100644 --- a/tfjs-backend-wasm/src/index_test.ts +++ b/tfjs-backend-wasm/src/index_test.ts @@ -97,6 +97,7 @@ describeWithFlags('wasm init', BROWSER_ENVS, () => { const a = tf.tensor2d([1, -2, -3, 0, 7, 1], [2, 3]); const b = tf.tensor2d([5, 3, 4, 5, 2, -3], [2, 3], 'int32'); // const expected = [1, -8, 81, 0, 49, 1]; + // const result = tf.pow(a, b); const result = tf.pow(a, b); const data = await result.data(); console.log(Array.from(data)); diff --git a/tfjs-core/src/ops/binary_ops.ts b/tfjs-core/src/ops/binary_ops.ts index 52fcebbc638..ce070e4d9d1 100644 --- a/tfjs-core/src/ops/binary_ops.ts +++ b/tfjs-core/src/ops/binary_ops.ts @@ -20,7 +20,7 @@ import {Tensor} from '../tensor'; import {NamedTensorMap} from '../tensor_types'; import {makeTypesMatch} from '../tensor_util'; import {convertToTensor} from '../tensor_util_env'; -import {TensorLike, upcastType} from '../types'; +import {TensorLike} from '../types'; import * as util from '../util'; import * as broadcast_util from './broadcast_util'; import {where} from './logical_ops'; @@ -247,14 +247,14 @@ function subStrict_(a: T|TensorLike, b: T|TensorLike): T { * @param exp The exponent `tf.Tensor` to pow element-wise. */ /** @doc {heading: 'Operations', subheading: 'Arithmetic'} */ -function pow_(base: T|TensorLike, exp: Tensor|TensorLike): T { - const $base = convertToTensor(base, 'base', 'pow'); - const $exp = convertToTensor(exp, 'exp', 'pow'); +function pow_( + base: Tensor|TensorLike, exp: Tensor|TensorLike): T { + let $base = convertToTensor(base, 'base', 'pow'); + let $exp = convertToTensor(exp, 'exp', 'pow'); + [$base, $exp] = makeTypesMatch($base, $exp); const outShape = broadcast_util.assertAndGetBroadcastShape($base.shape, $exp.shape); - base = $base.cast(upcastType($base.dtype, $exp.dtype)); - exp = $exp.cast(upcastType($base.dtype, $exp.dtype)); const grad = (dy: Tensor, saved: Tensor[]) => { const [$base, $exp, y] = saved; const derBase = () => { From 2f99398f9df4588d7dd9d90cba10798b057ddab4 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 25 Feb 2020 09:35:17 -0500 Subject: [PATCH 04/35] add pow --- tfjs-backend-wasm/src/index_test.ts | 4 ++-- tfjs-backend-wasm/src/setup_test.ts | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tfjs-backend-wasm/src/index_test.ts b/tfjs-backend-wasm/src/index_test.ts index b7d00105195..ab38cfad48d 100644 --- a/tfjs-backend-wasm/src/index_test.ts +++ b/tfjs-backend-wasm/src/index_test.ts @@ -93,12 +93,12 @@ describeWithFlags('wasm init', BROWSER_ENVS, () => { .toThrowError(/The WASM backend was already initialized. Make sure/); }); - fit('pow', async () => { + it('pow', async () => { const a = tf.tensor2d([1, -2, -3, 0, 7, 1], [2, 3]); const b = tf.tensor2d([5, 3, 4, 5, 2, -3], [2, 3], 'int32'); // const expected = [1, -8, 81, 0, 49, 1]; - // const result = tf.pow(a, b); const result = tf.pow(a, b); + // const result = tf.div(a, b); const data = await result.data(); console.log(Array.from(data)); }); diff --git a/tfjs-backend-wasm/src/setup_test.ts b/tfjs-backend-wasm/src/setup_test.ts index 212b63cea95..4d942718f47 100644 --- a/tfjs-backend-wasm/src/setup_test.ts +++ b/tfjs-backend-wasm/src/setup_test.ts @@ -35,7 +35,15 @@ const TEST_FILTERS: TestFilter[] = [ ] }, {include: 'softmax'}, - {include: 'pow'}, + { + include: 'pow', + excludes: [ + 'gradient', // zerosLike not defined yet. + 'broadcasting same rank Tensors different shape', // Broadcasting along + // inner dims not + // supported yet. + ] + }, { include: 'add ', excludes: [ From f1021be9b313a728c06e7977c7ffcc7f343df637 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Thu, 5 Mar 2020 07:23:43 -0500 Subject: [PATCH 05/35] fusedbmm running --- tfjs-backend-wasm/src/cc/BUILD | 25 ++++ tfjs-backend-wasm/src/cc/batchMatMul_impl.cc | 19 +++ tfjs-backend-wasm/src/cc/batchMatMul_impl.h | 28 ++++ .../src/cc/kernels/FusedBatchMatMul.cc | 35 +++++ .../src/cc/kernels/FusedBatchMatMul.h | 26 ++++ tfjs-backend-wasm/src/cc/kernels/Pow.cc | 2 +- tfjs-backend-wasm/src/index_test.ts | 9 ++ .../src/kernels/FusedBatchMatMul.ts | 128 ++++++++++++++++++ tfjs-core/src/ops/fused_ops.ts | 64 +++++---- 9 files changed, 305 insertions(+), 31 deletions(-) create mode 100644 tfjs-backend-wasm/src/cc/batchMatMul_impl.cc create mode 100644 tfjs-backend-wasm/src/cc/batchMatMul_impl.h create mode 100644 tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc create mode 100644 tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h create mode 100644 tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index b32b78b2f76..b6a34763ce8 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -94,6 +94,18 @@ tfjs_cc_library( ], ) +tfjs_cc_library( + name = "batchMatMul_impl", + srcs = ["batchMatMul_impl.cc"], + hdrs = ["batchMatMul_impl.h"], + deps = [ + ":backend", + ":prelu_impl", + ":transpose_impl", + ":util", + ], +) + tfjs_cc_library( name = "interpolate_bilinear_impl", srcs = ["interpolate_bilinear_impl.cc"], @@ -155,6 +167,7 @@ tfjs_cc_library( ":ArgMax", ":AvgPool", ":BatchMatMul", + ":FusedBatchMatMul", ":ClipByValue", ":Conv2D", ":CropAndResize", @@ -258,6 +271,18 @@ tfjs_cc_library( deps = [ ":backend", ":util", + ":batchMatMul_impl", + ], +) + +tfjs_cc_library( + name = "FusedBatchMatMul", + srcs = ["kernels/FusedBatchMatMul.cc"], + hdrs = ["kernels/FusedBatchMatMul.h"], + deps = [ + ":backend", + ":util", + ":batchMatMul_impl", ], ) diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc new file mode 100644 index 00000000000..e7a13a0a4d6 --- /dev/null +++ b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc @@ -0,0 +1,19 @@ +/* Copyright 2019 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ===========================================================================*/ + +namespace tfjs { +namespace wasm { +void batchMatMul() {} +} // namespace wasm +} // namespace tfjs diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.h b/tfjs-backend-wasm/src/cc/batchMatMul_impl.h new file mode 100644 index 00000000000..0942ebe5690 --- /dev/null +++ b/tfjs-backend-wasm/src/cc/batchMatMul_impl.h @@ -0,0 +1,28 @@ +/* Copyright 2019 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ===========================================================================*/ + +#ifndef BATCHMATMUL_IMPL_H_ +#define BATCHMATMUL_IMPL_H_ + +#include + +namespace tfjs { +namespace wasm { + +void batchMatMul(); + +} +} // namespace tfjs + +#endif // BATCHMATMUL_IMPL_H_ diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc new file mode 100644 index 00000000000..b0a5154187d --- /dev/null +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc @@ -0,0 +1,35 @@ +/* Copyright 2019 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ===========================================================================*/ + +#ifdef __EMSCRIPTEN__ +#include +#endif + +#include "src/cc/kernels/FusedBatchMatMul.h" + +#include "src/cc/batchMatMul_impl.h" + +namespace tfjs { +namespace wasm { + +extern "C" { + +#ifdef __EMSCRIPTEN__ +EMSCRIPTEN_KEEPALIVE +#endif + +void FusedBatchMatMul() {} +} +} // namespace wasm +} // namespace tfjs diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h new file mode 100644 index 00000000000..9fb70193516 --- /dev/null +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h @@ -0,0 +1,26 @@ +/* Copyright 2019 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ===========================================================================*/ + +#ifndef KERNELS_FUSEDBATCHMATMUL_H_ +#define KERNELS_FUSEDBATCHMATMUL_H_ + +namespace tfjs { +namespace wasm { +extern "C" { +void FusedBatchMatMul(); +} +} // namespace wasm +} // namespace tfjs + +#endif // KERNELS_FUSEDBATCHMATMUL_H_ diff --git a/tfjs-backend-wasm/src/cc/kernels/Pow.cc b/tfjs-backend-wasm/src/cc/kernels/Pow.cc index 646fe355fa1..b9c5bed7388 100644 --- a/tfjs-backend-wasm/src/cc/kernels/Pow.cc +++ b/tfjs-backend-wasm/src/cc/kernels/Pow.cc @@ -25,7 +25,7 @@ namespace { template inline T power(T a, T b) { - return pow(a, b); + return std::pow(a, b); } } // namespace diff --git a/tfjs-backend-wasm/src/index_test.ts b/tfjs-backend-wasm/src/index_test.ts index ab38cfad48d..baa71b8ef2f 100644 --- a/tfjs-backend-wasm/src/index_test.ts +++ b/tfjs-backend-wasm/src/index_test.ts @@ -102,4 +102,13 @@ describeWithFlags('wasm init', BROWSER_ENVS, () => { const data = await result.data(); console.log(Array.from(data)); }); + + fit('fused batch mm', async () => { + const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); + const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); + + const c = tf.fused.matMul({a, b}); + const data = await c.data(); + console.log(data); // 0, 8, -3, 20 + }); }); diff --git a/tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts b/tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts new file mode 100644 index 00000000000..cceacbc0363 --- /dev/null +++ b/tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts @@ -0,0 +1,128 @@ +/** + * @license + * Copyright 2019 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {NamedAttrMap, NamedTensorInfoMap, registerKernel, TensorInfo} from '@tensorflow/tfjs-core'; + +import {BackendWasm} from '../backend_wasm'; + +interface FusedBatchMatMulInputs extends NamedTensorInfoMap { + a: TensorInfo; + b: TensorInfo; + bias?: TensorInfo; + preluActivationWeights?: TensorInfo; +} + +interface FusedBatchMatMulAttrs extends NamedAttrMap { + transposeA: boolean; + transposeB: boolean; + activation: FusableActivation; +} + +// Must match enum in batchMatMul_impl.h. +enum FusableActivation { + linear = 0, + relu = 1, + relu6 = 2, + prelu = 3 +} + +let wasmFusedBatchMatMul: ( + aId: number, aShape: Uint8Array, aShapeSize: number, bId: number, + bShape: Uint8Array, bShapeSize: number, transposeA: boolean, + transposeB: boolean, activation: number, biasId: number, + preluActivationWeightsId: number, outId: number) => void; + +function setup(backend: BackendWasm) { + wasmFusedBatchMatMul = + backend.wasm.cwrap('FusedBatchMatMul', null /* void */, [ + 'number', // a_id + 'array', // a_shape + 'number', // a_shape.length + 'number', // b_id + 'array', // b_shape + 'number', // b_shape.length + 'number', // transpose_a + 'number', // transpose_b + 'number', // activation + 'number', // biasId + 'number', // preluActivationWeightsId + 'number' // out_id + ]); +} + +function fusedBatchMatMul(args: { + inputs: FusedBatchMatMulInputs, + backend: BackendWasm, + attrs: FusedBatchMatMulAttrs +}) { + const {inputs, backend, attrs} = args; + const {a, b, bias, preluActivationWeights} = inputs; + + if (a.dtype !== 'float32' || b.dtype !== 'float32') { + throw new Error( + `FusedBatchMatMul for non non-float32 tensors not yet supported.`); + } + + const {transposeA, transposeB, activation} = attrs; + const aId = backend.dataIdMap.get(a.dataId).id; + const bId = backend.dataIdMap.get(b.dataId).id; + + let biasId = 0; + if (bias != null) { + const biasData = backend.dataIdMap.get(bias.dataId); + if (biasData.shape.length !== 1) { + throw new Error( + `FusedBatchMatMul only supports rank-1 bias but got ` + + `rank ${biasData.shape.length}.`); + } + biasId = biasData.id; + } + const preluActivationWeightsId = preluActivationWeights == null ? + 0 : + backend.dataIdMap.get(preluActivationWeights.dataId).id; + const fusedActivation = + FusableActivation[activation as {} as keyof typeof FusableActivation]; + if (fusedActivation == null) { + throw new Error( + `${activation} activation not yet supported for FusedConv2D ` + + `in the wasm backend.`); + } + + const leftDim = transposeA ? a.shape[2] : a.shape[1]; + const rightDim = transposeB ? b.shape[1] : b.shape[2]; + const batchDim = a.shape[0]; + + const out = backend.makeOutput([batchDim, leftDim, rightDim], a.dtype); + const outId = backend.dataIdMap.get(out.dataId).id; + + const aShapeBytes = new Uint8Array(new Int32Array(a.shape).buffer); + const bShapeBytes = new Uint8Array(new Int32Array(b.shape).buffer); + + wasmFusedBatchMatMul( + aId, aShapeBytes, a.shape.length, bId, bShapeBytes, b.shape.length, + transposeA, transposeB, activation, biasId, preluActivationWeightsId, + outId); + + return out; +} + +registerKernel({ + kernelName: 'FusedBatchMatMul', + backendName: 'wasm', + setupFunc: setup, + kernelFunc: fusedBatchMatMul +}); diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index 821b35406f5..dce09ebbf42 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -186,66 +186,70 @@ function fusedMatMul_({ let biasGradient = {}; if (bias != null) { - biasGradient = {$bias: () => getFusedBiasGradient($bias, dyActivation)}; + biasGradient = {bias: () => getFusedBiasGradient($bias, dyActivation)}; } if (!transposeA && !transposeB) { return Object.assign( { - $a: () => dyActivation.matMul(b3D as Tensor3D, false, true), - $b: () => a3D.matMul(dyActivation, true, false) + a: () => dyActivation.matMul(b3D as Tensor3D, false, true), + b: () => a3D.matMul(dyActivation, true, false) }, biasGradient); } else if (!transposeA && transposeB) { return Object.assign( { - $a: () => dyActivation.matMul(b3D as Tensor3D, false, false), - $b: () => dyActivation.matMul(a3D as Tensor3D, true, false) + a: () => dyActivation.matMul(b3D as Tensor3D, false, false), + b: () => dyActivation.matMul(a3D as Tensor3D, true, false) }, biasGradient); } else if (transposeA && !transposeB) { return Object.assign( { - $a: () => b3D.matMul(dyActivation, false, true), - $b: () => a3D.matMul(dyActivation, false, false) + a: () => b3D.matMul(dyActivation, false, true), + b: () => a3D.matMul(dyActivation, false, false) }, biasGradient); } else { return Object.assign( { - $a: () => b3D.matMul(dyActivation, true, true), - $b: () => dyActivation.matMul(a3D as Tensor3D, true, true) + a: () => b3D.matMul(dyActivation, true, true), + b: () => dyActivation.matMul(a3D as Tensor3D, true, true) }, biasGradient); } }; - const inputs: { - $a: Tensor, - $b: Tensor, - $bias?: Tensor, - $preluActivationWeights?: Tensor - } = {$a: a3D, $b: b3D}; + const inputs: + {a: Tensor, b: Tensor, + bias?: Tensor, + preluActivationWeights?: Tensor} = {a: a3D, b: b3D}; if (bias != null) { - inputs.$bias = $bias; + inputs.bias = $bias; } if (preluActivationWeights != null) { - inputs.$preluActivationWeights = $preluActivationWeights; + inputs.preluActivationWeights = $preluActivationWeights; } - const res = ENGINE.runKernelFunc((backend, save) => { - const y = backend.fusedBatchMatMul({ - a: a3D, - b: b3D, - transposeA, - transposeB, - bias: $bias, - activation, - preluActivationWeights: $preluActivationWeights - }); - save([a3D, b3D, y]); - return y; - }, inputs, grad); + const inputsToSave = [a3D, b3D]; + const outputsToSave = [true]; + + const res = ENGINE.runKernelFunc( + (backend, save) => { + const y = backend.fusedBatchMatMul({ + a: a3D, + b: b3D, + transposeA, + transposeB, + bias: $bias, + activation, + preluActivationWeights: $preluActivationWeights + }); + save([a3D, b3D, y]); + return y; + }, + inputs, grad, 'FusedBatchMatMul', {transposeA, transposeB, activation}, + inputsToSave, outputsToSave); return res.reshape(outShape) as T; } From ad3778ab74f6d49798a76ae705a766843f2fc02a Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Thu, 5 Mar 2020 07:48:07 -0500 Subject: [PATCH 06/35] basic --- tfjs-backend-wasm/src/cc/batchMatMul_impl.h | 4 +- .../src/cc/kernels/FusedBatchMatMul.cc | 192 +++++++++++++++++- .../src/cc/kernels/FusedBatchMatMul.h | 19 +- 3 files changed, 206 insertions(+), 9 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.h b/tfjs-backend-wasm/src/cc/batchMatMul_impl.h index 0942ebe5690..3d315f237cb 100644 --- a/tfjs-backend-wasm/src/cc/batchMatMul_impl.h +++ b/tfjs-backend-wasm/src/cc/batchMatMul_impl.h @@ -20,9 +20,11 @@ namespace tfjs { namespace wasm { +enum FusableActivation { LINEAR = 0, RELU = 1, RELU6 = 2, PRELU = 3 }; + void batchMatMul(); -} +} // namespace wasm } // namespace tfjs #endif // BATCHMATMUL_IMPL_H_ diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc index b0a5154187d..bc58f9580aa 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc @@ -16,20 +16,204 @@ #include #endif -#include "src/cc/kernels/FusedBatchMatMul.h" +#include +#include +#include +#include +#include +#include +#include + +#include "src/cc/backend.h" +#include "src/cc/util.h" #include "src/cc/batchMatMul_impl.h" +#include "src/cc/kernels/FusedBatchMatMul.h" + +const size_t kBlockSize = 48; + +namespace { +// We use std::tuple as the cache key as it implements the compare operator +// needed for std::map. +typedef std::tuple OperatorCacheKey; + +// The operator cache maps the weights id to the xnn_operator_t instantiated for +// this set of weights. +std::map operator_cache; + +void delete_xnn_operator(const size_t weights_id) { + xnn_operator_t fully_connected_op = operator_cache.at(weights_id); + xnn_delete_operator(fully_connected_op); + tfjs::backend::xnn_operator_count--; + + operator_cache.erase(weights_id); +} + +void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr, + const size_t a_shape_len, const size_t b_id, + const size_t* b_shape_ptr, const size_t b_shape_len, + const size_t out_id) { + auto& a_info = tfjs::backend::get_tensor_info(a_id); + auto& b_info = tfjs::backend::get_tensor_info(b_id); + auto& out_info = tfjs::backend::get_tensor_info_out(out_id); + + const float* a_buf = a_info.f32(); + const float* b_buf = b_info.f32(); + float* out_buf = out_info.f32_write(); + + xnn_operator_t fully_connected_op = nullptr; + + OperatorCacheKey cache_key = {b_id}; + + // We assume b is the weights and cache the xnn operator on it. + auto operator_cache_idx = operator_cache.find(cache_key); + if (operator_cache_idx == operator_cache.end()) { + const size_t input_channels = b_shape_ptr[1]; + const size_t output_channels = b_shape_ptr[2]; + const size_t input_stride = input_channels; + const size_t output_stride = output_channels; + const float* bias = nullptr; + + const float output_min = -std::numeric_limits::infinity(); + const float output_max = std::numeric_limits::infinity(); + + // XNNPack expects b to already be transposed. TensorFlow.js doesn't do this + // automatically so we have to tell XNNPack to do the transposing. + const uint32_t flags = XNN_FLAG_TRANSPOSE_WEIGHTS; + xnn_status status = xnn_create_fully_connected_nc_f32( + input_channels, output_channels, input_stride, output_stride, b_buf, + bias, output_min, output_max, flags, &fully_connected_op); + if (status != xnn_status_success) { + tfjs::util::warn( + "XNN status for xnn_create_fully_connected_nc_f32 is not successful. " + "Got status %d. Use -c dbg to see XNN logs.", + status); + return; + } + + operator_cache.insert({cache_key, fully_connected_op}); + + tfjs::backend::register_disposal_callback(b_id, *delete_xnn_operator); + + tfjs::backend::xnn_operator_count++; + } else { + fully_connected_op = operator_cache_idx->second; + } + + const size_t batch_size = a_shape_ptr[1]; + xnn_status status = + xnn_setup_fully_connected_nc_f32(fully_connected_op, batch_size, a_buf, + out_buf, nullptr /* thread pool */); + if (status != xnn_status_success) { + tfjs::util::warn( + "XNN status for xnn_setup_fully_connected_nc_f32 is not successful. " + "Got status %d. Use -c dbg to see XNN logs.", + status); + return; + } + + xnn_run_operator(fully_connected_op, nullptr /* thread pool */); +} + +void slow_batch_matmul(const size_t a_id, const size_t* a_shape_ptr, + const size_t a_shape_len, const size_t b_id, + const size_t* b_shape_ptr, const size_t b_shape_len, + const bool transpose_a, const bool transpose_b, + const size_t out_id) { + const size_t shared_dim = transpose_a ? a_shape_ptr[1] : a_shape_ptr[2]; + const size_t left_dim = transpose_a ? a_shape_ptr[2] : a_shape_ptr[1]; + const size_t right_dim = transpose_b ? b_shape_ptr[1] : b_shape_ptr[2]; + const size_t batch_dim = a_shape_ptr[0]; + + std::vector a_shape(a_shape_ptr, a_shape_ptr + a_shape_len); + std::vector b_shape(b_shape_ptr, b_shape_ptr + b_shape_len); + const std::vector a_strides = tfjs::util::compute_strides(a_shape); + const std::vector b_strides = tfjs::util::compute_strides(b_shape); + + size_t a_batch = a_strides[0]; + size_t a_outer_step, a_inner_step; + if (transpose_a) { + a_outer_step = 1; + a_inner_step = a_strides[1]; + } else { + a_outer_step = a_strides[1]; + a_inner_step = 1; + } + size_t b_batch = b_strides[0]; + size_t b_outer_step, b_inner_step; + if (transpose_b) { + b_outer_step = b_strides[1]; + b_inner_step = 1; + } else { + b_outer_step = 1; + b_inner_step = b_strides[1]; + } + + auto& a_info = tfjs::backend::get_tensor_info(a_id); + auto& b_info = tfjs::backend::get_tensor_info(b_id); + auto& out_info = tfjs::backend::get_tensor_info_out(out_id); + + const float* a_buf = a_info.f32(); + const float* b_buf = b_info.f32(); + float* out_buf = out_info.f32_write(); + + const size_t size = left_dim * right_dim; + + // Zero out the output buffer because it might have been used before. + std::fill(out_buf, out_buf + batch_dim * size, 0); + + for (size_t b = 0; b < batch_dim; ++b) { + for (size_t i0 = 0; i0 < left_dim; i0 += kBlockSize) { + for (size_t j0 = 0; j0 < right_dim; j0 += kBlockSize) { + for (size_t k0 = 0; k0 < shared_dim; k0 += kBlockSize) { + // for when kBlockSize doesn't evenly divide the input + const size_t i_block = std::min(i0 + kBlockSize, left_dim); + const size_t j_block = std::min(j0 + kBlockSize, right_dim); + const size_t k_block = std::min(k0 + kBlockSize, shared_dim); + + for (size_t i = i0; i < i_block; ++i) { + for (size_t j = j0; j < j_block; ++j) { + float sum = 0.0; + + for (size_t k = k0; k < k_block; ++k) { + sum += + a_buf[b * a_batch + i * a_outer_step + k * a_inner_step] * + b_buf[k * b_inner_step + j * b_outer_step + b * b_batch]; + } + out_buf[b * size + (i * right_dim + j)] += sum; + } + } + } + } + } + } +} +} // namespace namespace tfjs { namespace wasm { - +// We use C-style API to interface with Javascript. extern "C" { #ifdef __EMSCRIPTEN__ EMSCRIPTEN_KEEPALIVE #endif - -void FusedBatchMatMul() {} +void FusedBatchMatMul(const size_t a_id, const size_t* a_shape_ptr, + const size_t a_shape_len, const size_t b_id, + const size_t* b_shape_ptr, const size_t b_shape_len, + const bool transpose_a, const bool transpose_b, + const FusableActivation activation, const size_t bias_id, + const size_t prelu_weights_id, const size_t out_id) { + if (!transpose_a && !transpose_b && a_shape_ptr[0] == 1 && + b_shape_ptr[0] == 1) { + xnn_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len, + out_id); + } else { + slow_batch_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, + b_shape_len, transpose_a, transpose_b, out_id); + } } + +} // extern "C" } // namespace wasm } // namespace tfjs diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h index 9fb70193516..6af656c47b3 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h @@ -12,15 +12,26 @@ * limitations under the License. * ===========================================================================*/ -#ifndef KERNELS_FUSEDBATCHMATMUL_H_ -#define KERNELS_FUSEDBATCHMATMUL_H_ +#ifndef KERNELS_BATCHMATMUL_H_ +#define KERNELS_BATCHMATMUL_H_ + +#include + +#include "src/cc/batchMatMul_impl.h" namespace tfjs { namespace wasm { extern "C" { -void FusedBatchMatMul(); + +void FusedBatchMatMul(const size_t a_id, const size_t* a_shape_ptr, + const size_t a_shape_len, const size_t b_id, + const size_t* b_shape_ptr, const size_t b_shape_len, + const bool transpose_a, const bool transpose_b, + const FusableActivation activation, const size_t bias_id, + const size_t prelu_weights_id, const size_t out_id); } + } // namespace wasm } // namespace tfjs -#endif // KERNELS_FUSEDBATCHMATMUL_H_ +#endif // KERNELS_BATCHMATMUL_H_ From 0819120eb5ea451bc9524c9f2d8ffcb03cd4f499 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Thu, 5 Mar 2020 07:57:43 -0500 Subject: [PATCH 07/35] yup --- tfjs-backend-wasm/src/cc/batchMatMul_impl.cc | 193 +++++++++++++++++- tfjs-backend-wasm/src/cc/batchMatMul_impl.h | 7 +- .../src/cc/kernels/FusedBatchMatMul.cc | 180 +--------------- 3 files changed, 201 insertions(+), 179 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc index e7a13a0a4d6..359669012df 100644 --- a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc +++ b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc @@ -12,8 +12,199 @@ * limitations under the License. * ===========================================================================*/ +#ifdef __EMSCRIPTEN__ +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include "src/cc/backend.h" +#include "src/cc/util.h" + +#include "src/cc/batchMatMul_impl.h" + +const size_t kBlockSize = 48; + +namespace { +// We use std::tuple as the cache key as it implements the compare operator +// needed for std::map. +typedef std::tuple OperatorCacheKey; + +// The operator cache maps the weights id to the xnn_operator_t instantiated for +// this set of weights. +std::map operator_cache; + +void delete_xnn_operator(const size_t weights_id) { + xnn_operator_t fully_connected_op = operator_cache.at(weights_id); + xnn_delete_operator(fully_connected_op); + tfjs::backend::xnn_operator_count--; + + operator_cache.erase(weights_id); +} + +void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr, + const size_t a_shape_len, const size_t b_id, + const size_t* b_shape_ptr, const size_t b_shape_len, + const size_t out_id) { + auto& a_info = tfjs::backend::get_tensor_info(a_id); + auto& b_info = tfjs::backend::get_tensor_info(b_id); + auto& out_info = tfjs::backend::get_tensor_info_out(out_id); + + const float* a_buf = a_info.f32(); + const float* b_buf = b_info.f32(); + float* out_buf = out_info.f32_write(); + + xnn_operator_t fully_connected_op = nullptr; + + OperatorCacheKey cache_key = {b_id}; + + // We assume b is the weights and cache the xnn operator on it. + auto operator_cache_idx = operator_cache.find(cache_key); + if (operator_cache_idx == operator_cache.end()) { + const size_t input_channels = b_shape_ptr[1]; + const size_t output_channels = b_shape_ptr[2]; + const size_t input_stride = input_channels; + const size_t output_stride = output_channels; + const float* bias = nullptr; + + const float output_min = -std::numeric_limits::infinity(); + const float output_max = std::numeric_limits::infinity(); + + // XNNPack expects b to already be transposed. TensorFlow.js doesn't do this + // automatically so we have to tell XNNPack to do the transposing. + const uint32_t flags = XNN_FLAG_TRANSPOSE_WEIGHTS; + xnn_status status = xnn_create_fully_connected_nc_f32( + input_channels, output_channels, input_stride, output_stride, b_buf, + bias, output_min, output_max, flags, &fully_connected_op); + if (status != xnn_status_success) { + tfjs::util::warn( + "XNN status for xnn_create_fully_connected_nc_f32 is not successful. " + "Got status %d. Use -c dbg to see XNN logs.", + status); + return; + } + + operator_cache.insert({cache_key, fully_connected_op}); + + tfjs::backend::register_disposal_callback(b_id, *delete_xnn_operator); + + tfjs::backend::xnn_operator_count++; + } else { + fully_connected_op = operator_cache_idx->second; + } + + const size_t batch_size = a_shape_ptr[1]; + xnn_status status = + xnn_setup_fully_connected_nc_f32(fully_connected_op, batch_size, a_buf, + out_buf, nullptr /* thread pool */); + if (status != xnn_status_success) { + tfjs::util::warn( + "XNN status for xnn_setup_fully_connected_nc_f32 is not successful. " + "Got status %d. Use -c dbg to see XNN logs.", + status); + return; + } + + xnn_run_operator(fully_connected_op, nullptr /* thread pool */); +} + +void slow_batch_matmul(const size_t a_id, const size_t* a_shape_ptr, + const size_t a_shape_len, const size_t b_id, + const size_t* b_shape_ptr, const size_t b_shape_len, + const bool transpose_a, const bool transpose_b, + const size_t out_id) { + const size_t shared_dim = transpose_a ? a_shape_ptr[1] : a_shape_ptr[2]; + const size_t left_dim = transpose_a ? a_shape_ptr[2] : a_shape_ptr[1]; + const size_t right_dim = transpose_b ? b_shape_ptr[1] : b_shape_ptr[2]; + const size_t batch_dim = a_shape_ptr[0]; + + std::vector a_shape(a_shape_ptr, a_shape_ptr + a_shape_len); + std::vector b_shape(b_shape_ptr, b_shape_ptr + b_shape_len); + const std::vector a_strides = tfjs::util::compute_strides(a_shape); + const std::vector b_strides = tfjs::util::compute_strides(b_shape); + + size_t a_batch = a_strides[0]; + size_t a_outer_step, a_inner_step; + if (transpose_a) { + a_outer_step = 1; + a_inner_step = a_strides[1]; + } else { + a_outer_step = a_strides[1]; + a_inner_step = 1; + } + size_t b_batch = b_strides[0]; + size_t b_outer_step, b_inner_step; + if (transpose_b) { + b_outer_step = b_strides[1]; + b_inner_step = 1; + } else { + b_outer_step = 1; + b_inner_step = b_strides[1]; + } + + auto& a_info = tfjs::backend::get_tensor_info(a_id); + auto& b_info = tfjs::backend::get_tensor_info(b_id); + auto& out_info = tfjs::backend::get_tensor_info_out(out_id); + + const float* a_buf = a_info.f32(); + const float* b_buf = b_info.f32(); + float* out_buf = out_info.f32_write(); + + const size_t size = left_dim * right_dim; + + // Zero out the output buffer because it might have been used before. + std::fill(out_buf, out_buf + batch_dim * size, 0); + + for (size_t b = 0; b < batch_dim; ++b) { + for (size_t i0 = 0; i0 < left_dim; i0 += kBlockSize) { + for (size_t j0 = 0; j0 < right_dim; j0 += kBlockSize) { + for (size_t k0 = 0; k0 < shared_dim; k0 += kBlockSize) { + // for when kBlockSize doesn't evenly divide the input + const size_t i_block = std::min(i0 + kBlockSize, left_dim); + const size_t j_block = std::min(j0 + kBlockSize, right_dim); + const size_t k_block = std::min(k0 + kBlockSize, shared_dim); + + for (size_t i = i0; i < i_block; ++i) { + for (size_t j = j0; j < j_block; ++j) { + float sum = 0.0; + + for (size_t k = k0; k < k_block; ++k) { + sum += + a_buf[b * a_batch + i * a_outer_step + k * a_inner_step] * + b_buf[k * b_inner_step + j * b_outer_step + b * b_batch]; + } + out_buf[b * size + (i * right_dim + j)] += sum; + } + } + } + } + } + } +} +} // namespace + namespace tfjs { namespace wasm { -void batchMatMul() {} +void batchMatMul(const size_t a_id, const size_t* a_shape_ptr, + const size_t a_shape_len, const size_t b_id, + const size_t* b_shape_ptr, const size_t b_shape_len, + const bool transpose_a, const bool transpose_b, + const FusableActivation activation, const size_t bias_id, + const size_t prelu_weights_id, const size_t out_id) { + if (!transpose_a && !transpose_b && a_shape_ptr[0] == 1 && + b_shape_ptr[0] == 1) { + xnn_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len, + out_id); + } else { + slow_batch_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, + b_shape_len, transpose_a, transpose_b, out_id); + } +} } // namespace wasm } // namespace tfjs diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.h b/tfjs-backend-wasm/src/cc/batchMatMul_impl.h index 3d315f237cb..c69c204113a 100644 --- a/tfjs-backend-wasm/src/cc/batchMatMul_impl.h +++ b/tfjs-backend-wasm/src/cc/batchMatMul_impl.h @@ -22,7 +22,12 @@ namespace wasm { enum FusableActivation { LINEAR = 0, RELU = 1, RELU6 = 2, PRELU = 3 }; -void batchMatMul(); +void batchMatMul(const size_t a_id, const size_t* a_shape_ptr, + const size_t a_shape_len, const size_t b_id, + const size_t* b_shape_ptr, const size_t b_shape_len, + const bool transpose_a, const bool transpose_b, + const FusableActivation activation, const size_t bias_id, + const size_t prelu_weights_id, const size_t out_id); } // namespace wasm } // namespace tfjs diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc index bc58f9580aa..fa5783df4cb 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc @@ -16,180 +16,11 @@ #include #endif -#include -#include #include -#include -#include -#include -#include - -#include "src/cc/backend.h" -#include "src/cc/util.h" #include "src/cc/batchMatMul_impl.h" #include "src/cc/kernels/FusedBatchMatMul.h" -const size_t kBlockSize = 48; - -namespace { -// We use std::tuple as the cache key as it implements the compare operator -// needed for std::map. -typedef std::tuple OperatorCacheKey; - -// The operator cache maps the weights id to the xnn_operator_t instantiated for -// this set of weights. -std::map operator_cache; - -void delete_xnn_operator(const size_t weights_id) { - xnn_operator_t fully_connected_op = operator_cache.at(weights_id); - xnn_delete_operator(fully_connected_op); - tfjs::backend::xnn_operator_count--; - - operator_cache.erase(weights_id); -} - -void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr, - const size_t a_shape_len, const size_t b_id, - const size_t* b_shape_ptr, const size_t b_shape_len, - const size_t out_id) { - auto& a_info = tfjs::backend::get_tensor_info(a_id); - auto& b_info = tfjs::backend::get_tensor_info(b_id); - auto& out_info = tfjs::backend::get_tensor_info_out(out_id); - - const float* a_buf = a_info.f32(); - const float* b_buf = b_info.f32(); - float* out_buf = out_info.f32_write(); - - xnn_operator_t fully_connected_op = nullptr; - - OperatorCacheKey cache_key = {b_id}; - - // We assume b is the weights and cache the xnn operator on it. - auto operator_cache_idx = operator_cache.find(cache_key); - if (operator_cache_idx == operator_cache.end()) { - const size_t input_channels = b_shape_ptr[1]; - const size_t output_channels = b_shape_ptr[2]; - const size_t input_stride = input_channels; - const size_t output_stride = output_channels; - const float* bias = nullptr; - - const float output_min = -std::numeric_limits::infinity(); - const float output_max = std::numeric_limits::infinity(); - - // XNNPack expects b to already be transposed. TensorFlow.js doesn't do this - // automatically so we have to tell XNNPack to do the transposing. - const uint32_t flags = XNN_FLAG_TRANSPOSE_WEIGHTS; - xnn_status status = xnn_create_fully_connected_nc_f32( - input_channels, output_channels, input_stride, output_stride, b_buf, - bias, output_min, output_max, flags, &fully_connected_op); - if (status != xnn_status_success) { - tfjs::util::warn( - "XNN status for xnn_create_fully_connected_nc_f32 is not successful. " - "Got status %d. Use -c dbg to see XNN logs.", - status); - return; - } - - operator_cache.insert({cache_key, fully_connected_op}); - - tfjs::backend::register_disposal_callback(b_id, *delete_xnn_operator); - - tfjs::backend::xnn_operator_count++; - } else { - fully_connected_op = operator_cache_idx->second; - } - - const size_t batch_size = a_shape_ptr[1]; - xnn_status status = - xnn_setup_fully_connected_nc_f32(fully_connected_op, batch_size, a_buf, - out_buf, nullptr /* thread pool */); - if (status != xnn_status_success) { - tfjs::util::warn( - "XNN status for xnn_setup_fully_connected_nc_f32 is not successful. " - "Got status %d. Use -c dbg to see XNN logs.", - status); - return; - } - - xnn_run_operator(fully_connected_op, nullptr /* thread pool */); -} - -void slow_batch_matmul(const size_t a_id, const size_t* a_shape_ptr, - const size_t a_shape_len, const size_t b_id, - const size_t* b_shape_ptr, const size_t b_shape_len, - const bool transpose_a, const bool transpose_b, - const size_t out_id) { - const size_t shared_dim = transpose_a ? a_shape_ptr[1] : a_shape_ptr[2]; - const size_t left_dim = transpose_a ? a_shape_ptr[2] : a_shape_ptr[1]; - const size_t right_dim = transpose_b ? b_shape_ptr[1] : b_shape_ptr[2]; - const size_t batch_dim = a_shape_ptr[0]; - - std::vector a_shape(a_shape_ptr, a_shape_ptr + a_shape_len); - std::vector b_shape(b_shape_ptr, b_shape_ptr + b_shape_len); - const std::vector a_strides = tfjs::util::compute_strides(a_shape); - const std::vector b_strides = tfjs::util::compute_strides(b_shape); - - size_t a_batch = a_strides[0]; - size_t a_outer_step, a_inner_step; - if (transpose_a) { - a_outer_step = 1; - a_inner_step = a_strides[1]; - } else { - a_outer_step = a_strides[1]; - a_inner_step = 1; - } - size_t b_batch = b_strides[0]; - size_t b_outer_step, b_inner_step; - if (transpose_b) { - b_outer_step = b_strides[1]; - b_inner_step = 1; - } else { - b_outer_step = 1; - b_inner_step = b_strides[1]; - } - - auto& a_info = tfjs::backend::get_tensor_info(a_id); - auto& b_info = tfjs::backend::get_tensor_info(b_id); - auto& out_info = tfjs::backend::get_tensor_info_out(out_id); - - const float* a_buf = a_info.f32(); - const float* b_buf = b_info.f32(); - float* out_buf = out_info.f32_write(); - - const size_t size = left_dim * right_dim; - - // Zero out the output buffer because it might have been used before. - std::fill(out_buf, out_buf + batch_dim * size, 0); - - for (size_t b = 0; b < batch_dim; ++b) { - for (size_t i0 = 0; i0 < left_dim; i0 += kBlockSize) { - for (size_t j0 = 0; j0 < right_dim; j0 += kBlockSize) { - for (size_t k0 = 0; k0 < shared_dim; k0 += kBlockSize) { - // for when kBlockSize doesn't evenly divide the input - const size_t i_block = std::min(i0 + kBlockSize, left_dim); - const size_t j_block = std::min(j0 + kBlockSize, right_dim); - const size_t k_block = std::min(k0 + kBlockSize, shared_dim); - - for (size_t i = i0; i < i_block; ++i) { - for (size_t j = j0; j < j_block; ++j) { - float sum = 0.0; - - for (size_t k = k0; k < k_block; ++k) { - sum += - a_buf[b * a_batch + i * a_outer_step + k * a_inner_step] * - b_buf[k * b_inner_step + j * b_outer_step + b * b_batch]; - } - out_buf[b * size + (i * right_dim + j)] += sum; - } - } - } - } - } - } -} -} // namespace - namespace tfjs { namespace wasm { // We use C-style API to interface with Javascript. @@ -204,14 +35,9 @@ void FusedBatchMatMul(const size_t a_id, const size_t* a_shape_ptr, const bool transpose_a, const bool transpose_b, const FusableActivation activation, const size_t bias_id, const size_t prelu_weights_id, const size_t out_id) { - if (!transpose_a && !transpose_b && a_shape_ptr[0] == 1 && - b_shape_ptr[0] == 1) { - xnn_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len, - out_id); - } else { - slow_batch_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, - b_shape_len, transpose_a, transpose_b, out_id); - } + tfjs::wasm::batchMatMul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, + b_shape_len, transpose_a, transpose_b, activation, + bias_id, prelu_weights_id, out_id); } } // extern "C" From e2b152c57abc3c682a71ff2a1bffe058d8353320 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Thu, 5 Mar 2020 08:03:13 -0500 Subject: [PATCH 08/35] create shared impl --- .../src/cc/kernels/BatchMatMul.cc | 184 +----------------- tfjs-backend-wasm/src/index_test.ts | 3 +- 2 files changed, 9 insertions(+), 178 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc index e0417dbacaf..5318dbeb3aa 100644 --- a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc +++ b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc @@ -16,179 +16,11 @@ #include #endif -#include -#include #include -#include -#include -#include -#include - -#include "src/cc/backend.h" -#include "src/cc/util.h" +#include "src/cc/batchMatMul_impl.h" #include "src/cc/kernels/BatchMatMul.h" -const size_t kBlockSize = 48; - -namespace { -// We use std::tuple as the cache key as it implements the compare operator -// needed for std::map. -typedef std::tuple OperatorCacheKey; - -// The operator cache maps the weights id to the xnn_operator_t instantiated for -// this set of weights. -std::map operator_cache; - -void delete_xnn_operator(const size_t weights_id) { - xnn_operator_t fully_connected_op = operator_cache.at(weights_id); - xnn_delete_operator(fully_connected_op); - tfjs::backend::xnn_operator_count--; - - operator_cache.erase(weights_id); -} - -void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr, - const size_t a_shape_len, const size_t b_id, - const size_t* b_shape_ptr, const size_t b_shape_len, - const size_t out_id) { - auto& a_info = tfjs::backend::get_tensor_info(a_id); - auto& b_info = tfjs::backend::get_tensor_info(b_id); - auto& out_info = tfjs::backend::get_tensor_info_out(out_id); - - const float* a_buf = a_info.f32(); - const float* b_buf = b_info.f32(); - float* out_buf = out_info.f32_write(); - - xnn_operator_t fully_connected_op = nullptr; - - OperatorCacheKey cache_key = {b_id}; - - // We assume b is the weights and cache the xnn operator on it. - auto operator_cache_idx = operator_cache.find(cache_key); - if (operator_cache_idx == operator_cache.end()) { - const size_t input_channels = b_shape_ptr[1]; - const size_t output_channels = b_shape_ptr[2]; - const size_t input_stride = input_channels; - const size_t output_stride = output_channels; - const float* bias = nullptr; - - const float output_min = -std::numeric_limits::infinity(); - const float output_max = std::numeric_limits::infinity(); - - // XNNPack expects b to already be transposed. TensorFlow.js doesn't do this - // automatically so we have to tell XNNPack to do the transposing. - const uint32_t flags = XNN_FLAG_TRANSPOSE_WEIGHTS; - xnn_status status = xnn_create_fully_connected_nc_f32( - input_channels, output_channels, input_stride, output_stride, b_buf, - bias, output_min, output_max, flags, &fully_connected_op); - if (status != xnn_status_success) { - tfjs::util::warn( - "XNN status for xnn_create_fully_connected_nc_f32 is not successful. " - "Got status %d. Use -c dbg to see XNN logs.", - status); - return; - } - - operator_cache.insert({cache_key, fully_connected_op}); - - tfjs::backend::register_disposal_callback(b_id, *delete_xnn_operator); - - tfjs::backend::xnn_operator_count++; - } else { - fully_connected_op = operator_cache_idx->second; - } - - const size_t batch_size = a_shape_ptr[1]; - xnn_status status = - xnn_setup_fully_connected_nc_f32(fully_connected_op, batch_size, a_buf, - out_buf, nullptr /* thread pool */); - if (status != xnn_status_success) { - tfjs::util::warn( - "XNN status for xnn_setup_fully_connected_nc_f32 is not successful. " - "Got status %d. Use -c dbg to see XNN logs.", - status); - return; - } - - xnn_run_operator(fully_connected_op, nullptr /* thread pool */); -} - -void slow_batch_matmul(const size_t a_id, const size_t* a_shape_ptr, - const size_t a_shape_len, const size_t b_id, - const size_t* b_shape_ptr, const size_t b_shape_len, - const bool transpose_a, const bool transpose_b, - const size_t out_id) { - const size_t shared_dim = transpose_a ? a_shape_ptr[1] : a_shape_ptr[2]; - const size_t left_dim = transpose_a ? a_shape_ptr[2] : a_shape_ptr[1]; - const size_t right_dim = transpose_b ? b_shape_ptr[1] : b_shape_ptr[2]; - const size_t batch_dim = a_shape_ptr[0]; - - std::vector a_shape(a_shape_ptr, a_shape_ptr + a_shape_len); - std::vector b_shape(b_shape_ptr, b_shape_ptr + b_shape_len); - const std::vector a_strides = tfjs::util::compute_strides(a_shape); - const std::vector b_strides = tfjs::util::compute_strides(b_shape); - - size_t a_batch = a_strides[0]; - size_t a_outer_step, a_inner_step; - if (transpose_a) { - a_outer_step = 1; - a_inner_step = a_strides[1]; - } else { - a_outer_step = a_strides[1]; - a_inner_step = 1; - } - size_t b_batch = b_strides[0]; - size_t b_outer_step, b_inner_step; - if (transpose_b) { - b_outer_step = b_strides[1]; - b_inner_step = 1; - } else { - b_outer_step = 1; - b_inner_step = b_strides[1]; - } - - auto& a_info = tfjs::backend::get_tensor_info(a_id); - auto& b_info = tfjs::backend::get_tensor_info(b_id); - auto& out_info = tfjs::backend::get_tensor_info_out(out_id); - - const float* a_buf = a_info.f32(); - const float* b_buf = b_info.f32(); - float* out_buf = out_info.f32_write(); - - const size_t size = left_dim * right_dim; - - // Zero out the output buffer because it might have been used before. - std::fill(out_buf, out_buf + batch_dim * size, 0); - - for (size_t b = 0; b < batch_dim; ++b) { - for (size_t i0 = 0; i0 < left_dim; i0 += kBlockSize) { - for (size_t j0 = 0; j0 < right_dim; j0 += kBlockSize) { - for (size_t k0 = 0; k0 < shared_dim; k0 += kBlockSize) { - // for when kBlockSize doesn't evenly divide the input - const size_t i_block = std::min(i0 + kBlockSize, left_dim); - const size_t j_block = std::min(j0 + kBlockSize, right_dim); - const size_t k_block = std::min(k0 + kBlockSize, shared_dim); - - for (size_t i = i0; i < i_block; ++i) { - for (size_t j = j0; j < j_block; ++j) { - float sum = 0.0; - - for (size_t k = k0; k < k_block; ++k) { - sum += - a_buf[b * a_batch + i * a_outer_step + k * a_inner_step] * - b_buf[k * b_inner_step + j * b_outer_step + b * b_batch]; - } - out_buf[b * size + (i * right_dim + j)] += sum; - } - } - } - } - } - } -} -} // namespace - namespace tfjs { namespace wasm { // We use C-style API to interface with Javascript. @@ -202,14 +34,12 @@ void BatchMatMul(const size_t a_id, const size_t* a_shape_ptr, const size_t* b_shape_ptr, const size_t b_shape_len, const bool transpose_a, const bool transpose_b, const size_t out_id) { - if (!transpose_a && !transpose_b && a_shape_ptr[0] == 1 && - b_shape_ptr[0] == 1) { - xnn_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len, - out_id); - } else { - slow_batch_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, - b_shape_len, transpose_a, transpose_b, out_id); - } + const size_t bias_id = 0; + const size_t prelu_weights_id = 0; + const FusableActivation activation = FusableActivation::LINEAR; + tfjs::wasm::batchMatMul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, + b_shape_len, transpose_a, transpose_b, activation, + bias_id, prelu_weights_id, out_id); } } // extern "C" diff --git a/tfjs-backend-wasm/src/index_test.ts b/tfjs-backend-wasm/src/index_test.ts index baa71b8ef2f..5b075a5fa93 100644 --- a/tfjs-backend-wasm/src/index_test.ts +++ b/tfjs-backend-wasm/src/index_test.ts @@ -107,7 +107,8 @@ describeWithFlags('wasm init', BROWSER_ENVS, () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); - const c = tf.fused.matMul({a, b}); + // const c = tf.fused.matMul({a, b}); + const c = tf.matMul(a, b); const data = await c.data(); console.log(data); // 0, 8, -3, 20 }); From d9caed2ecc1e2d0f6db63ff7cd6b91d43f942fbb Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Thu, 5 Mar 2020 08:43:11 -0500 Subject: [PATCH 09/35] add activ --- tfjs-backend-wasm/src/cc/batchMatMul_impl.cc | 34 ++++++++++++++----- tfjs-backend-wasm/src/index_test.ts | 4 +-- .../src/kernels/FusedBatchMatMul.ts | 2 +- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc index 359669012df..8002f52a2d6 100644 --- a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc +++ b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc @@ -51,7 +51,8 @@ void delete_xnn_operator(const size_t weights_id) { void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr, const size_t a_shape_len, const size_t b_id, const size_t* b_shape_ptr, const size_t b_shape_len, - const size_t out_id) { + const size_t out_id, const float output_min, + const float output_max) { auto& a_info = tfjs::backend::get_tensor_info(a_id); auto& b_info = tfjs::backend::get_tensor_info(b_id); auto& out_info = tfjs::backend::get_tensor_info_out(out_id); @@ -73,9 +74,6 @@ void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr, const size_t output_stride = output_channels; const float* bias = nullptr; - const float output_min = -std::numeric_limits::infinity(); - const float output_max = std::numeric_limits::infinity(); - // XNNPack expects b to already be transposed. TensorFlow.js doesn't do this // automatically so we have to tell XNNPack to do the transposing. const uint32_t flags = XNN_FLAG_TRANSPOSE_WEIGHTS; @@ -118,7 +116,8 @@ void slow_batch_matmul(const size_t a_id, const size_t* a_shape_ptr, const size_t a_shape_len, const size_t b_id, const size_t* b_shape_ptr, const size_t b_shape_len, const bool transpose_a, const bool transpose_b, - const size_t out_id) { + const size_t out_id, const float output_min, + const float output_max) { const size_t shared_dim = transpose_a ? a_shape_ptr[1] : a_shape_ptr[2]; const size_t left_dim = transpose_a ? a_shape_ptr[2] : a_shape_ptr[1]; const size_t right_dim = transpose_b ? b_shape_ptr[1] : b_shape_ptr[2]; @@ -179,7 +178,10 @@ void slow_batch_matmul(const size_t a_id, const size_t* a_shape_ptr, a_buf[b * a_batch + i * a_outer_step + k * a_inner_step] * b_buf[k * b_inner_step + j * b_outer_step + b * b_batch]; } - out_buf[b * size + (i * right_dim + j)] += sum; + size_t out_buf_index = b * size + (i * right_dim + j); + float current = out_buf[out_buf_index]; + out_buf[out_buf_index] = + std::max(std::min(current + sum, output_max), output_min); } } } @@ -197,13 +199,29 @@ void batchMatMul(const size_t a_id, const size_t* a_shape_ptr, const bool transpose_a, const bool transpose_b, const FusableActivation activation, const size_t bias_id, const size_t prelu_weights_id, const size_t out_id) { + FusableActivation clamp_method = activation; + if (activation == FusableActivation::PRELU) { + clamp_method = FusableActivation::LINEAR; + } + + float output_min = -std::numeric_limits::infinity(); + float output_max = std::numeric_limits::infinity(); + + if (activation == FusableActivation::RELU) { + output_min = 0; + } else if (activation == FusableActivation::RELU6) { + output_min = 0; + output_max = 6; + } + if (!transpose_a && !transpose_b && a_shape_ptr[0] == 1 && b_shape_ptr[0] == 1) { xnn_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len, - out_id); + out_id, output_min, output_max); } else { slow_batch_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, - b_shape_len, transpose_a, transpose_b, out_id); + b_shape_len, transpose_a, transpose_b, out_id, output_min, + output_max); } } } // namespace wasm diff --git a/tfjs-backend-wasm/src/index_test.ts b/tfjs-backend-wasm/src/index_test.ts index 5b075a5fa93..dc511c43ffd 100644 --- a/tfjs-backend-wasm/src/index_test.ts +++ b/tfjs-backend-wasm/src/index_test.ts @@ -107,8 +107,8 @@ describeWithFlags('wasm init', BROWSER_ENVS, () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); - // const c = tf.fused.matMul({a, b}); - const c = tf.matMul(a, b); + const c = tf.fused.matMul({a, b, activation: 'relu'}); + // const c = tf.matMul(a, b); const data = await c.data(); console.log(data); // 0, 8, -3, 20 }); diff --git a/tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts b/tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts index cceacbc0363..01b59a49fc9 100644 --- a/tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts +++ b/tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts @@ -114,7 +114,7 @@ function fusedBatchMatMul(args: { wasmFusedBatchMatMul( aId, aShapeBytes, a.shape.length, bId, bShapeBytes, b.shape.length, - transposeA, transposeB, activation, biasId, preluActivationWeightsId, + transposeA, transposeB, fusedActivation, biasId, preluActivationWeightsId, outId); return out; From a97619de154761ee73a26969a4bcca84c9b8d416 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Thu, 5 Mar 2020 09:57:45 -0500 Subject: [PATCH 10/35] add clamp to cache key --- tfjs-backend-wasm/src/cc/batchMatMul_impl.cc | 60 ++++++++++++++++---- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc index 8002f52a2d6..7805937834a 100644 --- a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc +++ b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc @@ -18,10 +18,14 @@ #include #include +#include #include #include #include +#include #include +#include +#include #include #include "src/cc/backend.h" @@ -34,25 +38,61 @@ const size_t kBlockSize = 48; namespace { // We use std::tuple as the cache key as it implements the compare operator // needed for std::map. -typedef std::tuple OperatorCacheKey; +typedef std::tuple OperatorCacheKey; // The operator cache maps the weights id to the xnn_operator_t instantiated for // this set of weights. std::map operator_cache; -void delete_xnn_operator(const size_t weights_id) { - xnn_operator_t fully_connected_op = operator_cache.at(weights_id); - xnn_delete_operator(fully_connected_op); - tfjs::backend::xnn_operator_count--; +std::unordered_map> + b_operator_cache_key_map; + +void erase_from_cache(const size_t tensor_id, + std::unordered_map>& + operator_cache_key_map) { + auto operator_cache_keys_idx = operator_cache_key_map.find(tensor_id); + if (operator_cache_keys_idx != operator_cache_key_map.end()) { + std::vector& operator_cache_keys = + operator_cache_keys_idx->second; + for (auto& operator_cache_key : operator_cache_keys) { + auto operator_cache_key_idx = operator_cache.find(operator_cache_key); + if (operator_cache_key_idx != operator_cache.end()) { + auto& cached_op = operator_cache_key_idx->second; + xnn_delete_operator(cached_op); + tfjs::backend::xnn_operator_count--; + + operator_cache.erase(operator_cache_key); + } + } + operator_cache_key_map.erase(tensor_id); + } +} - operator_cache.erase(weights_id); +void delete_xnn_operators(const size_t weights_id) { + erase_from_cache(weights_id, b_operator_cache_key_map); +} + +void associate_tensor_with_key( + const size_t tensor_id, const OperatorCacheKey& cache_key, + std::unordered_map>& + operator_cache_key_map) { + auto cache_keys_idx = operator_cache_key_map.find(tensor_id); + if (cache_keys_idx == operator_cache_key_map.end()) { + std::vector cache_keys = {cache_key}; + operator_cache_key_map.emplace(tensor_id, std::move(cache_keys)); + tfjs::backend::register_disposal_callback(tensor_id, *delete_xnn_operators); + + } else { + auto& cache_keys = operator_cache_key_map.at(tensor_id); + cache_keys.emplace_back(cache_key); + } } void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr, const size_t a_shape_len, const size_t b_id, const size_t* b_shape_ptr, const size_t b_shape_len, const size_t out_id, const float output_min, - const float output_max) { + const float output_max, const size_t clamp_method) { auto& a_info = tfjs::backend::get_tensor_info(a_id); auto& b_info = tfjs::backend::get_tensor_info(b_id); auto& out_info = tfjs::backend::get_tensor_info_out(out_id); @@ -63,7 +103,7 @@ void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr, xnn_operator_t fully_connected_op = nullptr; - OperatorCacheKey cache_key = {b_id}; + OperatorCacheKey cache_key = {b_id, clamp_method}; // We assume b is the weights and cache the xnn operator on it. auto operator_cache_idx = operator_cache.find(cache_key); @@ -90,7 +130,7 @@ void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr, operator_cache.insert({cache_key, fully_connected_op}); - tfjs::backend::register_disposal_callback(b_id, *delete_xnn_operator); + associate_tensor_with_key(b_id, cache_key, b_operator_cache_key_map); tfjs::backend::xnn_operator_count++; } else { @@ -217,7 +257,7 @@ void batchMatMul(const size_t a_id, const size_t* a_shape_ptr, if (!transpose_a && !transpose_b && a_shape_ptr[0] == 1 && b_shape_ptr[0] == 1) { xnn_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len, - out_id, output_min, output_max); + out_id, output_min, output_max, clamp_method); } else { slow_batch_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len, transpose_a, transpose_b, out_id, output_min, From 649403c65429fe55b33180faf3d31eec7b1fa317 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Thu, 5 Mar 2020 10:03:41 -0500 Subject: [PATCH 11/35] prelu --- tfjs-backend-wasm/src/cc/batchMatMul_impl.cc | 7 +++++++ tfjs-backend-wasm/src/index_test.ts | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc index 7805937834a..ce64d2e61e3 100644 --- a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc +++ b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc @@ -29,6 +29,7 @@ #include #include "src/cc/backend.h" +#include "src/cc/prelu_impl.h" #include "src/cc/util.h" #include "src/cc/batchMatMul_impl.h" @@ -263,6 +264,12 @@ void batchMatMul(const size_t a_id, const size_t* a_shape_ptr, b_shape_len, transpose_a, transpose_b, out_id, output_min, output_max); } + + auto& out_info = backend::get_tensor_info_out(out_id); + float* out_buf = out_info.f32_write(); + if (activation == FusableActivation::PRELU) { + prelu(out_buf, out_info.size, prelu_weights_id, out_id); + } } } // namespace wasm } // namespace tfjs diff --git a/tfjs-backend-wasm/src/index_test.ts b/tfjs-backend-wasm/src/index_test.ts index dc511c43ffd..9a8d8163c12 100644 --- a/tfjs-backend-wasm/src/index_test.ts +++ b/tfjs-backend-wasm/src/index_test.ts @@ -106,8 +106,10 @@ describeWithFlags('wasm init', BROWSER_ENVS, () => { fit('fused batch mm', async () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); + const alpha = tf.tensor2d([0.5, 0.5], [1, 2]); - const c = tf.fused.matMul({a, b, activation: 'relu'}); + const c = tf.fused.matMul( + {a, b, activation: 'prelu', preluActivationWeights: alpha}); // const c = tf.matMul(a, b); const data = await c.data(); console.log(data); // 0, 8, -3, 20 From 698a93c8ddac61e8607de24537159546f02184c6 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Thu, 5 Mar 2020 10:22:30 -0500 Subject: [PATCH 12/35] xnn bias --- tfjs-backend-wasm/src/cc/batchMatMul_impl.cc | 39 ++++++++++++++------ tfjs-backend-wasm/src/index_test.ts | 16 ++++++++ 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc index ce64d2e61e3..dd1d97f4d73 100644 --- a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc +++ b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc @@ -39,7 +39,7 @@ const size_t kBlockSize = 48; namespace { // We use std::tuple as the cache key as it implements the compare operator // needed for std::map. -typedef std::tuple OperatorCacheKey; +typedef std::tuple OperatorCacheKey; // The operator cache maps the weights id to the xnn_operator_t instantiated for // this set of weights. @@ -48,6 +48,9 @@ std::map operator_cache; std::unordered_map> b_operator_cache_key_map; +std::unordered_map> + bias_operator_cache_key_map; + void erase_from_cache(const size_t tensor_id, std::unordered_map>& operator_cache_key_map) { @@ -92,8 +95,9 @@ void associate_tensor_with_key( void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr, const size_t a_shape_len, const size_t b_id, const size_t* b_shape_ptr, const size_t b_shape_len, - const size_t out_id, const float output_min, - const float output_max, const size_t clamp_method) { + const size_t out_id, const size_t bias_id, + const float output_min, const float output_max, + const size_t clamp_method) { auto& a_info = tfjs::backend::get_tensor_info(a_id); auto& b_info = tfjs::backend::get_tensor_info(b_id); auto& out_info = tfjs::backend::get_tensor_info_out(out_id); @@ -102,9 +106,14 @@ void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr, const float* b_buf = b_info.f32(); float* out_buf = out_info.f32_write(); + const float* bias_buf = nullptr; + if (bias_id != 0) { + bias_buf = tfjs::backend::get_tensor_info_out(bias_id).f32(); + } + xnn_operator_t fully_connected_op = nullptr; - OperatorCacheKey cache_key = {b_id, clamp_method}; + OperatorCacheKey cache_key = {b_id, bias_id, clamp_method}; // We assume b is the weights and cache the xnn operator on it. auto operator_cache_idx = operator_cache.find(cache_key); @@ -113,14 +122,13 @@ void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr, const size_t output_channels = b_shape_ptr[2]; const size_t input_stride = input_channels; const size_t output_stride = output_channels; - const float* bias = nullptr; // XNNPack expects b to already be transposed. TensorFlow.js doesn't do this // automatically so we have to tell XNNPack to do the transposing. const uint32_t flags = XNN_FLAG_TRANSPOSE_WEIGHTS; xnn_status status = xnn_create_fully_connected_nc_f32( input_channels, output_channels, input_stride, output_stride, b_buf, - bias, output_min, output_max, flags, &fully_connected_op); + bias_buf, output_min, output_max, flags, &fully_connected_op); if (status != xnn_status_success) { tfjs::util::warn( "XNN status for xnn_create_fully_connected_nc_f32 is not successful. " @@ -132,6 +140,10 @@ void xnn_matmul(const size_t a_id, const size_t* a_shape_ptr, operator_cache.insert({cache_key, fully_connected_op}); associate_tensor_with_key(b_id, cache_key, b_operator_cache_key_map); + if (bias_id != 0) { + associate_tensor_with_key(bias_id, cache_key, + bias_operator_cache_key_map); + } tfjs::backend::xnn_operator_count++; } else { @@ -157,8 +169,8 @@ void slow_batch_matmul(const size_t a_id, const size_t* a_shape_ptr, const size_t a_shape_len, const size_t b_id, const size_t* b_shape_ptr, const size_t b_shape_len, const bool transpose_a, const bool transpose_b, - const size_t out_id, const float output_min, - const float output_max) { + const size_t out_id, const size_t bias_id, + const float output_min, const float output_max) { const size_t shared_dim = transpose_a ? a_shape_ptr[1] : a_shape_ptr[2]; const size_t left_dim = transpose_a ? a_shape_ptr[2] : a_shape_ptr[1]; const size_t right_dim = transpose_b ? b_shape_ptr[1] : b_shape_ptr[2]; @@ -196,6 +208,11 @@ void slow_batch_matmul(const size_t a_id, const size_t* a_shape_ptr, const float* b_buf = b_info.f32(); float* out_buf = out_info.f32_write(); + const float* bias_buf = nullptr; + if (bias_id != 0) { + bias_buf = tfjs::backend::get_tensor_info_out(bias_id).f32(); + } + const size_t size = left_dim * right_dim; // Zero out the output buffer because it might have been used before. @@ -258,11 +275,11 @@ void batchMatMul(const size_t a_id, const size_t* a_shape_ptr, if (!transpose_a && !transpose_b && a_shape_ptr[0] == 1 && b_shape_ptr[0] == 1) { xnn_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len, - out_id, output_min, output_max, clamp_method); + out_id, bias_id, output_min, output_max, clamp_method); } else { slow_batch_matmul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, - b_shape_len, transpose_a, transpose_b, out_id, output_min, - output_max); + b_shape_len, transpose_a, transpose_b, out_id, bias_id, + output_min, output_max); } auto& out_info = backend::get_tensor_info_out(out_id); diff --git a/tfjs-backend-wasm/src/index_test.ts b/tfjs-backend-wasm/src/index_test.ts index 9a8d8163c12..894c2002c91 100644 --- a/tfjs-backend-wasm/src/index_test.ts +++ b/tfjs-backend-wasm/src/index_test.ts @@ -114,4 +114,20 @@ describeWithFlags('wasm init', BROWSER_ENVS, () => { const data = await c.data(); console.log(data); // 0, 8, -3, 20 }); + + fit('fused batch mm with bias', async () => { + const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); + const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); + const c = tf.tensor1d([1, 1]); + const transposeA = false; + const transposeB = false; + + const d = tf.fused.matMul( + {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); + + expect(d.shape).toEqual([2, 2]); + const data = await d.data(); + console.log(data); + // expectArraysClose(await d.data(), [1, 9, 0, 21]); + }); }); From 1d259362dc56627feace36601ce3ffad5b0d8c88 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Thu, 5 Mar 2020 10:50:03 -0500 Subject: [PATCH 13/35] add bias to slow matmul --- tfjs-backend-wasm/src/cc/batchMatMul_impl.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc index dd1d97f4d73..53f8365eb15 100644 --- a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc +++ b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc @@ -236,10 +236,12 @@ void slow_batch_matmul(const size_t a_id, const size_t* a_shape_ptr, a_buf[b * a_batch + i * a_outer_step + k * a_inner_step] * b_buf[k * b_inner_step + j * b_outer_step + b * b_batch]; } - size_t out_buf_index = b * size + (i * right_dim + j); + size_t innermost_dim = i * right_dim + j; + size_t out_buf_index = b * size + innermost_dim; float current = out_buf[out_buf_index]; - out_buf[out_buf_index] = - std::max(std::min(current + sum, output_max), output_min); + out_buf[out_buf_index] = std::max( + std::min(current + sum + bias_buf[innermost_dim], output_max), + output_min); } } } From 08932125e9cd7498517baf71ea59dcccfc274f46 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Thu, 5 Mar 2020 13:05:32 -0500 Subject: [PATCH 14/35] broadcaset --- tfjs-backend-wasm/src/cc/batchMatMul_impl.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc index 53f8365eb15..e98f5f71c29 100644 --- a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc +++ b/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc @@ -209,8 +209,9 @@ void slow_batch_matmul(const size_t a_id, const size_t* a_shape_ptr, float* out_buf = out_info.f32_write(); const float* bias_buf = nullptr; + auto& bias_info = tfjs::backend::get_tensor_info_out(bias_id); if (bias_id != 0) { - bias_buf = tfjs::backend::get_tensor_info_out(bias_id).f32(); + bias_buf = bias_info.f32(); } const size_t size = left_dim * right_dim; @@ -239,8 +240,12 @@ void slow_batch_matmul(const size_t a_id, const size_t* a_shape_ptr, size_t innermost_dim = i * right_dim + j; size_t out_buf_index = b * size + innermost_dim; float current = out_buf[out_buf_index]; + + // Handles 1D broadcasting. + size_t bias_index = std::min(innermost_dim, bias_info.size - 1); + out_buf[out_buf_index] = std::max( - std::min(current + sum + bias_buf[innermost_dim], output_max), + std::min(current + sum + bias_buf[bias_index], output_max), output_min); } } From 31bb2ace4648679d56b4e3345845fc030bfacfc0 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Thu, 5 Mar 2020 13:32:17 -0500 Subject: [PATCH 15/35] localize type --- tfjs-backend-wasm/src/cc/BUILD | 3 +++ tfjs-backend-wasm/src/cc/backend.h | 3 +++ tfjs-backend-wasm/src/cc/batchMatMul_impl.h | 2 -- tfjs-backend-wasm/src/cc/conv2d_impl.h | 5 ++--- tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc | 1 + tfjs-backend-wasm/src/cc/kernels/Conv2D.cc | 2 +- .../src/cc/kernels/DepthwiseConv2dNative.cc | 1 + tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc | 1 + .../src/cc/kernels/FusedDepthwiseConv2D.cc | 1 + .../src/cc/kernels/FusedDepthwiseConv2D.h | 2 +- tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts | 10 ++-------- tfjs-backend-wasm/src/kernels/FusedConv2D.ts | 10 ++-------- tfjs-backend-wasm/src/kernels/FusedDepthwiseConv2D.ts | 9 +-------- tfjs-backend-wasm/src/kernels/types.ts | 8 ++++++++ 14 files changed, 27 insertions(+), 31 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index b6a34763ce8..eaa4f2d9c3c 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -317,6 +317,7 @@ tfjs_cc_library( srcs = ["kernels/Conv2D.cc"], hdrs = ["kernels/Conv2D.h"], deps = [ + ":backend", ":conv2d_impl", ], ) @@ -344,6 +345,7 @@ tfjs_cc_library( srcs = ["kernels/DepthwiseConv2dNative.cc"], hdrs = ["kernels/DepthwiseConv2dNative.h"], deps = [ + ":backend", ":conv2d_impl", ], ) @@ -416,6 +418,7 @@ tfjs_cc_library( srcs = ["kernels/FusedDepthwiseConv2D.cc"], hdrs = ["kernels/FusedDepthwiseConv2D.h"], deps = [ + ":backend", ":conv2d_impl", ], ) diff --git a/tfjs-backend-wasm/src/cc/backend.h b/tfjs-backend-wasm/src/cc/backend.h index f730bd120ea..581066ec8ef 100644 --- a/tfjs-backend-wasm/src/cc/backend.h +++ b/tfjs-backend-wasm/src/cc/backend.h @@ -27,6 +27,9 @@ enum DType { complex64 = 4, }; +// Must match enum in kernels/types.ts. +enum FusableActivation { LINEAR = 0, RELU = 1, RELU6 = 2, PRELU = 3 }; + // Holds the memory offset and the size of a tensor. struct TensorInfo { // Pointer to the bytes where the data is allocated. diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.h b/tfjs-backend-wasm/src/cc/batchMatMul_impl.h index c69c204113a..73a16b95a65 100644 --- a/tfjs-backend-wasm/src/cc/batchMatMul_impl.h +++ b/tfjs-backend-wasm/src/cc/batchMatMul_impl.h @@ -20,8 +20,6 @@ namespace tfjs { namespace wasm { -enum FusableActivation { LINEAR = 0, RELU = 1, RELU6 = 2, PRELU = 3 }; - void batchMatMul(const size_t a_id, const size_t* a_shape_ptr, const size_t a_shape_len, const size_t b_id, const size_t* b_shape_ptr, const size_t b_shape_len, diff --git a/tfjs-backend-wasm/src/cc/conv2d_impl.h b/tfjs-backend-wasm/src/cc/conv2d_impl.h index 1836362b5aa..9d796f8fd94 100644 --- a/tfjs-backend-wasm/src/cc/conv2d_impl.h +++ b/tfjs-backend-wasm/src/cc/conv2d_impl.h @@ -17,12 +17,11 @@ #include +#include "src/cc/backend.h" + namespace tfjs { namespace wasm { -// Must match enum in FusedConv2D.ts. -enum FusableActivation { LINEAR = 0, RELU = 1, RELU6 = 2, PRELU = 3 }; - void conv2d(const size_t x_id, const size_t batch_size, const size_t input_height, const size_t input_width, const size_t filter_id, const size_t filter_height, diff --git a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc index 5318dbeb3aa..aa95cfc680e 100644 --- a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc +++ b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc @@ -18,6 +18,7 @@ #include +#include "src/cc/backend.h" #include "src/cc/batchMatMul_impl.h" #include "src/cc/kernels/BatchMatMul.h" diff --git a/tfjs-backend-wasm/src/cc/kernels/Conv2D.cc b/tfjs-backend-wasm/src/cc/kernels/Conv2D.cc index 552c9e93521..a3b9a973cd7 100644 --- a/tfjs-backend-wasm/src/cc/kernels/Conv2D.cc +++ b/tfjs-backend-wasm/src/cc/kernels/Conv2D.cc @@ -20,7 +20,7 @@ #include -#include "src/cc/conv2d_impl.h" +#include "src/cc/backend.h" namespace tfjs { namespace wasm { diff --git a/tfjs-backend-wasm/src/cc/kernels/DepthwiseConv2dNative.cc b/tfjs-backend-wasm/src/cc/kernels/DepthwiseConv2dNative.cc index 621690197d4..a9f3cc9adb6 100644 --- a/tfjs-backend-wasm/src/cc/kernels/DepthwiseConv2dNative.cc +++ b/tfjs-backend-wasm/src/cc/kernels/DepthwiseConv2dNative.cc @@ -18,6 +18,7 @@ #include +#include "src/cc/backend.h" #include "src/cc/conv2d_impl.h" #include "src/cc/kernels/DepthwiseConv2dNative.h" diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc index fa5783df4cb..31c6dbbc6d2 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc @@ -18,6 +18,7 @@ #include +#include "src/cc/backend.h" #include "src/cc/batchMatMul_impl.h" #include "src/cc/kernels/FusedBatchMatMul.h" diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedDepthwiseConv2D.cc b/tfjs-backend-wasm/src/cc/kernels/FusedDepthwiseConv2D.cc index 5d8b55453a6..e796e9b8043 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedDepthwiseConv2D.cc +++ b/tfjs-backend-wasm/src/cc/kernels/FusedDepthwiseConv2D.cc @@ -20,6 +20,7 @@ #include "src/cc/kernels/FusedDepthwiseConv2D.h" +#include "src/cc/backend.h" #include "src/cc/conv2d_impl.h" namespace tfjs { diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedDepthwiseConv2D.h b/tfjs-backend-wasm/src/cc/kernels/FusedDepthwiseConv2D.h index 34bacbbe4e2..4a876d349da 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedDepthwiseConv2D.h +++ b/tfjs-backend-wasm/src/cc/kernels/FusedDepthwiseConv2D.h @@ -17,7 +17,7 @@ #include -#include "src/cc/conv2d_impl.h" +#include "src/cc/backend.h" namespace tfjs { diff --git a/tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts b/tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts index 01b59a49fc9..0b0253df2b1 100644 --- a/tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts +++ b/tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts @@ -19,6 +19,8 @@ import {NamedAttrMap, NamedTensorInfoMap, registerKernel, TensorInfo} from '@ten import {BackendWasm} from '../backend_wasm'; +import {FusableActivation} from './types'; + interface FusedBatchMatMulInputs extends NamedTensorInfoMap { a: TensorInfo; b: TensorInfo; @@ -32,14 +34,6 @@ interface FusedBatchMatMulAttrs extends NamedAttrMap { activation: FusableActivation; } -// Must match enum in batchMatMul_impl.h. -enum FusableActivation { - linear = 0, - relu = 1, - relu6 = 2, - prelu = 3 -} - let wasmFusedBatchMatMul: ( aId: number, aShape: Uint8Array, aShapeSize: number, bId: number, bShape: Uint8Array, bShapeSize: number, transposeA: boolean, diff --git a/tfjs-backend-wasm/src/kernels/FusedConv2D.ts b/tfjs-backend-wasm/src/kernels/FusedConv2D.ts index cf3c70e9fe3..7fd7054c2fd 100644 --- a/tfjs-backend-wasm/src/kernels/FusedConv2D.ts +++ b/tfjs-backend-wasm/src/kernels/FusedConv2D.ts @@ -19,6 +19,8 @@ import {backend_util, KernelFunc, NamedTensorInfoMap, registerKernel, TensorInfo import {BackendWasm} from '../backend_wasm'; +import {FusableActivation} from './types'; + interface FusedConv2DInputs extends NamedTensorInfoMap { x: TensorInfo; filter: TensorInfo; @@ -61,14 +63,6 @@ function setup(backend: BackendWasm) { ]); } -// Must match enum in conv2d_impl.h. -enum FusableActivation { - linear = 0, - relu = 1, - relu6 = 2, - prelu = 3 -} - function fusedConv2d(args: { inputs: FusedConv2DInputs, backend: BackendWasm, diff --git a/tfjs-backend-wasm/src/kernels/FusedDepthwiseConv2D.ts b/tfjs-backend-wasm/src/kernels/FusedDepthwiseConv2D.ts index ba482015887..03d28d6e172 100644 --- a/tfjs-backend-wasm/src/kernels/FusedDepthwiseConv2D.ts +++ b/tfjs-backend-wasm/src/kernels/FusedDepthwiseConv2D.ts @@ -18,6 +18,7 @@ import {backend_util, KernelFunc, NamedTensorInfoMap, registerKernel, TensorInfo} from '@tensorflow/tfjs-core'; import {BackendWasm} from '../backend_wasm'; +import {FusableActivation} from './types'; interface FusedDepthwiseConv2DInputs extends NamedTensorInfoMap { x: TensorInfo; @@ -62,14 +63,6 @@ function setup(backend: BackendWasm) { ]); } -// Must match enum in conv2d_impl.h. -enum FusableActivation { - linear = 0, - relu = 1, - relu6 = 2, - prelu = 3 -} - function fusedDepthwiseConv2d(args: { inputs: FusedDepthwiseConv2DInputs, backend: BackendWasm, diff --git a/tfjs-backend-wasm/src/kernels/types.ts b/tfjs-backend-wasm/src/kernels/types.ts index 2a778e4aa30..d13b2e0b871 100644 --- a/tfjs-backend-wasm/src/kernels/types.ts +++ b/tfjs-backend-wasm/src/kernels/types.ts @@ -23,3 +23,11 @@ export enum CppDType { string = 3, complex64 = 4 } + +// Must match enum in cc/fusable_activations.h. +export enum FusableActivation { + linear = 0, + relu = 1, + relu6 = 2, + prelu = 3 +} From 0d528e45c4b8c666d97bd851b8e8edfc273446ed Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Thu, 5 Mar 2020 14:04:20 -0500 Subject: [PATCH 16/35] rename --- tfjs-backend-wasm/src/cc/BUILD | 10 +++++----- .../cc/{batchMatMul_impl.cc => batch_mat_mul_impl.cc} | 2 +- .../cc/{batchMatMul_impl.h => batch_mat_mul_impl.h} | 0 tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc | 2 +- tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc | 2 +- tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h | 2 -- 6 files changed, 8 insertions(+), 10 deletions(-) rename tfjs-backend-wasm/src/cc/{batchMatMul_impl.cc => batch_mat_mul_impl.cc} (99%) rename tfjs-backend-wasm/src/cc/{batchMatMul_impl.h => batch_mat_mul_impl.h} (100%) diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index eaa4f2d9c3c..8e6f8646597 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -95,9 +95,9 @@ tfjs_cc_library( ) tfjs_cc_library( - name = "batchMatMul_impl", - srcs = ["batchMatMul_impl.cc"], - hdrs = ["batchMatMul_impl.h"], + name = "batch_mat_mul_impl", + srcs = ["batch_mat_mul_impl.cc"], + hdrs = ["batch_mat_mul_impl.h"], deps = [ ":backend", ":prelu_impl", @@ -271,7 +271,7 @@ tfjs_cc_library( deps = [ ":backend", ":util", - ":batchMatMul_impl", + ":batch_mat_mul_impl", ], ) @@ -282,7 +282,7 @@ tfjs_cc_library( deps = [ ":backend", ":util", - ":batchMatMul_impl", + ":batch_mat_mul_impl", ], ) diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc similarity index 99% rename from tfjs-backend-wasm/src/cc/batchMatMul_impl.cc rename to tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc index e98f5f71c29..b80d60294a1 100644 --- a/tfjs-backend-wasm/src/cc/batchMatMul_impl.cc +++ b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc @@ -32,7 +32,7 @@ #include "src/cc/prelu_impl.h" #include "src/cc/util.h" -#include "src/cc/batchMatMul_impl.h" +#include "src/cc/batch_mat_mul_impl.h" const size_t kBlockSize = 48; diff --git a/tfjs-backend-wasm/src/cc/batchMatMul_impl.h b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h similarity index 100% rename from tfjs-backend-wasm/src/cc/batchMatMul_impl.h rename to tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h diff --git a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc index aa95cfc680e..93f1e3c0507 100644 --- a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc +++ b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc @@ -19,7 +19,7 @@ #include #include "src/cc/backend.h" -#include "src/cc/batchMatMul_impl.h" +#include "src/cc/batch_mat_mul_impl.h" #include "src/cc/kernels/BatchMatMul.h" namespace tfjs { diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc index 31c6dbbc6d2..6ac640f76df 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc @@ -19,7 +19,7 @@ #include #include "src/cc/backend.h" -#include "src/cc/batchMatMul_impl.h" +#include "src/cc/batch_mat_mul_impl.h" #include "src/cc/kernels/FusedBatchMatMul.h" namespace tfjs { diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h index 6af656c47b3..bc88ea75493 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h @@ -17,8 +17,6 @@ #include -#include "src/cc/batchMatMul_impl.h" - namespace tfjs { namespace wasm { extern "C" { From 01be763496971a9fb19e232bd6b9a96225bd664c Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Thu, 5 Mar 2020 14:20:53 -0500 Subject: [PATCH 17/35] add tests --- tfjs-backend-wasm/src/cc/BUILD | 8 ++ .../src/cc/kernels/FusedBatchMatMul_test.cc | 93 +++++++++++++++++++ .../src/cc/kernels/FusedConv2D_test.cc | 7 +- .../cc/kernels/FusedDepthwiseConv2D_test.cc | 9 +- 4 files changed, 106 insertions(+), 11 deletions(-) create mode 100644 tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index 8e6f8646597..040fd9a51cf 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -294,6 +294,14 @@ tfjs_unit_test( ], ) +tfjs_unit_test( + name = "FusedBatchMatMul_test", + srcs = ["kernels/FusedBatchMatMul_test.cc"], + deps = [ + ":FusedBatchMatMul", + ], +) + tfjs_cc_library( name = "ClipByValue", srcs = ["kernels/ClipByValue.cc"], diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc new file mode 100644 index 00000000000..f912c5f0450 --- /dev/null +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc @@ -0,0 +1,93 @@ +/* Copyright 2020 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ===========================================================================*/ + +#include + +#include +#include + +#include "src/cc/backend.h" +#include "src/cc/kernels/FusedBatchMatMul.h" + +TEST(FUSEDBATCH_MATMUL, xnn_operator_lfietime) { + tfjs::wasm::init(); + + ASSERT_EQ(0, tfjs::backend::num_tensors()); + + size_t a0_id = 1; + size_t a1_id = 2; + size_t size = 2; + float a_values[2] = {1, 2}; + std::vector a_shape = {1, 2, 1}; + size_t* a_shape_ptr = a_shape.data(); + + size_t b0_id = 3; + size_t b1_id = 4; + float b_values[2] = {1, 2}; + std::vector b_shape = {1, 1, 2}; + size_t* b_shape_ptr = b_shape.data(); + + size_t out_id = 5; + float out_values[2] = {0, 0}; + + tfjs::wasm::register_tensor(a0_id, size, a_values); + tfjs::wasm::register_tensor(a1_id, size, a_values); + tfjs::wasm::register_tensor(b0_id, size, b_values); + tfjs::wasm::register_tensor(b1_id, size, b_values); + tfjs::wasm::register_tensor(out_id, size, out_values); + + ASSERT_EQ(5, tfjs::backend::num_tensors()); + ASSERT_EQ(0, tfjs::backend::xnn_operator_count); + + // One new xnn_operator should be created for the first call to BatchMatMul. + tfjs::wasm::FusedBatchMatMul( + a0_id, a_shape_ptr, a_shape.size(), b0_id, b_shape_ptr, b_shape.size(), + false /* transpose_a */, false /* transpose_b */, out_id); + ASSERT_EQ(1, tfjs::backend::xnn_operator_count); + + // No new xnn_operators should be created for the second call to BatchMatMul + // with the same b's. + tfjs::wasm::FusedBatchMatMul( + a0_id, a_shape_ptr, a_shape.size(), b0_id, b_shape_ptr, b_shape.size(), + false /* transpose_a */, false /* transpose_b */, out_id); + ASSERT_EQ(1, tfjs::backend::xnn_operator_count); + + // One new xnn_operator should be created for another call to BatchMatMul with + // new b's. + tfjs::wasm::FusedBatchMatMul( + a0_id, a_shape_ptr, a_shape.size(), b1_id, b_shape_ptr, b_shape.size(), + false /* transpose_a */, false /* transpose_b */, out_id); + ASSERT_EQ(2, tfjs::backend::xnn_operator_count); + + // No new xnn_operators should be created for the next call to BatchMatMul + // with the same b's. + tfjs::wasm::FusedBatchMatMul( + a0_id, a_shape_ptr, a_shape.size(), b1_id, b_shape_ptr, b_shape.size(), + false /* transpose_a */, false /* transpose_b */, out_id); + ASSERT_EQ(2, tfjs::backend::xnn_operator_count); + + // Disposing a's should not remove xnn operators. + tfjs::wasm::dispose_data(a0_id); + tfjs::wasm::dispose_data(a1_id); + ASSERT_EQ(2, tfjs::backend::xnn_operator_count); + + // Disposing b's should remove xnn operators. + tfjs::wasm::dispose_data(b0_id); + ASSERT_EQ(1, tfjs::backend::xnn_operator_count); + + tfjs::wasm::dispose_data(b1_id); + ASSERT_EQ(0, tfjs::backend::xnn_operator_count); + + tfjs::wasm::dispose(); +} diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedConv2D_test.cc b/tfjs-backend-wasm/src/cc/kernels/FusedConv2D_test.cc index 6a75526979d..1b73d35d514 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedConv2D_test.cc +++ b/tfjs-backend-wasm/src/cc/kernels/FusedConv2D_test.cc @@ -18,7 +18,6 @@ #include #include "src/cc/backend.h" -#include "src/cc/conv2d_impl.h" #include "src/cc/kernels/FusedConv2D.h" #include "src/cc/util.h" @@ -76,8 +75,7 @@ TEST(FUSEDCONV2D, xnn_operator_lifetime) { const size_t input_channels = 1; const size_t output_channels = 1; - const tfjs::wasm::FusableActivation activation = - tfjs::wasm::FusableActivation::LINEAR; + const FusableActivation activation = FusableActivation::LINEAR; const size_t prelu_weights_id = 0; tfjs::wasm::FusedConv2D( @@ -173,8 +171,7 @@ TEST(FUSEDCONV2D, xnn_operator_lifetime) { // One new XNN operator should be created for the next call to conv2d with a // different activation. - const tfjs::wasm::FusableActivation activation2 = - tfjs::wasm::FusableActivation::RELU6; + const FusableActivation activation2 = FusableActivation::RELU6; tfjs::wasm::FusedConv2D( x1_id, batch_size, input_height, input_width, weights1_id, filter_height, filter_width, bias1_id, pad_top1, pad_right, pad_bottom1, pad_left, diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedDepthwiseConv2D_test.cc b/tfjs-backend-wasm/src/cc/kernels/FusedDepthwiseConv2D_test.cc index 5f362c9126b..37afa3c7b12 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedDepthwiseConv2D_test.cc +++ b/tfjs-backend-wasm/src/cc/kernels/FusedDepthwiseConv2D_test.cc @@ -76,8 +76,7 @@ TEST(FUSEDDEPTHWISECONV2D, xnn_operator_lifetime) { const size_t input_channels = 1; const size_t output_channels = 1; - const tfjs::wasm::FusableActivation activation = - tfjs::wasm::FusableActivation::LINEAR; + const FusableActivation activation = FusableActivation::LINEAR; tfjs::wasm::FusedDepthwiseConv2D( x0_id, batch_size, input_height, input_width, weights0_id, filter_height, @@ -89,8 +88,7 @@ TEST(FUSEDDEPTHWISECONV2D, xnn_operator_lifetime) { // One new xnn operator should be created for second call to conv2d with no // bias and prelu activation. - const tfjs::wasm::FusableActivation prelu_activation = - tfjs::wasm::FusableActivation::PRELU; + const FusableActivation prelu_activation = FusableActivation::PRELU; const size_t prelu_weights_id = 8; const size_t prelu_size = 8; @@ -190,8 +188,7 @@ TEST(FUSEDDEPTHWISECONV2D, xnn_operator_lifetime) { // One new XNN operator should be created for the next call to conv2d with a // different activation. - const tfjs::wasm::FusableActivation activation2 = - tfjs::wasm::FusableActivation::RELU6; + const FusableActivation activation2 = FusableActivation::RELU6; tfjs::wasm::FusedDepthwiseConv2D( x1_id, batch_size, input_height, input_width, weights1_id, filter_height, filter_width, bias1_id, pad_top1, pad_right, pad_bottom1, pad_left, From c14ffef60f2e6e96a690a2fd7815c8dad60f71b5 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 09:44:37 -0400 Subject: [PATCH 18/35] got test to pass --- .../src/cc/kernels/FusedBatchMatMul_test.cc | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc index f912c5f0450..9086c799ba5 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc @@ -50,31 +50,39 @@ TEST(FUSEDBATCH_MATMUL, xnn_operator_lfietime) { ASSERT_EQ(5, tfjs::backend::num_tensors()); ASSERT_EQ(0, tfjs::backend::xnn_operator_count); + const FusableActivation activation = FusableActivation::LINEAR; + size_t bias_id = 0; + size_t prelu_weights_id = 0; + // One new xnn_operator should be created for the first call to BatchMatMul. - tfjs::wasm::FusedBatchMatMul( - a0_id, a_shape_ptr, a_shape.size(), b0_id, b_shape_ptr, b_shape.size(), - false /* transpose_a */, false /* transpose_b */, out_id); + tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b0_id, + b_shape_ptr, b_shape.size(), + false /* transpose_a */, false /* transpose_b */, + activation, bias_id, prelu_weights_id, out_id); ASSERT_EQ(1, tfjs::backend::xnn_operator_count); // No new xnn_operators should be created for the second call to BatchMatMul // with the same b's. - tfjs::wasm::FusedBatchMatMul( - a0_id, a_shape_ptr, a_shape.size(), b0_id, b_shape_ptr, b_shape.size(), - false /* transpose_a */, false /* transpose_b */, out_id); + tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b0_id, + b_shape_ptr, b_shape.size(), + false /* transpose_a */, false /* transpose_b */, + activation, bias_id, prelu_weights_id, out_id); ASSERT_EQ(1, tfjs::backend::xnn_operator_count); // One new xnn_operator should be created for another call to BatchMatMul with // new b's. - tfjs::wasm::FusedBatchMatMul( - a0_id, a_shape_ptr, a_shape.size(), b1_id, b_shape_ptr, b_shape.size(), - false /* transpose_a */, false /* transpose_b */, out_id); + tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, + b_shape_ptr, b_shape.size(), + false /* transpose_a */, false /* transpose_b */, + activation, bias_id, prelu_weights_id, out_id); ASSERT_EQ(2, tfjs::backend::xnn_operator_count); // No new xnn_operators should be created for the next call to BatchMatMul // with the same b's. - tfjs::wasm::FusedBatchMatMul( - a0_id, a_shape_ptr, a_shape.size(), b1_id, b_shape_ptr, b_shape.size(), - false /* transpose_a */, false /* transpose_b */, out_id); + tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, + b_shape_ptr, b_shape.size(), + false /* transpose_a */, false /* transpose_b */, + activation, bias_id, prelu_weights_id, out_id); ASSERT_EQ(2, tfjs::backend::xnn_operator_count); // Disposing a's should not remove xnn operators. From 5836bc20e9bb09b2aef076f60a16dcdad1704982 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 10:09:47 -0400 Subject: [PATCH 19/35] add test --- .../src/cc/kernels/FusedBatchMatMul_test.cc | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc index 9086c799ba5..95bf8530655 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc @@ -54,7 +54,8 @@ TEST(FUSEDBATCH_MATMUL, xnn_operator_lfietime) { size_t bias_id = 0; size_t prelu_weights_id = 0; - // One new xnn_operator should be created for the first call to BatchMatMul. + // One new xnn_operator should be created for the first call to BatchMatMul + // with no bias. tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b0_id, b_shape_ptr, b_shape.size(), false /* transpose_a */, false /* transpose_b */, @@ -62,15 +63,23 @@ TEST(FUSEDBATCH_MATMUL, xnn_operator_lfietime) { ASSERT_EQ(1, tfjs::backend::xnn_operator_count); // No new xnn_operators should be created for the second call to BatchMatMul - // with the same b's. + // with the same arguments. tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b0_id, b_shape_ptr, b_shape.size(), false /* transpose_a */, false /* transpose_b */, activation, bias_id, prelu_weights_id, out_id); ASSERT_EQ(1, tfjs::backend::xnn_operator_count); - // One new xnn_operator should be created for another call to BatchMatMul with - // new b's. + // No new xnn_operators should be created for calling BatchMatMul + // with a new a. + tfjs::wasm::FusedBatchMatMul(a1_id, a_shape_ptr, a_shape.size(), b0_id, + b_shape_ptr, b_shape.size(), + false /* transpose_a */, false /* transpose_b */, + activation, bias_id, prelu_weights_id, out_id); + ASSERT_EQ(1, tfjs::backend::xnn_operator_count); + + // One new xnn_operator should be created for calling BatchMatMul + // with a new b. tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, b_shape_ptr, b_shape.size(), false /* transpose_a */, false /* transpose_b */, @@ -78,7 +87,7 @@ TEST(FUSEDBATCH_MATMUL, xnn_operator_lfietime) { ASSERT_EQ(2, tfjs::backend::xnn_operator_count); // No new xnn_operators should be created for the next call to BatchMatMul - // with the same b's. + // with the same b. tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, b_shape_ptr, b_shape.size(), false /* transpose_a */, false /* transpose_b */, From 6dc84aa25db104de05d07582fbae98035aafa6db Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 10:27:10 -0400 Subject: [PATCH 20/35] update test --- .../src/cc/batch_mat_mul_impl.cc | 5 +-- .../src/cc/kernels/FusedBatchMatMul_test.cc | 33 ++++++++++++++----- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc index b80d60294a1..2cdb51ad712 100644 --- a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc +++ b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc @@ -72,8 +72,9 @@ void erase_from_cache(const size_t tensor_id, } } -void delete_xnn_operators(const size_t weights_id) { - erase_from_cache(weights_id, b_operator_cache_key_map); +void delete_xnn_operators(const size_t tensor_id) { + erase_from_cache(tensor_id, b_operator_cache_key_map); + erase_from_cache(tensor_id, bias_operator_cache_key_map); } void associate_tensor_with_key( diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc index 95bf8530655..ce96bf5ccfc 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc @@ -54,23 +54,23 @@ TEST(FUSEDBATCH_MATMUL, xnn_operator_lfietime) { size_t bias_id = 0; size_t prelu_weights_id = 0; - // One new xnn_operator should be created for the first call to BatchMatMul - // with no bias. + // One new xnn_operator should be created for the first call to + // FusedBatchMatMul with no bias. tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b0_id, b_shape_ptr, b_shape.size(), false /* transpose_a */, false /* transpose_b */, activation, bias_id, prelu_weights_id, out_id); ASSERT_EQ(1, tfjs::backend::xnn_operator_count); - // No new xnn_operators should be created for the second call to BatchMatMul - // with the same arguments. + // No new xnn_operators should be created for the second call to + // FusedBatchMatMul with the same arguments. tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b0_id, b_shape_ptr, b_shape.size(), false /* transpose_a */, false /* transpose_b */, activation, bias_id, prelu_weights_id, out_id); ASSERT_EQ(1, tfjs::backend::xnn_operator_count); - // No new xnn_operators should be created for calling BatchMatMul + // No new xnn_operators should be created for calling FusedBatchMatMul // with a new a. tfjs::wasm::FusedBatchMatMul(a1_id, a_shape_ptr, a_shape.size(), b0_id, b_shape_ptr, b_shape.size(), @@ -78,7 +78,7 @@ TEST(FUSEDBATCH_MATMUL, xnn_operator_lfietime) { activation, bias_id, prelu_weights_id, out_id); ASSERT_EQ(1, tfjs::backend::xnn_operator_count); - // One new xnn_operator should be created for calling BatchMatMul + // One new xnn_operator should be created for calling FusedBatchMatMul // with a new b. tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, b_shape_ptr, b_shape.size(), @@ -86,17 +86,34 @@ TEST(FUSEDBATCH_MATMUL, xnn_operator_lfietime) { activation, bias_id, prelu_weights_id, out_id); ASSERT_EQ(2, tfjs::backend::xnn_operator_count); - // No new xnn_operators should be created for the next call to BatchMatMul - // with the same b. + // No new xnn_operators should be created for the next call to + // FusedBatchMatMul with the same b. tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, b_shape_ptr, b_shape.size(), false /* transpose_a */, false /* transpose_b */, activation, bias_id, prelu_weights_id, out_id); ASSERT_EQ(2, tfjs::backend::xnn_operator_count); + const size_t bias1_id = 6; + const size_t bias_size = 1; + float bias_values[bias_size] = {1}; + tfjs::wasm::register_tensor(bias1_id, bias_size, bias_values); + // One new xnn_operator should be created for calling FusedBatchMatMul with a + // new bias. + tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, + b_shape_ptr, b_shape.size(), + false /* transpose_a */, false /* transpose_b */, + activation, bias1_id, prelu_weights_id, out_id); + ASSERT_EQ(3, tfjs::backend::xnn_operator_count); + // Disposing a's should not remove xnn operators. tfjs::wasm::dispose_data(a0_id); tfjs::wasm::dispose_data(a1_id); + ASSERT_EQ(3, tfjs::backend::xnn_operator_count); + + // Disposing the second bias should remove the xnn_operator it's associated + // with. + tfjs::wasm::dispose_data(bias1_id); ASSERT_EQ(2, tfjs::backend::xnn_operator_count); // Disposing b's should remove xnn operators. From 83265b1f2cea5015bd33054f2f0e633613bc59a4 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 10:34:02 -0400 Subject: [PATCH 21/35] add activ test --- .../src/cc/kernels/FusedBatchMatMul_test.cc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc index ce96bf5ccfc..de0c41ca6b0 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc @@ -106,12 +106,21 @@ TEST(FUSEDBATCH_MATMUL, xnn_operator_lfietime) { activation, bias1_id, prelu_weights_id, out_id); ASSERT_EQ(3, tfjs::backend::xnn_operator_count); + // One new xnn_operator should be created for calling FusedBatchMatMul with a + // different activation. + const FusableActivation activation2 = FusableActivation::RELU; + tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, + b_shape_ptr, b_shape.size(), + false /* transpose_a */, false /* transpose_b */, + activation2, bias1_id, prelu_weights_id, out_id); + ASSERT_EQ(4, tfjs::backend::xnn_operator_count); + // Disposing a's should not remove xnn operators. tfjs::wasm::dispose_data(a0_id); tfjs::wasm::dispose_data(a1_id); - ASSERT_EQ(3, tfjs::backend::xnn_operator_count); + ASSERT_EQ(4, tfjs::backend::xnn_operator_count); - // Disposing the second bias should remove the xnn_operator it's associated + // Disposing the second bias should remove the xnn_operators it's associated // with. tfjs::wasm::dispose_data(bias1_id); ASSERT_EQ(2, tfjs::backend::xnn_operator_count); From 1e2830733b3198bd97648851ced181b9558a216b Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 10:57:04 -0400 Subject: [PATCH 22/35] tests --- tfjs-backend-wasm/src/cc/kernels/Pow.cc | 2 +- tfjs-backend-wasm/src/index_test.ts | 4 ++-- tfjs-backend-wasm/src/kernels/all_kernels.ts | 1 + tfjs-backend-wasm/src/setup_test.ts | 1 + 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/kernels/Pow.cc b/tfjs-backend-wasm/src/cc/kernels/Pow.cc index b9c5bed7388..646fe355fa1 100644 --- a/tfjs-backend-wasm/src/cc/kernels/Pow.cc +++ b/tfjs-backend-wasm/src/cc/kernels/Pow.cc @@ -25,7 +25,7 @@ namespace { template inline T power(T a, T b) { - return std::pow(a, b); + return pow(a, b); } } // namespace diff --git a/tfjs-backend-wasm/src/index_test.ts b/tfjs-backend-wasm/src/index_test.ts index 894c2002c91..b25d937f1b3 100644 --- a/tfjs-backend-wasm/src/index_test.ts +++ b/tfjs-backend-wasm/src/index_test.ts @@ -103,7 +103,7 @@ describeWithFlags('wasm init', BROWSER_ENVS, () => { console.log(Array.from(data)); }); - fit('fused batch mm', async () => { + it('fused batch mm', async () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); const alpha = tf.tensor2d([0.5, 0.5], [1, 2]); @@ -115,7 +115,7 @@ describeWithFlags('wasm init', BROWSER_ENVS, () => { console.log(data); // 0, 8, -3, 20 }); - fit('fused batch mm with bias', async () => { + it('fused batch mm with bias', async () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); const c = tf.tensor1d([1, 1]); diff --git a/tfjs-backend-wasm/src/kernels/all_kernels.ts b/tfjs-backend-wasm/src/kernels/all_kernels.ts index be3f297fdb2..602eb6f91e1 100644 --- a/tfjs-backend-wasm/src/kernels/all_kernels.ts +++ b/tfjs-backend-wasm/src/kernels/all_kernels.ts @@ -37,6 +37,7 @@ import './FloorDiv'; import './FusedBatchNorm'; import './FusedConv2D'; import './FusedDepthwiseConv2D'; +import './FusedBatchMatMul'; import './Gather'; import './GatherNd'; import './Greater'; diff --git a/tfjs-backend-wasm/src/setup_test.ts b/tfjs-backend-wasm/src/setup_test.ts index 4d942718f47..10493424f3e 100644 --- a/tfjs-backend-wasm/src/setup_test.ts +++ b/tfjs-backend-wasm/src/setup_test.ts @@ -44,6 +44,7 @@ const TEST_FILTERS: TestFilter[] = [ // supported yet. ] }, + // {include: 'fused matmul'}, { include: 'add ', excludes: [ From 572bc152a9788b84386a7c8e43f943ba7422c58f Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 12:06:06 -0400 Subject: [PATCH 23/35] add tests --- tfjs-backend-wasm/src/setup_test.ts | 27 +++++++++++++----------- tfjs-core/src/ops/fused_test.ts | 32 ++++++++++++++--------------- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/tfjs-backend-wasm/src/setup_test.ts b/tfjs-backend-wasm/src/setup_test.ts index 10493424f3e..01d7c8bbc81 100644 --- a/tfjs-backend-wasm/src/setup_test.ts +++ b/tfjs-backend-wasm/src/setup_test.ts @@ -44,7 +44,6 @@ const TEST_FILTERS: TestFilter[] = [ // supported yet. ] }, - // {include: 'fused matmul'}, { include: 'add ', excludes: [ @@ -63,11 +62,12 @@ const TEST_FILTERS: TestFilter[] = [ { include: 'relu', excludes: [ - 'derivative', // Not yet implemented. - 'gradient', // Not yet implemented. - 'valueAndGradients', // Not yet implemented. - 'fused matmul', // Not yet implemented. - 'broadcasted bias', // Not yet implemented. + 'derivative', // Not yet implemented. + 'gradient', // Not yet implemented. + 'valueAndGradients', // Not yet implemented. + 'broadcasted bias', // Not yet implemented. + 'fused A x B with 2d bias' // Fused matMul with 2D bias not yet + // supported. ] }, { @@ -89,12 +89,15 @@ const TEST_FILTERS: TestFilter[] = [ { include: 'matmul ', excludes: [ - 'valueAndGradients', // Gradients not defined yet - 'gradient', // Gradients not defined yet - 'fused matmul', // Fused kernels aren't ready yet - 'zero in its shape', // Zero in shapes aren't supported yet - 'matmul followed by mul', // mul not supported yet - 'upcasts', // Upcasting not supported yet. + 'valueAndGradients', // Gradients not defined yet + 'gradient', // Gradients not defined yet + 'zero in its shape', // Zero in shapes aren't supported yet + 'matmul followed by mul', // mul not supported yet + 'upcasts', // Upcasting not supported yet. + 'fused A x B with elu', // Fused matMul with elu activation not yet + // supported. + 'fused A x B with 2d bias', // Fused matMul with 2D bias not yet + // supported. ] }, { diff --git a/tfjs-core/src/ops/fused_test.ts b/tfjs-core/src/ops/fused_test.ts index e8c6f9c91dc..bdffb90d6f2 100644 --- a/tfjs-core/src/ops/fused_test.ts +++ b/tfjs-core/src/ops/fused_test.ts @@ -20,7 +20,7 @@ import {ALL_ENVS, describeWithFlags} from '../jasmine_util'; import {expectArraysClose} from '../test_util'; describeWithFlags('fused matmul', ALL_ENVS, () => { - it('A x B', async () => { + it('fused A x B', async () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); @@ -30,7 +30,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await c.data(), [0, 8, -3, 20]); }); - it('A x B with relu', async () => { + it('fused A x B with relu', async () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); const transposeA = false; @@ -43,7 +43,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await c.data(), [0, 8, 0, 20]); }); - it('A x B with elu', async () => { + it('fused A x B with elu', async () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); const transposeA = false; @@ -56,7 +56,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await c.data(), [0, 8, -0.9502, 20]); }); - it('A x B with relu6', async () => { + it('fused A x B with relu6', async () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); const transposeA = false; @@ -69,7 +69,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await c.data(), [0, 6, 0, 6]); }); - it('A x B with prelu', async () => { + it('fused A x B with prelu', async () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); const alpha = tf.tensor2d([0.5, 0.5], [1, 2]); @@ -90,7 +90,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await c.data(), [0, 8, -1.5, 20]); }); - it('A x B with relu transpose', async () => { + it('fused A x B with relu transpose', async () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [2, 3]); const transposeA = false; @@ -103,7 +103,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await c.data(), [0, 9, 0, 24]); }); - it('A x B with relu and bias', async () => { + it('fused A x B with 2d bias and relu', async () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); @@ -117,7 +117,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await d.data(), [1, 9, 0, 21]); }); - it('A x B with relu and broadcasted bias', async () => { + it('fused A x B with relu and broadcasted bias', async () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); const c = tf.tensor1d([1, 1]); @@ -132,7 +132,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await d.data(), [1, 9, 0, 21]); }); - it('A x B with elu and broadcasted bias', async () => { + it('fused A x B with elu and broadcasted bias', async () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); const c = tf.tensor1d([1, 1]); @@ -147,7 +147,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await d.data(), [1, 9, -0.8647, 21]); }); - it('A x B with relu and broadcasted bias different rank', async () => { + it('fused A x B with relu and broadcasted bias different rank', async () => { const a = tf.tensor3d([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [2, 2, 3]); const b = tf.tensor3d([0, 1, -3, 2, 2, 1, 0, 1, -3, 2, 2, 1], [2, 3, 2]); const c = tf.tensor2d([1, 2], [1, 2]); @@ -162,7 +162,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await d.data(), [2, 6, 0, 18, 0, 30, 0, 42]); }); - it('A x B with bias only', async () => { + it('fused A x B with 2d bias only', async () => { const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); @@ -176,7 +176,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await d.data(), [1, 9, -2, 21]); }); - it('A x B with relu gradient', async () => { + it('fused A x B with relu gradient', async () => { const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); @@ -224,7 +224,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expect(fusedDb.shape).toEqual(b.shape); }); - it('A x B with relu bias gradient', async () => { + it('fused A x B with relu bias gradient', async () => { const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); @@ -252,7 +252,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await dc.array(), await fusedDc.array()); }); - it('A x B with relu bias gradient transpose', async () => { + it('fused A x B with relu bias gradient transpose', async () => { const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [3, 2]); const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); const c = tf.tensor2d([1, 1, 1, 1], [2, 2]); @@ -280,7 +280,7 @@ describeWithFlags('fused matmul', ALL_ENVS, () => { expectArraysClose(await dc.array(), await fusedDc.array()); }); - it('A x B with relu and broadcasted bias gradient', async () => { + it('fused A x B with relu and broadcasted bias gradient', async () => { const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); const c = tf.tensor2d([[1]]); @@ -1021,7 +1021,7 @@ describeWithFlags('fused conv2d', ALL_ENVS, () => { expectArraysClose(await dbiasFused.array(), await dbias.array()); }); - it('fused matmul with relu6', async () => { + it('fused matmul with relu6 and gradients', async () => { const a = tf.tensor2d([1, 2, 3, 10, 20, -30], [2, 3]); const b = tf.tensor2d([2, 3, 4, -1, 2, 3], [3, 2]); const dy = tf.tensor2d([1, 10, 20, 30], [2, 2]); From a82b6101079ffb422393ab31008433f2cf30b207 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 12:12:40 -0400 Subject: [PATCH 24/35] fix setup test --- tfjs-backend-wasm/src/cc/BUILD | 1 - tfjs-backend-wasm/src/index_test.ts | 42 ++--------------------------- 2 files changed, 2 insertions(+), 41 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index 040fd9a51cf..2e5afd7d6b1 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -281,7 +281,6 @@ tfjs_cc_library( hdrs = ["kernels/FusedBatchMatMul.h"], deps = [ ":backend", - ":util", ":batch_mat_mul_impl", ], ) diff --git a/tfjs-backend-wasm/src/index_test.ts b/tfjs-backend-wasm/src/index_test.ts index b25d937f1b3..370615f2cf3 100644 --- a/tfjs-backend-wasm/src/index_test.ts +++ b/tfjs-backend-wasm/src/index_test.ts @@ -58,8 +58,8 @@ describeWithFlags('wasm init', BROWSER_ENVS, () => { }, 100); // Silences backend registration warnings. - // spyOn(console, 'warn'); - // spyOn(console, 'log'); + spyOn(console, 'warn'); + spyOn(console, 'log'); }); afterEach(() => { @@ -92,42 +92,4 @@ describeWithFlags('wasm init', BROWSER_ENVS, () => { expect(() => setWasmPath('too/late')) .toThrowError(/The WASM backend was already initialized. Make sure/); }); - - it('pow', async () => { - const a = tf.tensor2d([1, -2, -3, 0, 7, 1], [2, 3]); - const b = tf.tensor2d([5, 3, 4, 5, 2, -3], [2, 3], 'int32'); - // const expected = [1, -8, 81, 0, 49, 1]; - const result = tf.pow(a, b); - // const result = tf.div(a, b); - const data = await result.data(); - console.log(Array.from(data)); - }); - - it('fused batch mm', async () => { - const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); - const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); - const alpha = tf.tensor2d([0.5, 0.5], [1, 2]); - - const c = tf.fused.matMul( - {a, b, activation: 'prelu', preluActivationWeights: alpha}); - // const c = tf.matMul(a, b); - const data = await c.data(); - console.log(data); // 0, 8, -3, 20 - }); - - it('fused batch mm with bias', async () => { - const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [2, 3]); - const b = tf.tensor2d([0, 1, -3, 2, 2, 1], [3, 2]); - const c = tf.tensor1d([1, 1]); - const transposeA = false; - const transposeB = false; - - const d = tf.fused.matMul( - {a, b, transposeA, transposeB, bias: c, activation: 'relu'}); - - expect(d.shape).toEqual([2, 2]); - const data = await d.data(); - console.log(data); - // expectArraysClose(await d.data(), [1, 9, 0, 21]); - }); }); From ee53a7269daf82cab2a6ea86e6dc7ff6b8295a97 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 12:57:01 -0400 Subject: [PATCH 25/35] lint --- tfjs-backend-wasm/src/cc/BUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index 2e5afd7d6b1..82c69b5f141 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -167,7 +167,6 @@ tfjs_cc_library( ":ArgMax", ":AvgPool", ":BatchMatMul", - ":FusedBatchMatMul", ":ClipByValue", ":Conv2D", ":CropAndResize", @@ -175,6 +174,7 @@ tfjs_cc_library( ":Div", ":Exp", ":FloorDiv", + ":FusedBatchMatMul", ":FusedBatchNorm", ":FusedConv2D", ":FusedDepthwiseConv2D", @@ -270,8 +270,8 @@ tfjs_cc_library( hdrs = ["kernels/BatchMatMul.h"], deps = [ ":backend", - ":util", ":batch_mat_mul_impl", + ":util", ], ) From 72a5bbf2b498c9347754fe5a7f9decd92069b8e4 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 12:57:31 -0400 Subject: [PATCH 26/35] lint --- tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h index 73a16b95a65..b022d7badf5 100644 --- a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h +++ b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h @@ -12,8 +12,8 @@ * limitations under the License. * ===========================================================================*/ -#ifndef BATCHMATMUL_IMPL_H_ -#define BATCHMATMUL_IMPL_H_ +#ifndef BATCH_MAT_MUL_IMPL_H_ +#define BATCH_MAT_MUL_IMPL_H_ #include @@ -30,4 +30,4 @@ void batchMatMul(const size_t a_id, const size_t* a_shape_ptr, } // namespace wasm } // namespace tfjs -#endif // BATCHMATMUL_IMPL_H_ +#endif // BATCH_MAT_MUL_IMPL_H_ From fa3c9a797f181e513c4b749886f3365a63e81a51 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 12:57:59 -0400 Subject: [PATCH 27/35] linrt --- tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h index bc88ea75493..2a0dcaf3594 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h +++ b/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h @@ -12,8 +12,8 @@ * limitations under the License. * ===========================================================================*/ -#ifndef KERNELS_BATCHMATMUL_H_ -#define KERNELS_BATCHMATMUL_H_ +#ifndef KERNELS_FUSEDBATCHMATMUL_H_ +#define KERNELS_FUSEDBATCHMATMUL_H_ #include @@ -32,4 +32,4 @@ void FusedBatchMatMul(const size_t a_id, const size_t* a_shape_ptr, } // namespace wasm } // namespace tfjs -#endif // KERNELS_BATCHMATMUL_H_ +#endif // KERNELS_FUSEDBATCHMATMUL_H_ From 0038b72f1bf1c6dc025998a92304e192760772de Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 16:19:17 -0400 Subject: [PATCH 28/35] rename --- tfjs-backend-wasm/src/cc/BUILD | 14 ++-- .../src/cc/batch_mat_mul_impl.cc | 2 +- tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h | 2 +- .../src/cc/kernels/BatchMatMul.cc | 2 +- .../src/cc/kernels/BatchMatMul.h | 2 +- .../src/cc/kernels/BatchMatMul_test.cc | 2 +- .../{FusedBatchMatMul.cc => _FusedMatMul.cc} | 14 ++-- .../{FusedBatchMatMul.h => _FusedMatMul.h} | 18 ++--- ...tchMatMul_test.cc => _FusedMatMul_test.cc} | 74 +++++++++---------- .../{FusedBatchMatMul.ts => _FusedMatMul.ts} | 51 +++++++------ tfjs-backend-wasm/src/kernels/all_kernels.ts | 2 +- tfjs-core/src/ops/fused_ops.ts | 2 +- 12 files changed, 92 insertions(+), 93 deletions(-) rename tfjs-backend-wasm/src/cc/kernels/{FusedBatchMatMul.cc => _FusedMatMul.cc} (72%) rename tfjs-backend-wasm/src/cc/kernels/{FusedBatchMatMul.h => _FusedMatMul.h} (59%) rename tfjs-backend-wasm/src/cc/kernels/{FusedBatchMatMul_test.cc => _FusedMatMul_test.cc} (55%) rename tfjs-backend-wasm/src/kernels/{FusedBatchMatMul.ts => _FusedMatMul.ts} (75%) diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index 82c69b5f141..eca4b8f81ce 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -161,6 +161,7 @@ tfjs_cc_library( tfjs_cc_library( name = "all_kernels", deps = [ + ":_FusedMatMul", ":Abs", ":Add", ":AddN", @@ -174,7 +175,6 @@ tfjs_cc_library( ":Div", ":Exp", ":FloorDiv", - ":FusedBatchMatMul", ":FusedBatchNorm", ":FusedConv2D", ":FusedDepthwiseConv2D", @@ -276,9 +276,9 @@ tfjs_cc_library( ) tfjs_cc_library( - name = "FusedBatchMatMul", - srcs = ["kernels/FusedBatchMatMul.cc"], - hdrs = ["kernels/FusedBatchMatMul.h"], + name = "_FusedMatMul", + srcs = ["kernels/_FusedMatMul.cc"], + hdrs = ["kernels/_FusedMatMul.h"], deps = [ ":backend", ":batch_mat_mul_impl", @@ -294,10 +294,10 @@ tfjs_unit_test( ) tfjs_unit_test( - name = "FusedBatchMatMul_test", - srcs = ["kernels/FusedBatchMatMul_test.cc"], + name = "_FusedMatMul_test", + srcs = ["kernels/_FusedMatMul_test.cc"], deps = [ - ":FusedBatchMatMul", + ":_FusedMatMul", ], ) diff --git a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc index 2cdb51ad712..29759a17d0c 100644 --- a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc +++ b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 Google Inc. All Rights Reserved. +/* Copyright 2020 Google LLC. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at diff --git a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h index b022d7badf5..8df58ada339 100644 --- a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h +++ b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h @@ -1,4 +1,4 @@ -/* Copyright 2019 Google Inc. All Rights Reserved. +/* Copyright 2020 Google LLC. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at diff --git a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc index 93f1e3c0507..9aa757a1e3c 100644 --- a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc +++ b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 Google Inc. All Rights Reserved. +/* Copyright 2020 Google LLC. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at diff --git a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.h b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.h index 0b9570f3839..bc5b5728c4b 100644 --- a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.h +++ b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.h @@ -1,4 +1,4 @@ -/* Copyright 2019 Google Inc. All Rights Reserved. +/* Copyright 2020 Google LLC. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at diff --git a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul_test.cc b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul_test.cc index a5bb986dd3e..1ac04d3d4a9 100644 --- a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul_test.cc +++ b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 Google Inc. All Rights Reserved. +/* Copyright 2020 Google LLC. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc b/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.cc similarity index 72% rename from tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc rename to tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.cc index 6ac640f76df..1e3b43368f5 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.cc +++ b/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.cc @@ -20,7 +20,7 @@ #include "src/cc/backend.h" #include "src/cc/batch_mat_mul_impl.h" -#include "src/cc/kernels/FusedBatchMatMul.h" +#include "src/cc/kernels/_FusedMatMul.h" namespace tfjs { namespace wasm { @@ -30,12 +30,12 @@ extern "C" { #ifdef __EMSCRIPTEN__ EMSCRIPTEN_KEEPALIVE #endif -void FusedBatchMatMul(const size_t a_id, const size_t* a_shape_ptr, - const size_t a_shape_len, const size_t b_id, - const size_t* b_shape_ptr, const size_t b_shape_len, - const bool transpose_a, const bool transpose_b, - const FusableActivation activation, const size_t bias_id, - const size_t prelu_weights_id, const size_t out_id) { +void _FusedMatMul(const size_t a_id, const size_t* a_shape_ptr, + const size_t a_shape_len, const size_t b_id, + const size_t* b_shape_ptr, const size_t b_shape_len, + const bool transpose_a, const bool transpose_b, + const FusableActivation activation, const size_t bias_id, + const size_t prelu_weights_id, const size_t out_id) { tfjs::wasm::batchMatMul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len, transpose_a, transpose_b, activation, bias_id, prelu_weights_id, out_id); diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h b/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.h similarity index 59% rename from tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h rename to tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.h index 2a0dcaf3594..f300781ff3a 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul.h +++ b/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.h @@ -12,8 +12,8 @@ * limitations under the License. * ===========================================================================*/ -#ifndef KERNELS_FUSEDBATCHMATMUL_H_ -#define KERNELS_FUSEDBATCHMATMUL_H_ +#ifndef KERNELS_FUSEDMATMUL_H_ +#define KERNELS_FUSEDMATMUL_H_ #include @@ -21,15 +21,15 @@ namespace tfjs { namespace wasm { extern "C" { -void FusedBatchMatMul(const size_t a_id, const size_t* a_shape_ptr, - const size_t a_shape_len, const size_t b_id, - const size_t* b_shape_ptr, const size_t b_shape_len, - const bool transpose_a, const bool transpose_b, - const FusableActivation activation, const size_t bias_id, - const size_t prelu_weights_id, const size_t out_id); +void _FusedMatMul(const size_t a_id, const size_t* a_shape_ptr, + const size_t a_shape_len, const size_t b_id, + const size_t* b_shape_ptr, const size_t b_shape_len, + const bool transpose_a, const bool transpose_b, + const FusableActivation activation, const size_t bias_id, + const size_t prelu_weights_id, const size_t out_id); } } // namespace wasm } // namespace tfjs -#endif // KERNELS_FUSEDBATCHMATMUL_H_ +#endif // KERNELS_FUSEDMATMUL_H_ diff --git a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc b/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul_test.cc similarity index 55% rename from tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc rename to tfjs-backend-wasm/src/cc/kernels/_FusedMatMul_test.cc index de0c41ca6b0..96ec6f90181 100644 --- a/tfjs-backend-wasm/src/cc/kernels/FusedBatchMatMul_test.cc +++ b/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul_test.cc @@ -18,9 +18,9 @@ #include #include "src/cc/backend.h" -#include "src/cc/kernels/FusedBatchMatMul.h" +#include "src/cc/kernels/_FusedMatMul.h" -TEST(FUSEDBATCH_MATMUL, xnn_operator_lfietime) { +TEST(_FUSED_MATMUL, xnn_operator_lfietime) { tfjs::wasm::init(); ASSERT_EQ(0, tfjs::backend::num_tensors()); @@ -55,64 +55,64 @@ TEST(FUSEDBATCH_MATMUL, xnn_operator_lfietime) { size_t prelu_weights_id = 0; // One new xnn_operator should be created for the first call to - // FusedBatchMatMul with no bias. - tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b0_id, - b_shape_ptr, b_shape.size(), - false /* transpose_a */, false /* transpose_b */, - activation, bias_id, prelu_weights_id, out_id); + // _FusedMatMul with no bias. + tfjs::wasm::_FusedMatMul(a0_id, a_shape_ptr, a_shape.size(), b0_id, + b_shape_ptr, b_shape.size(), false /* transpose_a */, + false /* transpose_b */, activation, bias_id, + prelu_weights_id, out_id); ASSERT_EQ(1, tfjs::backend::xnn_operator_count); // No new xnn_operators should be created for the second call to - // FusedBatchMatMul with the same arguments. - tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b0_id, - b_shape_ptr, b_shape.size(), - false /* transpose_a */, false /* transpose_b */, - activation, bias_id, prelu_weights_id, out_id); + // _FusedMatMul with the same arguments. + tfjs::wasm::_FusedMatMul(a0_id, a_shape_ptr, a_shape.size(), b0_id, + b_shape_ptr, b_shape.size(), false /* transpose_a */, + false /* transpose_b */, activation, bias_id, + prelu_weights_id, out_id); ASSERT_EQ(1, tfjs::backend::xnn_operator_count); - // No new xnn_operators should be created for calling FusedBatchMatMul + // No new xnn_operators should be created for calling _FusedMatMul // with a new a. - tfjs::wasm::FusedBatchMatMul(a1_id, a_shape_ptr, a_shape.size(), b0_id, - b_shape_ptr, b_shape.size(), - false /* transpose_a */, false /* transpose_b */, - activation, bias_id, prelu_weights_id, out_id); + tfjs::wasm::_FusedMatMul(a1_id, a_shape_ptr, a_shape.size(), b0_id, + b_shape_ptr, b_shape.size(), false /* transpose_a */, + false /* transpose_b */, activation, bias_id, + prelu_weights_id, out_id); ASSERT_EQ(1, tfjs::backend::xnn_operator_count); - // One new xnn_operator should be created for calling FusedBatchMatMul + // One new xnn_operator should be created for calling _FusedMatMul // with a new b. - tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, - b_shape_ptr, b_shape.size(), - false /* transpose_a */, false /* transpose_b */, - activation, bias_id, prelu_weights_id, out_id); + tfjs::wasm::_FusedMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, + b_shape_ptr, b_shape.size(), false /* transpose_a */, + false /* transpose_b */, activation, bias_id, + prelu_weights_id, out_id); ASSERT_EQ(2, tfjs::backend::xnn_operator_count); // No new xnn_operators should be created for the next call to - // FusedBatchMatMul with the same b. - tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, - b_shape_ptr, b_shape.size(), - false /* transpose_a */, false /* transpose_b */, - activation, bias_id, prelu_weights_id, out_id); + // _FusedMatMul with the same b. + tfjs::wasm::_FusedMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, + b_shape_ptr, b_shape.size(), false /* transpose_a */, + false /* transpose_b */, activation, bias_id, + prelu_weights_id, out_id); ASSERT_EQ(2, tfjs::backend::xnn_operator_count); const size_t bias1_id = 6; const size_t bias_size = 1; float bias_values[bias_size] = {1}; tfjs::wasm::register_tensor(bias1_id, bias_size, bias_values); - // One new xnn_operator should be created for calling FusedBatchMatMul with a + // One new xnn_operator should be created for calling _FusedMatMul with a // new bias. - tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, - b_shape_ptr, b_shape.size(), - false /* transpose_a */, false /* transpose_b */, - activation, bias1_id, prelu_weights_id, out_id); + tfjs::wasm::_FusedMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, + b_shape_ptr, b_shape.size(), false /* transpose_a */, + false /* transpose_b */, activation, bias1_id, + prelu_weights_id, out_id); ASSERT_EQ(3, tfjs::backend::xnn_operator_count); - // One new xnn_operator should be created for calling FusedBatchMatMul with a + // One new xnn_operator should be created for calling _FusedMatMul with a // different activation. const FusableActivation activation2 = FusableActivation::RELU; - tfjs::wasm::FusedBatchMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, - b_shape_ptr, b_shape.size(), - false /* transpose_a */, false /* transpose_b */, - activation2, bias1_id, prelu_weights_id, out_id); + tfjs::wasm::_FusedMatMul(a0_id, a_shape_ptr, a_shape.size(), b1_id, + b_shape_ptr, b_shape.size(), false /* transpose_a */, + false /* transpose_b */, activation2, bias1_id, + prelu_weights_id, out_id); ASSERT_EQ(4, tfjs::backend::xnn_operator_count); // Disposing a's should not remove xnn operators. diff --git a/tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts b/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts similarity index 75% rename from tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts rename to tfjs-backend-wasm/src/kernels/_FusedMatMul.ts index 0b0253df2b1..51d4111c3af 100644 --- a/tfjs-backend-wasm/src/kernels/FusedBatchMatMul.ts +++ b/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts @@ -21,54 +21,53 @@ import {BackendWasm} from '../backend_wasm'; import {FusableActivation} from './types'; -interface FusedBatchMatMulInputs extends NamedTensorInfoMap { +interface FusedMatMulInputs extends NamedTensorInfoMap { a: TensorInfo; b: TensorInfo; bias?: TensorInfo; preluActivationWeights?: TensorInfo; } -interface FusedBatchMatMulAttrs extends NamedAttrMap { +interface FusedMatMulAttrs extends NamedAttrMap { transposeA: boolean; transposeB: boolean; activation: FusableActivation; } -let wasmFusedBatchMatMul: ( +let wasmFusedMatMul: ( aId: number, aShape: Uint8Array, aShapeSize: number, bId: number, bShape: Uint8Array, bShapeSize: number, transposeA: boolean, transposeB: boolean, activation: number, biasId: number, preluActivationWeightsId: number, outId: number) => void; function setup(backend: BackendWasm) { - wasmFusedBatchMatMul = - backend.wasm.cwrap('FusedBatchMatMul', null /* void */, [ - 'number', // a_id - 'array', // a_shape - 'number', // a_shape.length - 'number', // b_id - 'array', // b_shape - 'number', // b_shape.length - 'number', // transpose_a - 'number', // transpose_b - 'number', // activation - 'number', // biasId - 'number', // preluActivationWeightsId - 'number' // out_id - ]); + wasmFusedMatMul = backend.wasm.cwrap('_FusedMatMul', null /* void */, [ + 'number', // a_id + 'array', // a_shape + 'number', // a_shape.length + 'number', // b_id + 'array', // b_shape + 'number', // b_shape.length + 'number', // transpose_a + 'number', // transpose_b + 'number', // activation + 'number', // biasId + 'number', // preluActivationWeightsId + 'number' // out_id + ]); } -function fusedBatchMatMul(args: { - inputs: FusedBatchMatMulInputs, +function fusedMatMul(args: { + inputs: FusedMatMulInputs, backend: BackendWasm, - attrs: FusedBatchMatMulAttrs + attrs: FusedMatMulAttrs }) { const {inputs, backend, attrs} = args; const {a, b, bias, preluActivationWeights} = inputs; if (a.dtype !== 'float32' || b.dtype !== 'float32') { throw new Error( - `FusedBatchMatMul for non non-float32 tensors not yet supported.`); + `_FusedMatMul for non non-float32 tensors not yet supported.`); } const {transposeA, transposeB, activation} = attrs; @@ -80,7 +79,7 @@ function fusedBatchMatMul(args: { const biasData = backend.dataIdMap.get(bias.dataId); if (biasData.shape.length !== 1) { throw new Error( - `FusedBatchMatMul only supports rank-1 bias but got ` + + `_FusedMatMul only supports rank-1 bias but got ` + `rank ${biasData.shape.length}.`); } biasId = biasData.id; @@ -106,7 +105,7 @@ function fusedBatchMatMul(args: { const aShapeBytes = new Uint8Array(new Int32Array(a.shape).buffer); const bShapeBytes = new Uint8Array(new Int32Array(b.shape).buffer); - wasmFusedBatchMatMul( + wasmFusedMatMul( aId, aShapeBytes, a.shape.length, bId, bShapeBytes, b.shape.length, transposeA, transposeB, fusedActivation, biasId, preluActivationWeightsId, outId); @@ -115,8 +114,8 @@ function fusedBatchMatMul(args: { } registerKernel({ - kernelName: 'FusedBatchMatMul', + kernelName: '_FusedMatMul', backendName: 'wasm', setupFunc: setup, - kernelFunc: fusedBatchMatMul + kernelFunc: fusedMatMul }); diff --git a/tfjs-backend-wasm/src/kernels/all_kernels.ts b/tfjs-backend-wasm/src/kernels/all_kernels.ts index 602eb6f91e1..56cec9e79f0 100644 --- a/tfjs-backend-wasm/src/kernels/all_kernels.ts +++ b/tfjs-backend-wasm/src/kernels/all_kernels.ts @@ -18,6 +18,7 @@ // We explicitly import the modular kernels so they get registered in the // global registry when we compile the library. A modular build would replace // the contents of this file and import only the kernels that are needed. +import './_FusedMatMul'; import './Abs'; import './Add'; import './AddN'; @@ -37,7 +38,6 @@ import './FloorDiv'; import './FusedBatchNorm'; import './FusedConv2D'; import './FusedDepthwiseConv2D'; -import './FusedBatchMatMul'; import './Gather'; import './GatherNd'; import './Greater'; diff --git a/tfjs-core/src/ops/fused_ops.ts b/tfjs-core/src/ops/fused_ops.ts index dce09ebbf42..0977cbae2d4 100644 --- a/tfjs-core/src/ops/fused_ops.ts +++ b/tfjs-core/src/ops/fused_ops.ts @@ -248,7 +248,7 @@ function fusedMatMul_({ save([a3D, b3D, y]); return y; }, - inputs, grad, 'FusedBatchMatMul', {transposeA, transposeB, activation}, + inputs, grad, '_FusedMatMul', {transposeA, transposeB, activation}, inputsToSave, outputsToSave); return res.reshape(outShape) as T; } From 241efac1dbfdae9eb08b3165ef05a9f0d516643f Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 16:46:01 -0400 Subject: [PATCH 29/35] rename --- tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc | 13 +++++++------ tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h | 13 +++++++------ tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc | 6 +++--- tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.cc | 6 +++--- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc index 29759a17d0c..2475b50f81e 100644 --- a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc +++ b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc @@ -259,12 +259,13 @@ void slow_batch_matmul(const size_t a_id, const size_t* a_shape_ptr, namespace tfjs { namespace wasm { -void batchMatMul(const size_t a_id, const size_t* a_shape_ptr, - const size_t a_shape_len, const size_t b_id, - const size_t* b_shape_ptr, const size_t b_shape_len, - const bool transpose_a, const bool transpose_b, - const FusableActivation activation, const size_t bias_id, - const size_t prelu_weights_id, const size_t out_id) { +void fused_batch_mat_mul(const size_t a_id, const size_t* a_shape_ptr, + const size_t a_shape_len, const size_t b_id, + const size_t* b_shape_ptr, const size_t b_shape_len, + const bool transpose_a, const bool transpose_b, + const FusableActivation activation, + const size_t bias_id, const size_t prelu_weights_id, + const size_t out_id) { FusableActivation clamp_method = activation; if (activation == FusableActivation::PRELU) { clamp_method = FusableActivation::LINEAR; diff --git a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h index 8df58ada339..476eeb5d088 100644 --- a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h +++ b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.h @@ -20,12 +20,13 @@ namespace tfjs { namespace wasm { -void batchMatMul(const size_t a_id, const size_t* a_shape_ptr, - const size_t a_shape_len, const size_t b_id, - const size_t* b_shape_ptr, const size_t b_shape_len, - const bool transpose_a, const bool transpose_b, - const FusableActivation activation, const size_t bias_id, - const size_t prelu_weights_id, const size_t out_id); +void fused_batch_mat_mul(const size_t a_id, const size_t* a_shape_ptr, + const size_t a_shape_len, const size_t b_id, + const size_t* b_shape_ptr, const size_t b_shape_len, + const bool transpose_a, const bool transpose_b, + const FusableActivation activation, + const size_t bias_id, const size_t prelu_weights_id, + const size_t out_id); } // namespace wasm } // namespace tfjs diff --git a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc index 9aa757a1e3c..32e05d3e54b 100644 --- a/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc +++ b/tfjs-backend-wasm/src/cc/kernels/BatchMatMul.cc @@ -38,9 +38,9 @@ void BatchMatMul(const size_t a_id, const size_t* a_shape_ptr, const size_t bias_id = 0; const size_t prelu_weights_id = 0; const FusableActivation activation = FusableActivation::LINEAR; - tfjs::wasm::batchMatMul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, - b_shape_len, transpose_a, transpose_b, activation, - bias_id, prelu_weights_id, out_id); + tfjs::wasm::fused_batch_mat_mul( + a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len, + transpose_a, transpose_b, activation, bias_id, prelu_weights_id, out_id); } } // extern "C" diff --git a/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.cc b/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.cc index 1e3b43368f5..5eef8ff02c1 100644 --- a/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.cc +++ b/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.cc @@ -36,9 +36,9 @@ void _FusedMatMul(const size_t a_id, const size_t* a_shape_ptr, const bool transpose_a, const bool transpose_b, const FusableActivation activation, const size_t bias_id, const size_t prelu_weights_id, const size_t out_id) { - tfjs::wasm::batchMatMul(a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, - b_shape_len, transpose_a, transpose_b, activation, - bias_id, prelu_weights_id, out_id); + tfjs::wasm::fused_batch_mat_mul( + a_id, a_shape_ptr, a_shape_len, b_id, b_shape_ptr, b_shape_len, + transpose_a, transpose_b, activation, bias_id, prelu_weights_id, out_id); } } // extern "C" From 98ae17fc1f84d4a95ea7b85f89110eda363fd35b Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 16:49:56 -0400 Subject: [PATCH 30/35] rm --- tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc index 2475b50f81e..3cf5b15e979 100644 --- a/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc +++ b/tfjs-backend-wasm/src/cc/batch_mat_mul_impl.cc @@ -12,10 +12,6 @@ * limitations under the License. * ===========================================================================*/ -#ifdef __EMSCRIPTEN__ -#include -#endif - #include #include #include From 416fdf69e3748e10b4511cc13a5b7056e8512ea9 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 17:12:17 -0400 Subject: [PATCH 31/35] use inline --- tfjs-backend-wasm/src/cc/kernels/Pow.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tfjs-backend-wasm/src/cc/kernels/Pow.cc b/tfjs-backend-wasm/src/cc/kernels/Pow.cc index 646fe355fa1..ccb57c116aa 100644 --- a/tfjs-backend-wasm/src/cc/kernels/Pow.cc +++ b/tfjs-backend-wasm/src/cc/kernels/Pow.cc @@ -42,7 +42,7 @@ void Pow(const size_t a_id, const size_t* a_shape_ptr, const size_t a_shape_len, const DType dtype, const size_t out_id) { switch (dtype) { case DType::float32: - binary_f32(a_id, b_id, out_id, power); + binary_f32(a_id, b_id, out_id, pow); break; case DType::int32: binary_i32(a_id, b_id, out_id, power); From 5b2c51d94497f3b7d96b0be249b19da3f0d62c01 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 17:13:55 -0400 Subject: [PATCH 32/35] lint --- tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.h b/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.h index f300781ff3a..98501aabd55 100644 --- a/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.h +++ b/tfjs-backend-wasm/src/cc/kernels/_FusedMatMul.h @@ -12,8 +12,8 @@ * limitations under the License. * ===========================================================================*/ -#ifndef KERNELS_FUSEDMATMUL_H_ -#define KERNELS_FUSEDMATMUL_H_ +#ifndef KERNELS__FUSEDMATMUL_H_ +#define KERNELS__FUSEDMATMUL_H_ #include @@ -32,4 +32,4 @@ void _FusedMatMul(const size_t a_id, const size_t* a_shape_ptr, } // namespace wasm } // namespace tfjs -#endif // KERNELS_FUSEDMATMUL_H_ +#endif // KERNELS__FUSEDMATMUL_H_ From 30e1278bdfce36f00e701e3d0072db6016dfa220 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Mon, 9 Mar 2020 17:14:55 -0400 Subject: [PATCH 33/35] lint --- tfjs-backend-wasm/src/cc/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index eca4b8f81ce..e3951df8944 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -161,7 +161,6 @@ tfjs_cc_library( tfjs_cc_library( name = "all_kernels", deps = [ - ":_FusedMatMul", ":Abs", ":Add", ":AddN", @@ -206,6 +205,7 @@ tfjs_cc_library( ":Sub", ":Tile", ":Transpose", + ":_FusedMatMul", ], ) From a030effc97489856fa7ff8a7a6af32430214d519 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 10 Mar 2020 07:56:14 -0400 Subject: [PATCH 34/35] hbn --- tfjs-backend-wasm/src/cc/kernels/Pow.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tfjs-backend-wasm/src/cc/kernels/Pow.cc b/tfjs-backend-wasm/src/cc/kernels/Pow.cc index ccb57c116aa..646fe355fa1 100644 --- a/tfjs-backend-wasm/src/cc/kernels/Pow.cc +++ b/tfjs-backend-wasm/src/cc/kernels/Pow.cc @@ -42,7 +42,7 @@ void Pow(const size_t a_id, const size_t* a_shape_ptr, const size_t a_shape_len, const DType dtype, const size_t out_id) { switch (dtype) { case DType::float32: - binary_f32(a_id, b_id, out_id, pow); + binary_f32(a_id, b_id, out_id, power); break; case DType::int32: binary_i32(a_id, b_id, out_id, power); From 7c899d93fe0f458d2c0df2633545bc8cc76acbf0 Mon Sep 17 00:00:00 2001 From: Ann Yuan Date: Tue, 10 Mar 2020 08:08:09 -0400 Subject: [PATCH 35/35] rename kernel func --- tfjs-backend-wasm/src/kernels/_FusedMatMul.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts b/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts index 51d4111c3af..8d5c4130751 100644 --- a/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts +++ b/tfjs-backend-wasm/src/kernels/_FusedMatMul.ts @@ -57,7 +57,7 @@ function setup(backend: BackendWasm) { ]); } -function fusedMatMul(args: { +function fusedBatchMatMul(args: { inputs: FusedMatMulInputs, backend: BackendWasm, attrs: FusedMatMulAttrs @@ -117,5 +117,5 @@ registerKernel({ kernelName: '_FusedMatMul', backendName: 'wasm', setupFunc: setup, - kernelFunc: fusedMatMul + kernelFunc: fusedBatchMatMul });