From 15e3e9417b70a743d2cd410431814390f28d62f7 Mon Sep 17 00:00:00 2001 From: Linchenn Date: Fri, 20 Jan 2023 10:50:42 -0800 Subject: [PATCH 1/9] benchmark --- .../preconfigured_browser.json | 56 +------- tfjs-backend-webgl/src/flags_webgl.ts | 4 + tfjs-backend-webgl/src/kernels/ScatterNd.ts | 16 ++- tfjs-backend-webgl/src/scatter_packed_gpu.ts | 121 ++++++++++++++++++ 4 files changed, 138 insertions(+), 59 deletions(-) create mode 100644 tfjs-backend-webgl/src/scatter_packed_gpu.ts diff --git a/e2e/benchmarks/browserstack-benchmark/preconfigured_browser.json b/e2e/benchmarks/browserstack-benchmark/preconfigured_browser.json index 865c1c23e66..ea328151bcd 100644 --- a/e2e/benchmarks/browserstack-benchmark/preconfigured_browser.json +++ b/e2e/benchmarks/browserstack-benchmark/preconfigured_browser.json @@ -1,64 +1,10 @@ { "benchmark": { - "model": ["MobileNetV3", "MobileNetV2", "HandPoseDetector", - "HandPoseLandmark", "MoveNet-SinglePose", "MoveNet-MultiPose", - "BlazePoseDetector", "BlazePoseLandmark", "Coco-SSD", "DeepLabV3", - "FaceDetection", "FaceLandmarkDetection", "ArPortraitDepth", - "SelfieSegmentation-General", "SelfieSegmentation-Landscape", - "AutoML Image", "AutoML Object", "USE - batchsize 30", "USE - batchsize 1", - "TextToxicity", "MobileBert", "posenet", "bodypix", "speech-commands"], + "model": ["USE - batchsize 30"], "numRuns": 10, "backend": ["webgl"] }, "browsers": { - "Windows_11_1": { - "base": "BrowserStack", - "browser": "chrome", - "browser_version": "103.0", - "os": "Windows", - "os_version": "11", - "device": null - }, - "Windows_11_2": { - "base": "BrowserStack", - "browser": "edge", - "browser_version": "103.0", - "os": "Windows", - "os_version": "11", - "device": null - }, - "Windows_11_3": { - "base": "BrowserStack", - "browser": "firefox", - "browser_version": "103.0", - "os": "Windows", - "os_version": "11", - "device": null - }, - "Windows_7_1": { - "base": "BrowserStack", - "browser": "firefox", - "browser_version": "103.0", - "os": "Windows", - "os_version": "7", - "device": null - }, - "OS_X_Monterey_1": { - "base": "BrowserStack", - "browser": "safari", - "browser_version": "15.3", - "os": "OS X", - "os_version": "Monterey", - "device": null - }, - "OS_X_Monterey_2": { - "base": "BrowserStack", - "os": "OS X", - "os_version": "Monterey", - "browser": "chrome", - "device": null, - "browser_version": "103.0" - }, "iPhone_13_Pro_Max_1": { "base": "BrowserStack", "os": "ios", diff --git a/tfjs-backend-webgl/src/flags_webgl.ts b/tfjs-backend-webgl/src/flags_webgl.ts index a855af24504..c1075bdb26e 100644 --- a/tfjs-backend-webgl/src/flags_webgl.ts +++ b/tfjs-backend-webgl/src/flags_webgl.ts @@ -277,3 +277,7 @@ ENV.registerFlag('WEBGL2_ISNAN_CUSTOM', () => false); /** Experimental flag, whether enter compile only phase. */ ENV.registerFlag('ENGINE_COMPILE_ONLY', () => false); + +ENV.registerFlag('PACK_SCATTER', () => false); + +ENV.registerFlag('IMP_BRANCH', () => false); diff --git a/tfjs-backend-webgl/src/kernels/ScatterNd.ts b/tfjs-backend-webgl/src/kernels/ScatterNd.ts index 82120264f7a..dacda492f16 100644 --- a/tfjs-backend-webgl/src/kernels/ScatterNd.ts +++ b/tfjs-backend-webgl/src/kernels/ScatterNd.ts @@ -15,10 +15,11 @@ * ============================================================================= */ -import {backend_util, KernelConfig, KernelFunc, ScatterNd, ScatterNdAttrs, ScatterNdInputs, TensorInfo} from '@tensorflow/tfjs-core'; +import {backend_util, env, KernelConfig, KernelFunc, ScatterNd, ScatterNdAttrs, ScatterNdInputs, TensorInfo} from '@tensorflow/tfjs-core'; import {MathBackendWebGL} from '../backend_webgl'; import {ScatterProgram} from '../scatter_gpu'; +import {ScatterPackedProgram} from '../scatter_packed_gpu'; import {reshape} from './Reshape'; export function scatterNd(args: { @@ -46,9 +47,16 @@ export function scatterNd(args: { const defaultValue = backend.makeTensorInfo( [], 'float32', new Float32Array([0])); // scalar(0) - const program = new ScatterProgram( - numUpdates, sliceRank, flattenIndices.shape.length, flattenX.shape.length, - strides, flattenShape); + let program; + if (env().getBool('WEBGL_PACK') && env().getBool('PACK_SCATTER')) { + program = new ScatterPackedProgram( + numUpdates, sliceRank, flattenIndices.shape.length, + flattenX.shape.length, strides, flattenShape); + } else { + program = new ScatterProgram( + numUpdates, sliceRank, flattenIndices.shape.length, + flattenX.shape.length, strides, flattenShape); + } const res = backend.runWebGLProgram( program, [flattenX, flattenIndices, defaultValue], flattenX.dtype); diff --git a/tfjs-backend-webgl/src/scatter_packed_gpu.ts b/tfjs-backend-webgl/src/scatter_packed_gpu.ts new file mode 100644 index 00000000000..cd0a2f6fff2 --- /dev/null +++ b/tfjs-backend-webgl/src/scatter_packed_gpu.ts @@ -0,0 +1,121 @@ +/** + * @license + * Copyright 2018 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {env} from '@tensorflow/tfjs-core'; +import {GPGPUProgram} from './gpgpu_math'; +import {getCoordsDataType} from './shader_compiler'; + +export class ScatterPackedProgram implements GPGPUProgram { + variableNames = ['updates', 'indices', 'defaultValue']; + outputShape: number[]; + packedInputs = true; + packedOutput = true; + userCode: string; + + constructor( + updateSize: number, sliceDim: number, indicesRank: number, + updatesRank: number, strides: number[], shape: number[], + summingDupeIndex = true, defaultIsTensor = false) { + this.outputShape = shape; + const stridesType = getCoordsDataType(strides.length); + const dtype = getCoordsDataType(shape.length); + let indicesString = ''; + if (indicesRank === 1) { + indicesString = 'i'; + } else if (indicesRank === 2) { + indicesString = 'i, j'; + } + const indicesSnippet = `getIndices(${indicesString})`; + + let updatesString = ''; + if (updatesRank === 1) { + updatesString = 'i'; + } else if (updatesRank === 2) { + updatesString = 'i, coords[1]'; + } + const updatesSnippet = `getUpdates(${updatesString})`; + + let defaultValuesString = ''; + if (defaultIsTensor) { + defaultValuesString = 'coords[0], coords[1]'; + } + const defaultValueSnippet = `getDefaultValue(${defaultValuesString})`; + + const strideString = sliceDim > 1 ? 'strides[j]' : 'strides'; + const strideString2 = sliceDim > 1 ? 'strides[j + 1]' : 'strides'; + + let branch; + if (env().getBool('IMP_BRANCH')) { + branch = ` + vec4 status = 1. - vec4(bvec4(flattenedIndex[0] - coords[0], + flattenedIndex[1] - coords[0], + flattenedIndex[0] - coords[0] - 1, + flattenedIndex[1] - coords[0] - 1)); + if (dot(status, vec4(1.)) > 0.) { + vec4 updVals = ${updatesSnippet}; + found += status.xxzz + status.yyww; + sum += updVals.xyxy * status.xxzz + updVals.zwzw * status.yyww; + } + `; + } else { + branch = ` + if (flattenedIndex[0] == coords[0] || flattenedIndex[1] == coords[0] || + flattenedIndex[0] == coords[0] + 1 || flattenedIndex[1] == coords[0] + 1) { + vec4 updVals = ${updatesSnippet}; + if (flattenedIndex[0] == coords[0]) { + sum.xy += updVals.xy; + found.xy = vec2(1.); + } + if (flattenedIndex[1] == coords[0]) { + sum.xy += updVals.zw; + found.xy = vec2(1.); + } + if (flattenedIndex[0] == coords[0] + 1) { + sum.zw += updVals.xy; + found.zw = vec2(1.); + } + if (flattenedIndex[1] == coords[0] + 1) { + sum.zw += updVals.zw; + found.zw = vec2(1.); + } + } + `; + } + + this.userCode = ` + ${stridesType} strides = ${stridesType}(${strides}); + + void main() { + ${dtype} coords = getOutputCoords(); + vec4 sum = vec4(0.); + vec4 found = vec4(0.); + for (int i = 0; i < ${updateSize}; i+=2) { + ivec2 flattenedIndex = ivec2(0); + for (int j = 0; j < ${sliceDim}; j+=2) { + ivec4 index = round(${indicesSnippet}); + flattenedIndex += index.xz * ${strideString}; + if (j + 1 < ${sliceDim}) { + flattenedIndex += index.yw * ${strideString2}; + } + } + ${branch} + } + setOutput(mix(${defaultValueSnippet}, sum, vec4(bvec4(found)))); + } + `; + } +} From 3027cedd787bf846282c1d1b70f3d393b51e5892 Mon Sep 17 00:00:00 2001 From: Linchenn Date: Fri, 20 Jan 2023 10:55:04 -0800 Subject: [PATCH 2/9] Update flags_webgl.ts --- tfjs-backend-webgl/src/flags_webgl.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tfjs-backend-webgl/src/flags_webgl.ts b/tfjs-backend-webgl/src/flags_webgl.ts index c1075bdb26e..4b143a659e1 100644 --- a/tfjs-backend-webgl/src/flags_webgl.ts +++ b/tfjs-backend-webgl/src/flags_webgl.ts @@ -278,6 +278,6 @@ ENV.registerFlag('WEBGL2_ISNAN_CUSTOM', () => false); /** Experimental flag, whether enter compile only phase. */ ENV.registerFlag('ENGINE_COMPILE_ONLY', () => false); -ENV.registerFlag('PACK_SCATTER', () => false); +ENV.registerFlag('PACK_SCATTER', () => true); -ENV.registerFlag('IMP_BRANCH', () => false); +ENV.registerFlag('IMP_BRANCH', () => true); From caf22a55e4a662f44bb985d448fbc66e383b464e Mon Sep 17 00:00:00 2001 From: Linchenn Date: Fri, 20 Jan 2023 11:17:35 -0800 Subject: [PATCH 3/9] benchmark if-branch --- tfjs-backend-webgl/src/flags_webgl.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tfjs-backend-webgl/src/flags_webgl.ts b/tfjs-backend-webgl/src/flags_webgl.ts index 4b143a659e1..0992a0af3db 100644 --- a/tfjs-backend-webgl/src/flags_webgl.ts +++ b/tfjs-backend-webgl/src/flags_webgl.ts @@ -280,4 +280,4 @@ ENV.registerFlag('ENGINE_COMPILE_ONLY', () => false); ENV.registerFlag('PACK_SCATTER', () => true); -ENV.registerFlag('IMP_BRANCH', () => true); +ENV.registerFlag('IMP_BRANCH', () => false); From 568751b028478199bc18ac9ea59a9d5cc381f91e Mon Sep 17 00:00:00 2001 From: Linchenn Date: Fri, 20 Jan 2023 11:36:07 -0800 Subject: [PATCH 4/9] roll out vectorized optimization --- .../preconfigured_browser.json | 56 +++++++++++++++++- tfjs-backend-webgl/src/flags_webgl.ts | 4 -- tfjs-backend-webgl/src/scatter_packed_gpu.ts | 59 +++++++------------ 3 files changed, 75 insertions(+), 44 deletions(-) diff --git a/e2e/benchmarks/browserstack-benchmark/preconfigured_browser.json b/e2e/benchmarks/browserstack-benchmark/preconfigured_browser.json index ea328151bcd..865c1c23e66 100644 --- a/e2e/benchmarks/browserstack-benchmark/preconfigured_browser.json +++ b/e2e/benchmarks/browserstack-benchmark/preconfigured_browser.json @@ -1,10 +1,64 @@ { "benchmark": { - "model": ["USE - batchsize 30"], + "model": ["MobileNetV3", "MobileNetV2", "HandPoseDetector", + "HandPoseLandmark", "MoveNet-SinglePose", "MoveNet-MultiPose", + "BlazePoseDetector", "BlazePoseLandmark", "Coco-SSD", "DeepLabV3", + "FaceDetection", "FaceLandmarkDetection", "ArPortraitDepth", + "SelfieSegmentation-General", "SelfieSegmentation-Landscape", + "AutoML Image", "AutoML Object", "USE - batchsize 30", "USE - batchsize 1", + "TextToxicity", "MobileBert", "posenet", "bodypix", "speech-commands"], "numRuns": 10, "backend": ["webgl"] }, "browsers": { + "Windows_11_1": { + "base": "BrowserStack", + "browser": "chrome", + "browser_version": "103.0", + "os": "Windows", + "os_version": "11", + "device": null + }, + "Windows_11_2": { + "base": "BrowserStack", + "browser": "edge", + "browser_version": "103.0", + "os": "Windows", + "os_version": "11", + "device": null + }, + "Windows_11_3": { + "base": "BrowserStack", + "browser": "firefox", + "browser_version": "103.0", + "os": "Windows", + "os_version": "11", + "device": null + }, + "Windows_7_1": { + "base": "BrowserStack", + "browser": "firefox", + "browser_version": "103.0", + "os": "Windows", + "os_version": "7", + "device": null + }, + "OS_X_Monterey_1": { + "base": "BrowserStack", + "browser": "safari", + "browser_version": "15.3", + "os": "OS X", + "os_version": "Monterey", + "device": null + }, + "OS_X_Monterey_2": { + "base": "BrowserStack", + "os": "OS X", + "os_version": "Monterey", + "browser": "chrome", + "device": null, + "browser_version": "103.0" + }, "iPhone_13_Pro_Max_1": { "base": "BrowserStack", "os": "ios", diff --git a/tfjs-backend-webgl/src/flags_webgl.ts b/tfjs-backend-webgl/src/flags_webgl.ts index 0992a0af3db..a855af24504 100644 --- a/tfjs-backend-webgl/src/flags_webgl.ts +++ b/tfjs-backend-webgl/src/flags_webgl.ts @@ -277,7 +277,3 @@ ENV.registerFlag('WEBGL2_ISNAN_CUSTOM', () => false); /** Experimental flag, whether enter compile only phase. */ ENV.registerFlag('ENGINE_COMPILE_ONLY', () => false); - -ENV.registerFlag('PACK_SCATTER', () => true); - -ENV.registerFlag('IMP_BRANCH', () => false); diff --git a/tfjs-backend-webgl/src/scatter_packed_gpu.ts b/tfjs-backend-webgl/src/scatter_packed_gpu.ts index cd0a2f6fff2..d88aef8d58f 100644 --- a/tfjs-backend-webgl/src/scatter_packed_gpu.ts +++ b/tfjs-backend-webgl/src/scatter_packed_gpu.ts @@ -58,44 +58,6 @@ export class ScatterPackedProgram implements GPGPUProgram { const strideString = sliceDim > 1 ? 'strides[j]' : 'strides'; const strideString2 = sliceDim > 1 ? 'strides[j + 1]' : 'strides'; - let branch; - if (env().getBool('IMP_BRANCH')) { - branch = ` - vec4 status = 1. - vec4(bvec4(flattenedIndex[0] - coords[0], - flattenedIndex[1] - coords[0], - flattenedIndex[0] - coords[0] - 1, - flattenedIndex[1] - coords[0] - 1)); - if (dot(status, vec4(1.)) > 0.) { - vec4 updVals = ${updatesSnippet}; - found += status.xxzz + status.yyww; - sum += updVals.xyxy * status.xxzz + updVals.zwzw * status.yyww; - } - `; - } else { - branch = ` - if (flattenedIndex[0] == coords[0] || flattenedIndex[1] == coords[0] || - flattenedIndex[0] == coords[0] + 1 || flattenedIndex[1] == coords[0] + 1) { - vec4 updVals = ${updatesSnippet}; - if (flattenedIndex[0] == coords[0]) { - sum.xy += updVals.xy; - found.xy = vec2(1.); - } - if (flattenedIndex[1] == coords[0]) { - sum.xy += updVals.zw; - found.xy = vec2(1.); - } - if (flattenedIndex[0] == coords[0] + 1) { - sum.zw += updVals.xy; - found.zw = vec2(1.); - } - if (flattenedIndex[1] == coords[0] + 1) { - sum.zw += updVals.zw; - found.zw = vec2(1.); - } - } - `; - } - this.userCode = ` ${stridesType} strides = ${stridesType}(${strides}); @@ -112,7 +74,26 @@ export class ScatterPackedProgram implements GPGPUProgram { flattenedIndex += index.yw * ${strideString2}; } } - ${branch} + if (flattenedIndex[0] == coords[0] || flattenedIndex[1] == coords[0] || + flattenedIndex[0] == coords[0] + 1 || flattenedIndex[1] == coords[0] + 1) { + vec4 updVals = ${updatesSnippet}; + if (flattenedIndex[0] == coords[0]) { + sum.xy += updVals.xy; + found.xy = vec2(1.); + } + if (flattenedIndex[1] == coords[0]) { + sum.xy += updVals.zw; + found.xy = vec2(1.); + } + if (flattenedIndex[0] == coords[0] + 1) { + sum.zw += updVals.xy; + found.zw = vec2(1.); + } + if (flattenedIndex[1] == coords[0] + 1) { + sum.zw += updVals.zw; + found.zw = vec2(1.); + } + } } setOutput(mix(${defaultValueSnippet}, sum, vec4(bvec4(found)))); } From 180da158848f2e05e190c7736e0f270f320ace40 Mon Sep 17 00:00:00 2001 From: Linchenn Date: Fri, 20 Jan 2023 11:36:49 -0800 Subject: [PATCH 5/9] Update ScatterNd.ts --- tfjs-backend-webgl/src/kernels/ScatterNd.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tfjs-backend-webgl/src/kernels/ScatterNd.ts b/tfjs-backend-webgl/src/kernels/ScatterNd.ts index dacda492f16..87642887a5a 100644 --- a/tfjs-backend-webgl/src/kernels/ScatterNd.ts +++ b/tfjs-backend-webgl/src/kernels/ScatterNd.ts @@ -48,7 +48,7 @@ export function scatterNd(args: { const defaultValue = backend.makeTensorInfo( [], 'float32', new Float32Array([0])); // scalar(0) let program; - if (env().getBool('WEBGL_PACK') && env().getBool('PACK_SCATTER')) { + if (env().getBool('WEBGL_PACK')) { program = new ScatterPackedProgram( numUpdates, sliceRank, flattenIndices.shape.length, flattenX.shape.length, strides, flattenShape); From 76d92cf7d6c926dba6ec422b869df816e95703f6 Mon Sep 17 00:00:00 2001 From: Linchenn Date: Fri, 20 Jan 2023 11:37:25 -0800 Subject: [PATCH 6/9] date --- tfjs-backend-webgl/src/scatter_packed_gpu.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tfjs-backend-webgl/src/scatter_packed_gpu.ts b/tfjs-backend-webgl/src/scatter_packed_gpu.ts index d88aef8d58f..6f4e1761e03 100644 --- a/tfjs-backend-webgl/src/scatter_packed_gpu.ts +++ b/tfjs-backend-webgl/src/scatter_packed_gpu.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2018 Google LLC. All Rights Reserved. + * Copyright 2023 Google LLC. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at From 910a1503f8cd085cdd48ce3e874666aa1dae6ce1 Mon Sep 17 00:00:00 2001 From: Linchenn Date: Fri, 20 Jan 2023 11:48:19 -0800 Subject: [PATCH 7/9] Update scatter_packed_gpu.ts --- tfjs-backend-webgl/src/scatter_packed_gpu.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/tfjs-backend-webgl/src/scatter_packed_gpu.ts b/tfjs-backend-webgl/src/scatter_packed_gpu.ts index 6f4e1761e03..1c8358f3b2e 100644 --- a/tfjs-backend-webgl/src/scatter_packed_gpu.ts +++ b/tfjs-backend-webgl/src/scatter_packed_gpu.ts @@ -15,7 +15,6 @@ * ============================================================================= */ -import {env} from '@tensorflow/tfjs-core'; import {GPGPUProgram} from './gpgpu_math'; import {getCoordsDataType} from './shader_compiler'; From 3e1fd72dc312c025006ff6515bc77b47594cbdd5 Mon Sep 17 00:00:00 2001 From: Linchenn Date: Mon, 23 Jan 2023 11:08:30 -0800 Subject: [PATCH 8/9] reduce conversion --- tfjs-backend-webgl/src/scatter_packed_gpu.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tfjs-backend-webgl/src/scatter_packed_gpu.ts b/tfjs-backend-webgl/src/scatter_packed_gpu.ts index 1c8358f3b2e..12417b075d7 100644 --- a/tfjs-backend-webgl/src/scatter_packed_gpu.ts +++ b/tfjs-backend-webgl/src/scatter_packed_gpu.ts @@ -94,7 +94,7 @@ export class ScatterPackedProgram implements GPGPUProgram { } } } - setOutput(mix(${defaultValueSnippet}, sum, vec4(bvec4(found)))); + setOutput(mix(${defaultValueSnippet}, sum, found)); } `; } From 0bfb4e3dcc366c4a1ce915cfe989d4e043f57fa8 Mon Sep 17 00:00:00 2001 From: Linchenn Date: Mon, 23 Jan 2023 14:54:01 -0800 Subject: [PATCH 9/9] Update scatter_packed_gpu.ts --- tfjs-backend-webgl/src/scatter_packed_gpu.ts | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tfjs-backend-webgl/src/scatter_packed_gpu.ts b/tfjs-backend-webgl/src/scatter_packed_gpu.ts index 12417b075d7..959cd1c52e6 100644 --- a/tfjs-backend-webgl/src/scatter_packed_gpu.ts +++ b/tfjs-backend-webgl/src/scatter_packed_gpu.ts @@ -79,16 +79,14 @@ export class ScatterPackedProgram implements GPGPUProgram { if (flattenedIndex[0] == coords[0]) { sum.xy += updVals.xy; found.xy = vec2(1.); + } else if (flattenedIndex[0] == coords[0] + 1) { + sum.zw += updVals.xy; + found.zw = vec2(1.); } if (flattenedIndex[1] == coords[0]) { sum.xy += updVals.zw; found.xy = vec2(1.); - } - if (flattenedIndex[0] == coords[0] + 1) { - sum.zw += updVals.xy; - found.zw = vec2(1.); - } - if (flattenedIndex[1] == coords[0] + 1) { + } else if (flattenedIndex[1] == coords[0] + 1) { sum.zw += updVals.zw; found.zw = vec2(1.); }