From 29d34e518be0677a4e26eb4ee20485e0468c68ec Mon Sep 17 00:00:00 2001 From: Linchenn Date: Fri, 3 Feb 2023 16:09:56 -0800 Subject: [PATCH 1/7] pack --- .../src/conv_backprop_packed_gpu.ts | 102 ++++++++++++++++++ .../src/kernels/Conv2DBackpropInput.ts | 17 ++- 2 files changed, 116 insertions(+), 3 deletions(-) create mode 100644 tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts diff --git a/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts b/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts new file mode 100644 index 00000000000..f691f599776 --- /dev/null +++ b/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts @@ -0,0 +1,102 @@ +/** + * @license + * Copyright 2023 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {backend_util} from '@tensorflow/tfjs-core'; +import {GPGPUProgram, useShapeUniforms} from './gpgpu_math'; + +export class Conv2DDerInputPackedProgram implements GPGPUProgram { + variableNames = ['dy', 'W']; + packedInputs = true; + packedOutput = true; + outputShape: number[]; + userCode: string; + enableShapeUniforms: boolean; + customUniforms = [ + {name: 'strides', type: 'vec2' as const }, + ]; + + constructor(convInfo: backend_util.Conv2DInfo) { + this.outputShape = convInfo.inShape; + this.enableShapeUniforms = useShapeUniforms(this.outputShape.length); + + const filterHeight = convInfo.filterHeight; + const filterWidth = convInfo.filterWidth; + + const padTop = filterHeight - 1 - convInfo.padInfo.top; + const padLeft = filterWidth - 1 - convInfo.padInfo.left; + + this.userCode = ` + const ivec2 pads = ivec2(${padTop}, ${padLeft}); + + void main() { + ivec4 coords = getOutputCoords(); + int batch = coords[0]; + int d1 = coords[3]; + + ivec2 dyCorner = ivec2(coords[1], coords[2]) - pads; + int dyRCorner = dyCorner.x; + int dyCCorner = dyCorner.y; + + //intialize dotProd with a small epsilon seems to reduce GPU accuracy loss. + vec4 dotProd = vec4(0.000000000000001); + + for (int wR = 0; wR < ${filterHeight}; wR++) { + int wRPerm = ${filterHeight} - 1 - wR; + float dyR = float(dyRCorner + wR) / strides[0]; + + if (dyR < 0.0 || dyR >= ${convInfo.outHeight}.0 || fract(dyR) > 0.0) { + continue; + } + int idyR = int(dyR); + + for (int wC = 0; wC < ${filterWidth}; wC++) { + int wCPerm = ${filterWidth} - 1 - wC; + + float dyC = float(dyCCorner + wC) / strides[1]; + float idyCVal = dyC < 0.0 ? 0. : + dyC >= ${convInfo.outWidth}.0 ? 0. : + fract(dyC) > 0.0 ? 0. : 1.; + int idyC = int(dyC); + + float dyC2 = float(dyCCorner + wC + 1) / strides[1]; + float idyCVal2 = dyC2 < 0.0 ? 0. : + dyC2 >= ${convInfo.outWidth}.0 ? 0. : + fract(dyC2) > 0.0 ? 0. : 1.; + int idyC2 = int(dyC2); + + if (idyCVal + idyCVal2 == 0.) { + continue; + } + + for (int d2 = 0; d2 < ${convInfo.outChannels}; d2 += 2) { + vec4 dySample = getDy(batch, idyR, idyC, d2); + vec2 dyValue = mod(float(idyC), 2.) == 0. ? dySample.xy : dySample.zw; + vec4 wValue = getW(wRPerm, wCPerm, d1, d2); + dotProd.xy += vec2(dot(dyValue, wValue.xy), dot(dyValue, wValue.zw)) * idyCVal; + + dySample = getDy(batch, idyR, idyC2, d2); + dyValue = mod(float(idyC2), 2.) == 0. ? dySample.xy : dySample.zw; + dotProd.zw += vec2(dot(dyValue, wValue.xy), dot(dyValue, wValue.zw)) * idyCVal2; + } + } + } + vec4 result = dotProd - vec4(0.000000000000001); + setOutput(result); + } + `; + } +} diff --git a/tfjs-backend-webgl/src/kernels/Conv2DBackpropInput.ts b/tfjs-backend-webgl/src/kernels/Conv2DBackpropInput.ts index 6de51c3ca0b..4c3df8243dc 100644 --- a/tfjs-backend-webgl/src/kernels/Conv2DBackpropInput.ts +++ b/tfjs-backend-webgl/src/kernels/Conv2DBackpropInput.ts @@ -15,10 +15,11 @@ * ============================================================================= */ -import {backend_util, Conv2DBackpropInput, Conv2DBackpropInputAttrs, Conv2DBackpropInputInputs, KernelConfig, KernelFunc} from '@tensorflow/tfjs-core'; +import {backend_util, Conv2DBackpropInput, Conv2DBackpropInputAttrs, Conv2DBackpropInputInputs, env, KernelConfig, KernelFunc} from '@tensorflow/tfjs-core'; import {MathBackendWebGL} from '../backend_webgl'; import {Conv2DDerInputProgram} from '../conv_backprop_gpu'; +import {Conv2DDerInputPackedProgram} from '../conv_backprop_packed_gpu'; export function conv2DBackpropInput(args: { inputs: Conv2DBackpropInputInputs, @@ -34,8 +35,18 @@ export function conv2DBackpropInput(args: { inputShape, filter.shape as [number, number, number, number], strides, 1 /* dilations */, pad, dimRoundingMode, false, $dataFormat); - const program = new Conv2DDerInputProgram(convInfo); - return backend.runWebGLProgram(program, [dy, filter], 'float32'); + if (env().getBool('WEBGL_PACK') && $dataFormat === 'channelsLast') { + console.log('tested'); + const customValues = [ + [convInfo.strideHeight, convInfo.strideWidth], + ]; + const program = new Conv2DDerInputPackedProgram(convInfo); + return backend.runWebGLProgram( + program, [dy, filter], 'float32', customValues); + } else { + const program = new Conv2DDerInputProgram(convInfo); + return backend.runWebGLProgram(program, [dy, filter], 'float32'); + } } export const conv2DBackpropInputConfig: KernelConfig = { From a8717d39243e447e2b8ff06d3de40852685fe1e6 Mon Sep 17 00:00:00 2001 From: Linchenn Date: Fri, 3 Feb 2023 16:16:29 -0800 Subject: [PATCH 2/7] sss --- tfjs-backend-webgl/src/kernels/Conv2DBackpropInput.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/tfjs-backend-webgl/src/kernels/Conv2DBackpropInput.ts b/tfjs-backend-webgl/src/kernels/Conv2DBackpropInput.ts index 4c3df8243dc..cfc853db5f1 100644 --- a/tfjs-backend-webgl/src/kernels/Conv2DBackpropInput.ts +++ b/tfjs-backend-webgl/src/kernels/Conv2DBackpropInput.ts @@ -36,7 +36,6 @@ export function conv2DBackpropInput(args: { 1 /* dilations */, pad, dimRoundingMode, false, $dataFormat); if (env().getBool('WEBGL_PACK') && $dataFormat === 'channelsLast') { - console.log('tested'); const customValues = [ [convInfo.strideHeight, convInfo.strideWidth], ]; From 36a98968f7535524e9acc34e9117653186331773 Mon Sep 17 00:00:00 2001 From: Linchenn Date: Fri, 3 Feb 2023 16:21:29 -0800 Subject: [PATCH 3/7] format --- tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts b/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts index f691f599776..1756d02d51d 100644 --- a/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts +++ b/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts @@ -55,13 +55,12 @@ export class Conv2DDerInputPackedProgram implements GPGPUProgram { vec4 dotProd = vec4(0.000000000000001); for (int wR = 0; wR < ${filterHeight}; wR++) { - int wRPerm = ${filterHeight} - 1 - wR; float dyR = float(dyRCorner + wR) / strides[0]; - if (dyR < 0.0 || dyR >= ${convInfo.outHeight}.0 || fract(dyR) > 0.0) { continue; } int idyR = int(dyR); + int wRPerm = ${filterHeight} - 1 - wR; for (int wC = 0; wC < ${filterWidth}; wC++) { int wCPerm = ${filterWidth} - 1 - wC; From 4d24ae43757dd622702ad8952edba1f23a65c77e Mon Sep 17 00:00:00 2001 From: Linchenn Date: Mon, 6 Feb 2023 10:24:26 -0800 Subject: [PATCH 4/7] Update conv_backprop_packed_gpu.ts --- tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts b/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts index 1756d02d51d..7e5ca8545cb 100644 --- a/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts +++ b/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2023 Google LLC. All Rights Reserved. + * Copyright 2023 Google LLC. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at From db381491b7b307800a22e3df1444c224bec031f0 Mon Sep 17 00:00:00 2001 From: Linchenn Date: Mon, 6 Feb 2023 13:47:03 -0800 Subject: [PATCH 5/7] tune shader --- .../src/conv_backprop_packed_gpu.ts | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts b/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts index 7e5ca8545cb..3188f868fcf 100644 --- a/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts +++ b/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts @@ -82,14 +82,20 @@ export class Conv2DDerInputPackedProgram implements GPGPUProgram { } for (int d2 = 0; d2 < ${convInfo.outChannels}; d2 += 2) { - vec4 dySample = getDy(batch, idyR, idyC, d2); - vec2 dyValue = mod(float(idyC), 2.) == 0. ? dySample.xy : dySample.zw; vec4 wValue = getW(wRPerm, wCPerm, d1, d2); - dotProd.xy += vec2(dot(dyValue, wValue.xy), dot(dyValue, wValue.zw)) * idyCVal; - - dySample = getDy(batch, idyR, idyC2, d2); - dyValue = mod(float(idyC2), 2.) == 0. ? dySample.xy : dySample.zw; - dotProd.zw += vec2(dot(dyValue, wValue.xy), dot(dyValue, wValue.zw)) * idyCVal2; + vec4 dySample = getDy(batch, idyR, idyC, d2); + vec4 dySample2 = (idyC / 2 == idyC2 / 2) ? + dySample : getDy(batch, idyR, idyC2, d2); + + vec2 dyValue = mod(float(idyC), 2.) == 0. ? + dySample.xy : dySample.zw; + dotProd.xy += vec2(dot(dyValue, wValue.xy), + dot(dyValue, wValue.zw)) * idyCVal; + + dyValue = mod(float(idyC2), 2.) == 0. ? + dySample2.xy : dySample2.zw; + dotProd.zw += vec2(dot(dyValue, wValue.xy), + dot(dyValue, wValue.zw)) * idyCVal2; } } } From 6500652cbf3b5a04b259e90609bdfe22b50fc725 Mon Sep 17 00:00:00 2001 From: Linchenn Date: Mon, 6 Feb 2023 14:03:24 -0800 Subject: [PATCH 6/7] remove ini value for dotProd --- tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts b/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts index 3188f868fcf..10d3d1bc801 100644 --- a/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts +++ b/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts @@ -51,9 +51,7 @@ export class Conv2DDerInputPackedProgram implements GPGPUProgram { int dyRCorner = dyCorner.x; int dyCCorner = dyCorner.y; - //intialize dotProd with a small epsilon seems to reduce GPU accuracy loss. - vec4 dotProd = vec4(0.000000000000001); - + vec4 result = vec4(0.); for (int wR = 0; wR < ${filterHeight}; wR++) { float dyR = float(dyRCorner + wR) / strides[0]; if (dyR < 0.0 || dyR >= ${convInfo.outHeight}.0 || fract(dyR) > 0.0) { @@ -99,7 +97,6 @@ export class Conv2DDerInputPackedProgram implements GPGPUProgram { } } } - vec4 result = dotProd - vec4(0.000000000000001); setOutput(result); } `; From c4f5928c40c6f2b755b39a4caa7eba74437a1457 Mon Sep 17 00:00:00 2001 From: Linchenn Date: Mon, 6 Feb 2023 14:05:03 -0800 Subject: [PATCH 7/7] remove dotProd --- tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts b/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts index 10d3d1bc801..ccdae6a70cf 100644 --- a/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts +++ b/tfjs-backend-webgl/src/conv_backprop_packed_gpu.ts @@ -87,12 +87,12 @@ export class Conv2DDerInputPackedProgram implements GPGPUProgram { vec2 dyValue = mod(float(idyC), 2.) == 0. ? dySample.xy : dySample.zw; - dotProd.xy += vec2(dot(dyValue, wValue.xy), + result.xy += vec2(dot(dyValue, wValue.xy), dot(dyValue, wValue.zw)) * idyCVal; dyValue = mod(float(idyC2), 2.) == 0. ? dySample2.xy : dySample2.zw; - dotProd.zw += vec2(dot(dyValue, wValue.xy), + result.zw += vec2(dot(dyValue, wValue.xy), dot(dyValue, wValue.zw)) * idyCVal2; } }