From 2cba4dbdf2c6d4627aca9d1bb9f3e0432798aee1 Mon Sep 17 00:00:00 2001 From: Xinghua Cao Date: Mon, 7 Nov 2022 15:11:10 +0800 Subject: [PATCH 1/3] webgpu: add a naive implementation of conv3d --- .../src/conv3d_naive_webgpu.ts | 130 ++++++++++++++++++ tfjs-backend-webgpu/src/kernels/Conv3D.ts | 61 ++++++++ .../src/register_all_kernels.ts | 2 + tfjs-backend-webgpu/src/setup_test.ts | 7 +- tfjs-backend-webgpu/src/shader_util.ts | 7 +- 5 files changed, 203 insertions(+), 4 deletions(-) create mode 100644 tfjs-backend-webgpu/src/conv3d_naive_webgpu.ts create mode 100644 tfjs-backend-webgpu/src/kernels/Conv3D.ts diff --git a/tfjs-backend-webgpu/src/conv3d_naive_webgpu.ts b/tfjs-backend-webgpu/src/conv3d_naive_webgpu.ts new file mode 100644 index 00000000000..2e1346f1b8a --- /dev/null +++ b/tfjs-backend-webgpu/src/conv3d_naive_webgpu.ts @@ -0,0 +1,130 @@ +/** + * @license + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {backend_util} from '@tensorflow/tfjs-core'; + +import {getMainHeaderString as main, WebGPUProgram} from './webgpu_program'; +import {computeDispatch} from './webgpu_util'; + +export class Conv3DNaiveProgram implements WebGPUProgram { + outputShape: number[]; + shaderKey: string; + dispatchLayout: {x: number[], y: number[], z: number[]}; + dispatch: [number, number, number]; + variableNames = ['x', 'W']; + uniforms = + 'filterDims: vec3, pad: vec3, strides: vec3, dilations: vec3,'; + workgroupSize: [number, number, number] = [4, 4, 8]; + + constructor(convInfo: backend_util.Conv3DInfo) { + this.outputShape = convInfo.outShape; + this.dispatchLayout = {x: [3], y: [1, 2], z: [0, 4]}; + this.dispatch = computeDispatch( + this.dispatchLayout, this.outputShape, this.workgroupSize); + + this.shaderKey = `conv3dnaive`; + } + + getUserCode(): string { + const userCode = ` + ${main()} { + let coords = getOutputCoords(); + let batch = coords.x; + let d2 = coords.u; + + let xFRCCorner = vec3(coords.y, coords.z, coords.w) * uniforms.strides - uniforms.pad; + let xFCorner = xFRCCorner.x; + let xRCorner = xFRCCorner.y; + let xCCorner = xFRCCorner.z; + + let inputDepthNearestVec4 = (uniforms.xShape.u / 4) * 4; + let inputDepthVec4Remainder = uniforms.xShape.u % 4; + + var dotProd = 0.0; + for (var wF = 0; wF < uniforms.filterDims[0]; wF = wF + 1) { + let xF = xFCorner + wF * uniforms.dilations[0]; + if (xF < 0 || xF >= uniforms.xShape.y) { + continue; + } + + for (var wR = 0; wR < uniforms.filterDims[1]; wR = wR + 1) { + let xR = xRCorner + wR * uniforms.dilations[1]; + if (xR < 0 || xR >= uniforms.xShape.z) { + continue; + } + + for (var wC = 0; wC < uniforms.filterDims[2]; wC = wC + 1) { + let xC = xCCorner + wC * uniforms.dilations[2]; + if (xC < 0 || xC >= uniforms.xShape.w) { + continue; + } + + for (var d1 = 0; d1 < inputDepthNearestVec4; d1 = d1 + 4) { + let xValues = vec4( + getX(batch, xF, xR, xC, d1), + getX(batch, xF, xR, xC, d1 + 1), + getX(batch, xF, xR, xC, d1 + 2), + getX(batch, xF, xR, xC, d1 + 3) + ); + let wValues = vec4( + getW(wF, wR, wC, d1, d2), + getW(wF, wR, wC, d1 + 1, d2), + getW(wF, wR, wC, d1 + 2, d2), + getW(wF, wR, wC, d1 + 3, d2) + ); + + dotProd = dotProd + dot(xValues, wValues); + } + + if (inputDepthVec4Remainder == 1) { + dotProd = dotProd + + getX(batch, xF, xR, xC, inputDepthNearestVec4) * + getW(wF, wR, wC, inputDepthNearestVec4, d2); + } else if (inputDepthVec4Remainder == 2) { + let xValues = vec2( + getX(batch, xF, xR, xC, inputDepthNearestVec4), + getX(batch, xF, xR, xC, inputDepthNearestVec4 + 1) + ); + let wValues = vec2( + getW(wF, wR, wC, inputDepthNearestVec4, d2), + getW(wF, wR, wC, inputDepthNearestVec4 + 1, d2) + ); + dotProd = dotProd + dot(xValues, wValues); + } else if (inputDepthVec4Remainder == 3) { + let xValues = vec3( + getX(batch, xF, xR, xC, inputDepthNearestVec4), + getX(batch, xF, xR, xC, inputDepthNearestVec4 + 1), + getX(batch, xF, xR, xC, inputDepthNearestVec4 + 2) + ); + let wValues = vec3( + getW(wF, wR, wC, inputDepthNearestVec4, d2), + getW(wF, wR, wC, inputDepthNearestVec4 + 1, d2), + getW(wF, wR, wC, inputDepthNearestVec4 + 2, d2) + ); + dotProd = dotProd + dot(xValues, wValues); + } + } + } + } + if (coords.x < uniforms.outShape.x && coords.y < uniforms.outShape.y && coords.z < uniforms.outShape.z + && coords.w < uniforms.outShape.w && coords.u < uniforms.outShape.u) { + setOutputAtCoords(coords.x, coords.y, coords.z, coords.w, coords.u, dotProd); + } + }`; + return userCode; + } +} diff --git a/tfjs-backend-webgpu/src/kernels/Conv3D.ts b/tfjs-backend-webgpu/src/kernels/Conv3D.ts new file mode 100644 index 00000000000..283b62b3c19 --- /dev/null +++ b/tfjs-backend-webgpu/src/kernels/Conv3D.ts @@ -0,0 +1,61 @@ +/** + * @license + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {backend_util, Conv3D, Conv3DAttrs, Conv3DInputs, KernelConfig, KernelFunc, upcastType} from '@tensorflow/tfjs-core'; + +import {WebGPUBackend} from '../backend_webgpu'; +import {Conv3DNaiveProgram} from '../conv3d_naive_webgpu'; + +export function conv3D( + args: {inputs: Conv3DInputs, attrs: Conv3DAttrs, backend: WebGPUBackend}) { + const {inputs, backend, attrs} = args; + const {x, filter} = inputs; + const {strides, pad, dilations} = attrs; + + const convInfo = backend_util.computeConv3DInfo( + x.shape as [number, number, number, number, number], + filter.shape as [number, number, number, number, number], strides, + dilations, pad); + + const padInfo = + [convInfo.padInfo.front, convInfo.padInfo.top, convInfo.padInfo.left]; + const dimensions = [ + { + type: 'int32', + data: [convInfo.filterDepth, convInfo.filterHeight, convInfo.filterWidth] + }, + {type: 'int32', data: [...padInfo]}, { + type: 'int32', + data: [convInfo.strideDepth, convInfo.strideHeight, convInfo.strideWidth] + }, + { + type: 'int32', + data: [ + convInfo.dilationDepth, convInfo.dilationHeight, convInfo.dilationWidth + ] + } + ]; + const program = new Conv3DNaiveProgram(convInfo); + const dtype = upcastType(x.dtype, filter.dtype); + return backend.runWebGPUProgram(program, [x, filter], dtype, dimensions); +} + +export const conv3DConfig: KernelConfig = { + kernelName: Conv3D, + backendName: 'webgpu', + kernelFunc: conv3D as {} as KernelFunc, +}; diff --git a/tfjs-backend-webgpu/src/register_all_kernels.ts b/tfjs-backend-webgpu/src/register_all_kernels.ts index 9f22374f4d6..7071f0c997f 100644 --- a/tfjs-backend-webgpu/src/register_all_kernels.ts +++ b/tfjs-backend-webgpu/src/register_all_kernels.ts @@ -44,6 +44,7 @@ import {concatConfig} from './kernels/Concat'; import {conv2DConfig} from './kernels/Conv2D'; import {conv2DBackpropFilterConfig} from './kernels/Conv2DBackpropFilter'; import {conv2DBackpropInputConfig} from './kernels/Conv2DBackpropInput'; +import {conv3DConfig} from './kernels/Conv3D'; import {cosConfig} from './kernels/Cos'; import {coshConfig} from './kernels/Cosh'; import {cropAndResizeConfig} from './kernels/CropAndResize'; @@ -189,6 +190,7 @@ const kernelConfigs: KernelConfig[] = [ conv2DConfig, conv2DBackpropFilterConfig, conv2DBackpropInputConfig, + conv3DConfig, cosConfig, coshConfig, cropAndResizeConfig, diff --git a/tfjs-backend-webgpu/src/setup_test.ts b/tfjs-backend-webgpu/src/setup_test.ts index 9de00f91cd5..55b6a7386a8 100644 --- a/tfjs-backend-webgpu/src/setup_test.ts +++ b/tfjs-backend-webgpu/src/setup_test.ts @@ -57,6 +57,12 @@ const TEST_FILTERS: TestFilter[] = [ 'gradient', // gradient function not found. ] }, + { + startsWith: 'conv3d ', + excludes: [ + 'gradient', // Not yet implemented. + ] + }, { startsWith: 'cumprod ', excludes: [ @@ -238,7 +244,6 @@ const TEST_FILTERS: TestFilter[] = [ 'conv2DBackpropFilter ', 'gradient with clones, input=2x2x1,d2=1,f=1,s=1,d=1,p=same', // Conv2DBackpropFilter 'conv1d gradients', // Conv2DBackpropFilter - 'conv3d ', 'conv3dTranspose ', 'maxPool3d ', 'maxPool3dBackprop ', diff --git a/tfjs-backend-webgpu/src/shader_util.ts b/tfjs-backend-webgpu/src/shader_util.ts index 30668e78edc..bf6e80c8cb5 100644 --- a/tfjs-backend-webgpu/src/shader_util.ts +++ b/tfjs-backend-webgpu/src/shader_util.ts @@ -18,12 +18,13 @@ // Generates WGSL that computes strides. export function symbolicallyComputeStrides( indicesArr: number[], variableName: string): string[] { - if (Math.max(...indicesArr) > 3) { - throw new Error('Cannot symbolically compute strides for rank > 4 tensor.'); + if (Math.max(...indicesArr) > 5) { + throw new Error('Cannot symbolically compute strides for rank > 6 tensor.'); } const numCoords = indicesArr.length; - const shape = indicesArr.map(d => `${variableName}[${d}]`); + const indicesStr = ['.x', '.y', '.z', '.w', '.u', '.v']; + const shape = indicesArr.map(d => `${variableName}${indicesStr[d]}`); const strides = new Array(numCoords - 1); strides[numCoords - 2] = shape[numCoords - 1]; for (let i = numCoords - 3; i >= 0; --i) { From 7ca16233f3a9e1c9b73d0aa93eac02b99c00d8ae Mon Sep 17 00:00:00 2001 From: Xinghua Cao Date: Wed, 11 Jan 2023 13:13:14 +0800 Subject: [PATCH 2/3] Address Yang's comments --- tfjs-backend-webgpu/src/conv3d_naive_webgpu.ts | 17 ++++++++--------- tfjs-backend-webgpu/src/shader_util.ts | 4 ++-- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tfjs-backend-webgpu/src/conv3d_naive_webgpu.ts b/tfjs-backend-webgpu/src/conv3d_naive_webgpu.ts index 2e1346f1b8a..e4ac69492e3 100644 --- a/tfjs-backend-webgpu/src/conv3d_naive_webgpu.ts +++ b/tfjs-backend-webgpu/src/conv3d_naive_webgpu.ts @@ -55,25 +55,25 @@ export class Conv3DNaiveProgram implements WebGPUProgram { let inputDepthVec4Remainder = uniforms.xShape.u % 4; var dotProd = 0.0; - for (var wF = 0; wF < uniforms.filterDims[0]; wF = wF + 1) { + for (var wF = 0; wF < uniforms.filterDims[0]; wF++) { let xF = xFCorner + wF * uniforms.dilations[0]; if (xF < 0 || xF >= uniforms.xShape.y) { continue; } - for (var wR = 0; wR < uniforms.filterDims[1]; wR = wR + 1) { + for (var wR = 0; wR < uniforms.filterDims[1]; wR++) { let xR = xRCorner + wR * uniforms.dilations[1]; if (xR < 0 || xR >= uniforms.xShape.z) { continue; } - for (var wC = 0; wC < uniforms.filterDims[2]; wC = wC + 1) { + for (var wC = 0; wC < uniforms.filterDims[2]; wC++) { let xC = xCCorner + wC * uniforms.dilations[2]; if (xC < 0 || xC >= uniforms.xShape.w) { continue; } - for (var d1 = 0; d1 < inputDepthNearestVec4; d1 = d1 + 4) { + for (var d1 = 0; d1 < inputDepthNearestVec4; d1 += 4) { let xValues = vec4( getX(batch, xF, xR, xC, d1), getX(batch, xF, xR, xC, d1 + 1), @@ -87,12 +87,11 @@ export class Conv3DNaiveProgram implements WebGPUProgram { getW(wF, wR, wC, d1 + 3, d2) ); - dotProd = dotProd + dot(xValues, wValues); + dotProd += dot(xValues, wValues); } if (inputDepthVec4Remainder == 1) { - dotProd = dotProd + - getX(batch, xF, xR, xC, inputDepthNearestVec4) * + dotProd += getX(batch, xF, xR, xC, inputDepthNearestVec4) * getW(wF, wR, wC, inputDepthNearestVec4, d2); } else if (inputDepthVec4Remainder == 2) { let xValues = vec2( @@ -103,7 +102,7 @@ export class Conv3DNaiveProgram implements WebGPUProgram { getW(wF, wR, wC, inputDepthNearestVec4, d2), getW(wF, wR, wC, inputDepthNearestVec4 + 1, d2) ); - dotProd = dotProd + dot(xValues, wValues); + dotProd += dot(xValues, wValues); } else if (inputDepthVec4Remainder == 3) { let xValues = vec3( getX(batch, xF, xR, xC, inputDepthNearestVec4), @@ -115,7 +114,7 @@ export class Conv3DNaiveProgram implements WebGPUProgram { getW(wF, wR, wC, inputDepthNearestVec4 + 1, d2), getW(wF, wR, wC, inputDepthNearestVec4 + 2, d2) ); - dotProd = dotProd + dot(xValues, wValues); + dotProd += dot(xValues, wValues); } } } diff --git a/tfjs-backend-webgpu/src/shader_util.ts b/tfjs-backend-webgpu/src/shader_util.ts index bf6e80c8cb5..98a5782e301 100644 --- a/tfjs-backend-webgpu/src/shader_util.ts +++ b/tfjs-backend-webgpu/src/shader_util.ts @@ -23,8 +23,8 @@ export function symbolicallyComputeStrides( } const numCoords = indicesArr.length; - const indicesStr = ['.x', '.y', '.z', '.w', '.u', '.v']; - const shape = indicesArr.map(d => `${variableName}${indicesStr[d]}`); + const indicesStr = 'xyzwuv'; + const shape = indicesArr.map(d => `${variableName}.${indicesStr[d]}`); const strides = new Array(numCoords - 1); strides[numCoords - 2] = shape[numCoords - 1]; for (let i = numCoords - 3; i >= 0; --i) { From 604fac11dc720673d988360d27e07769c21bcfe6 Mon Sep 17 00:00:00 2001 From: Xinghua Cao Date: Wed, 11 Jan 2023 15:55:51 +0800 Subject: [PATCH 3/3] Address Jiajia' comments --- .../src/conv3d_naive_webgpu.ts | 26 +++++++++---------- tfjs-backend-webgpu/src/kernels/Conv3D.ts | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tfjs-backend-webgpu/src/conv3d_naive_webgpu.ts b/tfjs-backend-webgpu/src/conv3d_naive_webgpu.ts index e4ac69492e3..cb86c15a60b 100644 --- a/tfjs-backend-webgpu/src/conv3d_naive_webgpu.ts +++ b/tfjs-backend-webgpu/src/conv3d_naive_webgpu.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2022 Google LLC. + * Copyright 2023 Google LLC. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -18,21 +18,22 @@ import {backend_util} from '@tensorflow/tfjs-core'; import {getMainHeaderString as main, WebGPUProgram} from './webgpu_program'; -import {computeDispatch} from './webgpu_util'; +import {computeDispatch, flatDispatchLayout} from './webgpu_util'; export class Conv3DNaiveProgram implements WebGPUProgram { outputShape: number[]; shaderKey: string; - dispatchLayout: {x: number[], y: number[], z: number[]}; + dispatchLayout: {x: number[]}; dispatch: [number, number, number]; variableNames = ['x', 'W']; uniforms = - 'filterDims: vec3, pad: vec3, strides: vec3, dilations: vec3,'; - workgroupSize: [number, number, number] = [4, 4, 8]; + 'filterDims: vec3, pads: vec3, strides: vec3, dilations: vec3,'; + workgroupSize: [number, number, number] = [64, 1, 1]; + size = true; constructor(convInfo: backend_util.Conv3DInfo) { this.outputShape = convInfo.outShape; - this.dispatchLayout = {x: [3], y: [1, 2], z: [0, 4]}; + this.dispatchLayout = flatDispatchLayout(this.outputShape); this.dispatch = computeDispatch( this.dispatchLayout, this.outputShape, this.workgroupSize); @@ -41,12 +42,13 @@ export class Conv3DNaiveProgram implements WebGPUProgram { getUserCode(): string { const userCode = ` - ${main()} { + ${main('index')} { + if (index < uniforms.size) { let coords = getOutputCoords(); let batch = coords.x; let d2 = coords.u; - let xFRCCorner = vec3(coords.y, coords.z, coords.w) * uniforms.strides - uniforms.pad; + let xFRCCorner = vec3(coords.y, coords.z, coords.w) * uniforms.strides - uniforms.pads; let xFCorner = xFRCCorner.x; let xRCorner = xFRCCorner.y; let xCCorner = xFRCCorner.z; @@ -119,11 +121,9 @@ export class Conv3DNaiveProgram implements WebGPUProgram { } } } - if (coords.x < uniforms.outShape.x && coords.y < uniforms.outShape.y && coords.z < uniforms.outShape.z - && coords.w < uniforms.outShape.w && coords.u < uniforms.outShape.u) { - setOutputAtCoords(coords.x, coords.y, coords.z, coords.w, coords.u, dotProd); - } - }`; + setOutputAtIndex(index, dotProd); + } + }`; return userCode; } } diff --git a/tfjs-backend-webgpu/src/kernels/Conv3D.ts b/tfjs-backend-webgpu/src/kernels/Conv3D.ts index 283b62b3c19..a8bc7a6daf5 100644 --- a/tfjs-backend-webgpu/src/kernels/Conv3D.ts +++ b/tfjs-backend-webgpu/src/kernels/Conv3D.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2022 Google LLC. + * Copyright 2023 Google LLC. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at