From 017ea941d7d266912bdae1d0c92539ce2900edf7 Mon Sep 17 00:00:00 2001 From: Daniel Smilkov Date: Wed, 16 Aug 2017 17:14:25 -0400 Subject: [PATCH 01/10] migrate addScaledMat and conv2d to logical sampling and improve shader compiler --- .vscode/settings.json | 1 + src/math/math_gpu.ts | 40 +---- src/math/webgl/addscaledmat_gpu.ts | 90 +++--------- src/math/webgl/addscaledmat_gpu_test.ts | 73 ++++++--- src/math/webgl/binaryop_gpu.ts | 3 +- src/math/webgl/conv_gpu.ts | 152 +++++++------------ src/math/webgl/gpgpu_math.ts | 11 +- src/math/webgl/shader_compiler.ts | 187 ++++++++++++++++-------- tsconfig.json | 7 +- tslint.json | 1 + 10 files changed, 267 insertions(+), 298 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index b1cd35c836..67a611b80a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,6 +9,7 @@ }, "tslint.enable": true, "tslint.run": "onType", + "tslint.configFile": "tslint.json", "editor.tabSize": 2, "editor.insertSpaces": true, "files.insertFinalNewline": true, diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts index eaf418dadd..8818f148f1 100644 --- a/src/math/math_gpu.ts +++ b/src/math/math_gpu.ts @@ -20,7 +20,7 @@ import * as conv_util from './conv_util'; import {MatrixOrientation, NDArrayMath} from './math'; import * as ndarray from './ndarray'; import {Array1D, Array2D, Array3D, Array4D, NDArray, Scalar} from './ndarray'; -import * as addscaledmat_gpu from './webgl/addscaledmat_gpu'; +import {AddScaledMatProgram} from './webgl/addscaledmat_gpu'; import {ArgMaxEqualsProgram} from './webgl/argmaxequals_gpu'; import {ArgMinMaxProgram} from './webgl/argminmax_gpu'; import * as avg_pool_gpu from './webgl/avg_pool_gpu'; @@ -53,9 +53,6 @@ const BATCHNORM_PROG = 'batchnorm'; const COPY_PROG = 'copy'; const CONCAT_PROG = 'concat'; -// Matrix algebra. -const ADD_SCALED_MAT_PROG = 'addscaledmat'; - // Element-wise ops. const RESHAPE_PROG = 'reshape'; @@ -238,27 +235,8 @@ export class NDArrayMathGPU extends NDArrayMath { protected scaledArrayAddInternal( c1: Scalar, a: T, c2: Scalar, b: T) { - let cleanupB = false; - if (!this.doGPUShapesMatch(a, b)) { - b = this.reshapeTexture(b, a.getTextureShapeRC()); - cleanupB = true; - } - - const program = this.getAndSaveProgram( - ADD_SCALED_MAT_PROG, () => addscaledmat_gpu.getFragmentShaderSource()); - - const textureShapeRC = a.getTextureShapeRC(); - const resultTexture = this.textureManager.acquireTexture(textureShapeRC); - - addscaledmat_gpu.addScaledMatrices( - this.gpgpu, program, a.getTexture(), b.getTexture(), textureShapeRC[0], - textureShapeRC[1], c1.getTexture(), c2.getTexture(), resultTexture); - - if (cleanupB) { - b.dispose(); - } - // Bring the result back to the original shape. - return NDArray.make(a.shape, {texture: resultTexture, textureShapeRC}); + const program = new AddScaledMatProgram(a.shape, b.shape); + return this.compileAndRun(program, [a, b, c1, c2]); } protected negInternal(a: T): T { @@ -963,18 +941,6 @@ export class NDArrayMathGPU extends NDArrayMath { return this.programCache[programKey]; } - private doGPUShapesMatch(a: NDArray, b: NDArray): boolean { - util.assertShapesMatch(a.shape, b.shape); - if (a.inGPU()) { - // Prefer B to have the shape of A. - b.getTextureShapeRC(a.getTextureShapeRC()); - } else if (b.inGPU()) { - // Prefer A to have the shape of B. - a.getTextureShapeRC(b.getTextureShapeRC()); - } - return util.arraysEqual(a.getTextureShapeRC(), b.getTextureShapeRC()); - } - getTextureManager(): TextureManager { return this.textureManager; } diff --git a/src/math/webgl/addscaledmat_gpu.ts b/src/math/webgl/addscaledmat_gpu.ts index 57dee24ad6..c1b2475d88 100644 --- a/src/math/webgl/addscaledmat_gpu.ts +++ b/src/math/webgl/addscaledmat_gpu.ts @@ -13,72 +13,26 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -import {GPGPUContext} from './gpgpu_context'; - -export function getFragmentShaderSource(): string { - return ` - precision highp float; - uniform sampler2D matrixA; - uniform sampler2D matrixB; - uniform sampler2D matrixAScalar; - uniform sampler2D matrixBScalar; - varying vec2 resultUV; - - const vec2 halfTexel = vec2(0.5, 0.5); - - void main() { - float a = texture2D(matrixA, resultUV).r; - float b = texture2D(matrixB, resultUV).r; - float aScalar = texture2D(matrixAScalar, halfTexel).r; - float bScalar = texture2D(matrixBScalar, halfTexel).r; - vec2 abScaled = vec2(a, b) * vec2(aScalar, bScalar); - gl_FragColor = vec4(abScaled.x + abScaled.y, 0, 0, 0); - }`; -} - -export function addScaledMatrices( - gpgpu: GPGPUContext, addScaledMatricesProgram: WebGLProgram, - a: WebGLTexture, b: WebGLTexture, rows: number, columns: number, - aScalar: WebGLTexture, bScalar: WebGLTexture, result: WebGLTexture) { - gpgpu.setOutputMatrixTexture(result, rows, columns); - gpgpu.setProgram(addScaledMatricesProgram); - gpgpu.setInputMatrixTexture(a, 'matrixA', 0); - gpgpu.setInputMatrixTexture(b, 'matrixB', 1); - gpgpu.setInputMatrixTexture(aScalar, 'matrixAScalar', 2); - gpgpu.setInputMatrixTexture(bScalar, 'matrixBScalar', 3); - gpgpu.executeProgram(); -} - -export function uploadAddScaledMatricesDownload( - a: Float32Array, b: Float32Array, rows: number, columns: number, - aScalar: number, bScalar: number): Float32Array { - const gpgpu = new GPGPUContext(); - const program: WebGLProgram = gpgpu.createProgram(getFragmentShaderSource()); - - const aTex = gpgpu.createMatrixTexture(rows, columns); - const bTex = gpgpu.createMatrixTexture(rows, columns); - const aScalarTex = gpgpu.createMatrixTexture(1, 1); - const bScalarTex = gpgpu.createMatrixTexture(1, 1); - const resultTex = gpgpu.createMatrixTexture(rows, columns); - - gpgpu.uploadMatrixToTexture(aTex, rows, columns, a); - gpgpu.uploadMatrixToTexture(bTex, rows, columns, b); - gpgpu.uploadMatrixToTexture(aScalarTex, 1, 1, new Float32Array([aScalar])); - gpgpu.uploadMatrixToTexture(bScalarTex, 1, 1, new Float32Array([bScalar])); - - addScaledMatrices( - gpgpu, program, aTex, bTex, rows, columns, aScalarTex, bScalarTex, - resultTex); - - const result = gpgpu.downloadMatrixFromTexture(resultTex, rows, columns); - - gpgpu.deleteMatrixTexture(aTex); - gpgpu.deleteMatrixTexture(bTex); - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(aScalarTex); - gpgpu.deleteMatrixTexture(bScalarTex); - gpgpu.deleteProgram(program); - gpgpu.dispose(); - - return result; +import {GPGPUProgram} from './gpgpu_math'; +import * as util from '../../util'; + +export class AddScaledMatProgram implements GPGPUProgram { + variableNames = ['A', 'B', 'c1', 'c2']; + params: Array<{}> = []; + outputShape: number[]; + userCode: string; + supportsBroadcasting = true; + + constructor(aShape: number[], bShape: number[]) { + this.outputShape = util.assertAndGetBroadcastedShape(aShape, bShape); + this.userCode = ` + void main() { + float a = getAAtOutCoords(); + float b = getBAtOutCoords(); + float c1 = getC1(); + float c2 = getC2(); + setOutput(dot(vec2(c1, c2), vec2(a, b))); + } + `; + } } diff --git a/src/math/webgl/addscaledmat_gpu_test.ts b/src/math/webgl/addscaledmat_gpu_test.ts index 617bb17383..cf63e1a668 100644 --- a/src/math/webgl/addscaledmat_gpu_test.ts +++ b/src/math/webgl/addscaledmat_gpu_test.ts @@ -14,7 +14,12 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; -import * as addscaledmat_gpu from './addscaledmat_gpu'; +import {AddScaledMatProgram} from './addscaledmat_gpu'; +import {GPGPUContext} from './gpgpu_context'; +import * as gpgpu_math from './gpgpu_math'; +import {NDArray, Array1D, Array2D, Scalar, initializeGPU} from '../ndarray'; +import * as util from '../../util'; +import {TextureManager} from './texture_manager'; function cpuAddScaledMatrices( a: Float32Array, aScalar: number, b: Float32Array, @@ -28,48 +33,68 @@ function cpuAddScaledMatrices( describe('addscaledmat_gpu', () => { it('returns a matrix with the same shape as the input matrices', () => { - const a = new Float32Array(9 * 14); - const b = new Float32Array(a.length); - const result = - addscaledmat_gpu.uploadAddScaledMatricesDownload(a, b, 9, 14, 0, 0); + const a = Array2D.zeros([9, 14]); + const b = Array2D.zerosLike(a); + const result = uploadAddScaledMatDownload(a, b, 0, 0); expect(result.length).toEqual(9 * 14); }); it('returns A + B when scalars are 1', () => { - const a = new Float32Array([1, 2, 3, 4, 5, 6]); - const b = new Float32Array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6]); - const result = - addscaledmat_gpu.uploadAddScaledMatricesDownload(a, b, 3, 2, 1, 1); + const a = Array1D.new([1, 2, 3, 4, 5, 6]); + const b = Array1D.new([0.1, 0.2, 0.3, 0.4, 0.5, 0.6]); + const result = uploadAddScaledMatDownload(a, b, 1, 1); test_util.expectArraysClose( result, new Float32Array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6]), 0.0001); }); it('returns A * aScalar when B and bScalar are 0', () => { - const a = new Float32Array([1, 2, 3, 4, 5, 6]); - const b = new Float32Array(a.length); - const result = - addscaledmat_gpu.uploadAddScaledMatricesDownload(a, b, 3, 2, 1.1, 0); + const a = Array1D.new([1, 2, 3, 4, 5, 6]); + const b = Array1D.zerosLike(a); + const result = uploadAddScaledMatDownload(a, b, 1.1, 0); test_util.expectArraysClose( result, new Float32Array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6]), 0.0001); }); it('returns B * bScalar when A and aScalar are 0', () => { - const b = new Float32Array([1, 2, 3, 4, 5, 6]); - const a = new Float32Array(b.length); - const result = - addscaledmat_gpu.uploadAddScaledMatricesDownload(a, b, 3, 2, 0, 1.1); + const b = Array1D.new([1, 2, 3, 4, 5, 6]); + const a = Array1D.zerosLike(b); + const result = uploadAddScaledMatDownload(a, b, 0, 1.1); test_util.expectArraysClose( result, new Float32Array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6]), 0.0001); }); it('returns (A * aScalar) + (B * bScalar)', () => { - const a = test_util.randomArrayInRange(12 * 12, -2, 2); - const b = test_util.randomArrayInRange(a.length, -10, 10); - const aScalar = 0.5; - const bScalar = 0.25; - const result = addscaledmat_gpu.uploadAddScaledMatricesDownload( - a, b, 12, 12, aScalar, bScalar); + const a = Array2D.randUniform([12, 12], -2, 2); + const aVals = a.getValues(); + const b = Array2D.randUniform([12, 12], -10, 10); + const bVals = b.getValues(); + + const c1 = 0.5; + const c2 = 0.25; + const result = uploadAddScaledMatDownload(a, b, c1, c2); test_util.expectArraysClose( - result, cpuAddScaledMatrices(a, aScalar, b, bScalar), 0.001); + result, cpuAddScaledMatrices(aVals, c1, bVals, c2), 0.001); }); }); + +export function uploadAddScaledMatDownload( + a: NDArray, b: NDArray, c1Val: number, c2Val: number): Float32Array { + const c1 = Scalar.new(c1Val); + const c2 = Scalar.new(c2Val); + const gpgpu = new GPGPUContext(); + const textureManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, textureManager); + + const outShape = util.assertAndGetBroadcastedShape(a.shape, b.shape); + const res = NDArray.zeros(outShape); + const program = new AddScaledMatProgram(a.shape, b.shape); + const binary = gpgpu_math.compileProgram(gpgpu, program, [a, b, c1, c2], res); + gpgpu_math.runProgram(binary, [a, b, c1, c2], res); + + const resValues = res.getValues(); + textureManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); + gpgpu.dispose(); + + return resValues; +} diff --git a/src/math/webgl/binaryop_gpu.ts b/src/math/webgl/binaryop_gpu.ts index 63298b83e6..33bf96b863 100644 --- a/src/math/webgl/binaryop_gpu.ts +++ b/src/math/webgl/binaryop_gpu.ts @@ -21,10 +21,9 @@ export class BinaryOpProgram implements GPGPUProgram { params: Array<{}>; outputShape: number[]; userCode: string; - supportsBroadcasting: boolean; + supportsBroadcasting = true; constructor(op: '+' | '-' | '*' | '/', aShape: number[], bShape: number[]) { - this.supportsBroadcasting = true; this.params = [op]; this.outputShape = util.assertAndGetBroadcastedShape(aShape, bShape); this.userCode = ` diff --git a/src/math/webgl/conv_gpu.ts b/src/math/webgl/conv_gpu.ts index 53638c8ddd..5d7b222194 100644 --- a/src/math/webgl/conv_gpu.ts +++ b/src/math/webgl/conv_gpu.ts @@ -14,7 +14,60 @@ limitations under the License. ==============================================================================*/ import * as conv_util from '../conv_util'; -import {GPGPUContext} from './gpgpu_context'; +import {GPGPUProgram} from './gpgpu_math'; + +export class Conv2DProgram implements GPGPUProgram { + variableNames = ['x', 'W', 'bias']; + params: Array<{}>; + outputShape: number[]; + userCode: string; + + constructor(xShape: [number, number, number], fieldSize: number, + outputDepth: number, stride: number, pad: number, hasBias: boolean) { + this.outputShape = conv_util.computeOutputShape3D(xShape, + fieldSize, outputDepth, stride, pad); + const inputDepth = xShape[2]; + this.params = [inputDepth, fieldSize, stride, pad, hasBias]; + + this.userCode = ` + void main() { + vec3 output = getOutputCoords(); + float yR = output.x; + float yC = output.y; + float d2 = output.z; + + vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}, ${stride}) - + vec2(${pad}.0, ${pad}.0); + float xRCorner = xRCCorner.x; + float xCCorner = xRCCorner.y; + + // Convolve x(?, ?, d1) with w(:, :, d1, d2) to get y(yR, yC, d2). + // ? = to be determined. : = across all values in that axis. + float dotProd = 0.0; + for (int wR = 0; wR < ${fieldSize}; wR++) { + float wR_float = float(wR); + float xR = xRCorner + wR_float; + + for (int wC = 0; wC < ${fieldSize}; wC++) { + float wC_float = float(wC); + float xC = xCCorner + wC_float; + + for (int d1 = 0; d1 < ${inputDepth}; d1++) { + float d1_float = float(d1); + float xValue = getXOrZeroPad(xR, xC, d1_float); + float wValue = getW(wR_float, wC_float, d1_float, d2); + dotProd += xValue * wValue; + } + } + } + if (${hasBias}) { + dotProd += getBias(d2); + } + setOutput(dotProd); + } + `; + } +} export function getFragmentShaderPrologueSource(): string { return ` @@ -38,69 +91,6 @@ export function getFragmentShaderGetMatrixValueOrZeroPadSource(): string { }`; } -export function getFragmentShaderConvolveSource( - xShapeRCD: [number, number, number], fSize: number, outputDepth: number, - stride: number, pad: number, hasBias: boolean) { - const inputDepth = xShapeRCD[2]; - const xTexShapeRC = conv_util.computeTexShapeFrom3D(xShapeRCD); - const wTexShapeRC = - conv_util.computeWeightsTexShape(inputDepth, outputDepth, fSize); - - return ` - const vec2 halfCR = vec2(0.5, 0.5); - const vec2 xShapeCR = vec2(${xTexShapeRC[1]}, ${xTexShapeRC[0]}); - const vec2 wShapeCR = vec2(${wTexShapeRC[1]}, ${wTexShapeRC[0]}); - - void main() { - vec2 yTexCR = floor(gl_FragCoord.xy); - - // Map from 2D (yTexR, yTexC) to 3D (yR, yC, d2). - float yR = yTexCR.y; - float yC = floor(yTexCR.x / ${outputDepth}.0); - float d2 = mod(yTexCR.x, ${outputDepth}.0); - float wTexC = d2; - - vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}, ${stride}) - - vec2(${pad}.0, ${pad}.0); - float xRCorner = xRCCorner.x; - float xCCorner = xRCCorner.y; - - // Convolve x(?, ?, d1) with w(:, :, d1, d2) to get y(yR, yC, d2). - // ? = to be determined. : = across all values in that axis. - float dotProd = 0.0; - for (int wR = 0; wR < ${fSize}; wR++) { - float wR_float = float(wR); - float xR = xRCorner + wR_float; - float xTexR = xR; - - for (int wC = 0; wC < ${fSize}; wC++) { - float wC_float = float(wC); - float xC = xCCorner + wC_float; - - for (int d1 = 0; d1 < ${inputDepth}; d1++) { - float d1_float = float(d1); - float xTexC = xC * ${inputDepth}.0 + d1_float; - float wTexR = wR_float * ${fSize * inputDepth}.0 + - wC_float * ${inputDepth}.0 + d1_float; - - float xValue = - getMatrixValueOrZeroPad(x, xShapeCR, vec2(xTexC, xTexR)); - - // Read w(wR, wC, d1, d2). - vec2 wUV = (vec2(wTexC, wTexR) + halfCR) / wShapeCR; - float wValue = texture2D(weights, wUV).r; - - dotProd += xValue * wValue; - } - } - } - if (${hasBias}) { - dotProd += getBiasValue(biases, d2); - } - gl_FragColor = vec4(dotProd, 0, 0, 0); - }`; -} - export function getFragmentShaderGetBiasValueSource(outputDepth: number): string { return ` @@ -111,37 +101,3 @@ export function getFragmentShaderGetBiasValueSource(outputDepth: number): return texture2D(bias, biasUV).r; }`; } - -export function getFragmentShaderSource( - aShapeRowColDepth: [number, number, number], resultDepth: number, - fieldSize: number, stride: number, zeroPad: number, - hasBias: boolean): string { - const prologue = getFragmentShaderPrologueSource(); - const getMatrixValueOrZeroPad = - getFragmentShaderGetMatrixValueOrZeroPadSource(); - const convolve = getFragmentShaderConvolveSource( - aShapeRowColDepth, fieldSize, resultDepth, stride, zeroPad, hasBias); - const getBiasValue = getFragmentShaderGetBiasValueSource(resultDepth); - - return [ - prologue, - getMatrixValueOrZeroPad, - getBiasValue, - convolve, - ].join('\n'); -} - -export function convolve( - gpgpu: GPGPUContext, program: WebGLProgram, a: WebGLTexture, - weights: WebGLTexture, biases: WebGLTexture|null, result: WebGLTexture, - resultShapeRowCol: [number, number]) { - gpgpu.setOutputMatrixTexture( - result, resultShapeRowCol[0], resultShapeRowCol[1]); - gpgpu.setProgram(program); - gpgpu.setInputMatrixTexture(a, 'x', 0); - gpgpu.setInputMatrixTexture(weights, 'weights', 1); - if (biases != null) { - gpgpu.setInputMatrixTexture(biases, 'biases', 2); - } - gpgpu.executeProgram(); -} diff --git a/src/math/webgl/gpgpu_math.ts b/src/math/webgl/gpgpu_math.ts index b112f4b9a9..b5b2fe9d5a 100644 --- a/src/math/webgl/gpgpu_math.ts +++ b/src/math/webgl/gpgpu_math.ts @@ -66,12 +66,17 @@ export function compileProgram( }; } -function validateBinaryAndProgram(shapeInfos: ShapeInfo[], bArrays: NDArray[]) { +function validateBinaryAndProgram(shapeInfos: ShapeInfo[], inputs: NDArray[]) { + if (shapeInfos.length !== inputs.length) { + throw Error(`Binary was compiled with ${shapeInfos.length} inputs, but ` + + `was executed with ${inputs.length} inputs`); + } + shapeInfos.forEach((s, i) => { const shapeA = s.logicalShape; const texShapeA = s.texShape; - const shapeB = bArrays[i].shape; - const texShapeB = bArrays[i].getTextureShapeRC(); + const shapeB = inputs[i].shape; + const texShapeB = inputs[i].getTextureShapeRC(); if (!util.arraysEqual(shapeA, shapeB)) { throw Error(`Binary was compiled with different shapes than ` + diff --git a/src/math/webgl/shader_compiler.ts b/src/math/webgl/shader_compiler.ts index 89e2eae914..109efa6c3c 100644 --- a/src/math/webgl/shader_compiler.ts +++ b/src/math/webgl/shader_compiler.ts @@ -36,8 +36,7 @@ export function makeShader(inputsInfo: InputInfo[], outputShape: ShapeInfo, const outputSamplingSnippet = getOutputSamplingSnippet(outputShape.logicalShape, outTexShape); const source = [ - SHADER_PREFIX, inputPrefixSnippet, SAMPLE_1D_SNIPPET, SAMPLE_2D_SNIPPET, - SAMPLE_3D_SNIPPET, SAMPLE_4D_SNIPPET, inputSamplingSnippet, + SHADER_PREFIX, inputPrefixSnippet, inputSamplingSnippet, outputSamplingSnippet, userCode ].join('\n'); return source; @@ -107,62 +106,74 @@ function getOutputSamplingSnippet( } } +const SAMPLE_1D_SNIPPET = ` +vec2 UVfrom1D(float texNumR, float texNumC, float index) { + float texR = floor(index / texNumC); + float texC = mod(index, texNumC); + return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); +} +`; + +const SAMPLE_2D_SNIPPET = ` +vec2 UVfrom2D(float texNumR, float texNumC, float numC, float row, + float col) { + float index = dot(vec2(row, col), vec2(numC, 1.0)); + float texR = floor(index / texNumC); + float texC = mod(index, texNumC); + return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); +} +`; + +const SAMPLE_3D_SNIPPET = ` +vec2 UVfrom3D(float texNumR, float texNumC, float stride0, + float stride1, float row, float col, float depth) { + float index = dot(vec3(row, col, depth), vec3(stride0, stride1, 1.0)); + float texR = floor(index / texNumC); + float texC = mod(index, texNumC); + return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); +} +`; + +const SAMPLE_4D_SNIPPET = ` +vec2 UVfrom4D(float texNumR, float texNumC, float stride0, + float stride1, float stride2, float row, float col, float depth, + float depth2) { + float index = dot(vec4(row, col, depth, depth2), + vec4(stride0, stride1, stride2, 1.0)); + float texR = floor(index / texNumC); + float texC = mod(index, texNumC); + return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); +} +`; + const SHADER_PREFIX = ` precision highp float; varying vec2 resultUV; const vec2 halfCR = vec2(0.5, 0.5); - void setOutput(float val) { - gl_FragColor = vec4(val, 0, 0, 0); - } - - bool isNaN(float val) { - return val == val ? false : true; - } -`; - -const SAMPLE_1D_SNIPPET = ` - float sample1D(sampler2D texture, float texNumR, float texNumC, float index) { - float texR = floor(index / texNumC); - float texC = mod(index, texNumC); - vec2 uv = (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); + float sample(sampler2D texture, vec2 uv) { return texture2D(texture, uv).r; } -`; -const SAMPLE_2D_SNIPPET = ` - float sample2D(sampler2D texture, float texNumR, float texNumC, float numC, - float row, float col) { - float index = dot(vec2(row, col), vec2(numC, 1.0)); - float texR = floor(index / texNumC); - float texC = mod(index, texNumC); - vec2 uv = (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); - return texture2D(texture, uv).r; + float sampleOrZeroPad(sampler2D texture, vec2 uv) { + bool lessThanZero = any(lessThan(uv, vec2(0, 0))); + bool greaterThanOne = any(greaterThan(uv, vec2(1, 1))); + bool outside = lessThanZero || greaterThanOne; + float value = sample(texture, uv); + return mix(value, 0.0, float(outside)); } -`; -const SAMPLE_3D_SNIPPET = ` - float sample3D(sampler2D texture, float texNumR, float texNumC, float stride0, - float stride1, float row, float col, float depth) { - float index = dot(vec3(row, col, depth), vec3(stride0, stride1, 1.0)); - float texR = floor(index / texNumC); - float texC = mod(index, texNumC); - vec2 uv = (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); - return texture2D(texture, uv).r; + void setOutput(float val) { + gl_FragColor = vec4(val, 0, 0, 0); } -`; -const SAMPLE_4D_SNIPPET = ` - float sample4D(sampler2D texture, float texNumR, float texNumC, float stride0, - float stride1, float stride2, float row, float col, float depth, - float depth2) { - float index = dot(vec4(row, col, depth, depth2), - vec4(stride0, stride1, stride2, 1.0)); - float texR = floor(index / texNumC); - float texC = mod(index, texNumC); - vec2 uv = (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); - return texture2D(texture, uv).r; + bool isNaN(float val) { + return val == val ? false : true; } + ${SAMPLE_1D_SNIPPET} + ${SAMPLE_2D_SNIPPET} + ${SAMPLE_3D_SNIPPET} + ${SAMPLE_4D_SNIPPET} `; function getOutput1DCoords( @@ -254,7 +265,7 @@ function getSamplerScalar(texName: string): string { const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1); return ` float ${funcName}() { - return texture2D(${texName}, halfCR).r; + return sample(${texName}, halfCR); } `; } @@ -267,7 +278,11 @@ function getSampler1D( if (texShape[0] === 1 && texShape[1] === 1) { return ` float ${funcName}(float index) { - return texture2D(${texName}, halfCR).r; + return sample(${texName}, halfCR); + } + + float ${funcName}OrZeroPad(float index) { + return sampleOrZeroPad(${texName}, halfCR); } `; } @@ -275,7 +290,12 @@ function getSampler1D( return ` float ${funcName}(float index) { vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0); - return texture2D(${texName}, uv).r; + return sample(${texName}, uv); + } + + float ${funcName}OrZeroPad(float index) { + vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0); + return sampleOrZeroPad(${texName}, uv); } `; } @@ -283,13 +303,24 @@ function getSampler1D( return ` float ${funcName}(float index) { vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5); - return texture2D(${texName}, uv).r; + return sample(${texName}, uv); + } + + float ${funcName}OrZeroPad(float index) { + vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5); + return sampleOrZeroPad(${texName}, uv); } `; } return ` float ${funcName}(float index) { - return sample1D(${texName}, ${tR}.0, ${tC}.0, index); + vec2 uv = UVfrom1D(${tR}.0, ${tC}.0, index); + return sample(${texName}, uv); + } + + float ${funcName}OrZeroPad(float index) { + vec2 uv = UVfrom1D(${tR}.0, ${tC}.0, index); + return sampleOrZeroPad(${texName}, uv); } `; } @@ -304,8 +335,15 @@ function getSampler3D( const stride1 = shape[2]; return ` float ${funcName}(float row, float col, float depth) { - return sample3D(${texName}, ${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, - row, col, depth); + vec2 uv = UVfrom3D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, row, + col, depth); + return sample(${texName}, uv); + } + + float ${funcName}OrZeroPad(float row, float col, float depth) { + vec2 uv = UVfrom3D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, row, + col, depth); + return sampleOrZeroPad(${texName}, uv); } `; } @@ -321,11 +359,19 @@ function getSampler4D( const stride0 = shape[1] * stride1; return ` - float ${funcName}(float row, float col, float depth, float depth2) { - return sample4D(${texName}, ${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, - ${stride2}.0, row, col, depth, depth2); - } -`; + float ${funcName}(float row, float col, float depth, float depth2) { + vec2 uv = UVfrom4D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, + ${stride2}.0, row, col, depth, depth2); + return sample(${texName}, uv); + } + + float ${funcName}OrZeroPad(float row, float col, float depth, + float depth2) { + vec2 uv = UVfrom4D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, + ${stride2}.0, row, col, depth, depth2); + return sampleOrZeroPad(${texName}, uv); + } + `; } function getSampler2D( @@ -338,13 +384,24 @@ function getSampler2D( return ` float ${funcName}(float row, float col) { vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0); - return texture2D(${texName}, uv).r; + return sample(${texName}, uv); + } + + float ${funcName}OrZeroPad(float row, float col) { + vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0); + return sampleOrZeroPad(${texName}, uv); } `; } return ` float ${funcName}(float row, float col) { - return sample2D(${texName}, ${tR}.0, ${tC}.0, ${shape[1]}.0, row, col); + vec2 uv = UVfrom2D(${tR}.0, ${tC}.0, ${shape[1]}.0, row, col); + return sample(${texName}, uv); + } + + float ${funcName}OrZeroPad(float row, float col) { + vec2 uv = UVfrom2D(${tR}.0, ${tC}.0, ${shape[1]}.0, row, col); + return sampleOrZeroPad(${texName}, uv); } `; } @@ -357,7 +414,7 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string { if (tNumC === 1 && tNumR === 1) { return ` float ${funcName}(float index) { - return texture2D(${texName}, halfCR).r; + return sample(${texName}, halfCR); } `; } @@ -365,7 +422,7 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string { return ` float ${funcName}(float index) { vec2 uv = vec2(0.5, (index + 0.5) / ${tNumR}.0); - return texture2D(${texName}, uv).r; + return sample(${texName}, uv); } `; } @@ -373,7 +430,7 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string { return ` float ${funcName}(float index) { vec2 uv = vec2((index + 0.5) / ${tNumC}.0, 0.5); - return texture2D(${texName}, uv).r; + return sample(${texName}, uv); } `; } @@ -382,7 +439,7 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string { float texR = floor(index / ${tNumC}.0); float texC = mod(index, ${tNumC}.0); vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tNumC}.0, ${tNumR}.0); - return texture2D(${texName}, uv).r; + return sample(${texName}, uv); } `; } @@ -394,7 +451,7 @@ function getSamplerAtOutputCoords(texName: string, inTexShape: [number, number], if (util.arraysEqual(inTexShape, outTexShape)) { return ` float ${funcName}() { - return texture2D(${texName}, resultUV).r; + return sample(${texName}, resultUV); } `; } @@ -410,7 +467,7 @@ function getSamplerAtOutputCoords(texName: string, inTexShape: [number, number], float texC = mod(index, ${inTexShape[1]}.0); vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${inTexShape[1]}.0, ${inTexShape[0]}.0); - return texture2D(${texName}, uv).r; + return sample(${texName}, uv); } `; } diff --git a/tsconfig.json b/tsconfig.json index 93bc6fb9e4..3f59e958de 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -10,6 +10,11 @@ "lib": ["es2015", "dom"], "outDir": "./dist", "noUnusedLocals": true, - "noImplicitReturns": true + "noImplicitReturns": true, + "noImplicitThis": true, + "noUnusedParameters": false, + "pretty": true, + "noFallthroughCasesInSwitch": true, + "allowUnreachableCode": false } } diff --git a/tslint.json b/tslint.json index 6b67c36556..546ec936f5 100644 --- a/tslint.json +++ b/tslint.json @@ -21,6 +21,7 @@ "class-name": true, "interface-name": [true, "never-prefix"], "jsdoc-format": true, + "forin": false, "label-position": true, "max-line-length": [true, 80], "new-parens": true, From bef3392c5d42da264ddf535f52936f07ed6b8627 Mon Sep 17 00:00:00 2001 From: Daniel Smilkov Date: Wed, 16 Aug 2017 21:39:49 -0400 Subject: [PATCH 02/10] fix conv2d zero paddig and make the project build --- demos/benchmarks/conv_gpu_benchmark.ts | 85 ++++++---------- demos/benchmarks/logsumexp_gpu_benchmark.ts | 2 +- src/math/math_gpu.ts | 69 ++----------- src/math/webgl/addscaledmat_gpu_test.ts | 4 +- src/math/webgl/conv_gpu.ts | 14 ++- src/math/webgl/conv_gpu_test.ts | 105 ++++++-------------- src/math/webgl/gpgpu_math.ts | 8 +- src/math/webgl/shader_compiler.ts | 58 ++--------- 8 files changed, 88 insertions(+), 257 deletions(-) diff --git a/demos/benchmarks/conv_gpu_benchmark.ts b/demos/benchmarks/conv_gpu_benchmark.ts index fffd644e2c..8d583bed2f 100644 --- a/demos/benchmarks/conv_gpu_benchmark.ts +++ b/demos/benchmarks/conv_gpu_benchmark.ts @@ -14,76 +14,49 @@ limitations under the License. ==============================================================================*/ import * as conv_util from '../../src/math/conv_util'; -import * as conv_gpu from '../../src/math/webgl/conv_gpu'; +import {Conv2DProgram} from '../../src/math/webgl/conv_gpu'; import {GPGPUContext} from '../../src/math/webgl/gpgpu_context'; -import * as test_util from '../../src/test_util'; - +import * as gpgpu_math from '../../src/math/webgl/gpgpu_math'; +import {Array1D, Array3D, Array4D, initializeGPU} from '../../src/math/ndarray'; +import {TextureManager} from '../../src/math/webgl/texture_manager'; import {BenchmarkTest} from './benchmark'; const OP_RUNS = 40; export const BENCHMARK_TEST: BenchmarkTest = (size: number) => { - const inputShapeRCD: [number, number, number] = [size, size, 1]; + const inputDepth = 1; + const inputShape: [number, number, number] = [size, size, inputDepth]; const outputDepth = 1; const fieldSize = 11; const stride = 1; - const zeroPad = conv_util.computeDefaultPad(inputShapeRCD, fieldSize, stride); - const outputShapeRCD: [number, number, number] = - conv_util.computeOutputShape3D( - inputShapeRCD, fieldSize, outputDepth, stride, zeroPad); - - const inputTexShapeRC = conv_util.computeTexShapeFrom3D(inputShapeRCD); - const outputTexShapeRC = conv_util.computeTexShapeFrom3D(outputShapeRCD); - const weightsTexShapeRC = conv_util.computeWeightsTexShape( - inputShapeRCD[2], outputDepth, fieldSize); - const biasesTexShapeRC = conv_util.computeBiasesTexShape(outputDepth); - - const hasBias = true; + const zeroPad = conv_util.computeDefaultPad(inputShape, fieldSize, stride); const gpgpu = new GPGPUContext(); - const program = gpgpu.createProgram(conv_gpu.getFragmentShaderSource( - inputShapeRCD, outputDepth, fieldSize, stride, zeroPad, hasBias)); - - const inputTexture = - gpgpu.createMatrixTexture(inputTexShapeRC[0], inputTexShapeRC[1]); - const weightsTexture = - gpgpu.createMatrixTexture(weightsTexShapeRC[0], weightsTexShapeRC[1]); - const biasesTexture = - gpgpu.createMatrixTexture(biasesTexShapeRC[0], biasesTexShapeRC[1]); - const outputTexture = - gpgpu.createMatrixTexture(outputTexShapeRC[0], outputTexShapeRC[1]); - - const inputData = test_util.randomArrayInRange( - inputTexShapeRC[0] * inputTexShapeRC[1], -1, 1); - const weightsData = test_util.randomArrayInRange( - weightsTexShapeRC[0] * weightsTexShapeRC[1], -1, 1); - const biasesData = test_util.randomArrayInRange( - biasesTexShapeRC[0] * biasesTexShapeRC[1], -1, 1); - - gpgpu.uploadMatrixToTexture( - inputTexture, inputTexShapeRC[0], inputTexShapeRC[1], inputData); - gpgpu.uploadMatrixToTexture( - weightsTexture, weightsTexShapeRC[0], weightsTexShapeRC[1], weightsData); - gpgpu.uploadMatrixToTexture( - biasesTexture, biasesTexShapeRC[0], biasesTexShapeRC[1], biasesData); + const texManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, texManager); + const program = new Conv2DProgram( + inputShape, fieldSize, outputDepth, stride, zeroPad, true); + const outputShape = program.outputShape as [number, number, number]; + const out = Array3D.zeros(outputShape); + const x = Array3D.randUniform(inputShape, -1, 1); + const wShape = conv_util.computeWeightsShape4D(1, outputDepth, fieldSize); + const W = Array4D.randUniform(wShape, -1, 1); + const b = Array1D.randUniform([outputDepth], -1, 1); + const inputs = [x, W, b]; + const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, out); const start = performance.now(); for (let i = 0; i < OP_RUNS; i++) { - conv_gpu.convolve( - gpgpu, program, inputTexture, weightsTexture, biasesTexture, - outputTexture, outputTexShapeRC); + gpgpu_math.runProgram(binary, inputs, out); } - - gpgpu.downloadMatrixFromTexture( - outputTexture, outputTexShapeRC[0], outputTexShapeRC[1]); - const end = performance.now(); - - const avgTime = (end - start) / OP_RUNS; - - gpgpu.deleteMatrixTexture(inputTexture); - gpgpu.deleteMatrixTexture(weightsTexture); - gpgpu.deleteMatrixTexture(biasesTexture); - gpgpu.deleteMatrixTexture(outputTexture); - gpgpu.deleteProgram(program); + out.getValues(); + const avgTime = (performance.now() - start) / OP_RUNS; + + x.dispose(); + W.dispose(); + b.dispose(); + out.dispose(); + texManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); gpgpu.dispose(); return avgTime; diff --git a/demos/benchmarks/logsumexp_gpu_benchmark.ts b/demos/benchmarks/logsumexp_gpu_benchmark.ts index 007a6228c1..6afc05f4d0 100644 --- a/demos/benchmarks/logsumexp_gpu_benchmark.ts +++ b/demos/benchmarks/logsumexp_gpu_benchmark.ts @@ -35,7 +35,7 @@ export const BENCHMARK_TEST: BenchmarkTest = (size: number) => { for (let i = 0; i < OP_RUNS; i++) { gpgpu_math.runProgram(binary, [a], out); } - + out.getValues(); const avgTime = (performance.now() - start) / OP_RUNS; a.dispose(); out.dispose(); diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts index 8818f148f1..a311acf435 100644 --- a/src/math/math_gpu.ts +++ b/src/math/math_gpu.ts @@ -27,7 +27,7 @@ import * as avg_pool_gpu from './webgl/avg_pool_gpu'; import * as batchnorm_gpu from './webgl/batchnorm_gpu'; import * as concat3d_gpu from './webgl/concat3d_gpu'; import * as conv_backprop_gpu from './webgl/conv_backprop_gpu'; -import * as conv_gpu from './webgl/conv_gpu'; +import {Conv2DProgram} from './webgl/conv_gpu'; import * as copy_gpu from './webgl/copy_gpu'; import {GPGPUContext} from './webgl/gpgpu_context'; import {BinaryOpProgram} from './webgl/binaryop_gpu'; @@ -57,7 +57,6 @@ const CONCAT_PROG = 'concat'; const RESHAPE_PROG = 'reshape'; // Convolution. -const CONV2D_PROG = 'conv'; const CONV2D_TRANSPOSE_PROG = 'conv_transpose'; const CONV2D_DERW_PROG = 'conv_derw'; const CONV2D_DERB_PROG = 'conv_derb'; @@ -477,70 +476,14 @@ export class NDArrayMathGPU extends NDArrayMath { } protected conv2dInternal( - x: Array3D, weights: Array4D, biases: Array1D|null, stride: number, + x: Array3D, weights: Array4D, bias: Array1D|null, stride: number, zeroPad: number): Array3D { const fieldSize = weights.shape[0]; - const inputDepth = weights.shape[2]; const outputDepth = weights.shape[3]; - const progKey = [ - CONV2D_PROG, x.shape, outputDepth, fieldSize, stride, biases != null - ].join('_'); - const program = this.getAndSaveProgram(progKey, () => { - return conv_gpu.getFragmentShaderSource( - x.shape, outputDepth, fieldSize, stride, zeroPad, biases != null); - }); - - const xTexShape = conv_util.computeTexShapeFrom3D(x.shape); - const wTexShape = - conv_util.computeWeightsTexShape(inputDepth, outputDepth, fieldSize); - const biasTexShape = conv_util.computeBiasesTexShape(outputDepth); - - // If the texture shapes doesn't match the shapes that shaders expect, - // do physical texture reshapes on the GPU. - const actualXTexShape = x.getTextureShapeRC(xTexShape); - let cleanupX = false; - if (!util.arraysEqual(actualXTexShape, xTexShape)) { - x = this.reshapeTexture(x, xTexShape); - cleanupX = true; - } - - let cleanupW = false; - const actualWTexShape = weights.getTextureShapeRC(wTexShape); - if (!util.arraysEqual(actualWTexShape, wTexShape)) { - weights = this.reshapeTexture(weights, wTexShape); - cleanupW = true; - } - - let cleanupB = false; - if (biases != null) { - const actualBTexShape = biases.getTextureShapeRC(biasTexShape); - if (!util.arraysEqual(actualBTexShape, biasTexShape)) { - biases = this.reshapeTexture(biases, biasTexShape); - cleanupB = true; - } - } - - const resultShape = conv_util.computeOutputShape3D( - x.shape, fieldSize, outputDepth, stride, zeroPad); - const resultTexShape = conv_util.computeTexShapeFrom3D(resultShape); - const resultTex = this.textureManager.acquireTexture(resultTexShape); - - conv_gpu.convolve( - this.gpgpu, program, x.getTexture(), weights.getTexture(), - biases != null ? biases.getTexture() : null, resultTex, resultTexShape); - - if (cleanupX) { - x.dispose(); - } - if (cleanupW) { - weights.dispose(); - } - if (cleanupB && biases != null) { - biases.dispose(); - } - - return NDArray.make( - resultShape, {texture: resultTex, textureShapeRC: resultTexShape}); + const program = new Conv2DProgram( + x.shape, fieldSize, outputDepth, stride, zeroPad, bias != null); + const inputs = bias != null ? [x, weights, bias] : [x, weights]; + return this.compileAndRun(program, inputs); } protected conv2dBackPropInternal( diff --git a/src/math/webgl/addscaledmat_gpu_test.ts b/src/math/webgl/addscaledmat_gpu_test.ts index cf63e1a668..2196f9347d 100644 --- a/src/math/webgl/addscaledmat_gpu_test.ts +++ b/src/math/webgl/addscaledmat_gpu_test.ts @@ -18,7 +18,6 @@ import {AddScaledMatProgram} from './addscaledmat_gpu'; import {GPGPUContext} from './gpgpu_context'; import * as gpgpu_math from './gpgpu_math'; import {NDArray, Array1D, Array2D, Scalar, initializeGPU} from '../ndarray'; -import * as util from '../../util'; import {TextureManager} from './texture_manager'; function cpuAddScaledMatrices( @@ -85,9 +84,8 @@ export function uploadAddScaledMatDownload( const textureManager = new TextureManager(gpgpu); initializeGPU(gpgpu, textureManager); - const outShape = util.assertAndGetBroadcastedShape(a.shape, b.shape); - const res = NDArray.zeros(outShape); const program = new AddScaledMatProgram(a.shape, b.shape); + const res = NDArray.zeros(program.outputShape); const binary = gpgpu_math.compileProgram(gpgpu, program, [a, b, c1, c2], res); gpgpu_math.runProgram(binary, [a, b, c1, c2], res); diff --git a/src/math/webgl/conv_gpu.ts b/src/math/webgl/conv_gpu.ts index 5d7b222194..e05bc1b048 100644 --- a/src/math/webgl/conv_gpu.ts +++ b/src/math/webgl/conv_gpu.ts @@ -31,12 +31,12 @@ export class Conv2DProgram implements GPGPUProgram { this.userCode = ` void main() { - vec3 output = getOutputCoords(); - float yR = output.x; - float yC = output.y; - float d2 = output.z; + vec3 coords = getOutputCoords(); + float yR = coords.x; + float yC = coords.y; + float d2 = coords.z; - vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}, ${stride}) - + vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}.0, ${stride}.0) - vec2(${pad}.0, ${pad}.0); float xRCorner = xRCCorner.x; float xCCorner = xRCCorner.y; @@ -60,9 +60,7 @@ export class Conv2DProgram implements GPGPUProgram { } } } - if (${hasBias}) { - dotProd += getBias(d2); - } + ${hasBias ? 'dotProd += getBias(d2);' : ''} setOutput(dotProd); } `; diff --git a/src/math/webgl/conv_gpu_test.ts b/src/math/webgl/conv_gpu_test.ts index ac41c6a4ea..3d5a4c4542 100644 --- a/src/math/webgl/conv_gpu_test.ts +++ b/src/math/webgl/conv_gpu_test.ts @@ -16,78 +16,49 @@ limitations under the License. import * as test_util from '../../test_util'; import * as conv_util from '../conv_util'; import {NDArrayMathCPU} from '../math_cpu'; -import {Array1D, Array3D, Array4D, NDArray} from '../ndarray'; -import * as conv_gpu from './conv_gpu'; +import {Conv2DProgram} from './conv_gpu'; import {GPGPUContext} from './gpgpu_context'; +import * as gpgpu_math from './gpgpu_math'; +import {NDArray, Array1D, Array3D, Array4D, initializeGPU} from '../ndarray'; +import {TextureManager} from './texture_manager'; describe('conv_gpu', () => { function uploadConvolveDownload( - x: Float32Array, aShapeRowColDepth: [number, number, number], - weights: Float32Array, biases: Float32Array|null, resultDepth: number, + xVals: Float32Array, xShapeRCD: [number, number, number], + weights: Float32Array, biasVals: Float32Array|null, resultDepth: number, fieldSize: number, stride: number, zeroPad?: number): Float32Array { zeroPad = zeroPad != null ? zeroPad : - conv_util.computeDefaultPad(aShapeRowColDepth, fieldSize, stride); + conv_util.computeDefaultPad(xShapeRCD, fieldSize, stride); - const xTexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(aShapeRowColDepth); + const x = Array3D.new(xShapeRCD, xVals); + const wShape = + conv_util.computeWeightsShape4D(xShapeRCD[2], resultDepth, fieldSize); + const W = Array4D.new(wShape, weights); - const resultShapeRCD: [number, number, number] = - conv_util.computeOutputShape3D( - aShapeRowColDepth, fieldSize, resultDepth, stride, zeroPad); - - const weightsTexShapeRC: [number, number] = - conv_util.computeWeightsTexShape( - aShapeRowColDepth[2], resultDepth, fieldSize); - - const biasesTexShapeRC: [number, number] = [1, resultDepth]; - const resultTexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(resultShapeRCD); + const b = biasVals != null ? Array1D.new(biasVals) : null; const gpgpu = new GPGPUContext(); gpgpu.enableAutomaticDebugValidation(true); - - const shaderSource = conv_gpu.getFragmentShaderSource( - aShapeRowColDepth, resultDepth, fieldSize, stride, zeroPad, - biases != null); - const program = gpgpu.createProgram(shaderSource); - - const xTex = gpgpu.createMatrixTexture(xTexShapeRC[0], xTexShapeRC[1]); - const weightsTex = - gpgpu.createMatrixTexture(weightsTexShapeRC[0], weightsTexShapeRC[1]); - const biasesTex = biases != null ? - gpgpu.createMatrixTexture(biasesTexShapeRC[0], biasesTexShapeRC[1]) : - null; - const resultTex = - gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.uploadMatrixToTexture(xTex, xTexShapeRC[0], xTexShapeRC[1], x); - gpgpu.uploadMatrixToTexture( - weightsTex, weightsTexShapeRC[0], weightsTexShapeRC[1], weights); - - if (biases != null) { - gpgpu.uploadMatrixToTexture( - biasesTex!, biasesTexShapeRC[0], biasesTexShapeRC[1], biases); - } - - conv_gpu.convolve( - gpgpu, program, xTex, weightsTex, biasesTex, resultTex, - resultTexShapeRC); - - const result = gpgpu.downloadMatrixFromTexture( - resultTex, resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.deleteMatrixTexture(resultTex); - if (biasesTex != null) { - gpgpu.deleteMatrixTexture(biasesTex); - } - gpgpu.deleteMatrixTexture(weightsTex); - gpgpu.deleteMatrixTexture(xTex); - gpgpu.deleteProgram(program); + const textureManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, textureManager); + + const program = new Conv2DProgram( + xShapeRCD, fieldSize, resultDepth, stride, zeroPad, + biasVals != null); + const res = NDArray.zeros(program.outputShape); + const inputs = biasVals != null ? [x, W, b] : [x, W]; + const binary = + gpgpu_math.compileProgram(gpgpu, program, inputs, res); + gpgpu_math.runProgram(binary, inputs, res); + const resValues = res.getValues(); + + textureManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); gpgpu.dispose(); - return result; + return resValues; } function compareToCPU( @@ -251,7 +222,7 @@ describe('conv_gpu', () => { expect(result[5]).toBeCloseTo(a[2] * weights[2] + a[3] * weights[5]); }); - it('2x2x1 in, 1d out, 2x2 filter, 1 stride', () => { + it('2x2x1 in, 1d out, 2x2 filter, s=2, bias=0, p=1', () => { const x = new Float32Array([1, 2, 3, 4]); const w = new Float32Array([3, 1, 5, 0]); const bias = new Float32Array([0]); @@ -263,7 +234,7 @@ describe('conv_gpu', () => { expect(result[3]).toBe(12); }); - it('2x2x1 in, 1d out, 2x2 filter, 1 stride', () => { + it('2x2x1 in, 1d out, 2x2 filter, 1 stride, bias=-1', () => { const x = new Float32Array([1, 2, 3, 4]); const w = new Float32Array([3, 1, 5, 0]); const bias = new Float32Array([-1]); @@ -272,7 +243,7 @@ describe('conv_gpu', () => { expect(result[0]).toBe(19); }); - it('2x2x1 in, 1d out, 2x2 filter, 1 stride, null bias', () => { + it('2x2x1 in, 1d out, 2x2 filter, 1 stride, no bias', () => { const x = new Float32Array([1, 2, 3, 4]); const w = new Float32Array([3, 1, 5, 0]); const bias: Float32Array|null = null; @@ -281,19 +252,7 @@ describe('conv_gpu', () => { expect(result[0]).toBe(20); }); - it('2x2x1 in, 1d out, 2x2 filter, 1 stride, zeropad = 1', () => { - const x = new Float32Array([1, 2, 3, 4]); - const w = new Float32Array([3, 1, 5, 0]); - const bias = new Float32Array([0]); - const result = uploadConvolveDownload(x, [2, 2, 1], w, bias, 1, 2, 2, 1); - expect(result.length).toEqual(4); - expect(result[0]).toBe(0); - expect(result[1]).toBe(10); - expect(result[2]).toBe(3); - expect(result[3]).toBe(12); - }); - - it('5x5x3 in, 2d out, 3x3 filter, 2 stride', () => { + it('5x5x3 in, 2d out, 3x3 filter, s=2, p=1', () => { /* weights: input: [ 1, -1, [1, 2, 2, 0, 0, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, diff --git a/src/math/webgl/gpgpu_math.ts b/src/math/webgl/gpgpu_math.ts index b5b2fe9d5a..d6b069eafa 100644 --- a/src/math/webgl/gpgpu_math.ts +++ b/src/math/webgl/gpgpu_math.ts @@ -41,12 +41,12 @@ export function compileProgram( gpgpu: GPGPUContext, program: GPGPUProgram, inputs: T[], output: K): GPGPUBinary { const userCode = program.userCode; - const inputInfos = program.variableNames.map((x, i) => { + const inputInfos = inputs.map((input, i) => { const shapeInfo = { - logicalShape: inputs[i].shape, - texShape: inputs[i].getTextureShapeRC() + logicalShape: input.shape, + texShape: input.getTextureShapeRC() }; - return {name: x, shapeInfo}; + return {name: program.variableNames[i], shapeInfo}; }); const inShapeInfos = inputInfos.map(x => x.shapeInfo); const outShapeInfo = { diff --git a/src/math/webgl/shader_compiler.ts b/src/math/webgl/shader_compiler.ts index 109efa6c3c..19d91e2ea3 100644 --- a/src/math/webgl/shader_compiler.ts +++ b/src/math/webgl/shader_compiler.ts @@ -54,7 +54,7 @@ function getInputSamplingSnippet( res += getSamplerScalar(inInfo.name); break; case 1: - res += getSampler1D(inInfo.name, texShape); + res += getSampler1D(inInfo.name, texShape, shape[0]); break; case 2: res += getSampler2D(inInfo.name, shape as [number, number], texShape); @@ -155,14 +155,6 @@ const SHADER_PREFIX = ` return texture2D(texture, uv).r; } - float sampleOrZeroPad(sampler2D texture, vec2 uv) { - bool lessThanZero = any(lessThan(uv, vec2(0, 0))); - bool greaterThanOne = any(greaterThan(uv, vec2(1, 1))); - bool outside = lessThanZero || greaterThanOne; - float value = sample(texture, uv); - return mix(value, 0.0, float(outside)); - } - void setOutput(float val) { gl_FragColor = vec4(val, 0, 0, 0); } @@ -271,7 +263,7 @@ function getSamplerScalar(texName: string): string { } function getSampler1D( - texName: string, texShape: [number, number]): string { + texName: string, texShape: [number, number], size: number): string { const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1); const tR = texShape[0]; const tC = texShape[1]; @@ -280,10 +272,6 @@ function getSampler1D( float ${funcName}(float index) { return sample(${texName}, halfCR); } - - float ${funcName}OrZeroPad(float index) { - return sampleOrZeroPad(${texName}, halfCR); - } `; } if (texShape[1] === 1) { @@ -292,11 +280,6 @@ function getSampler1D( vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0); return sample(${texName}, uv); } - - float ${funcName}OrZeroPad(float index) { - vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0); - return sampleOrZeroPad(${texName}, uv); - } `; } if (texShape[0] === 1) { @@ -305,11 +288,6 @@ function getSampler1D( vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5); return sample(${texName}, uv); } - - float ${funcName}OrZeroPad(float index) { - vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5); - return sampleOrZeroPad(${texName}, uv); - } `; } return ` @@ -317,11 +295,6 @@ function getSampler1D( vec2 uv = UVfrom1D(${tR}.0, ${tC}.0, index); return sample(${texName}, uv); } - - float ${funcName}OrZeroPad(float index) { - vec2 uv = UVfrom1D(${tR}.0, ${tC}.0, index); - return sampleOrZeroPad(${texName}, uv); - } `; } @@ -333,6 +306,7 @@ function getSampler3D( const tC = texShape[1]; const stride0 = shape[1] * shape[2]; const stride1 = shape[2]; + const [numRows, numCols, numDepths] = shape; return ` float ${funcName}(float row, float col, float depth) { vec2 uv = UVfrom3D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, row, @@ -341,9 +315,12 @@ function getSampler3D( } float ${funcName}OrZeroPad(float row, float col, float depth) { - vec2 uv = UVfrom3D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, row, - col, depth); - return sampleOrZeroPad(${texName}, uv); + vec3 coords = vec3(row, col, depth); + bool lessThanZero = any(lessThan(coords, vec3(0.0, 0.0, 0.0))); + bool greaterThanSize = any(greaterThan(coords, + vec3(${numRows}.0 - 0.5, ${numCols}.0 - 0.5, ${numDepths}.0 - 0.5))); + bool outside = lessThanZero || greaterThanSize; + return mix(${funcName}(row, col, depth), 0.0, float(outside)); } `; } @@ -364,13 +341,6 @@ function getSampler4D( ${stride2}.0, row, col, depth, depth2); return sample(${texName}, uv); } - - float ${funcName}OrZeroPad(float row, float col, float depth, - float depth2) { - vec2 uv = UVfrom4D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, - ${stride2}.0, row, col, depth, depth2); - return sampleOrZeroPad(${texName}, uv); - } `; } @@ -386,11 +356,6 @@ function getSampler2D( vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0); return sample(${texName}, uv); } - - float ${funcName}OrZeroPad(float row, float col) { - vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0); - return sampleOrZeroPad(${texName}, uv); - } `; } return ` @@ -398,11 +363,6 @@ function getSampler2D( vec2 uv = UVfrom2D(${tR}.0, ${tC}.0, ${shape[1]}.0, row, col); return sample(${texName}, uv); } - - float ${funcName}OrZeroPad(float row, float col) { - vec2 uv = UVfrom2D(${tR}.0, ${tC}.0, ${shape[1]}.0, row, col); - return sampleOrZeroPad(${texName}, uv); - } `; } From 85c4bcef3eefbd0f9f6a3ea9cceea208f4757d2b Mon Sep 17 00:00:00 2001 From: Daniel Smilkov Date: Thu, 17 Aug 2017 01:10:45 -0400 Subject: [PATCH 03/10] migrate rest of conv shaders to logical --- .../conv_transpose_gpu_benchmark.ts | 67 ++-- src/math/math_gpu.ts | 170 +-------- src/math/webgl/conv_backprop_gpu.ts | 347 +++++++----------- .../webgl/conv_backprop_gpu_derbias_test.ts | 38 +- .../conv_backprop_gpu_derweights_test.ts | 48 +-- .../webgl/conv_backprop_transpose_gpu_test.ts | 74 +--- src/math/webgl/conv_gpu.ts | 50 +-- src/math/webgl/conv_gpu_getbiasvalue_test.ts | 85 ----- .../conv_gpu_getmatrixvalueorzeropad_test.ts | 139 ------- 9 files changed, 225 insertions(+), 793 deletions(-) delete mode 100644 src/math/webgl/conv_gpu_getbiasvalue_test.ts delete mode 100644 src/math/webgl/conv_gpu_getmatrixvalueorzeropad_test.ts diff --git a/demos/benchmarks/conv_transpose_gpu_benchmark.ts b/demos/benchmarks/conv_transpose_gpu_benchmark.ts index a68cb068ba..86074a08fc 100644 --- a/demos/benchmarks/conv_transpose_gpu_benchmark.ts +++ b/demos/benchmarks/conv_transpose_gpu_benchmark.ts @@ -14,71 +14,48 @@ limitations under the License. ==============================================================================*/ import * as conv_util from '../../src/math/conv_util'; -import * as conv_backprop_gpu from '../../src/math/webgl/conv_backprop_gpu'; +import {Array3D, Array4D, initializeGPU} from '../../src/math/ndarray'; +import {Conv2DTransposeProgram} from '../../src/math/webgl/conv_backprop_gpu'; import {GPGPUContext} from '../../src/math/webgl/gpgpu_context'; -import * as test_util from '../../src/test_util'; - +import * as gpgpu_math from '../../src/math/webgl/gpgpu_math'; +import {TextureManager} from '../../src/math/webgl/texture_manager'; import {BenchmarkTest} from './benchmark'; const OP_RUNS = 100; export const BENCHMARK_TEST: BenchmarkTest = (size: number) => { - const xShapeRCD: [number, number, number] = [size, size, 1]; + const origInputDepth = 1; const origOutputDepth = 2; + const xShape: [number, number, number] = [size, size, origOutputDepth]; const fieldSize = 11; const origStride = 1; const origPad = 1; const gpgpu = new GPGPUContext(); + const texManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, texManager); gpgpu.enableAutomaticDebugValidation(true); - const origInputDepth = xShapeRCD[2]; - const src = conv_backprop_gpu.getFragmentShaderConvTransposeSource( - xShapeRCD, fieldSize, origInputDepth, origStride, origPad, false); - const program = gpgpu.createProgram(src); - // Upload x. - const xTexShapeRC = conv_util.computeTexShapeFrom3D(xShapeRCD); - const xTex = gpgpu.createMatrixTexture(xTexShapeRC[0], xTexShapeRC[1]); - const xData = - test_util.randomArrayInRange(xTexShapeRC[0] * xTexShapeRC[1], -1, 1); - gpgpu.uploadMatrixToTexture(xTex, xTexShapeRC[0], xTexShapeRC[1], xData); - // Upload weights. - const wTexShapeRC = conv_util.computeWeightsTexShape( + const program = new Conv2DTransposeProgram( + xShape, fieldSize, origInputDepth, origStride, origPad, false); + const outputShape = program.outputShape as [number, number, number]; + const out = Array3D.zeros(outputShape); + const x = Array3D.randUniform(xShape, -1, 1); + const wShape = conv_util.computeWeightsShape4D( origInputDepth, origOutputDepth, fieldSize); - const wData = - test_util.randomArrayInRange(wTexShapeRC[0] * wTexShapeRC[1], -1, 1); - const wTex = gpgpu.createMatrixTexture(wTexShapeRC[0], wTexShapeRC[1]); - gpgpu.uploadMatrixToTexture(wTex, wTexShapeRC[0], wTexShapeRC[1], wData); - - // Figure out the output shape by dilating the input. - const dilatedRC = - conv_util.computeDilatedRC([xShapeRCD[0], xShapeRCD[1]], origStride); - const pad = fieldSize - 1 - origPad; - const resultShapeRCD = conv_util.computeOutputShape3D( - [dilatedRC[0], dilatedRC[1], origOutputDepth], fieldSize, origInputDepth, - 1, pad); - - const resultTexRC = conv_util.computeTexShapeFrom3D(resultShapeRCD); - const resultTex = gpgpu.createMatrixTexture(resultTexRC[0], resultTexRC[1]); - + const W = Array4D.randUniform(wShape, -1, 1); + const inputs = [x, W]; + const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, out); const start = performance.now(); for (let i = 0; i < OP_RUNS; i++) { - conv_backprop_gpu.convTranspose( - gpgpu, program, xTex, wTex, null, resultTex, resultTexRC); + gpgpu_math.runProgram(binary, inputs, out); } + out.getValues(); + const avgTime = (performance.now() - start) / OP_RUNS; - gpgpu.downloadMatrixFromTexture(resultTex, resultTexRC[0], resultTexRC[1]); - - const end = performance.now(); - - const avgTime = (end - start) / OP_RUNS; - - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(xTex); - gpgpu.deleteMatrixTexture(wTex); - gpgpu.deleteProgram(program); + texManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); gpgpu.dispose(); - return avgTime; }; diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts index a311acf435..808bf1954a 100644 --- a/src/math/math_gpu.ts +++ b/src/math/math_gpu.ts @@ -25,14 +25,15 @@ import {ArgMaxEqualsProgram} from './webgl/argmaxequals_gpu'; import {ArgMinMaxProgram} from './webgl/argminmax_gpu'; import * as avg_pool_gpu from './webgl/avg_pool_gpu'; import * as batchnorm_gpu from './webgl/batchnorm_gpu'; +import {BinaryOpProgram} from './webgl/binaryop_gpu'; import * as concat3d_gpu from './webgl/concat3d_gpu'; -import * as conv_backprop_gpu from './webgl/conv_backprop_gpu'; +// tslint:disable-next-line:max-line-length +import {Conv2DDerBiasProgram, Conv2DDerWeightsProgram, Conv2DTransposeProgram} from './webgl/conv_backprop_gpu'; import {Conv2DProgram} from './webgl/conv_gpu'; import * as copy_gpu from './webgl/copy_gpu'; import {GPGPUContext} from './webgl/gpgpu_context'; -import {BinaryOpProgram} from './webgl/binaryop_gpu'; -import {GPGPUProgram, GPGPUBinary} from './webgl/gpgpu_math'; import * as gpgpu_math from './webgl/gpgpu_math'; +import {GPGPUBinary, GPGPUProgram} from './webgl/gpgpu_math'; import * as gpgpu_util from './webgl/gpgpu_util'; import {LogSumExpProgram} from './webgl/logsumexp_gpu'; import * as max_pool_backprop_gpu from './webgl/max_pool_backprop_gpu'; @@ -45,8 +46,8 @@ import {ReduceSumProgram} from './webgl/reducesum_gpu'; import * as reshape_gpu from './webgl/reshape_gpu'; import * as resize_bilinear_gpu from './webgl/resize_bilinear_gpu'; import {TextureManager} from './webgl/texture_manager'; +import {UnaryOp, UnaryOpProgram} from './webgl/unaryop_gpu'; import * as webgl_util from './webgl/webgl_util'; -import {UnaryOpProgram, UnaryOp} from './webgl/unaryop_gpu'; const BATCHNORM_PROG = 'batchnorm'; @@ -57,9 +58,6 @@ const CONCAT_PROG = 'concat'; const RESHAPE_PROG = 'reshape'; // Convolution. -const CONV2D_TRANSPOSE_PROG = 'conv_transpose'; -const CONV2D_DERW_PROG = 'conv_derw'; -const CONV2D_DERB_PROG = 'conv_derb'; const MAX_POOL_PROG = 'maxpool'; const MAX_POOL_POSITIONS_PROG = 'maxpool_posn'; const MAX_POOL_BACKPROP_PROG = 'maxpool_backprop'; @@ -281,8 +279,8 @@ export class NDArrayMathGPU extends NDArrayMath { protected matMulInternal( a: Array2D, b: Array2D, aOrientation: MatrixOrientation, bOrientation: MatrixOrientation): Array2D { - const program = new MatMulProgram(a.shape, b.shape, aOrientation, - bOrientation); + const program = + new MatMulProgram(a.shape, b.shape, aOrientation, bOrientation); return this.compileAndRun(program, [a, b]); } @@ -538,164 +536,28 @@ export class NDArrayMathGPU extends NDArrayMath { } protected conv2dTransposeInternal( - x: Array3D, weights: Array4D, biases: Array1D|null, origStride: number, + x: Array3D, weights: Array4D, bias: Array1D|null, origStride: number, origPad: number): Array3D { const origInputDepth = weights.shape[2]; - const origOutputDepth = weights.shape[3]; const fieldSize = weights.shape[0]; - - const progKey = [ - CONV2D_TRANSPOSE_PROG, x.shape, fieldSize, origInputDepth, origStride, - origPad, biases != null - ].join('_'); - const program = this.getAndSaveProgram(progKey, () => { - return conv_backprop_gpu.getFragmentShaderConvTransposeSource( - x.shape, fieldSize, origInputDepth, origStride, origPad, - biases != null); - }); - - const xTexShape = conv_util.computeTexShapeFrom3D(x.shape); - const wTexShape = conv_util.computeWeightsTexShape( - origInputDepth, origOutputDepth, fieldSize); - const biasTexShape = conv_util.computeBiasesTexShape(origInputDepth); - - // If the texture shapes doesn't match the shapes that shaders expect, - // do physical texture reshapes on the GPU. - const actualXTexShape = x.getTextureShapeRC(xTexShape); - let cleanupX = false; - if (!util.arraysEqual(actualXTexShape, xTexShape)) { - x = this.reshapeTexture(x, xTexShape); - cleanupX = true; - } - - let cleanupW = false; - const actualWTexShape = weights.getTextureShapeRC(wTexShape); - if (!util.arraysEqual(actualWTexShape, wTexShape)) { - weights = this.reshapeTexture(weights, wTexShape); - cleanupW = true; - } - - let cleanupB = false; - if (biases != null) { - const actualBiasTexShape = biases.getTextureShapeRC(biasTexShape); - if (!util.arraysEqual(actualBiasTexShape, biasTexShape)) { - biases = this.reshapeTexture(biases, biasTexShape); - cleanupB = true; - } - } - - // Figure out the output shape by dilating the input. - const dilatedRC = - conv_util.computeDilatedRC([x.shape[0], x.shape[1]], origStride); - const pad = fieldSize - 1 - origPad; - const resultShape = conv_util.computeOutputShape3D( - [dilatedRC[0], dilatedRC[1], origOutputDepth], fieldSize, - origInputDepth, 1, pad); - const resultTexShape = conv_util.computeTexShapeFrom3D(resultShape); - const resultTex = this.textureManager.acquireTexture(resultTexShape); - - conv_backprop_gpu.convTranspose( - this.gpgpu, program, x.getTexture(), weights.getTexture(), - biases != null ? biases.getTexture() : null, resultTex, resultTexShape); - - if (cleanupX) { - x.dispose(); - } - if (cleanupW) { - weights.dispose(); - } - if (cleanupB) { - biases!.dispose(); - } - - return NDArray.make( - resultShape, {texture: resultTex, textureShapeRC: resultTexShape}); + const program = new Conv2DTransposeProgram( + x.shape, fieldSize, origInputDepth, origStride, origPad, bias != null); + const inputs = bias != null ? [x, weights, bias] : [x, weights]; + return this.compileAndRun(program, inputs); } conv2dDerWeights( x: Array3D, dY: Array3D, fSize: number, stride: number, zeroPad: number): Array4D { - const inputDepth = x.shape[2]; const outputDepth = dY.shape[2]; - const progKey = [ - CONV2D_DERW_PROG, x.shape, fSize, outputDepth, stride, zeroPad - ].join('_'); - const program = this.getAndSaveProgram(progKey, () => { - return conv_backprop_gpu.getFragmentShaderDerWeightsSource( - x.shape, fSize, outputDepth, stride, zeroPad); - }); - - const xTexShape = conv_util.computeTexShapeFrom3D(x.shape); - const yShape = conv_util.computeOutputShape3D( + const program = new Conv2DDerWeightsProgram( x.shape, fSize, outputDepth, stride, zeroPad); - const yTexShape = conv_util.computeTexShapeFrom3D(yShape); - - // If the texture shapes doesn't match the shapes that shaders expect, - // do physical texture reshapes on the GPU. - const actualXTexShape = x.getTextureShapeRC(xTexShape); - let cleanupX = false; - if (!util.arraysEqual(actualXTexShape, xTexShape)) { - x = this.reshapeTexture(x, xTexShape); - cleanupX = true; - } - - let cleanupY = false; - const actualYTexShape = dY.getTextureShapeRC(yTexShape); - if (!util.arraysEqual(actualYTexShape, yTexShape)) { - dY = this.reshapeTexture(dY, yTexShape); - cleanupY = true; - } - - const resultTexShape = - conv_util.computeWeightsTexShape(inputDepth, outputDepth, fSize); - const resultTex = this.textureManager.acquireTexture(resultTexShape); - - conv_backprop_gpu.derWeights( - this.gpgpu, program, x.getTexture(), dY.getTexture(), resultTex, - resultTexShape); - - if (cleanupX) { - x.dispose(); - } - if (cleanupY) { - dY.dispose(); - } - - const weightsShape = - conv_util.computeWeightsShape4D(inputDepth, outputDepth, fSize); - return NDArray.make( - weightsShape, {texture: resultTex, textureShapeRC: resultTexShape}); + return this.compileAndRun(program, [x, dY]); } conv2dDerBias(dY: Array3D): Array1D { - const outputDepth = dY.shape[2]; - const progKey = [CONV2D_DERB_PROG, dY.shape].join('_'); - const program = this.getAndSaveProgram(progKey, () => { - return conv_backprop_gpu.getFragmentShaderDerBiasSource(dY.shape); - }); - const yTexShape = conv_util.computeTexShapeFrom3D(dY.shape); - - // If the texture shapes doesn't match the shapes that shaders expect, - // do physical texture reshapes on the GPU. - let cleanupY = false; - const actualYTexShape = dY.getTextureShapeRC(yTexShape); - if (!util.arraysEqual(actualYTexShape, yTexShape)) { - dY = this.reshapeTexture(dY, yTexShape); - cleanupY = true; - } - - const resultTexShape = conv_util.computeBiasesTexShape(outputDepth); - const resultTex = this.textureManager.acquireTexture(resultTexShape); - - conv_backprop_gpu.derBias( - this.gpgpu, program, dY.getTexture(), resultTex, resultTexShape); - - if (cleanupY) { - dY.dispose(); - } - - return NDArray.make( - [outputDepth], {texture: resultTex, textureShapeRC: resultTexShape}); + const program = new Conv2DDerBiasProgram(dY.shape); + return this.compileAndRun(program, [dY]); } private pool( diff --git a/src/math/webgl/conv_backprop_gpu.ts b/src/math/webgl/conv_backprop_gpu.ts index c707f98aba..b3df023d3c 100644 --- a/src/math/webgl/conv_backprop_gpu.ts +++ b/src/math/webgl/conv_backprop_gpu.ts @@ -14,244 +14,147 @@ limitations under the License. ==============================================================================*/ import * as conv_util from '../conv_util'; - -import * as conv_gpu from './conv_gpu'; -import {GPGPUContext} from './gpgpu_context'; - -export function getFragmentShaderDerWeightsSource( - xShapeRowColDepth: [number, number, number], fSize: number, - outputDepth: number, stride: number, zeroPad: number) { - const getMatrixValueOrZeroPad = - conv_gpu.getFragmentShaderGetMatrixValueOrZeroPadSource(); - const inputDepth = xShapeRowColDepth[2]; - - const xTexShapeRC = conv_util.computeTexShapeFrom3D(xShapeRowColDepth); - - const yShape = conv_util.computeOutputShape3D( - xShapeRowColDepth, fSize, outputDepth, stride, zeroPad); - const yNumRows = yShape[0]; - const yNumCols = yShape[1]; - const yTexShapeRC = conv_util.computeTexShapeFrom3D(yShape); - - const fSizeTimesInputDepth = fSize * inputDepth; - - const prologue = ` - precision highp float; - uniform sampler2D x; - uniform sampler2D dy; - `; - - return prologue + '\n' + getMatrixValueOrZeroPad + '\n' + - ` - const vec2 halfCR = vec2(0.5, 0.5); - const vec2 xShapeCR = vec2(${xTexShapeRC[1]}, ${xTexShapeRC[0]}); - const vec2 dyShapeCR = vec2(${yTexShapeRC[1]}, ${yTexShapeRC[0]}); - - void main() { - vec2 wTexCR = floor(gl_FragCoord.xy); - - // Map from 2D (wTexR, wTexC) to 4D (wR, wC, d1, d2). - float wR = floor(wTexCR.y / ${fSizeTimesInputDepth}.0); - float wTexRLeftover = wTexCR.y - wR * ${fSizeTimesInputDepth}.0; - float wC = floor(wTexRLeftover / ${inputDepth}.0); - float d1 = mod(wTexRLeftover, ${inputDepth}.0); - float d2 = wTexCR.x; - - // Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2). - // ? = to be determined. : = across all values in that axis. - float dotProd = 0.0; - for (int yR = 0; yR < ${yNumRows}; yR++) { - float yTexR = float(yR); - float xR = wR + yTexR * ${stride}.0 - ${zeroPad}.0; - float xTexR = xR; - - for (int yC = 0; yC < ${yNumCols}; yC++) { - float yC_float = float(yC); - float xC = wC + yC_float * ${stride}.0 - ${zeroPad}.0; - - // Map from 3D (xR, xC, d1) to 2D (xTexR, xTexC). - // Map from 3D (yR, yC, d2) to 2D (yTexR, yTexC). - vec2 xyTexC = - vec2(xC, yC_float) * vec2(${inputDepth}.0, ${outputDepth}.0) + - vec2(d1, d2); - float xTexC = xyTexC.x; - float yTexC = xyTexC.y; - - // Read dy(yR, yC, d2). - vec2 dyUV = (vec2(yTexC, yTexR) + halfCR) / dyShapeCR; - float dyValue = texture2D(dy, dyUV).r; - - // Read x(xR, xC, d1) (potentially zero-padded). - float xValue = - getMatrixValueOrZeroPad(x, xShapeCR, vec2(xTexC, xTexR)); - - dotProd += (xValue * dyValue); +import {GPGPUProgram} from './gpgpu_math'; + +export class Conv2DDerWeightsProgram implements GPGPUProgram { + variableNames = ['x', 'dy']; + params: Array<{}>; + outputShape: number[]; + userCode: string; + + constructor( + xShape: [number, number, number], fSize: number, outputDepth: number, + stride: number, zeroPad: number) { + const yShape = conv_util.computeOutputShape3D( + xShape, fSize, outputDepth, stride, zeroPad); + const yNumRows = yShape[0]; + const yNumCols = yShape[1]; + this.outputShape = + conv_util.computeWeightsShape4D(xShape[2], outputDepth, fSize); + this.params = [stride, zeroPad]; + this.userCode = ` + void main() { + vec4 coords = getOutputCoords(); + float wR = coords.x; + float wC = coords.y; + float d1 = coords.z; + float d2 = coords.w; + + // Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2). + // ? = to be determined. : = across all values in that axis. + float dotProd = 0.0; + for (int yR = 0; yR < ${yNumRows}; yR++) { + float yR_float = float(yR); + float xR = wR + yR_float * ${stride}.0 - ${zeroPad}.0; + + for (int yC = 0; yC < ${yNumCols}; yC++) { + float yC_float = float(yC); + float xC = wC + yC_float * ${stride}.0 - ${zeroPad}.0; + + float dyValue = getDy(yR_float, yC_float, d2); + float xValue = getXOrZeroPad(xR, xC, d1); + dotProd += (xValue * dyValue); + } } + setOutput(dotProd); } - gl_FragColor = vec4(dotProd, 0, 0, 0); - }`; -} - -export function getFragmentShaderConvTransposeSource( - xShapeRCD: [number, number, number], fSize: number, origInputDepth: number, - origStride: number, origPad: number, hasBias: boolean) { - const pad = fSize - 1 - origPad; - const [xRows, xCols, origOutputDepth] = xShapeRCD; - - const xTexShapeRC = conv_util.computeTexShapeFrom3D(xShapeRCD); - const wTexShapeRC = - conv_util.computeWeightsTexShape(origInputDepth, origOutputDepth, fSize); - - const getBiasValue = hasBias ? - conv_gpu.getFragmentShaderGetBiasValueSource(origInputDepth) : - ''; - const biasPrologue = hasBias ? 'uniform sampler2D biases;' : ''; - const biasOperation = hasBias ? 'dotProd += getBiasValue(biases, d2);' : ''; - - const prologue = ` - precision highp float; - uniform sampler2D x; - uniform sampler2D weights; - ${biasPrologue} `; + } +} - return prologue + '\n' + getBiasValue + '\n' + - ` - const vec2 halfCR = vec2(0.5, 0.5); - const vec2 xShapeCR = vec2(${xTexShapeRC[1]}, ${xTexShapeRC[0]}); - const vec2 wShapeCR = vec2(${wTexShapeRC[1]}, ${wTexShapeRC[0]}); - - void main() { - vec2 yTexCR = floor(gl_FragCoord.xy); - - // Map from 2D (yTexR, yTexC) to 3D (yR, yC, d2). - float yR = yTexCR.y; - float yC = floor(yTexCR.x / ${origInputDepth}.0); - float d2 = mod(yTexCR.x, ${origInputDepth}.0); - - vec2 xRCCorner = vec2(yR, yC) - vec2(${pad}.0, ${pad}.0); - float xRCorner = xRCCorner.x; - float xCCorner = xRCCorner.y; - - // Convolve x(?, ?, d1) with w(:, :, d2, d1) to get y(yR, yC, d2). - // ? = to be determined. : = across all values in that axis. - float dotProd = 0.0; - for (int wR = 0; wR < ${fSize}; wR++) { - float wR_float = float(wR); - float xR = (xRCorner + wR_float) / ${origStride}.0; - // TODO(smilkov): Splice this with another version where you call - // getMatrixValueOrZeroPad(). Here and below. - if (xR < 0.0 || xR >= ${xRows}.0 || fract(xR) > 0.0) { - continue; - } - - float wRPerm = ${fSize}.0 - 1.0 - wR_float; - float xTexR = xR; - - for (int wC = 0; wC < ${fSize}; wC++) { - float wC_float = float(wC); - float xC = (xCCorner + wC_float) / ${origStride}.0; - if (xC < 0.0 || xC >= ${xCols}.0 || fract(xC) > 0.0) { +export class Conv2DTransposeProgram implements GPGPUProgram { + variableNames = ['x', 'W', 'bias']; + params: Array<{}>; + outputShape: number[]; + userCode: string; + + constructor( + xShape: [number, number, number], fSize: number, origInputDepth: number, + origStride: number, origPad: number, hasBias: boolean) { + const [xRows, xCols, origOutputDepth] = xShape; + const biasSnippet = hasBias ? 'dotProd += getBias(d2);' : ''; + + // Figure out the output shape by dilating the input. + const xRowsDilated = (xRows - 1) * origStride + 1; + const xColsDilated = (xCols - 1) * origStride + 1; + const pad = fSize - 1 - origPad; + this.outputShape = conv_util.computeOutputShape3D( + [xRowsDilated, xColsDilated, origOutputDepth], fSize, origInputDepth, 1, + pad); + this.params = [pad, fSize, origStride, hasBias]; + + this.userCode = ` + void main() { + vec3 coords = getOutputCoords(); + float yR = coords.x; + float yC = coords.y; + float d2 = coords.z; + + vec2 xRCCorner = vec2(yR, yC) - vec2(${pad}.0, ${pad}.0); + float xRCorner = xRCCorner.x; + float xCCorner = xRCCorner.y; + + // Convolve x(?, ?, d1) with w(:, :, d2, d1) to get y(yR, yC, d2). + // ? = to be determined. : = across all values in that axis. + float dotProd = 0.0; + for (int wR = 0; wR < ${fSize}; wR++) { + float wR_float = float(wR); + float xR = (xRCorner + wR_float) / ${origStride}.0; + // TODO(smilkov): Splice this with another version where you call + // getMatrixValueOrZeroPad(). Here and below. + if (xR < 0.0 || xR >= ${xRows}.0 || fract(xR) > 0.0) { continue; } - float wCPerm = ${fSize}.0 - 1.0 - wC_float; - float wTexR = wRPerm * ${fSize}.0 * ${origInputDepth}.0 + - wCPerm * ${origInputDepth}.0 + d2; - - for (int d1 = 0; d1 < ${origOutputDepth}; d1++) { - float d1_float = float(d1); - float xTexC = xC * ${origOutputDepth}.0 + d1_float; - float wTexC = d1_float; + float wRPerm = ${fSize}.0 - 1.0 - wR_float; - // Read x(xR, xC, d1). - vec2 xUV = (vec2(xTexC, xTexR) + halfCR) / xShapeCR; - float xValue = texture2D(x, xUV).r; + for (int wC = 0; wC < ${fSize}; wC++) { + float wC_float = float(wC); + float xC = (xCCorner + wC_float) / ${origStride}.0; + if (xC < 0.0 || xC >= ${xCols}.0 || fract(xC) > 0.0) { + continue; + } - // Read w(wRPerm, wCPerm, d2, d1). - vec2 wUV = (vec2(wTexC, wTexR) + halfCR) / wShapeCR; - float wValue = texture2D(weights, wUV).r; + float wCPerm = ${fSize}.0 - 1.0 - wC_float; - dotProd += xValue * wValue; + for (int d1 = 0; d1 < ${origOutputDepth}; d1++) { + float d1_float = float(d1); + float xValue = getX(xR, xC, d1_float); + float wValue = getW(wRPerm, wCPerm, d2, d1_float); + dotProd += xValue * wValue; + } } } + ${biasSnippet} + setOutput(dotProd); } - ${biasOperation} - gl_FragColor = vec4(dotProd, 0, 0, 0); - }`; + `; + } } -export function getFragmentShaderDerBiasSource( - dyShapeRCD: [number, number, number]) { - const dyTexShapeRC = conv_util.computeTexShapeFrom3D(dyShapeRCD); - const [yNumRows, yNumCols, outputDepth] = dyShapeRCD; - - return ` - precision highp float; - uniform sampler2D dy; - - const vec2 halfCR = vec2(0.5, 0.5); - const vec2 dyShapeCR = vec2(${dyTexShapeRC[1]}, ${dyTexShapeRC[0]}); - - void main() { - vec2 biasTexCR = floor(gl_FragCoord.xy); - - // The bias texture RC shape is [1, d2]. - float d2 = biasTexCR.x; - - float derBias = 0.0; - for (int yR = 0; yR < ${yNumRows}; yR++) { - float yTexR = float(yR); - - for (int yC = 0; yC < ${yNumCols}; yC++) { - float yC_float = float(yC); - // Map from 3D (yR, yC, d2) to 2D (yTexR, yTexC). - float yTexC = yC_float * ${outputDepth}.0 + d2; - - // Read dy(yR, yC, d2). - vec2 dyUV = (vec2(yTexC, yTexR) + halfCR) / dyShapeCR; - float dyValue = texture2D(dy, dyUV).r; - - derBias += dyValue; +export class Conv2DDerBiasProgram implements GPGPUProgram { + variableNames = ['dy']; + params: Array<{}> = []; + outputShape: number[]; + userCode: string; + + constructor(yShape: [number, number, number]) { + const [yNumRows, yNumCols, outputDepth] = yShape; + this.outputShape = [outputDepth]; + this.userCode = ` + void main() { + float d2 = getOutputCoords(); + + float derBias = 0.0; + for (int iyR = 0; iyR < ${yNumRows}; iyR++) { + float yR = float(iyR); + for (int iyC = 0; iyC < ${yNumCols}; iyC++) { + float yC = float(iyC); + derBias += getDy(yR, yC, d2); + } } + setOutput(derBias); } - gl_FragColor = vec4(derBias, 0, 0, 0); - }`; -} - -export function derBias( - gpgpu: GPGPUContext, program: WebGLProgram, dyTex: WebGLTexture, - result: WebGLTexture, resultTexShapeRC: [number, number]) { - gpgpu.setOutputMatrixTexture( - result, resultTexShapeRC[0], resultTexShapeRC[1]); - gpgpu.setProgram(program); - gpgpu.setInputMatrixTexture(dyTex, 'dy', 0); - gpgpu.executeProgram(); -} - -export function derWeights( - gpgpu: GPGPUContext, program: WebGLProgram, xTex: WebGLTexture, - dyTex: WebGLTexture, result: WebGLTexture, - resultTexShapeRC: [number, number]) { - gpgpu.setOutputMatrixTexture( - result, resultTexShapeRC[0], resultTexShapeRC[1]); - gpgpu.setProgram(program); - gpgpu.setInputMatrixTexture(xTex, 'x', 0); - gpgpu.setInputMatrixTexture(dyTex, 'dy', 1); - gpgpu.executeProgram(); -} - -export function convTranspose( - gpgpu: GPGPUContext, program: WebGLProgram, xTex: WebGLTexture, - weightsTex: WebGLTexture, biasesTex: WebGLTexture|null, - resultTex: WebGLTexture, resultTexShapeRC: [number, number]) { - gpgpu.setOutputMatrixTexture( - resultTex, resultTexShapeRC[0], resultTexShapeRC[1]); - gpgpu.setProgram(program); - gpgpu.setInputMatrixTexture(xTex, 'x', 0); - gpgpu.setInputMatrixTexture(weightsTex, 'weights', 1); - if (biasesTex != null) { - gpgpu.setInputMatrixTexture(biasesTex, 'biases', 2); + `; } - gpgpu.executeProgram(); } diff --git a/src/math/webgl/conv_backprop_gpu_derbias_test.ts b/src/math/webgl/conv_backprop_gpu_derbias_test.ts index efd9c0cb92..75e6f1a690 100644 --- a/src/math/webgl/conv_backprop_gpu_derbias_test.ts +++ b/src/math/webgl/conv_backprop_gpu_derbias_test.ts @@ -14,40 +14,32 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; -import * as conv_util from '../conv_util'; import {NDArrayMathCPU} from '../math_cpu'; -import {Array3D, NDArray} from '../ndarray'; +import {Array1D, Array3D, initializeGPU, NDArray} from '../ndarray'; -import * as conv_backprop_gpu from './conv_backprop_gpu'; +import {Conv2DDerBiasProgram} from './conv_backprop_gpu'; import {GPGPUContext} from './gpgpu_context'; +import * as gpgpu_math from './gpgpu_math'; +import {TextureManager} from './texture_manager'; describe('conv_gpu derBias', () => { function uploadDerBiasDownload(dy: Array3D): Float32Array { const gpgpu = new GPGPUContext(); + const texManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, texManager); gpgpu.enableAutomaticDebugValidation(true); - const src = conv_backprop_gpu.getFragmentShaderDerBiasSource(dy.shape); - const program = gpgpu.createProgram(src); - - // Upload dy. - const dyTexShapeRC = conv_util.computeTexShapeFrom3D(dy.shape); - const dyTex = gpgpu.createMatrixTexture(dyTexShapeRC[0], dyTexShapeRC[1]); - gpgpu.uploadMatrixToTexture( - dyTex, dyTexShapeRC[0], dyTexShapeRC[1], dy.getValues()); - - const outputDepth = dy.shape[2]; - const resultTexRC = conv_util.computeBiasesTexShape(outputDepth); - const resultTex = gpgpu.createMatrixTexture(resultTexRC[0], resultTexRC[1]); - conv_backprop_gpu.derBias(gpgpu, program, dyTex, resultTex, resultTexRC); - const db = gpgpu.downloadMatrixFromTexture( - resultTex, resultTexRC[0], resultTexRC[1]); - - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(dyTex); - gpgpu.deleteProgram(program); + const program = new Conv2DDerBiasProgram(dy.shape); + const out = Array1D.zeros([dy.shape[2]]); + const binary = gpgpu_math.compileProgram(gpgpu, program, [dy], out); + gpgpu_math.runProgram(binary, [dy], out); + const result = out.getValues(); + + texManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); gpgpu.dispose(); - return db; + return result; } function compareToCPU(dyShapeRCD: [number, number, number]) { diff --git a/src/math/webgl/conv_backprop_gpu_derweights_test.ts b/src/math/webgl/conv_backprop_gpu_derweights_test.ts index 03129d77ac..33e93cf9bb 100644 --- a/src/math/webgl/conv_backprop_gpu_derweights_test.ts +++ b/src/math/webgl/conv_backprop_gpu_derweights_test.ts @@ -16,10 +16,12 @@ limitations under the License. import * as test_util from '../../test_util'; import * as conv_util from '../conv_util'; import {NDArrayMathCPU} from '../math_cpu'; -import {Array3D, NDArray} from '../ndarray'; +import {Array3D, Array4D, initializeGPU, NDArray} from '../ndarray'; -import * as conv_backprop_gpu from './conv_backprop_gpu'; +import {Conv2DDerWeightsProgram} from './conv_backprop_gpu'; import {GPGPUContext} from './gpgpu_context'; +import * as gpgpu_math from './gpgpu_math'; +import {TextureManager} from './texture_manager'; describe('conv_gpu derWeights', () => { @@ -27,40 +29,24 @@ describe('conv_gpu derWeights', () => { x: Array3D, dy: Array3D, fSize: number, stride: number, zeroPad: number): Float32Array { const gpgpu = new GPGPUContext(); + const texManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, texManager); gpgpu.enableAutomaticDebugValidation(true); const outputDepth = dy.shape[2]; - const src = conv_backprop_gpu.getFragmentShaderDerWeightsSource( + const inDepth = x.shape[2]; + const program = new Conv2DDerWeightsProgram( x.shape, fSize, outputDepth, stride, zeroPad); - const program = gpgpu.createProgram(src); - const inputDepth = x.shape[2]; - - // Upload x. - const xTexShapeRC = conv_util.computeTexShapeFrom3D(x.shape); - const xTex = gpgpu.createMatrixTexture(xTexShapeRC[0], xTexShapeRC[1]); - gpgpu.uploadMatrixToTexture( - xTex, xTexShapeRC[0], xTexShapeRC[1], x.getValues()); - - // Upload dy. - const dyTexShapeRC = conv_util.computeTexShapeFrom3D(dy.shape); - const dyTex = gpgpu.createMatrixTexture(dyTexShapeRC[0], dyTexShapeRC[1]); - gpgpu.uploadMatrixToTexture( - dyTex, dyTexShapeRC[0], dyTexShapeRC[1], dy.getValues()); - - const resultTexRC = - conv_util.computeWeightsTexShape(inputDepth, outputDepth, fSize); - const resultTex = gpgpu.createMatrixTexture(resultTexRC[0], resultTexRC[1]); - conv_backprop_gpu.derWeights( - gpgpu, program, xTex, dyTex, resultTex, resultTexRC); - const dw = gpgpu.downloadMatrixFromTexture( - resultTex, resultTexRC[0], resultTexRC[1]); - - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(xTex); - gpgpu.deleteMatrixTexture(dyTex); - gpgpu.deleteProgram(program); + const out = Array4D.zeros( + conv_util.computeWeightsShape4D(inDepth, outputDepth, fSize)); + const binary = gpgpu_math.compileProgram(gpgpu, program, [x, dy], out); + gpgpu_math.runProgram(binary, [x, dy], out); + const result = out.getValues(); + + texManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); gpgpu.dispose(); - return dw; + return result; } function compareToCPU( diff --git a/src/math/webgl/conv_backprop_transpose_gpu_test.ts b/src/math/webgl/conv_backprop_transpose_gpu_test.ts index 9cf4ba3c9a..0227f80ab4 100644 --- a/src/math/webgl/conv_backprop_transpose_gpu_test.ts +++ b/src/math/webgl/conv_backprop_transpose_gpu_test.ts @@ -14,72 +14,36 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; -import * as conv_util from '../conv_util'; import {NDArrayMathCPU} from '../math_cpu'; -import {Array1D, Array3D, Array4D, NDArray} from '../ndarray'; +import {Array1D, Array3D, Array4D, initializeGPU, NDArray} from '../ndarray'; -import * as conv_backprop_gpu from './conv_backprop_gpu'; +import {Conv2DTransposeProgram} from './conv_backprop_gpu'; import {GPGPUContext} from './gpgpu_context'; +import * as gpgpu_math from './gpgpu_math'; +import {TextureManager} from './texture_manager'; describe('conv_gpu transpose', () => { function uploadConvTransposeDownload( - x: Array3D, weights: Array4D, biases: Array1D|null, fSize: number, + x: Array3D, W: Array4D, bias: Array1D|null, fSize: number, origStride: number, origPad: number): Float32Array { const gpgpu = new GPGPUContext(); gpgpu.enableAutomaticDebugValidation(true); - const origInputDepth = weights.shape[2]; - const origOutputDepth = weights.shape[3]; - const src = conv_backprop_gpu.getFragmentShaderConvTransposeSource( - x.shape, fSize, origInputDepth, origStride, origPad, biases != null); - const program = gpgpu.createProgram(src); - - // Upload x. - const xTexShapeRC = conv_util.computeTexShapeFrom3D(x.shape); - const xTex = gpgpu.createMatrixTexture(xTexShapeRC[0], xTexShapeRC[1]); - gpgpu.uploadMatrixToTexture( - xTex, xTexShapeRC[0], xTexShapeRC[1], x.getValues()); - - // Upload weights. - const wTexShapeRC = conv_util.computeWeightsTexShape( - origInputDepth, origOutputDepth, fSize); - const wTex = gpgpu.createMatrixTexture(wTexShapeRC[0], wTexShapeRC[1]); - gpgpu.uploadMatrixToTexture( - wTex, wTexShapeRC[0], wTexShapeRC[1], weights.getValues()); - - const biasTexShapeRC = conv_util.computeBiasesTexShape(origInputDepth); - const biasTex = biases != null ? - gpgpu.createMatrixTexture(biasTexShapeRC[0], biasTexShapeRC[1]) : - null; - if (biasTex != null) { - gpgpu.uploadMatrixToTexture( - biasTex, biasTexShapeRC[0], biasTexShapeRC[1], biases!.getValues()); - } - - // Figure out the output shape by dilating the input. - const xRowsDilated = (x.shape[0] - 1) * origStride + 1; - const xColsDilated = (x.shape[1] - 1) * origStride + 1; - const pad = fSize - 1 - origPad; - const resultShapeRCD = conv_util.computeOutputShape3D( - [xRowsDilated, xColsDilated, origOutputDepth], fSize, origInputDepth, 1, - pad); - const resultTexRC = conv_util.computeTexShapeFrom3D(resultShapeRCD); - const resultTex = gpgpu.createMatrixTexture(resultTexRC[0], resultTexRC[1]); - conv_backprop_gpu.convTranspose( - gpgpu, program, xTex, wTex, biasTex, resultTex, resultTexRC); - const y = gpgpu.downloadMatrixFromTexture( - resultTex, resultTexRC[0], resultTexRC[1]); - - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(xTex); - gpgpu.deleteMatrixTexture(wTex); - if (biasTex != null) { - gpgpu.deleteMatrixTexture(biasTex); - } - gpgpu.deleteProgram(program); + const textureManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, textureManager); + const origInputDepth = W.shape[2]; + const program = new Conv2DTransposeProgram( + x.shape, fSize, origInputDepth, origStride, origPad, bias != null); + const res = NDArray.zeros(program.outputShape); + const inputs = bias != null ? [x, W, bias] : [x, W]; + const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, res); + gpgpu_math.runProgram(binary, inputs, res); + const resValues = res.getValues(); + + textureManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); gpgpu.dispose(); - - return y; + return resValues; } function compareToCPU( diff --git a/src/math/webgl/conv_gpu.ts b/src/math/webgl/conv_gpu.ts index e05bc1b048..6345fc50fe 100644 --- a/src/math/webgl/conv_gpu.ts +++ b/src/math/webgl/conv_gpu.ts @@ -22,12 +22,13 @@ export class Conv2DProgram implements GPGPUProgram { outputShape: number[]; userCode: string; - constructor(xShape: [number, number, number], fieldSize: number, - outputDepth: number, stride: number, pad: number, hasBias: boolean) { - this.outputShape = conv_util.computeOutputShape3D(xShape, - fieldSize, outputDepth, stride, pad); + constructor( + xShape: [number, number, number], fieldSize: number, outputDepth: number, + stride: number, pad: number, hasBias: boolean) { + this.outputShape = conv_util.computeOutputShape3D( + xShape, fieldSize, outputDepth, stride, pad); const inputDepth = xShape[2]; - this.params = [inputDepth, fieldSize, stride, pad, hasBias]; + this.params = [fieldSize, stride, pad, hasBias]; this.userCode = ` void main() { @@ -60,42 +61,13 @@ export class Conv2DProgram implements GPGPUProgram { } } } - ${hasBias ? 'dotProd += getBias(d2);' : ''} + ${hasBias ? + 'dotProd += getBias(d2);' : + '' + } + setOutput(dotProd); } `; } } - -export function getFragmentShaderPrologueSource(): string { - return ` - precision highp float; - uniform sampler2D x; - uniform sampler2D weights; - uniform sampler2D biases; - varying vec2 resultUV;`; -} - -export function getFragmentShaderGetMatrixValueOrZeroPadSource(): string { - return ` - float getMatrixValueOrZeroPad(in sampler2D matrix, vec2 matrixShapeCR, - vec2 requestedCR) { - vec2 uv = (requestedCR + vec2(0.5, 0.5)) / matrixShapeCR; - float value = texture2D(matrix, uv).r; - bool lessThanZero = any(lessThan(uv, vec2(0, 0))); - bool greaterThanOne = any(greaterThan(uv, vec2(1, 1))); - bool outside = lessThanZero || greaterThanOne; - return mix(value, 0.0, float(outside)); - }`; -} - -export function getFragmentShaderGetBiasValueSource(outputDepth: number): - string { - return ` - float getBiasValue(in sampler2D bias, float biasC) { - const vec2 biasShapeCR = vec2(${outputDepth}, 1); - vec2 biasCR = vec2(mod(biasC, ${outputDepth}.0), 0); - vec2 biasUV = (biasCR + vec2(0.5, 0.5)) / biasShapeCR; - return texture2D(bias, biasUV).r; - }`; -} diff --git a/src/math/webgl/conv_gpu_getbiasvalue_test.ts b/src/math/webgl/conv_gpu_getbiasvalue_test.ts deleted file mode 100644 index 62046c36c7..0000000000 --- a/src/math/webgl/conv_gpu_getbiasvalue_test.ts +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright 2017 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -import * as conv_gpu from './conv_gpu'; -import {GPGPUContext} from './gpgpu_context'; - -describe('conv_gpu getBiasValue', () => { - function createGetBiasValueProgram( - gpgpu: GPGPUContext, outputDepth: number): WebGLProgram { - const prologue = conv_gpu.getFragmentShaderPrologueSource(); - const uniforms = 'uniform float biasC;'; - const getBiasValue = - conv_gpu.getFragmentShaderGetBiasValueSource(outputDepth); - const main = ` - void main() { - gl_FragColor = vec4(getBiasValue(biases, biasC), 0, 0, 0); - }`; - - const src = [prologue, uniforms, getBiasValue, main].join('\n'); - return gpgpu.createProgram(src); - } - - function uploadGetBiasValueDownload( - biases: Float32Array, biasCol: number, outputDepth: number): number { - const gpgpu = new GPGPUContext(); - const program = createGetBiasValueProgram(gpgpu, outputDepth); - const biasesTex = gpgpu.createMatrixTexture(1, outputDepth); - const resultTex = gpgpu.createMatrixTexture(1, 1); - gpgpu.uploadMatrixToTexture(biasesTex, 1, outputDepth, biases); - gpgpu.setOutputMatrixTexture(resultTex, 1, 1); - gpgpu.setProgram(program); - gpgpu.setInputMatrixTexture(biasesTex, 'biases', 2); - gpgpu.gl.uniform1f(gpgpu.getUniformLocation('biasC'), biasCol); - gpgpu.executeProgram(); - const result = gpgpu.downloadMatrixFromTexture(resultTex, 1, 1)[0]; - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(biasesTex); - gpgpu.deleteProgram(program); - gpgpu.dispose(); - return result; - } - - it('returns the only bias value if output depth is 1', () => { - const biases = new Float32Array([4]); - const result = uploadGetBiasValueDownload(biases, 0, 1); - expect(result).toEqual(4); - }); - - it('returns the requested column if < output depth', () => { - const biases = new Float32Array([1, 2, 3, 4, 5]); - const result = - uploadGetBiasValueDownload(biases, biases.length - 1, biases.length); - expect(result).toEqual(5); - }); - - it('wraps around to column 0 if column == output depth', () => { - const biases = new Float32Array([6, 0, 0]); - const result = uploadGetBiasValueDownload(biases, 3, 3); - expect(result).toEqual(6); - }); - - it('wraps around twice if column == 2*output depth', () => { - const biases = new Float32Array([7, 0, 0]); - const result = uploadGetBiasValueDownload(biases, 6, 3); - expect(result).toEqual(7); - }); - - it('selects value from column mod(biasC, outputDepth)', () => { - const biases = new Float32Array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]); - const result = uploadGetBiasValueDownload(biases, 2017, biases.length); - expect(result).toEqual(biases[2017 % biases.length]); - }); -}); diff --git a/src/math/webgl/conv_gpu_getmatrixvalueorzeropad_test.ts b/src/math/webgl/conv_gpu_getmatrixvalueorzeropad_test.ts deleted file mode 100644 index 48d8c3d687..0000000000 --- a/src/math/webgl/conv_gpu_getmatrixvalueorzeropad_test.ts +++ /dev/null @@ -1,139 +0,0 @@ -/* Copyright 2017 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -import * as conv_gpu from './conv_gpu'; -import {GPGPUContext} from './gpgpu_context'; - -describe('conv_gpu getMatrixValueOrZeroPad', () => { - function createGetMatrixValueOrZeroPadProgram( - gpgpu: GPGPUContext, shapeRowCol: [number, number]): WebGLProgram { - const prologue = conv_gpu.getFragmentShaderPrologueSource(); - - const uniformColRow = 'uniform vec2 colRow;'; - - const getMatrixValueOrZeroPad = - conv_gpu.getFragmentShaderGetMatrixValueOrZeroPadSource(); - - const main = ` - void main() { - const vec2 aShapeCR = vec2(${shapeRowCol[1]}, ${shapeRowCol[0]}); - float value = getMatrixValueOrZeroPad(x, aShapeCR, colRow); - gl_FragColor = vec4(value, 0, 0, 0); - }`; - - const src = - [prologue, uniformColRow, getMatrixValueOrZeroPad, main].join('\n'); - return gpgpu.createProgram(src); - } - - function uploadGetMatrixValueOrZeroPadDownload( - matrix: Float32Array, shapeRowCol: [number, number], - paramRowCol: [number, number]): number { - const gpgpu = new GPGPUContext(); - gpgpu.enableAutomaticDebugValidation(true); - - const program: WebGLProgram = - createGetMatrixValueOrZeroPadProgram(gpgpu, shapeRowCol); - - const matrixTexture = - gpgpu.createMatrixTexture(shapeRowCol[0], shapeRowCol[1]); - const resultTexture = gpgpu.createMatrixTexture(1, 1); - - gpgpu.uploadMatrixToTexture( - matrixTexture, shapeRowCol[0], shapeRowCol[1], matrix); - - gpgpu.setOutputMatrixTexture(resultTexture, 1, 1); - gpgpu.setProgram(program); - gpgpu.setInputMatrixTexture(matrixTexture, 'x', 0); - const loc = gpgpu.getUniformLocation('colRow'); - gpgpu.gl.uniform2f(loc, paramRowCol[1], paramRowCol[0]); - gpgpu.executeProgram(); - const result = gpgpu.downloadMatrixFromTexture(resultTexture, 1, 1); - gpgpu.deleteMatrixTexture(resultTexture); - gpgpu.deleteMatrixTexture(matrixTexture); - gpgpu.deleteProgram(program); - gpgpu.dispose(); - return result[0]; - } - - it('returns only value of a 1x1 matrix when row and column are 0', () => { - const a = new Float32Array([1.23]); - const result = uploadGetMatrixValueOrZeroPadDownload(a, [1, 1], [0, 0]); - expect(result).toBeCloseTo(a[0]); - }); - - it('returns value of matrix cell at specified row and column', () => { - const a = new Float32Array(32 * 64); - a[5 + (30 * 64)] = Math.PI; - const result = uploadGetMatrixValueOrZeroPadDownload(a, [32, 64], [30, 5]); - expect(result).toBeCloseTo(Math.PI); - }); - - it('returns zero if sampling out-of-bounds left', () => { - const a = new Float32Array(4 * 4); - a.fill(1); - const result = uploadGetMatrixValueOrZeroPadDownload(a, [4, 4], [0, -1]); - expect(result).toEqual(0); - }); - - it('returns zero if sampling out-of-bounds right', () => { - const a = new Float32Array(4 * 4); - a.fill(1); - const result = uploadGetMatrixValueOrZeroPadDownload(a, [4, 4], [0, 15]); - expect(result).toEqual(0); - }); - - it('returns zero if sampling out-of-bounds top', () => { - const a = new Float32Array(19 * 35); - a.fill(1); - const result = uploadGetMatrixValueOrZeroPadDownload(a, [19, 35], [-1, 0]); - expect(result).toEqual(0); - }); - - it('returns zero if sampling out-of-bounds bottom', () => { - const a = new Float32Array(19 * 35); - a.fill(1); - const result = uploadGetMatrixValueOrZeroPadDownload(a, [19, 35], [20, 0]); - expect(result).toEqual(0); - }); - - it('returns zero if sampling out-of-bounds upper-left', () => { - const a = new Float32Array(19 * 35); - a.fill(1); - const result = uploadGetMatrixValueOrZeroPadDownload(a, [19, 35], [-1, -1]); - expect(result).toEqual(0); - }); - - it('returns zero if sampling out-of-bounds upper-right', () => { - const a = new Float32Array(19 * 35); - a.fill(1); - const result = uploadGetMatrixValueOrZeroPadDownload(a, [19, 35], [-1, 36]); - expect(result).toEqual(0); - }); - - it('returns zero if sampling out-of-bounds lower-left', () => { - const a = new Float32Array(19 * 35); - a.fill(1); - const result = uploadGetMatrixValueOrZeroPadDownload(a, [19, 35], [20, -1]); - expect(result).toEqual(0); - }); - - it('returns zero if sampling out-of-bounds lower-right', () => { - const a = new Float32Array(19 * 35); - a.fill(1); - const result = uploadGetMatrixValueOrZeroPadDownload(a, [19, 35], [20, 36]); - expect(result).toEqual(0); - }); -}); From d0007df908fdb0a4304fd5d03c45a3251c2b64d3 Mon Sep 17 00:00:00 2001 From: Daniel Smilkov Date: Thu, 17 Aug 2017 09:24:35 -0400 Subject: [PATCH 04/10] replace zero pad with if --- .../conv_transpose_gpu_benchmark.ts | 4 +- demos/benchmarks/logsumexp_gpu_benchmark.ts | 7 +- src/math/ndarray_test.ts | 4 +- src/math/webgl/addscaledmat_gpu_test.ts | 3 +- src/math/webgl/conv_backprop_gpu.ts | 14 ++- src/math/webgl/conv_gpu.ts | 20 +-- src/math/webgl/gpgpu_math.ts | 21 ++-- src/math/webgl/shader_compiler.ts | 114 +++++++++++------- src/math/webgl/webgl_util.ts | 36 +++--- 9 files changed, 132 insertions(+), 91 deletions(-) diff --git a/demos/benchmarks/conv_transpose_gpu_benchmark.ts b/demos/benchmarks/conv_transpose_gpu_benchmark.ts index 86074a08fc..bd34e097cf 100644 --- a/demos/benchmarks/conv_transpose_gpu_benchmark.ts +++ b/demos/benchmarks/conv_transpose_gpu_benchmark.ts @@ -21,12 +21,12 @@ import * as gpgpu_math from '../../src/math/webgl/gpgpu_math'; import {TextureManager} from '../../src/math/webgl/texture_manager'; import {BenchmarkTest} from './benchmark'; -const OP_RUNS = 100; +const OP_RUNS = 40; export const BENCHMARK_TEST: BenchmarkTest = (size: number) => { const origInputDepth = 1; const origOutputDepth = 2; - const xShape: [number, number, number] = [size, size, origOutputDepth]; + const xShape: [number, number, number] = [size, size, 1]; const fieldSize = 11; const origStride = 1; const origPad = 1; diff --git a/demos/benchmarks/logsumexp_gpu_benchmark.ts b/demos/benchmarks/logsumexp_gpu_benchmark.ts index 6afc05f4d0..651d992c78 100644 --- a/demos/benchmarks/logsumexp_gpu_benchmark.ts +++ b/demos/benchmarks/logsumexp_gpu_benchmark.ts @@ -13,14 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +import {Array2D, initializeGPU, Scalar} from '../../src/math/ndarray'; import {GPGPUContext} from '../../src/math/webgl/gpgpu_context'; -import {LogSumExpProgram} from '../../src/math/webgl/logsumexp_gpu'; import * as gpgpu_math from '../../src/math/webgl/gpgpu_math'; -import {Scalar, Array2D, initializeGPU} from '../../src/math/ndarray'; +import {LogSumExpProgram} from '../../src/math/webgl/logsumexp_gpu'; import {TextureManager} from '../../src/math/webgl/texture_manager'; + import {BenchmarkTest} from './benchmark'; -const OP_RUNS = 100; +const OP_RUNS = 2; export const BENCHMARK_TEST: BenchmarkTest = (size: number) => { const gpgpu = new GPGPUContext(); diff --git a/src/math/ndarray_test.ts b/src/math/ndarray_test.ts index 143c51f063..48e966dd04 100644 --- a/src/math/ndarray_test.ts +++ b/src/math/ndarray_test.ts @@ -285,9 +285,9 @@ describe('NDArray', () => { expect(t.getTextureShapeRC()).toEqual([2, 4]); }); - it('preferred texture shape, Array4D is squareish', () => { + it('preferred texture shape, Array4D d1 and d2 strided along columns', () => { const t = Array4D.zeros([8, 2, 4, 4]); - expect(t.getTextureShapeRC()).toEqual([16, 16]); + expect(t.getTextureShapeRC()).toEqual([8, 2 * 4 * 4]); }); }); // Close describe. diff --git a/src/math/webgl/addscaledmat_gpu_test.ts b/src/math/webgl/addscaledmat_gpu_test.ts index 2196f9347d..13f886e830 100644 --- a/src/math/webgl/addscaledmat_gpu_test.ts +++ b/src/math/webgl/addscaledmat_gpu_test.ts @@ -14,10 +14,11 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; +import {Array1D, Array2D, initializeGPU, NDArray, Scalar} from '../ndarray'; + import {AddScaledMatProgram} from './addscaledmat_gpu'; import {GPGPUContext} from './gpgpu_context'; import * as gpgpu_math from './gpgpu_math'; -import {NDArray, Array1D, Array2D, Scalar, initializeGPU} from '../ndarray'; import {TextureManager} from './texture_manager'; function cpuAddScaledMatrices( diff --git a/src/math/webgl/conv_backprop_gpu.ts b/src/math/webgl/conv_backprop_gpu.ts index b3df023d3c..568ff131e0 100644 --- a/src/math/webgl/conv_backprop_gpu.ts +++ b/src/math/webgl/conv_backprop_gpu.ts @@ -29,6 +29,8 @@ export class Conv2DDerWeightsProgram implements GPGPUProgram { xShape, fSize, outputDepth, stride, zeroPad); const yNumRows = yShape[0]; const yNumCols = yShape[1]; + const xRowsLimit = xShape[0] - 0.5; + const xColsLimit = xShape[1] - 0.5; this.outputShape = conv_util.computeWeightsShape4D(xShape[2], outputDepth, fSize); this.params = [stride, zeroPad]; @@ -46,13 +48,17 @@ export class Conv2DDerWeightsProgram implements GPGPUProgram { for (int yR = 0; yR < ${yNumRows}; yR++) { float yR_float = float(yR); float xR = wR + yR_float * ${stride}.0 - ${zeroPad}.0; - + if (xR < 0.0 || xR > ${xRowsLimit}) { + continue; + } for (int yC = 0; yC < ${yNumCols}; yC++) { float yC_float = float(yC); float xC = wC + yC_float * ${stride}.0 - ${zeroPad}.0; - + if (xC < 0.0 || xC > ${xColsLimit}) { + continue; + } float dyValue = getDy(yR_float, yC_float, d2); - float xValue = getXOrZeroPad(xR, xC, d1); + float xValue = getX(xR, xC, d1); dotProd += (xValue * dyValue); } } @@ -100,8 +106,6 @@ export class Conv2DTransposeProgram implements GPGPUProgram { for (int wR = 0; wR < ${fSize}; wR++) { float wR_float = float(wR); float xR = (xRCorner + wR_float) / ${origStride}.0; - // TODO(smilkov): Splice this with another version where you call - // getMatrixValueOrZeroPad(). Here and below. if (xR < 0.0 || xR >= ${xRows}.0 || fract(xR) > 0.0) { continue; } diff --git a/src/math/webgl/conv_gpu.ts b/src/math/webgl/conv_gpu.ts index 6345fc50fe..acf92d10bd 100644 --- a/src/math/webgl/conv_gpu.ts +++ b/src/math/webgl/conv_gpu.ts @@ -29,7 +29,9 @@ export class Conv2DProgram implements GPGPUProgram { xShape, fieldSize, outputDepth, stride, pad); const inputDepth = xShape[2]; this.params = [fieldSize, stride, pad, hasBias]; - + const biasSnippet = hasBias ? 'dotProd += getBias(d2);' : ''; + const xRowsLimit = xShape[0] - 0.5; + const xColsLimit = xShape[1] - 0.5; this.userCode = ` void main() { vec3 coords = getOutputCoords(); @@ -48,24 +50,24 @@ export class Conv2DProgram implements GPGPUProgram { for (int wR = 0; wR < ${fieldSize}; wR++) { float wR_float = float(wR); float xR = xRCorner + wR_float; - + if (xR < 0.0 || xR > ${xRowsLimit}) { + continue; + } for (int wC = 0; wC < ${fieldSize}; wC++) { float wC_float = float(wC); float xC = xCCorner + wC_float; - + if (xC < 0.0 || xC > ${xColsLimit}) { + continue; + } for (int d1 = 0; d1 < ${inputDepth}; d1++) { float d1_float = float(d1); - float xValue = getXOrZeroPad(xR, xC, d1_float); + float xValue = getX(xR, xC, d1_float); float wValue = getW(wR_float, wC_float, d1_float, d2); dotProd += xValue * wValue; } } } - ${hasBias ? - 'dotProd += getBias(d2);' : - '' - } - + ${biasSnippet} setOutput(dotProd); } `; diff --git a/src/math/webgl/gpgpu_math.ts b/src/math/webgl/gpgpu_math.ts index d6b069eafa..56ac04f08e 100644 --- a/src/math/webgl/gpgpu_math.ts +++ b/src/math/webgl/gpgpu_math.ts @@ -13,12 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +import * as util from '../../util'; import {NDArray} from '../ndarray'; import {GPGPUContext} from './gpgpu_context'; import * as shader_compiler from './shader_compiler'; import {ShapeInfo} from './shader_compiler'; -import * as util from '../../util'; export interface GPGPUProgram { variableNames: string[]; @@ -59,17 +59,15 @@ export function compileProgram( return { program, source, - webGLProgram: gpgpu.createProgram(source), - gpgpu, - inShapeInfos, - outShapeInfo + webGLProgram: gpgpu.createProgram(source), gpgpu, inShapeInfos, outShapeInfo }; } function validateBinaryAndProgram(shapeInfos: ShapeInfo[], inputs: NDArray[]) { if (shapeInfos.length !== inputs.length) { - throw Error(`Binary was compiled with ${shapeInfos.length} inputs, but ` + - `was executed with ${inputs.length} inputs`); + throw Error( + `Binary was compiled with ${shapeInfos.length} inputs, but ` + + `was executed with ${inputs.length} inputs`); } shapeInfos.forEach((s, i) => { @@ -79,11 +77,13 @@ function validateBinaryAndProgram(shapeInfos: ShapeInfo[], inputs: NDArray[]) { const texShapeB = inputs[i].getTextureShapeRC(); if (!util.arraysEqual(shapeA, shapeB)) { - throw Error(`Binary was compiled with different shapes than ` + + throw Error( + `Binary was compiled with different shapes than ` + `the current args. Shapes ${shapeA} and ${shapeB} must match`); } if (!util.arraysEqual(texShapeA, texShapeB)) { - throw Error(`Binary was compiled with different texture shapes than the` + + throw Error( + `Binary was compiled with different texture shapes than the` + ` current args. Shape ${texShapeA} and ${texShapeB} must match`); } }); @@ -107,8 +107,7 @@ export function runProgram( } export function makeShaderKey( - program: GPGPUProgram, inputs: NDArray[], - output: NDArray): string { + program: GPGPUProgram, inputs: NDArray[], output: NDArray): string { const params = program.params; const keyStart = inputs.concat(output).map(x => x.shape + '_' + x.getTextureShapeRC()); diff --git a/src/math/webgl/shader_compiler.ts b/src/math/webgl/shader_compiler.ts index 19d91e2ea3..a3af0f741a 100644 --- a/src/math/webgl/shader_compiler.ts +++ b/src/math/webgl/shader_compiler.ts @@ -16,8 +16,7 @@ limitations under the License. import * as util from '../../util'; export type ShapeInfo = { - logicalShape: number[], - texShape: [number, number]; + logicalShape: number[]; texShape: [number, number]; }; export type InputInfo = { @@ -25,8 +24,9 @@ export type InputInfo = { shapeInfo: ShapeInfo }; -export function makeShader(inputsInfo: InputInfo[], outputShape: ShapeInfo, - userCode: string, broadcast: boolean): string { +export function makeShader( + inputsInfo: InputInfo[], outputShape: ShapeInfo, userCode: string, + broadcast: boolean): string { const inputPrefixSnippet = inputsInfo.map(x => `uniform sampler2D ${x.name};`).join('\n'); const inputSamplingSnippet = @@ -54,7 +54,7 @@ function getInputSamplingSnippet( res += getSamplerScalar(inInfo.name); break; case 1: - res += getSampler1D(inInfo.name, texShape, shape[0]); + res += getSampler1D(inInfo.name, texShape); break; case 2: res += getSampler2D(inInfo.name, shape as [number, number], texShape); @@ -75,7 +75,8 @@ function getInputSamplingSnippet( // If input and output have matching logical shapes, add // getTexNameAtOutCoord() method that samples the input texture using the // output coordinates. - if (broadcast || util.arraysEqual( + if (broadcast || + util.arraysEqual( inInfo.shapeInfo.logicalShape, outShapeInfo.logicalShape)) { res += getSamplerAtOutputCoords(inInfo.name, texShape, outTexShape, broadcast); @@ -95,11 +96,11 @@ function getOutputSamplingSnippet( case 2: return getOutput2DCoords(outShape as [number, number], outTexShape); case 3: - return getOutput3DCoords(outShape as [number, number, number], - outTexShape); + return getOutput3DCoords( + outShape as [number, number, number], outTexShape); case 4: - return getOutput4DCoords(outShape as [number, number, number, number], - outTexShape); + return getOutput4DCoords( + outShape as [number, number, number, number], outTexShape); default: throw new Error( `${outShape.length}-D output sampling is not yet supported`); @@ -110,7 +111,7 @@ const SAMPLE_1D_SNIPPET = ` vec2 UVfrom1D(float texNumR, float texNumC, float index) { float texR = floor(index / texNumC); float texC = mod(index, texNumC); - return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); + return vec2(texC, texR) / vec2(texNumC, texNumR); } `; @@ -120,7 +121,7 @@ vec2 UVfrom2D(float texNumR, float texNumC, float numC, float row, float index = dot(vec2(row, col), vec2(numC, 1.0)); float texR = floor(index / texNumC); float texC = mod(index, texNumC); - return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); + return vec2(texC, texR) / vec2(texNumC, texNumR); } `; @@ -130,7 +131,7 @@ vec2 UVfrom3D(float texNumR, float texNumC, float stride0, float index = dot(vec3(row, col, depth), vec3(stride0, stride1, 1.0)); float texR = floor(index / texNumC); float texC = mod(index, texNumC); - return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); + return vec2(texC, texR) / vec2(texNumC, texNumR); } `; @@ -142,14 +143,13 @@ vec2 UVfrom4D(float texNumR, float texNumC, float stride0, vec4(stride0, stride1, stride2, 1.0)); float texR = floor(index / texNumC); float texC = mod(index, texNumC); - return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); + return vec2(texC, texR) / vec2(texNumC, texNumR); } `; const SHADER_PREFIX = ` precision highp float; varying vec2 resultUV; - const vec2 halfCR = vec2(0.5, 0.5); float sample(sampler2D texture, vec2 uv) { return texture2D(texture, uv).r; @@ -192,8 +192,8 @@ function getOutput1DCoords( `; } -function getOutput3DCoords(shape: [number, number, number], - texShape: [number, number]): string { +function getOutput3DCoords( + shape: [number, number, number], texShape: [number, number]): string { const stride0 = shape[1] * shape[2]; const stride1 = shape[2]; return ` @@ -209,8 +209,9 @@ function getOutput3DCoords(shape: [number, number, number], `; } -function getOutput4DCoords(shape: [number, number, number, number], - texShape: [number, number]): string { +function getOutput4DCoords( + shape: [number, number, number, number], + texShape: [number, number]): string { const stride2 = shape[3]; const stride1 = shape[2] * stride2; const stride0 = shape[1] * stride1; @@ -257,20 +258,19 @@ function getSamplerScalar(texName: string): string { const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1); return ` float ${funcName}() { - return sample(${texName}, halfCR); + return sample(${texName}, vec2(0.0, 0.0)); } `; } -function getSampler1D( - texName: string, texShape: [number, number], size: number): string { +function getSampler1D(texName: string, texShape: [number, number]): string { const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1); const tR = texShape[0]; const tC = texShape[1]; if (texShape[0] === 1 && texShape[1] === 1) { return ` float ${funcName}(float index) { - return sample(${texName}, halfCR); + return sample(${texName}, vec2(0.0, 0.0)); } `; } @@ -306,22 +306,22 @@ function getSampler3D( const tC = texShape[1]; const stride0 = shape[1] * shape[2]; const stride1 = shape[2]; - const [numRows, numCols, numDepths] = shape; + if (tC === stride0) { + return ` + float ${funcName}(float row, float col, float depth) { + float texR = row; + float texC = dot(vec2(col, depth), vec2(${stride1}, 1.0)); + vec2 uv = vec2(texC, texR) / vec2(${tC}.0, ${tR}.0); + return sample(${texName}, uv); + } + `; + } return ` float ${funcName}(float row, float col, float depth) { vec2 uv = UVfrom3D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, row, col, depth); return sample(${texName}, uv); } - - float ${funcName}OrZeroPad(float row, float col, float depth) { - vec3 coords = vec3(row, col, depth); - bool lessThanZero = any(lessThan(coords, vec3(0.0, 0.0, 0.0))); - bool greaterThanSize = any(greaterThan(coords, - vec3(${numRows}.0 - 0.5, ${numCols}.0 - 0.5, ${numDepths}.0 - 0.5))); - bool outside = lessThanZero || greaterThanSize; - return mix(${funcName}(row, col, depth), 0.0, float(outside)); - } `; } @@ -335,6 +335,17 @@ function getSampler4D( const stride1 = shape[2] * stride2; const stride0 = shape[1] * stride1; + if (tC === stride0) { + return ` + float ${funcName}(float row, float col, float depth, float depth2) { + float texR = row; + float texC = dot(vec3(col, depth, depth2), + vec3(${stride1}.0, ${stride2}.0, 1.0)); + vec2 uv = vec2(texC, texR) / vec2(${tC}.0, ${tR}.0); + return sample(${texName}, uv); + } + `; + } return ` float ${funcName}(float row, float col, float depth, float depth2) { vec2 uv = UVfrom4D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, @@ -353,7 +364,25 @@ function getSampler2D( if (util.arraysEqual(shape, texShape)) { return ` float ${funcName}(float row, float col) { - vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0); + vec2 uv = vec2(col, row) / vec2(${tC}.0, ${tR}.0); + return sample(${texName}, uv); + } + `; + } + if (tC === 1) { + return ` + float ${funcName}(float row, float col) { + float index = dot(vec2(row, col), vec2(${shape[1]}.0, 1.0)); + vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0); + return sample(${texName}, uv); + } + `; + } + if (tR === 1) { + return ` + float ${funcName}(float row, float col) { + float index = dot(vec2(row, col), vec2(${shape[1]}.0, 1.0)); + vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5); return sample(${texName}, uv); } `; @@ -367,14 +396,14 @@ function getSampler2D( } function getSamplerFlat(texName: string, texShape: [number, number]): string { - const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1) + - 'Flat'; + const funcName = + 'get' + texName.charAt(0).toUpperCase() + texName.slice(1) + 'Flat'; const tNumR = texShape[0]; const tNumC = texShape[1]; if (tNumC === 1 && tNumR === 1) { return ` float ${funcName}(float index) { - return sample(${texName}, halfCR); + return sample(${texName}, vec2(0.0, 0.0)); } `; } @@ -398,16 +427,17 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string { float ${funcName}(float index) { float texR = floor(index / ${tNumC}.0); float texC = mod(index, ${tNumC}.0); - vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tNumC}.0, ${tNumR}.0); + vec2 uv = vec2(texC, texR) / vec2(${tNumC}.0, ${tNumR}.0); return sample(${texName}, uv); } `; } -function getSamplerAtOutputCoords(texName: string, inTexShape: [number, number], +function getSamplerAtOutputCoords( + texName: string, inTexShape: [number, number], outTexShape: [number, number], broadcast: boolean) { const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1) + - 'AtOutCoords'; + 'AtOutCoords'; if (util.arraysEqual(inTexShape, outTexShape)) { return ` float ${funcName}() { @@ -425,8 +455,8 @@ function getSamplerAtOutputCoords(texName: string, inTexShape: [number, number], ${broadcastSnippet} float texR = floor(index / ${inTexShape[1]}.0); float texC = mod(index, ${inTexShape[1]}.0); - vec2 uv = (vec2(texC, texR) + halfCR) / - vec2(${inTexShape[1]}.0, ${inTexShape[0]}.0); + vec2 uv = vec2(texC, texR) / + vec2(${inTexShape[1]}.0, ${inTexShape[0]}.0); return sample(${texName}, uv); } `; diff --git a/src/math/webgl/webgl_util.ts b/src/math/webgl/webgl_util.ts index 2f2eac7d9c..39afc20ea7 100644 --- a/src/math/webgl/webgl_util.ts +++ b/src/math/webgl/webgl_util.ts @@ -67,10 +67,9 @@ export function isWebGL2Enabled() { if (gl != null) { WEBGL2_ENABLED = true; - const loseContextExtension = - getExtensionOrThrow( - gl as WebGLRenderingContext, 'WEBGL_lose_context') as - WebGLLoseContextExtension; + const loseContextExtension = getExtensionOrThrow( + gl as WebGLRenderingContext, + 'WEBGL_lose_context') as WebGLLoseContextExtension; loseContextExtension.loseContext(); } else { WEBGL2_ENABLED = false; @@ -86,9 +85,10 @@ export function createWebGLRenderingContextFromCanvas( if (isWebGL2Enabled()) { gl = canvas.getContext('webgl2', attributes) as WebGLRenderingContext; } else { - gl = (canvas.getContext('webgl', attributes) || - canvas.getContext('experimental-webgl', attributes)) as - WebGLRenderingContext; + gl = + (canvas.getContext('webgl', attributes) || + canvas.getContext( + 'experimental-webgl', attributes)) as WebGLRenderingContext; } if (gl == null) { @@ -379,10 +379,10 @@ function validateTextureUnit(gl: WebGLRenderingContext, textureUnit: number) { } export function getTextureShapeFromLogicalShape( - gl: WebGLRenderingContext, logicalShape: number[], + gl: WebGLRenderingContext, logShape: number[], preferredTexShape?: [number, number]): [number, number] { const maxTexSize = queryMaxTextureSize(gl); - const size = util.sizeFromShape(logicalShape); + const size = util.sizeFromShape(logShape); if (preferredTexShape != null) { const sizePreferred = util.sizeFromShape(preferredTexShape); util.assert( @@ -395,16 +395,20 @@ export function getTextureShapeFromLogicalShape( } } - if (logicalShape.length <= 1 && size <= maxTexSize) { + if (logShape.length <= 1 && size <= maxTexSize) { return [size, 1]; } else if ( - logicalShape.length === 2 && logicalShape[0] <= maxTexSize && - logicalShape[1] <= maxTexSize) { - return logicalShape as [number, number]; + logShape.length === 2 && logShape[0] <= maxTexSize && + logShape[1] <= maxTexSize) { + return logShape as [number, number]; } else if ( - logicalShape.length === 3 && logicalShape[0] <= maxTexSize && - logicalShape[1] * logicalShape[2] <= maxTexSize) { - return [logicalShape[0], logicalShape[1] * logicalShape[2]]; + logShape.length === 3 && logShape[0] <= maxTexSize && + logShape[1] * logShape[2] <= maxTexSize) { + return [logShape[0], logShape[1] * logShape[2]]; + } else if ( + logShape.length === 4 && logShape[0] <= maxTexSize && + logShape[1] * logShape[2] * logShape[3] <= maxTexSize) { + return [logShape[0], logShape[1] * logShape[2] * logShape[3]]; } else { return util.sizeToSquarishShape(size); } From ad3e6c24770fa0dfe9273c0e4266cf70fcf9afd9 Mon Sep 17 00:00:00 2001 From: Daniel Smilkov Date: Thu, 17 Aug 2017 13:57:19 -0400 Subject: [PATCH 05/10] migrate pool ops --- demos/benchmarks/conv_gpu_benchmark.ts | 11 +- .../max_pool_backprop_gpu_benchmark.ts | 80 +++---- demos/benchmarks/max_pool_gpu_benchmark.ts | 109 +++------- src/math/conv_util.ts | 10 - src/math/math_gpu.ts | 199 ++---------------- src/math/webgl/argminmax_gpu.ts | 12 +- src/math/webgl/avg_pool_gpu.ts | 30 --- src/math/webgl/avg_pool_gpu_test.ts | 46 +--- src/math/webgl/conv_backprop_gpu.ts | 44 ++-- src/math/webgl/conv_gpu.ts | 24 ++- src/math/webgl/conv_gpu_test.ts | 9 +- src/math/webgl/max_pool_backprop_gpu.ts | 133 ++++++------ src/math/webgl/max_pool_backprop_gpu_test.ts | 84 +++----- src/math/webgl/max_pool_gpu.ts | 44 ---- src/math/webgl/max_pool_gpu_test.ts | 46 +--- src/math/webgl/max_pool_positions_gpu_test.ts | 54 ++--- src/math/webgl/min_pool_gpu.ts | 30 --- src/math/webgl/min_pool_gpu_test.ts | 47 +---- src/math/webgl/mulmat_gpu.ts | 18 +- src/math/webgl/mulmat_packed_gpu.ts | 6 +- src/math/webgl/pool_gpu.ts | 175 +++++++-------- src/math/webgl/pool_gpu_test_util.ts | 42 ++++ src/math/webgl/shader_compiler.ts | 27 +-- 23 files changed, 398 insertions(+), 882 deletions(-) delete mode 100644 src/math/webgl/avg_pool_gpu.ts delete mode 100644 src/math/webgl/max_pool_gpu.ts delete mode 100644 src/math/webgl/min_pool_gpu.ts create mode 100644 src/math/webgl/pool_gpu_test_util.ts diff --git a/demos/benchmarks/conv_gpu_benchmark.ts b/demos/benchmarks/conv_gpu_benchmark.ts index 8d583bed2f..736563e768 100644 --- a/demos/benchmarks/conv_gpu_benchmark.ts +++ b/demos/benchmarks/conv_gpu_benchmark.ts @@ -14,25 +14,28 @@ limitations under the License. ==============================================================================*/ import * as conv_util from '../../src/math/conv_util'; +import {Array1D, Array3D, Array4D, initializeGPU} from '../../src/math/ndarray'; import {Conv2DProgram} from '../../src/math/webgl/conv_gpu'; import {GPGPUContext} from '../../src/math/webgl/gpgpu_context'; import * as gpgpu_math from '../../src/math/webgl/gpgpu_math'; -import {Array1D, Array3D, Array4D, initializeGPU} from '../../src/math/ndarray'; import {TextureManager} from '../../src/math/webgl/texture_manager'; + import {BenchmarkTest} from './benchmark'; const OP_RUNS = 40; export const BENCHMARK_TEST: BenchmarkTest = (size: number) => { + const gpgpu = new GPGPUContext(); + const texManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, texManager); + const inputDepth = 1; const inputShape: [number, number, number] = [size, size, inputDepth]; const outputDepth = 1; const fieldSize = 11; const stride = 1; const zeroPad = conv_util.computeDefaultPad(inputShape, fieldSize, stride); - const gpgpu = new GPGPUContext(); - const texManager = new TextureManager(gpgpu); - initializeGPU(gpgpu, texManager); + const program = new Conv2DProgram( inputShape, fieldSize, outputDepth, stride, zeroPad, true); const outputShape = program.outputShape as [number, number, number]; diff --git a/demos/benchmarks/max_pool_backprop_gpu_benchmark.ts b/demos/benchmarks/max_pool_backprop_gpu_benchmark.ts index 82076e06dd..96ef41ce27 100644 --- a/demos/benchmarks/max_pool_backprop_gpu_benchmark.ts +++ b/demos/benchmarks/max_pool_backprop_gpu_benchmark.ts @@ -14,69 +14,49 @@ limitations under the License. ==============================================================================*/ import * as conv_util from '../../src/math/conv_util'; +import {Array3D, initializeGPU, NDArray} from '../../src/math/ndarray'; import {GPGPUContext} from '../../src/math/webgl/gpgpu_context'; +import * as gpgpu_math from '../../src/math/webgl/gpgpu_math'; // tslint:disable-next-line:max-line-length -import * as max_pool_backprop_gpu from '../../src/math/webgl/max_pool_backprop_gpu'; -import * as test_util from '../../src/test_util'; -import * as util from '../../src/util'; - +import {MaxPool2DBackpropProgram} from '../../src/math/webgl/max_pool_backprop_gpu'; +import {TextureManager} from '../../src/math/webgl/texture_manager'; import {BenchmarkTest} from './benchmark'; -const OP_RUNS = 100; +const OP_RUNS = 40; export const BENCHMARK_TEST: BenchmarkTest = (size: number) => { - const dyShapeRCD: [number, number, number] = [size, size, 1]; - const outputDepth = 1; - const fieldSize = 11; - const stride = 1; - const zeroPad = conv_util.computeDefaultPad(dyShapeRCD, fieldSize, stride); - const outputShapeRCD: [number, number, number] = - conv_util.computeOutputShape3D( - dyShapeRCD, fieldSize, outputDepth, stride, zeroPad); - - const dyTexShapeRC = conv_util.computeTexShapeFrom3D(dyShapeRCD); - const outputTexShapeRC = conv_util.computeTexShapeFrom3D(outputShapeRCD); - const gpgpu = new GPGPUContext(); - const program = gpgpu.createProgram( - max_pool_backprop_gpu.getFragmentShaderMaxPoolBackprop( - dyShapeRCD, fieldSize, stride, zeroPad)); - - const dyTexture = gpgpu.createMatrixTexture(dyTexShapeRC[0], dyTexShapeRC[1]); - const maxPositionsTexture = - gpgpu.createMatrixTexture(dyTexShapeRC[0], dyTexShapeRC[1]); - const outputTexture = - gpgpu.createMatrixTexture(outputTexShapeRC[0], outputTexShapeRC[1]); + const texManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, texManager); - const dyData = - test_util.randomArrayInRange(dyTexShapeRC[0] * dyTexShapeRC[1], -1, 1); - const maxPositionsData = new Float32Array(util.sizeFromShape(dyShapeRCD)); - for (let i = 0; i < maxPositionsData.length; i++) { - maxPositionsData[i] = Math.floor(Math.random() * fieldSize * fieldSize); + const outputDepth = 1; + const dyShape: [number, number, number] = [size, size, outputDepth]; + const fSize = 11; + const stride = 1; + const zeroPad = conv_util.computeDefaultPad(dyShape, fSize, stride); + const program = new MaxPool2DBackpropProgram(dyShape, fSize, stride, zeroPad); + const res = NDArray.zeros(program.outputShape); + const dy = Array3D.randUniform(dyShape, -1, 1); + const positionsData = new Float32Array(dy.size); + for (let i = 0; i < positionsData.length; i++) { + positionsData[i] = Math.floor(Math.random() * fSize * fSize); } - - gpgpu.uploadMatrixToTexture( - dyTexture, dyTexShapeRC[0], dyTexShapeRC[1], dyData); - gpgpu.uploadMatrixToTexture( - maxPositionsTexture, dyTexShapeRC[0], dyTexShapeRC[1], maxPositionsData); + const positions = Array3D.new(dyShape, positionsData); + const binary = + gpgpu_math.compileProgram(gpgpu, program, [dy, positions], res); const start = performance.now(); for (let i = 0; i < OP_RUNS; i++) { - max_pool_backprop_gpu.maxPoolBackprop( - gpgpu, program, dyTexture, maxPositionsTexture, outputTexture, - outputTexShapeRC); + gpgpu_math.runProgram(binary, [dy, positions], res); } - - gpgpu.downloadMatrixFromTexture( - outputTexture, outputTexShapeRC[0], outputTexShapeRC[1]); - const end = performance.now(); - - const avgTime = (end - start) / OP_RUNS; - - gpgpu.deleteMatrixTexture(dyTexture); - gpgpu.deleteMatrixTexture(maxPositionsTexture); - gpgpu.deleteMatrixTexture(outputTexture); - gpgpu.deleteProgram(program); + res.getValues(); + const avgTime = (performance.now() - start) / OP_RUNS; + + dy.dispose(); + positions.dispose(); + res.dispose(); + texManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); gpgpu.dispose(); return avgTime; diff --git a/demos/benchmarks/max_pool_gpu_benchmark.ts b/demos/benchmarks/max_pool_gpu_benchmark.ts index bb1e6a6e24..ce1b683bfd 100644 --- a/demos/benchmarks/max_pool_gpu_benchmark.ts +++ b/demos/benchmarks/max_pool_gpu_benchmark.ts @@ -14,108 +14,53 @@ limitations under the License. ==============================================================================*/ import * as conv_util from '../../src/math/conv_util'; +import {Array3D, initializeGPU, NDArray} from '../../src/math/ndarray'; import {GPGPUContext} from '../../src/math/webgl/gpgpu_context'; -import * as max_pool_gpu from '../../src/math/webgl/max_pool_gpu'; -import * as test_util from '../../src/test_util'; +import * as gpgpu_math from '../../src/math/webgl/gpgpu_math'; +import {Pool2DProgram} from '../../src/math/webgl/pool_gpu'; +import {TextureManager} from '../../src/math/webgl/texture_manager'; import {BenchmarkTest} from './benchmark'; const OP_RUNS = 40; export const MAX_POOL_BENCHMARK_TEST: BenchmarkTest = (size: number) => { - const inputShapeRCD: [number, number, number] = [size, size, 1]; - const outputDepth = 1; - const fieldSize = 11; - const stride = 1; - const zeroPad = conv_util.computeDefaultPad(inputShapeRCD, fieldSize, stride); - const outputShapeRCD: [number, number, number] = - conv_util.computeOutputShape3D( - inputShapeRCD, fieldSize, outputDepth, stride, zeroPad); + return testMaxPool(size, false); +}; - const inputTexShapeRC = conv_util.computeTexShapeFrom3D(inputShapeRCD); - const outputTexShapeRC = conv_util.computeTexShapeFrom3D(outputShapeRCD); +export const MAX_POOL_POSNS_BENCHMARK_TEST: BenchmarkTest = (size: number) => { + return testMaxPool(size, true); +}; +function testMaxPool(size: number, positions: boolean): number { const gpgpu = new GPGPUContext(); - const program = - gpgpu.createProgram(max_pool_gpu.getFragmentShaderMaxPoolSource( - inputShapeRCD, fieldSize, stride, zeroPad)); - - const inputTexture = - gpgpu.createMatrixTexture(inputTexShapeRC[0], inputTexShapeRC[1]); - const outputTexture = - gpgpu.createMatrixTexture(outputTexShapeRC[0], outputTexShapeRC[1]); - - const inputData = test_util.randomArrayInRange( - inputTexShapeRC[0] * inputTexShapeRC[1], -1, 1); - - gpgpu.uploadMatrixToTexture( - inputTexture, inputTexShapeRC[0], inputTexShapeRC[1], inputData); - - const start = performance.now(); - for (let i = 0; i < OP_RUNS; i++) { - max_pool_gpu.maxPoolCommon( - gpgpu, program, inputTexture, outputTexture, outputTexShapeRC); - } - - gpgpu.downloadMatrixFromTexture( - outputTexture, outputTexShapeRC[0], outputTexShapeRC[1]); - const end = performance.now(); + const texManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, texManager); - const avgTime = (end - start) / OP_RUNS; - - gpgpu.deleteMatrixTexture(inputTexture); - gpgpu.deleteMatrixTexture(outputTexture); - gpgpu.deleteProgram(program); - gpgpu.dispose(); - - return avgTime; -}; - -export const MAX_POOL_POSNS_BENCHMARK_TEST: BenchmarkTest = (size: number) => { - const inputShapeRCD: [number, number, number] = [size, size, 1]; const outputDepth = 1; + const xShape: [number, number, number] = [size, size, outputDepth]; const fieldSize = 11; const stride = 1; - const zeroPad = conv_util.computeDefaultPad(inputShapeRCD, fieldSize, stride); - const outputShapeRCD: [number, number, number] = - conv_util.computeOutputShape3D( - inputShapeRCD, fieldSize, outputDepth, stride, zeroPad); - - const inputTexShapeRC = conv_util.computeTexShapeFrom3D(inputShapeRCD); - const outputTexShapeRC = conv_util.computeTexShapeFrom3D(outputShapeRCD); + const zeroPad = conv_util.computeDefaultPad(xShape, fieldSize, stride); - const gpgpu = new GPGPUContext(); - const program: WebGLProgram = - gpgpu.createProgram(max_pool_gpu.getFragmentShaderMaxPoolPositionsSource( - inputShapeRCD, fieldSize, stride, zeroPad)); - - const inputTexture = - gpgpu.createMatrixTexture(inputTexShapeRC[0], inputTexShapeRC[1]); - const outputTexture = - gpgpu.createMatrixTexture(outputTexShapeRC[0], outputTexShapeRC[1]); - - const inputData = test_util.randomArrayInRange( - inputTexShapeRC[0] * inputTexShapeRC[1], -1, 1); - - gpgpu.uploadMatrixToTexture( - inputTexture, inputTexShapeRC[0], inputTexShapeRC[1], inputData); + const program = + new Pool2DProgram(xShape, fieldSize, stride, zeroPad, 'max', positions); + const res = NDArray.zeros(program.outputShape); + const x = Array3D.randUniform(xShape, -1, 1); + const binary = gpgpu_math.compileProgram(gpgpu, program, [x], res); const start = performance.now(); for (let i = 0; i < OP_RUNS; i++) { - max_pool_gpu.maxPoolCommon( - gpgpu, program, inputTexture, outputTexture, outputTexShapeRC); + gpgpu_math.runProgram(binary, [x], res); } + res.getValues(); + const avgTime = (performance.now() - start) / OP_RUNS; - gpgpu.downloadMatrixFromTexture( - outputTexture, outputTexShapeRC[0], outputTexShapeRC[1]); - const end = performance.now(); - - const avgTime = (end - start) / OP_RUNS; - - gpgpu.deleteMatrixTexture(inputTexture); - gpgpu.deleteMatrixTexture(outputTexture); - gpgpu.deleteProgram(program); + x.dispose(); + res.dispose(); + texManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); gpgpu.dispose(); return avgTime; -}; \ No newline at end of file +} diff --git a/src/math/conv_util.ts b/src/math/conv_util.ts index 36d8455cda..15f4ecba3d 100644 --- a/src/math/conv_util.ts +++ b/src/math/conv_util.ts @@ -55,16 +55,6 @@ export function computeWeightsShape4D( return [fSize, fSize, inputDepth, outputDepth]; } -export function computeWeightsTexShape( - inputDepth: number, outputDepth: number, - fieldSize: number): [number, number] { - return [fieldSize * fieldSize * inputDepth, outputDepth]; -} - -export function computeBiasesTexShape(outputDepth: number): [number, number] { - return [1, outputDepth]; -} - export function computeDilatedRC( rc: [number, number], origStride: number): [number, number] { const rowsDilated = (rc[0] - 1) * origStride + 1; diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts index 808bf1954a..cd8319d91c 100644 --- a/src/math/math_gpu.ts +++ b/src/math/math_gpu.ts @@ -23,7 +23,6 @@ import {Array1D, Array2D, Array3D, Array4D, NDArray, Scalar} from './ndarray'; import {AddScaledMatProgram} from './webgl/addscaledmat_gpu'; import {ArgMaxEqualsProgram} from './webgl/argmaxequals_gpu'; import {ArgMinMaxProgram} from './webgl/argminmax_gpu'; -import * as avg_pool_gpu from './webgl/avg_pool_gpu'; import * as batchnorm_gpu from './webgl/batchnorm_gpu'; import {BinaryOpProgram} from './webgl/binaryop_gpu'; import * as concat3d_gpu from './webgl/concat3d_gpu'; @@ -36,12 +35,10 @@ import * as gpgpu_math from './webgl/gpgpu_math'; import {GPGPUBinary, GPGPUProgram} from './webgl/gpgpu_math'; import * as gpgpu_util from './webgl/gpgpu_util'; import {LogSumExpProgram} from './webgl/logsumexp_gpu'; -import * as max_pool_backprop_gpu from './webgl/max_pool_backprop_gpu'; -import * as max_pool_gpu from './webgl/max_pool_gpu'; -import * as min_pool_gpu from './webgl/min_pool_gpu'; +import {MaxPool2DBackpropProgram} from './webgl/max_pool_backprop_gpu'; import {MinMaxProgram} from './webgl/minmax_gpu'; import {MatMulProgram} from './webgl/mulmat_gpu'; -import * as pool_gpu from './webgl/pool_gpu'; +import {Pool2DProgram} from './webgl/pool_gpu'; import {ReduceSumProgram} from './webgl/reducesum_gpu'; import * as reshape_gpu from './webgl/reshape_gpu'; import * as resize_bilinear_gpu from './webgl/resize_bilinear_gpu'; @@ -50,20 +47,9 @@ import {UnaryOp, UnaryOpProgram} from './webgl/unaryop_gpu'; import * as webgl_util from './webgl/webgl_util'; const BATCHNORM_PROG = 'batchnorm'; - const COPY_PROG = 'copy'; const CONCAT_PROG = 'concat'; - -// Element-wise ops. const RESHAPE_PROG = 'reshape'; - -// Convolution. -const MAX_POOL_PROG = 'maxpool'; -const MAX_POOL_POSITIONS_PROG = 'maxpool_posn'; -const MAX_POOL_BACKPROP_PROG = 'maxpool_backprop'; -const MIN_POOL_PROG = 'minpool'; -const AVG_POOL_PROG = 'avgpool'; - const RESIZE_BILINEAR_PROG = 'resizebilin'; function makeCopyProgramName( @@ -488,50 +474,10 @@ export class NDArrayMathGPU extends NDArrayMath { x: Array3D, dy: Array3D, weights: Array4D, stride: number, pad: number): {dx: Array3D, dw: Array4D, db: Array1D} { const fSize = weights.shape[0]; - const inputDepth = weights.shape[2]; - const outputDepth = weights.shape[3]; - const xTexShape = conv_util.computeTexShapeFrom3D(x.shape); - const wTexShape = - conv_util.computeWeightsTexShape(inputDepth, outputDepth, fSize); - const yTexShape = conv_util.computeTexShapeFrom3D(dy.shape); - - // If the texture shapes doesn't match the shapes that shaders expect, - // do physical texture reshapes on the GPU. - let cleanupX = false; - const actualXTexShape = x.getTextureShapeRC(xTexShape); - if (!util.arraysEqual(actualXTexShape, xTexShape)) { - x = this.reshapeTexture(x, xTexShape); - cleanupX = true; - } - - let cleanupW = false; - const actualWTexShape = weights.getTextureShapeRC(wTexShape); - if (!util.arraysEqual(actualWTexShape, wTexShape)) { - weights = this.reshapeTexture(weights, wTexShape); - cleanupW = true; - } - - let cleanupY = false; - const actualYTexShape = dy.getTextureShapeRC(yTexShape); - if (!util.arraysEqual(actualYTexShape, yTexShape)) { - dy = this.reshapeTexture(dy, yTexShape); - cleanupY = true; - } - const dw = this.conv2dDerWeights(x, dy, fSize, stride, pad); const db = this.conv2dDerBias(dy); const dx = this.conv2dTransposeInternal( dy, weights, null /** biases */, stride, pad); - - if (cleanupX) { - x.dispose(); - } - if (cleanupW) { - weights.dispose(); - } - if (cleanupY) { - dy.dispose(); - } return {dx, db, dw}; } @@ -560,149 +506,38 @@ export class NDArrayMathGPU extends NDArrayMath { return this.compileAndRun(program, [dY]); } - private pool( - program: WebGLProgram, x: Array3D, fSize: number, stride: number, - pad: number): Array3D { - const xTexShape = conv_util.computeTexShapeFrom3D(x.shape); - - // If the texture shapes doesn't match the shapes that shaders expect, - // do physical texture reshapes on the GPU. - const actualXTexShape = x.getTextureShapeRC(xTexShape); - let cleanupX = false; - if (!util.arraysEqual(actualXTexShape, xTexShape)) { - x = this.reshapeTexture(x, xTexShape); - cleanupX = true; - } - - const resultShape = - conv_util.computeOutputShape3D(x.shape, fSize, x.shape[2], stride, pad); - const resultTexShape = conv_util.computeTexShapeFrom3D(resultShape); - const poolResultTex = this.textureManager.acquireTexture(resultTexShape); - - pool_gpu.poolCommon( - this.gpgpu, program, x.getTexture(), poolResultTex, resultTexShape); - - if (cleanupX) { - x.dispose(); - } - - return NDArray.make( - resultShape, {texture: poolResultTex, textureShapeRC: resultTexShape}); - } - protected maxPoolInternal( x: Array3D, fSize: number, stride: number, pad: number): Array3D { - const maxPoolProgKey = - [MAX_POOL_PROG, x.shape, fSize, stride, pad].join('_'); - const maxPoolProgram = this.getAndSaveProgram(maxPoolProgKey, () => { - return max_pool_gpu.getFragmentShaderMaxPoolSource( - x.shape, fSize, stride, pad); - }); - - return this.pool(maxPoolProgram, x, fSize, stride, pad); + const program = + new Pool2DProgram(x.shape, fSize, stride, pad, 'max', false); + return this.compileAndRun(program, [x]); } protected minPoolInternal( x: Array3D, fSize: number, stride: number, pad: number): Array3D { - const minPoolProgKey = - [MIN_POOL_PROG, x.shape, fSize, stride, pad].join('_'); - const minPoolProgram = this.getAndSaveProgram(minPoolProgKey, () => { - return min_pool_gpu.getFragmentShaderMinPoolSource( - x.shape, fSize, stride, pad); - }); - - return this.pool(minPoolProgram, x, fSize, stride, pad); + const program = + new Pool2DProgram(x.shape, fSize, stride, pad, 'min', false); + return this.compileAndRun(program, [x]); } protected avgPoolInternal( x: Array3D, fSize: number, stride: number, pad: number): Array3D { - const avgPoolProgKey = - [AVG_POOL_PROG, x.shape, fSize, stride, pad].join('_'); - const avgPoolProgram = this.getAndSaveProgram(avgPoolProgKey, () => { - return avg_pool_gpu.getFragmentShaderAvgPoolSource( - x.shape, fSize, stride, pad); - }); - - return this.pool(avgPoolProgram, x, fSize, stride, pad); + const program = + new Pool2DProgram(x.shape, fSize, stride, pad, 'avg', false); + return this.compileAndRun(program, [x]); } protected maxPoolBackpropInternal( dy: Array3D, x: Array3D, fSize: number, origStride: number, origPad: number): Array3D { - const maxPoolPositionsProgKey = [ - MAX_POOL_POSITIONS_PROG, x.shape, fSize, origStride, origPad - ].join('_'); const maxPoolPositionsProgram = - this.getAndSaveProgram(maxPoolPositionsProgKey, () => { - return max_pool_gpu.getFragmentShaderMaxPoolPositionsSource( - x.shape, fSize, origStride, origPad); - }); - - const maxPoolResultShape = conv_util.computeOutputShape3D( - x.shape, fSize, x.shape[2], origStride, origPad); - const maxPoolResultTexShape = - conv_util.computeTexShapeFrom3D(maxPoolResultShape); - const maxPoolPositionsResultTex = - this.textureManager.acquireTexture(maxPoolResultTexShape); - // If the texture shapes doesn't match the shapes that shaders expect, - // do physical texture reshapes on the GPU. - const xTexShape = conv_util.computeTexShapeFrom3D(x.shape); - const actualXTexShape = x.getTextureShapeRC(xTexShape); - let cleanupX = false; - if (!util.arraysEqual(actualXTexShape, xTexShape)) { - x = this.reshapeTexture(x, xTexShape); - cleanupX = true; - } - - max_pool_gpu.maxPoolCommon( - this.gpgpu, maxPoolPositionsProgram, x.getTexture(), - maxPoolPositionsResultTex, maxPoolResultTexShape); - - const maxPoolBackpropProgKey = [ - MAX_POOL_BACKPROP_PROG, dy.shape, fSize, origStride, origPad - ].join('_'); - const program = this.getAndSaveProgram(maxPoolBackpropProgKey, () => { - return max_pool_backprop_gpu.getFragmentShaderMaxPoolBackprop( - dy.shape, fSize, origStride, origPad); - }); - - const dyTexShape = conv_util.computeTexShapeFrom3D(dy.shape); + new Pool2DProgram(x.shape, fSize, origStride, origPad, 'max', true); + const maxPoolPositions: Array3D = + this.compileAndRun(maxPoolPositionsProgram, [x]); - // If the texture shapes doesn't match the shapes that shaders expect, - // do physical texture reshapes on the GPU. - const actualDyTexShape = dy.getTextureShapeRC(dyTexShape); - let cleanupDy = false; - if (!util.arraysEqual(actualDyTexShape, dyTexShape)) { - dy = this.reshapeTexture(dy, dyTexShape); - cleanupDy = true; - } - - const dilatedDyRC = - conv_util.computeDilatedRC([dy.shape[0], dy.shape[1]], origStride); - const pad = fSize - 1 - origPad; - const resultShapeRCD = conv_util.computeOutputShape3D( - [dilatedDyRC[0], dilatedDyRC[1], dy.shape[2]], fSize, dy.shape[2], 1, - pad); - const resultTexShape = conv_util.computeTexShapeFrom3D(resultShapeRCD); - const resultTex = this.textureManager.acquireTexture(resultTexShape); - - max_pool_backprop_gpu.maxPoolBackprop( - this.gpgpu, program, dy.getTexture(), maxPoolPositionsResultTex, - resultTex, resultTexShape); - - if (cleanupDy) { - dy.dispose(); - } - - if (cleanupX) { - x.dispose(); - } - - this.textureManager.releaseTexture( - maxPoolPositionsResultTex, maxPoolResultTexShape); - - return NDArray.make( - resultShapeRCD, {texture: resultTex, textureShapeRC: resultTexShape}); + const maxPoolBackPropProgram = + new MaxPool2DBackpropProgram(dy.shape, fSize, origStride, origPad); + return this.compileAndRun(maxPoolBackPropProgram, [dy, maxPoolPositions]); } protected resizeBilinear3DInternal( diff --git a/src/math/webgl/argminmax_gpu.ts b/src/math/webgl/argminmax_gpu.ts index f55e34f01c..7f50b08ab4 100644 --- a/src/math/webgl/argminmax_gpu.ts +++ b/src/math/webgl/argminmax_gpu.ts @@ -15,23 +15,23 @@ limitations under the License. import {GPGPUProgram} from './gpgpu_math'; -export function getArgMinMaxSnippet(op: 'min'|'max', texName: string, - size: number): string { +export function getArgMinMaxSnippet( + op: 'min'|'max', texName: string, size: number): string { const compOp = (op === 'min') ? '<' : '>'; return ` float getArgMinMax${texName}() { float bestIndex = 0.0; float bestValue = get${texName}Flat(0.0); - for (int i = 0; i < ${size}; i++) { - float i_float = float(i); - float candidate = get${texName}Flat(i_float); + for (int ii = 0; ii < ${size}; ii++) { + float i = float(ii); + float candidate = get${texName}Flat(i); if (isNaN(candidate)) { return candidate; } if (candidate ${compOp} bestValue) { bestValue = candidate; - bestIndex = i_float; + bestIndex = i; } } return bestIndex; diff --git a/src/math/webgl/avg_pool_gpu.ts b/src/math/webgl/avg_pool_gpu.ts deleted file mode 100644 index eb59aaf4e9..0000000000 --- a/src/math/webgl/avg_pool_gpu.ts +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2017 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -import {GPGPUContext} from './gpgpu_context'; -import * as pool_gpu from './pool_gpu'; - -export function getFragmentShaderAvgPoolSource( - xShapeRCD: [number, number, number], fSize: number, stride: number, - pad: number) { - return pool_gpu.getFragmentShaderPoolCommonSource( - xShapeRCD, fSize, stride, pad, 'avg', false); -} - -export function avgPool( - gpgpu: GPGPUContext, program: WebGLProgram, x: WebGLTexture, - result: WebGLTexture, resultShapeRowCol: [number, number]) { - pool_gpu.poolCommon(gpgpu, program, x, result, resultShapeRowCol); -} \ No newline at end of file diff --git a/src/math/webgl/avg_pool_gpu_test.ts b/src/math/webgl/avg_pool_gpu_test.ts index d74c98a38e..db9083cc21 100644 --- a/src/math/webgl/avg_pool_gpu_test.ts +++ b/src/math/webgl/avg_pool_gpu_test.ts @@ -14,51 +14,17 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; -import * as conv_util from '../conv_util'; import {NDArrayMathCPU} from '../math_cpu'; import {Array3D, NDArray} from '../ndarray'; +import * as pool_gpu_test_util from './pool_gpu_test_util'; -import * as avg_pool_gpu from './avg_pool_gpu'; -import {GPGPUContext} from './gpgpu_context'; describe('avg_pool_gpu', () => { function uploadAvgPoolDownload( - a: Float32Array, aShapeRowColDepth: [number, number, number], - fieldSize: number, stride: number, zeroPad: number): Float32Array { - const aTexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(aShapeRowColDepth); - - const resultShapeRCD: [number, number, number] = - conv_util.computeOutputShape3D( - aShapeRowColDepth, fieldSize, aShapeRowColDepth[2], stride, - zeroPad); - - const resultTexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(resultShapeRCD); - - const gpgpu = new GPGPUContext(); - gpgpu.enableAutomaticDebugValidation(true); - - const shaderSource = avg_pool_gpu.getFragmentShaderAvgPoolSource( - aShapeRowColDepth, fieldSize, stride, zeroPad); - const program = gpgpu.createProgram(shaderSource); - - const aTex = gpgpu.createMatrixTexture(aTexShapeRC[0], aTexShapeRC[1]); - const resultTex = - gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.uploadMatrixToTexture(aTex, aTexShapeRC[0], aTexShapeRC[1], a); - - avg_pool_gpu.avgPool(gpgpu, program, aTex, resultTex, resultTexShapeRC); - - const result = gpgpu.downloadMatrixFromTexture( - resultTex, resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(aTex); - gpgpu.deleteProgram(program); - gpgpu.dispose(); - return result; + a: Float32Array, xShape: [number, number, number], fieldSize: number, + stride: number, zeroPad: number): Float32Array { + return pool_gpu_test_util.uploadPoolDownload( + a, xShape, fieldSize, stride, zeroPad, 'avg'); } function compareToCPU( @@ -109,4 +75,4 @@ describe('avg_pool_gpu', () => { const zeroPad = 1; compareToCPU(inputShape, fSize, stride, zeroPad); }); -}); \ No newline at end of file +}); diff --git a/src/math/webgl/conv_backprop_gpu.ts b/src/math/webgl/conv_backprop_gpu.ts index 568ff131e0..c662d0e848 100644 --- a/src/math/webgl/conv_backprop_gpu.ts +++ b/src/math/webgl/conv_backprop_gpu.ts @@ -45,19 +45,23 @@ export class Conv2DDerWeightsProgram implements GPGPUProgram { // Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2). // ? = to be determined. : = across all values in that axis. float dotProd = 0.0; - for (int yR = 0; yR < ${yNumRows}; yR++) { - float yR_float = float(yR); - float xR = wR + yR_float * ${stride}.0 - ${zeroPad}.0; + for (int iyR = 0; iyR < ${yNumRows}; iyR++) { + float yR = float(iyR); + float xR = wR + yR * ${stride}.0 - ${zeroPad}.0; + if (xR < 0.0 || xR > ${xRowsLimit}) { continue; } - for (int yC = 0; yC < ${yNumCols}; yC++) { - float yC_float = float(yC); - float xC = wC + yC_float * ${stride}.0 - ${zeroPad}.0; + + for (int iyC = 0; iyC < ${yNumCols}; iyC++) { + float yC = float(iyC); + float xC = wC + yC * ${stride}.0 - ${zeroPad}.0; + if (xC < 0.0 || xC > ${xColsLimit}) { continue; } - float dyValue = getDy(yR_float, yC_float, d2); + + float dyValue = getDy(yR, yC, d2); float xValue = getX(xR, xC, d1); dotProd += (xValue * dyValue); } @@ -103,28 +107,30 @@ export class Conv2DTransposeProgram implements GPGPUProgram { // Convolve x(?, ?, d1) with w(:, :, d2, d1) to get y(yR, yC, d2). // ? = to be determined. : = across all values in that axis. float dotProd = 0.0; - for (int wR = 0; wR < ${fSize}; wR++) { - float wR_float = float(wR); - float xR = (xRCorner + wR_float) / ${origStride}.0; + for (int iwR = 0; iwR < ${fSize}; iwR++) { + float wR = float(iwR); + float xR = (xRCorner + wR) / ${origStride}.0; + if (xR < 0.0 || xR >= ${xRows}.0 || fract(xR) > 0.0) { continue; } - float wRPerm = ${fSize}.0 - 1.0 - wR_float; + float wRPerm = ${fSize}.0 - 1.0 - wR; + + for (int iwC = 0; iwC < ${fSize}; iwC++) { + float wC = float(iwC); + float xC = (xCCorner + wC) / ${origStride}.0; - for (int wC = 0; wC < ${fSize}; wC++) { - float wC_float = float(wC); - float xC = (xCCorner + wC_float) / ${origStride}.0; if (xC < 0.0 || xC >= ${xCols}.0 || fract(xC) > 0.0) { continue; } - float wCPerm = ${fSize}.0 - 1.0 - wC_float; + float wCPerm = ${fSize}.0 - 1.0 - wC; - for (int d1 = 0; d1 < ${origOutputDepth}; d1++) { - float d1_float = float(d1); - float xValue = getX(xR, xC, d1_float); - float wValue = getW(wRPerm, wCPerm, d2, d1_float); + for (int id1 = 0; id1 < ${origOutputDepth}; id1++) { + float d1 = float(id1); + float xValue = getX(xR, xC, d1); + float wValue = getW(wRPerm, wCPerm, d2, d1); dotProd += xValue * wValue; } } diff --git a/src/math/webgl/conv_gpu.ts b/src/math/webgl/conv_gpu.ts index acf92d10bd..5223f9e9ac 100644 --- a/src/math/webgl/conv_gpu.ts +++ b/src/math/webgl/conv_gpu.ts @@ -47,22 +47,26 @@ export class Conv2DProgram implements GPGPUProgram { // Convolve x(?, ?, d1) with w(:, :, d1, d2) to get y(yR, yC, d2). // ? = to be determined. : = across all values in that axis. float dotProd = 0.0; - for (int wR = 0; wR < ${fieldSize}; wR++) { - float wR_float = float(wR); - float xR = xRCorner + wR_float; + for (int iwR = 0; iwR < ${fieldSize}; iwR++) { + float wR = float(iwR); + float xR = xRCorner + wR; + if (xR < 0.0 || xR > ${xRowsLimit}) { continue; } - for (int wC = 0; wC < ${fieldSize}; wC++) { - float wC_float = float(wC); - float xC = xCCorner + wC_float; + + for (int iwC = 0; iwC < ${fieldSize}; iwC++) { + float wC = float(iwC); + float xC = xCCorner + wC; + if (xC < 0.0 || xC > ${xColsLimit}) { continue; } - for (int d1 = 0; d1 < ${inputDepth}; d1++) { - float d1_float = float(d1); - float xValue = getX(xR, xC, d1_float); - float wValue = getW(wR_float, wC_float, d1_float, d2); + + for (int id1 = 0; id1 < ${inputDepth}; id1++) { + float d1 = float(id1); + float xValue = getX(xR, xC, d1); + float wValue = getW(wR, wC, d1, d2); dotProd += xValue * wValue; } } diff --git a/src/math/webgl/conv_gpu_test.ts b/src/math/webgl/conv_gpu_test.ts index 3d5a4c4542..ef33bca443 100644 --- a/src/math/webgl/conv_gpu_test.ts +++ b/src/math/webgl/conv_gpu_test.ts @@ -16,11 +16,11 @@ limitations under the License. import * as test_util from '../../test_util'; import * as conv_util from '../conv_util'; import {NDArrayMathCPU} from '../math_cpu'; +import {Array1D, Array3D, Array4D, initializeGPU, NDArray} from '../ndarray'; import {Conv2DProgram} from './conv_gpu'; import {GPGPUContext} from './gpgpu_context'; import * as gpgpu_math from './gpgpu_math'; -import {NDArray, Array1D, Array3D, Array4D, initializeGPU} from '../ndarray'; import {TextureManager} from './texture_manager'; describe('conv_gpu', () => { @@ -37,7 +37,6 @@ describe('conv_gpu', () => { const wShape = conv_util.computeWeightsShape4D(xShapeRCD[2], resultDepth, fieldSize); const W = Array4D.new(wShape, weights); - const b = biasVals != null ? Array1D.new(biasVals) : null; const gpgpu = new GPGPUContext(); @@ -46,12 +45,10 @@ describe('conv_gpu', () => { initializeGPU(gpgpu, textureManager); const program = new Conv2DProgram( - xShapeRCD, fieldSize, resultDepth, stride, zeroPad, - biasVals != null); + xShapeRCD, fieldSize, resultDepth, stride, zeroPad, biasVals != null); const res = NDArray.zeros(program.outputShape); const inputs = biasVals != null ? [x, W, b] : [x, W]; - const binary = - gpgpu_math.compileProgram(gpgpu, program, inputs, res); + const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, res); gpgpu_math.runProgram(binary, inputs, res); const resValues = res.getValues(); diff --git a/src/math/webgl/max_pool_backprop_gpu.ts b/src/math/webgl/max_pool_backprop_gpu.ts index 2b18d3a007..66a97db9ac 100644 --- a/src/math/webgl/max_pool_backprop_gpu.ts +++ b/src/math/webgl/max_pool_backprop_gpu.ts @@ -14,88 +14,73 @@ limitations under the License. ==============================================================================*/ import * as conv_util from '../conv_util'; -import {GPGPUContext} from './gpgpu_context'; - -export function getFragmentShaderMaxPoolBackprop( - dyShapeRCD: [number, number, number], fSize: number, origStride: number, - origPad: number) { - const origInputDepth = dyShapeRCD[2]; - const pad = fSize - 1 - origPad; - const [dyRows, dyCols, depth] = dyShapeRCD; - - const dyTexShapeRC = conv_util.computeTexShapeFrom3D(dyShapeRCD); - - return ` - precision highp float; - uniform sampler2D dy; - uniform sampler2D maxPos; - - const vec2 halfCR = vec2(0.5, 0.5); - const vec2 dyShapeCR = vec2(${dyTexShapeRC[1]}, ${dyTexShapeRC[0]}); - - void main() { - vec2 dxTexCR = floor(gl_FragCoord.xy); - - // Map from 2D (dxTexR, dxTexC) to 3D (dxR, dxC, d). - float dxR = dxTexCR.y; - float dxC = floor(dxTexCR.x / ${origInputDepth}.0); - float d = mod(dxTexCR.x, ${origInputDepth}.0); - - vec2 dyRCCorner = vec2(dxR, dxC) - vec2(${pad}.0, ${pad}.0); - float dyRCorner = dyRCCorner.x; - float dyCCorner = dyRCCorner.y; - - // Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(yR, dxC, d). - // ? = to be determined. : = across all values in that axis. - float dotProd = 0.0; - for (int wR = 0; wR < ${fSize}; wR++) { - float wR_float = float(wR); - float dyR = (dyRCorner + wR_float) / ${origStride}.0; - // TODO(nsthorat): Splice this with another version where you call - // getMatrixValueOrZeroPad(). Here and below. - if (dyR < 0.0 || dyR >= ${dyRows}.0 || fract(dyR) > 0.0) { - continue; - } - - float dyTexR = dyR; - for (int wC = 0; wC < ${fSize}; wC++) { - float wC_float = float(wC); - float dyC = (dyCCorner + wC_float) / ${origStride}.0; - if (dyC < 0.0 || dyC >= ${dyCols}.0 || fract(dyC) > 0.0) { +import {GPGPUProgram} from './gpgpu_math'; + +export class MaxPool2DBackpropProgram implements GPGPUProgram { + variableNames = ['dy', 'maxPos']; + params: Array<{}>; + outputShape: number[]; + userCode: string; + + constructor( + dyShape: [number, number, number], fSize: number, origStride: number, + origPad: number) { + const pad = fSize - 1 - origPad; + const dyRows = dyShape[0]; + const dyCols = dyShape[1]; + this.params = [fSize, origStride, origPad]; + + const dilatedDyRC = + conv_util.computeDilatedRC([dyShape[0], dyShape[1]], origStride); + this.outputShape = conv_util.computeOutputShape3D( + [dilatedDyRC[0], dilatedDyRC[1], dyShape[2]], fSize, dyShape[2], 1, + pad); + + this.userCode = ` + void main() { + vec3 coords = getOutputCoords(); + float dxR = coords.x; + float dxC = coords.y; + float d = coords.z; + + vec2 dyRCCorner = vec2(dxR, dxC) - vec2(${pad}.0, ${pad}.0); + float dyRCorner = dyRCCorner.x; + float dyCCorner = dyRCCorner.y; + + // Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(yR, dxC, d). + // ? = to be determined. : = across all values in that axis. + float dotProd = 0.0; + for (int iwR = 0; iwR < ${fSize}; iwR++) { + float wR = float(iwR); + float dyR = (dyRCorner + wR) / ${origStride}.0; + + if (dyR < 0.0 || dyR >= ${dyRows}.0 || fract(dyR) > 0.0) { continue; } - float dyTexC = dyC * ${depth}.0 + d; + for (int iwC = 0; iwC < ${fSize}; iwC++) { + float wC = float(iwC); + float dyC = (dyCCorner + wC) / ${origStride}.0; - // Read dy(dyR, dyC, d). - vec2 dyUV = (vec2(dyTexC, dyTexR) + halfCR) / dyShapeCR; - float dyValue = texture2D(dy, dyUV).r; + if (dyC < 0.0 || dyC >= ${dyCols}.0 || fract(dyC) > 0.0) { + continue; + } - // Read maxPos(dyR, dyC, d). - float maxPosValue = - ${fSize * fSize - 1}.0 - texture2D(maxPos, dyUV).r; + float dyValue = getDy(dyR, dyC, d); + float maxPosValue = + ${fSize * fSize - 1}.0 - getMaxPos(dyR, dyC, d); - // Get the current value, check it against the value from the - // position matrix. - float curPosValue = wR_float * ${fSize}.0 + wC_float; - float mask = float(maxPosValue == curPosValue ? 1.0 : 0.0); + // Get the current value, check it against the value from the + // position matrix. + float curPosValue = wR * ${fSize}.0 + wC; + float mask = float(maxPosValue == curPosValue ? 1.0 : 0.0); - dotProd += dyValue * mask; + dotProd += dyValue * mask; + } } + setOutput(dotProd); } - gl_FragColor = vec4(dotProd, 0, 0, 0); - }`; -} - -export function maxPoolBackprop( - gpgpu: GPGPUContext, program: WebGLProgram, dyTex: WebGLTexture, - maxPositionsTex: WebGLTexture, resultTex: WebGLTexture, - resultTexShapeRC: [number, number]) { - gpgpu.setOutputMatrixTexture( - resultTex, resultTexShapeRC[0], resultTexShapeRC[1]); - gpgpu.setProgram(program); - gpgpu.setInputMatrixTexture(dyTex, 'dy', 0); - gpgpu.setInputMatrixTexture(maxPositionsTex, 'maxPos', 1); - gpgpu.executeProgram(); + `; + } } diff --git a/src/math/webgl/max_pool_backprop_gpu_test.ts b/src/math/webgl/max_pool_backprop_gpu_test.ts index 7eabe43a56..82d73779a4 100644 --- a/src/math/webgl/max_pool_backprop_gpu_test.ts +++ b/src/math/webgl/max_pool_backprop_gpu_test.ts @@ -14,13 +14,14 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; -import * as conv_util from '../conv_util'; import {NDArrayMathCPU} from '../math_cpu'; -import {Array3D, NDArray} from '../ndarray'; +import {Array3D, initializeGPU, NDArray} from '../ndarray'; import {GPGPUContext} from './gpgpu_context'; -import * as max_pool_backprop_gpu from './max_pool_backprop_gpu'; -import * as max_pool_gpu from './max_pool_gpu'; +import * as gpgpu_math from './gpgpu_math'; +import {MaxPool2DBackpropProgram} from './max_pool_backprop_gpu'; +import {Pool2DProgram} from './pool_gpu'; +import {TextureManager} from './texture_manager'; describe('max_pool_backprop_gpu', () => { @@ -29,60 +30,29 @@ describe('max_pool_backprop_gpu', () => { origPad: number): Float32Array { const gpgpu = new GPGPUContext(); gpgpu.enableAutomaticDebugValidation(true); - - const depth = dy.shape[2]; - const src = max_pool_backprop_gpu.getFragmentShaderMaxPoolBackprop( - dy.shape, fSize, origStride, origPad); - const program = gpgpu.createProgram(src); - - // Upload dy. - const dyTexShapeRC = conv_util.computeTexShapeFrom3D(dy.shape); - const dyTex = gpgpu.createMatrixTexture(dyTexShapeRC[0], dyTexShapeRC[1]); - gpgpu.uploadMatrixToTexture( - dyTex, dyTexShapeRC[0], dyTexShapeRC[1], dy.getValues()); - - // Upload x. - const xTexShapeRC = conv_util.computeTexShapeFrom3D(x.shape); - const xTex = gpgpu.createMatrixTexture(xTexShapeRC[0], xTexShapeRC[1]); - gpgpu.uploadMatrixToTexture( - xTex, xTexShapeRC[0], xTexShapeRC[1], x.getValues()); - - // Compute max positions. - const maxPoolResultShape = conv_util.computeOutputShape3D( - x.shape, fSize, x.shape[2], origStride, origPad); - const maxPoolResultTexShape = - conv_util.computeTexShapeFrom3D(maxPoolResultShape); - const maxPoolPositionsResultTex = gpgpu.createMatrixTexture( - maxPoolResultTexShape[0], maxPoolResultTexShape[1]); - const maxPoolPositionsSrc = - max_pool_gpu.getFragmentShaderMaxPoolPositionsSource( - x.shape, fSize, origStride, origPad); - const maxPoolPositionsProgram = gpgpu.createProgram(maxPoolPositionsSrc); - max_pool_gpu.maxPoolCommon( - gpgpu, maxPoolPositionsProgram, xTex, maxPoolPositionsResultTex, - maxPoolResultTexShape); - - // Figure out the output shape by dilating the input. - const dyRowsDilated = (dy.shape[0] - 1) * origStride + 1; - const dyColsDilated = (dy.shape[1] - 1) * origStride + 1; - const pad = fSize - 1 - origPad; - const resultShapeRCD = conv_util.computeOutputShape3D( - [dyRowsDilated, dyColsDilated, depth], fSize, depth, 1, pad); - const resultTexRC = conv_util.computeTexShapeFrom3D(resultShapeRCD); - const resultTex = gpgpu.createMatrixTexture(resultTexRC[0], resultTexRC[1]); - max_pool_backprop_gpu.maxPoolBackprop( - gpgpu, program, dyTex, maxPoolPositionsResultTex, resultTex, - resultTexRC); - const y = gpgpu.downloadMatrixFromTexture( - resultTex, resultTexRC[0], resultTexRC[1]); - - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(dyTex); - gpgpu.deleteMatrixTexture(xTex); - gpgpu.deleteProgram(program); + const textureManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, textureManager); + + const positionsProgram = + new Pool2DProgram(x.shape, fSize, origStride, origPad, 'max', true); + const positionsRes = NDArray.zeros(positionsProgram.outputShape); + const positionsBinary = + gpgpu_math.compileProgram(gpgpu, positionsProgram, [x], positionsRes); + gpgpu_math.runProgram(positionsBinary, [x], positionsRes); + + const program = + new MaxPool2DBackpropProgram(dy.shape, fSize, origStride, origPad); + const res = NDArray.zeros(program.outputShape); + const binary = + gpgpu_math.compileProgram(gpgpu, program, [dy, positionsRes], res); + gpgpu_math.runProgram(binary, [dy, positionsRes], res); + + const resValues = res.getValues(); + + textureManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); gpgpu.dispose(); - - return y; + return resValues; } function compareToCPU( diff --git a/src/math/webgl/max_pool_gpu.ts b/src/math/webgl/max_pool_gpu.ts deleted file mode 100644 index 7cac3f68fc..0000000000 --- a/src/math/webgl/max_pool_gpu.ts +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright 2017 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -import {GPGPUContext} from './gpgpu_context'; -import * as pool_gpu from './pool_gpu'; - -export function getFragmentShaderMaxPoolPositionsSource( - xShapeRCD: [number, number, number], fSize: number, stride: number, - pad: number) { - return getFragmentShaderMaxPoolCommonSource( - xShapeRCD, fSize, stride, pad, true); -} - -export function getFragmentShaderMaxPoolSource( - xShapeRCD: [number, number, number], fSize: number, stride: number, - pad: number) { - return getFragmentShaderMaxPoolCommonSource( - xShapeRCD, fSize, stride, pad, false); -} - -function getFragmentShaderMaxPoolCommonSource( - xShapeRCD: [number, number, number], fSize: number, stride: number, - pad: number, computeMaxPositions: boolean) { - return pool_gpu.getFragmentShaderPoolCommonSource( - xShapeRCD, fSize, stride, pad, 'max', computeMaxPositions); -} - -export function maxPoolCommon( - gpgpu: GPGPUContext, program: WebGLProgram, x: WebGLTexture, - result: WebGLTexture, resultShapeRowCol: [number, number]) { - pool_gpu.poolCommon(gpgpu, program, x, result, resultShapeRowCol); -} \ No newline at end of file diff --git a/src/math/webgl/max_pool_gpu_test.ts b/src/math/webgl/max_pool_gpu_test.ts index 33ae1dfb6e..8f9380ca80 100644 --- a/src/math/webgl/max_pool_gpu_test.ts +++ b/src/math/webgl/max_pool_gpu_test.ts @@ -14,52 +14,16 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; -import * as conv_util from '../conv_util'; import {NDArrayMathCPU} from '../math_cpu'; import {Array3D, NDArray} from '../ndarray'; - -import {GPGPUContext} from './gpgpu_context'; -import * as max_pool_gpu from './max_pool_gpu'; +import * as pool_gpu_test_util from './pool_gpu_test_util'; describe('max_pool_gpu', () => { function uploadMaxPoolDownload( - a: Float32Array, aShapeRowColDepth: [number, number, number], - fieldSize: number, stride: number, zeroPad: number): Float32Array { - const aTexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(aShapeRowColDepth); - - const resultShapeRCD: [number, number, number] = - conv_util.computeOutputShape3D( - aShapeRowColDepth, fieldSize, aShapeRowColDepth[2], stride, - zeroPad); - - const resultTexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(resultShapeRCD); - - const gpgpu = new GPGPUContext(); - gpgpu.enableAutomaticDebugValidation(true); - - const shaderSource = max_pool_gpu.getFragmentShaderMaxPoolSource( - aShapeRowColDepth, fieldSize, stride, zeroPad); - const program = gpgpu.createProgram(shaderSource); - - const aTex = gpgpu.createMatrixTexture(aTexShapeRC[0], aTexShapeRC[1]); - const resultTex = - gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.uploadMatrixToTexture(aTex, aTexShapeRC[0], aTexShapeRC[1], a); - - max_pool_gpu.maxPoolCommon( - gpgpu, program, aTex, resultTex, resultTexShapeRC); - - const result = gpgpu.downloadMatrixFromTexture( - resultTex, resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(aTex); - gpgpu.deleteProgram(program); - gpgpu.dispose(); - return result; + a: Float32Array, xShape: [number, number, number], fieldSize: number, + stride: number, zeroPad: number): Float32Array { + return pool_gpu_test_util.uploadPoolDownload( + a, xShape, fieldSize, stride, zeroPad, 'max'); } function compareToCPU( diff --git a/src/math/webgl/max_pool_positions_gpu_test.ts b/src/math/webgl/max_pool_positions_gpu_test.ts index 86f3a8a390..246bbb3586 100644 --- a/src/math/webgl/max_pool_positions_gpu_test.ts +++ b/src/math/webgl/max_pool_positions_gpu_test.ts @@ -14,50 +14,34 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; -import * as conv_util from '../conv_util'; import {NDArrayMathCPU} from '../math_cpu'; -import {Array3D, NDArray} from '../ndarray'; +import {Array3D, initializeGPU, NDArray} from '../ndarray'; import {GPGPUContext} from './gpgpu_context'; -import * as max_pool_gpu from './max_pool_gpu'; +import * as gpgpu_math from './gpgpu_math'; +import {Pool2DProgram} from './pool_gpu'; +import {TextureManager} from './texture_manager'; describe('max_pool_position', () => { function uploadMaxPoolPositionDownload( - x: Float32Array, xShapeRowColDepth: [number, number, number], - fieldSize: number, stride: number, pad: number): Float32Array { - const xTexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(xShapeRowColDepth); - - const resultShapeRCD: [number, number, number] = - conv_util.computeOutputShape3D( - xShapeRowColDepth, fieldSize, xShapeRowColDepth[2], stride, pad); - const resultTexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(resultShapeRCD); - + xVals: Float32Array, xShape: [number, number, number], fieldSize: number, + stride: number, pad: number): Float32Array { const gpgpu = new GPGPUContext(); gpgpu.enableAutomaticDebugValidation(true); - - const shaderSource = max_pool_gpu.getFragmentShaderMaxPoolPositionsSource( - xShapeRowColDepth, fieldSize, stride, pad); - const program = gpgpu.createProgram(shaderSource); - - const xTex = gpgpu.createMatrixTexture(xTexShapeRC[0], xTexShapeRC[1]); - const resultTex = - gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.uploadMatrixToTexture(xTex, xTexShapeRC[0], xTexShapeRC[1], x); - - max_pool_gpu.maxPoolCommon( - gpgpu, program, xTex, resultTex, resultTexShapeRC); - - const result = gpgpu.downloadMatrixFromTexture( - resultTex, resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(xTex); - gpgpu.deleteProgram(program); + const textureManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, textureManager); + const program = + new Pool2DProgram(xShape, fieldSize, stride, pad, 'max', true); + const res = NDArray.zeros(program.outputShape); + const x = Array3D.new(xShape, xVals); + const binary = gpgpu_math.compileProgram(gpgpu, program, [x], res); + gpgpu_math.runProgram(binary, [x], res); + const resValues = res.getValues(); + + textureManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); gpgpu.dispose(); - return result; + return resValues; } function compareToCPU( diff --git a/src/math/webgl/min_pool_gpu.ts b/src/math/webgl/min_pool_gpu.ts deleted file mode 100644 index f3b8888356..0000000000 --- a/src/math/webgl/min_pool_gpu.ts +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2017 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -import {GPGPUContext} from './gpgpu_context'; -import * as pool_gpu from './pool_gpu'; - -export function getFragmentShaderMinPoolSource( - xShapeRCD: [number, number, number], fSize: number, stride: number, - pad: number) { - return pool_gpu.getFragmentShaderPoolCommonSource( - xShapeRCD, fSize, stride, pad, 'min', false); -} - -export function minPool( - gpgpu: GPGPUContext, program: WebGLProgram, x: WebGLTexture, - result: WebGLTexture, resultShapeRowCol: [number, number]) { - pool_gpu.poolCommon(gpgpu, program, x, result, resultShapeRowCol); -} \ No newline at end of file diff --git a/src/math/webgl/min_pool_gpu_test.ts b/src/math/webgl/min_pool_gpu_test.ts index 1911c649a4..b275cc1035 100644 --- a/src/math/webgl/min_pool_gpu_test.ts +++ b/src/math/webgl/min_pool_gpu_test.ts @@ -14,51 +14,16 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; -import * as conv_util from '../conv_util'; import {NDArrayMathCPU} from '../math_cpu'; import {Array3D, NDArray} from '../ndarray'; - -import {GPGPUContext} from './gpgpu_context'; -import * as min_pool_gpu from './min_pool_gpu'; +import * as pool_gpu_test_util from './pool_gpu_test_util'; describe('min_pool_gpu', () => { function uploadMinPoolDownload( - a: Float32Array, aShapeRowColDepth: [number, number, number], - fieldSize: number, stride: number, zeroPad: number): Float32Array { - const aTexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(aShapeRowColDepth); - - const resultShapeRCD: [number, number, number] = - conv_util.computeOutputShape3D( - aShapeRowColDepth, fieldSize, aShapeRowColDepth[2], stride, - zeroPad); - - const resultTexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(resultShapeRCD); - - const gpgpu = new GPGPUContext(); - gpgpu.enableAutomaticDebugValidation(true); - - const shaderSource = min_pool_gpu.getFragmentShaderMinPoolSource( - aShapeRowColDepth, fieldSize, stride, zeroPad); - const program = gpgpu.createProgram(shaderSource); - - const aTex = gpgpu.createMatrixTexture(aTexShapeRC[0], aTexShapeRC[1]); - const resultTex = - gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.uploadMatrixToTexture(aTex, aTexShapeRC[0], aTexShapeRC[1], a); - - min_pool_gpu.minPool(gpgpu, program, aTex, resultTex, resultTexShapeRC); - - const result = gpgpu.downloadMatrixFromTexture( - resultTex, resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(aTex); - gpgpu.deleteProgram(program); - gpgpu.dispose(); - return result; + a: Float32Array, xShape: [number, number, number], fieldSize: number, + stride: number, zeroPad: number): Float32Array { + return pool_gpu_test_util.uploadPoolDownload( + a, xShape, fieldSize, stride, zeroPad, 'min'); } function compareToCPU( @@ -109,4 +74,4 @@ describe('min_pool_gpu', () => { const zeroPad = 1; compareToCPU(inputShape, fSize, stride, zeroPad); }); -}); \ No newline at end of file +}); diff --git a/src/math/webgl/mulmat_gpu.ts b/src/math/webgl/mulmat_gpu.ts index 069f95cf5d..366991a24a 100644 --- a/src/math/webgl/mulmat_gpu.ts +++ b/src/math/webgl/mulmat_gpu.ts @@ -22,7 +22,8 @@ export class MatMulProgram implements GPGPUProgram { outputShape: number[]; userCode: string; - constructor(aShape: [number, number], bShape: [number, number], + constructor( + aShape: [number, number], bShape: [number, number], aOrient = MatrixOrientation.REGULAR, bOrient = MatrixOrientation.REGULAR) { this.params = [aOrient, bOrient]; @@ -34,19 +35,19 @@ export class MatMulProgram implements GPGPUProgram { this.outputShape = [outerShapeA, outerShapeB]; const sharedDim = - (aOrient === MatrixOrientation.REGULAR ? aShape[1] : aShape[0]); - const aSnippet = (aOrient === MatrixOrientation.REGULAR) ? - 'aRow, i_float' : 'i_float, aRow'; - const bSnippet = (bOrient === MatrixOrientation.REGULAR) ? - 'i_float, bCol' : 'bCol, i_float'; + (aOrient === MatrixOrientation.REGULAR ? aShape[1] : aShape[0]); + const aSnippet = + (aOrient === MatrixOrientation.REGULAR) ? 'aRow, i' : 'i, aRow'; + const bSnippet = + (bOrient === MatrixOrientation.REGULAR) ? 'i, bCol' : 'bCol, i'; this.userCode = ` const int sharedDim = ${sharedDim}; float dotARowBCol(float aRow, float bCol) { float result = 0.0; - for (int i = 0; i < sharedDim; i++) { - float i_float = float(i); + for (int ii = 0; ii < sharedDim; ii++) { + float i = float(ii); float a = getMatrixA(${aSnippet}); float b = getMatrixB(${bSnippet}); result += (a * b); @@ -61,4 +62,3 @@ export class MatMulProgram implements GPGPUProgram { `; } } - diff --git a/src/math/webgl/mulmat_packed_gpu.ts b/src/math/webgl/mulmat_packed_gpu.ts index e0d29e1dd2..6a7a15fd3d 100644 --- a/src/math/webgl/mulmat_packed_gpu.ts +++ b/src/math/webgl/mulmat_packed_gpu.ts @@ -56,9 +56,9 @@ export function getFragmentShaderSource( vec4 dot2x2ARowBCol() { vec4 result = vec4(0, 0, 0, 0); - for (int i = 0; i < ${sharedDimensionPacked}; i++) { - float i_float = float(i); - float center = (i_float + 0.5) / sharedDimension; + for (int ii = 0; ii < ${sharedDimensionPacked}; ii++) { + float i = float(ii); + float center = (i + 0.5) / sharedDimension; vec4 a = texture2D(matrixA, vec2(${aSample})); vec4 b = texture2D(matrixB, vec2(${bSample})); result += diff --git a/src/math/webgl/pool_gpu.ts b/src/math/webgl/pool_gpu.ts index 7de03a4a47..dbac7f5341 100644 --- a/src/math/webgl/pool_gpu.ts +++ b/src/math/webgl/pool_gpu.ts @@ -14,111 +14,94 @@ limitations under the License. ==============================================================================*/ import * as conv_util from '../conv_util'; -import {GPGPUContext} from './gpgpu_context'; - -export function getFragmentShaderPoolCommonSource( - xShapeRCD: [number, number, number], fSize: number, stride: number, - pad: number, poolType: 'max'|'min'|'avg', computePositions: boolean) { - if (poolType === 'avg' && computePositions) { - throw new Error('Cannot compute positions for average pool.'); - } - - const depth = xShapeRCD[2]; - - const xTexShapeRC = conv_util.computeTexShapeFrom3D(xShapeRCD); +import {GPGPUProgram} from './gpgpu_math'; + +export class Pool2DProgram implements GPGPUProgram { + variableNames = ['x']; + params: Array<{}>; + outputShape: number[]; + userCode: string; + + constructor( + xShape: [number, number, number], fSize: number, stride: number, + pad: number, poolType: 'max'|'min'|'avg', computePositions: boolean) { + if (poolType === 'avg' && computePositions) { + throw new Error('Cannot compute positions for average pool.'); + } - let returnValue = 'minMaxValue'; - if (computePositions) { - returnValue = 'minMaxPosition'; - } else if (poolType === 'avg') { - returnValue = 'avgValue'; - } + let returnValue = 'minMaxValue'; + if (computePositions) { + returnValue = 'minMaxPosition'; + } else if (poolType === 'avg') { + returnValue = `avgValue / ${fSize * fSize}.0`; + } + const xRowsLimit = xShape[0] - 0.5; + const xColsLimit = xShape[1] - 0.5; + this.params = [stride, pad, fSize, computePositions]; + this.outputShape = + conv_util.computeOutputShape3D(xShape, fSize, xShape[2], stride, pad); + + this.userCode = ` + void main() { + vec3 coords = getOutputCoords(); + float yR = coords.x; + float yC = coords.y; + float d = coords.z; + + vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}.0, ${stride}.0) - + vec2(${pad}.0, ${pad}.0); + float xRCorner = xRCCorner.x; + float xCCorner = xRCCorner.y; + + // max/min x(?, ?, d) to get y(yR, yC, d). + // ? = to be determined + float minMaxValue = 0.0; + float minMaxValueFound = 0.0; + float minMaxPosition = 0.0; + float avgValue = 0.0; + + for (int iwR = 0; iwR < ${fSize}; iwR++) { + float wR = float(iwR); + float xR = xRCorner + wR; + + if (xR < 0.0 || xR > ${xRowsLimit}) { + continue; + } - return ` - precision highp float; - uniform sampler2D x; - varying vec2 resultUV; + for (int iwC = 0; iwC < ${fSize}; iwC++) { + float wC = float(iwC); + float xC = xCCorner + wC; - const vec2 halfCR = vec2(0.5, 0.5); - const vec2 xShapeCR = vec2(${xTexShapeRC[1]}, ${xTexShapeRC[0]}); + if (xC < 0.0 || xC > ${xColsLimit}) { + continue; + } - bool isNaN(float val) { - return val == val ? false : true; - } + float value = getX(xR, xC, d); - void main() { - vec2 yTexCR = floor(gl_FragCoord.xy); - - // Map from 2D (yTexR, yTexC) to 3D (yR, yC, d2). - float yR = yTexCR.y; - float yC = floor(yTexCR.x / ${depth}.0); - float d = mod(yTexCR.x, ${depth}.0); - - vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}, ${stride}) - - vec2(${pad}.0, ${pad}.0); - float xRCorner = xRCCorner.x; - float xCCorner = xRCCorner.y; - - // max/min x(?, ?, d) to get y(yR, yC, d). - // ? = to be determined - float minMaxValue = 0.0; - float minMaxValueFound = 0.0; - float minMaxPosition = 0.0; - float avgValue = 0.0; - - for (int wR = 0; wR < ${fSize}; wR++) { - float wR_float = float(wR); - float xR = xRCorner + wR_float; - float xTexR = xR; - - for (int wC = 0; wC < ${fSize}; wC++) { - float wC_float = float(wC); - float xC = xCCorner + wC_float; - float xTexC = xC * ${depth}.0 + d; - - vec2 texCR = vec2(xTexC, xTexR); - - // Check if the requested UV is invalid. - vec2 uv = (texCR + halfCR) / xShapeCR; - bool lessThanZero = any(lessThan(uv, vec2(0, 0))); - bool greaterThanOne = any(greaterThan(uv, vec2(1, 1))); - bool outside = lessThanZero || greaterThanOne; - if (outside) { - continue; - } + if (isNaN(value)) { + setOutput(value); + return; + } - float value = texture2D(x, uv).r; - if (isNaN(value)) { - gl_FragColor = vec4(value, 0, 0, 0); - return; - } - if (${poolType === 'avg'}) { - avgValue += value / ${fSize * fSize}.0; - } else { - // If a min / max value has already been found, use it. If not, use - // the current value. - float currentMinMaxValue = mix( - value, minMaxValue, minMaxValueFound); - if (value ${poolType === 'min' ? '<=' : '>='} currentMinMaxValue) { - minMaxValue = value; - minMaxValueFound = 1.0; - if (${computePositions}) { - minMaxPosition = wR_float * ${fSize}.0 + wC_float; + if (${poolType === 'avg'}) { + avgValue += value; + } else { + // If a min / max value has already been found, use it. If not, + // use the current value. + float currMinMaxValue = mix( + value, minMaxValue, minMaxValueFound); + if (value ${poolType === 'min' ? '<=' : '>='} currMinMaxValue) { + minMaxValue = value; + minMaxValueFound = 1.0; + if (${computePositions}) { + minMaxPosition = wR * ${fSize}.0 + wC; + } } } } } + setOutput(${returnValue}); } - gl_FragColor = vec4(${returnValue}, 0, 0, 0); - }`; -} - -export function poolCommon( - gpgpu: GPGPUContext, program: WebGLProgram, x: WebGLTexture, - result: WebGLTexture, resultShapeRowCol: [number, number]) { - gpgpu.setOutputMatrixTexture( - result, resultShapeRowCol[0], resultShapeRowCol[1]); - gpgpu.setProgram(program); - gpgpu.setInputMatrixTexture(x, 'x', 0); - gpgpu.executeProgram(); + `; + } } diff --git a/src/math/webgl/pool_gpu_test_util.ts b/src/math/webgl/pool_gpu_test_util.ts new file mode 100644 index 0000000000..a06be73b31 --- /dev/null +++ b/src/math/webgl/pool_gpu_test_util.ts @@ -0,0 +1,42 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +import {Array3D, initializeGPU, NDArray} from '../ndarray'; + +import {GPGPUContext} from './gpgpu_context'; +import * as gpgpu_math from './gpgpu_math'; +import {Pool2DProgram} from './pool_gpu'; +import {TextureManager} from './texture_manager'; + +export function uploadPoolDownload( + a: Float32Array, xShape: [number, number, number], fieldSize: number, + stride: number, zeroPad: number, op: 'min'|'max'|'avg'): Float32Array { + const gpgpu = new GPGPUContext(); + gpgpu.enableAutomaticDebugValidation(true); + const textureManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, textureManager); + + const x = Array3D.new(xShape, a); + const program = + new Pool2DProgram(xShape, fieldSize, stride, zeroPad, op, false); + const res = NDArray.zeros(program.outputShape); + const binary = gpgpu_math.compileProgram(gpgpu, program, [x], res); + gpgpu_math.runProgram(binary, [x], res); + const resValues = res.getValues(); + + textureManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); + gpgpu.dispose(); + return resValues; +} diff --git a/src/math/webgl/shader_compiler.ts b/src/math/webgl/shader_compiler.ts index a3af0f741a..19cdbad46a 100644 --- a/src/math/webgl/shader_compiler.ts +++ b/src/math/webgl/shader_compiler.ts @@ -111,7 +111,7 @@ const SAMPLE_1D_SNIPPET = ` vec2 UVfrom1D(float texNumR, float texNumC, float index) { float texR = floor(index / texNumC); float texC = mod(index, texNumC); - return vec2(texC, texR) / vec2(texNumC, texNumR); + return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); } `; @@ -121,7 +121,7 @@ vec2 UVfrom2D(float texNumR, float texNumC, float numC, float row, float index = dot(vec2(row, col), vec2(numC, 1.0)); float texR = floor(index / texNumC); float texC = mod(index, texNumC); - return vec2(texC, texR) / vec2(texNumC, texNumR); + return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); } `; @@ -131,7 +131,7 @@ vec2 UVfrom3D(float texNumR, float texNumC, float stride0, float index = dot(vec3(row, col, depth), vec3(stride0, stride1, 1.0)); float texR = floor(index / texNumC); float texC = mod(index, texNumC); - return vec2(texC, texR) / vec2(texNumC, texNumR); + return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); } `; @@ -143,13 +143,14 @@ vec2 UVfrom4D(float texNumR, float texNumC, float stride0, vec4(stride0, stride1, stride2, 1.0)); float texR = floor(index / texNumC); float texC = mod(index, texNumC); - return vec2(texC, texR) / vec2(texNumC, texNumR); + return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR); } `; const SHADER_PREFIX = ` precision highp float; varying vec2 resultUV; + const vec2 halfCR = vec2(0.5, 0.5); float sample(sampler2D texture, vec2 uv) { return texture2D(texture, uv).r; @@ -258,7 +259,7 @@ function getSamplerScalar(texName: string): string { const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1); return ` float ${funcName}() { - return sample(${texName}, vec2(0.0, 0.0)); + return sample(${texName}, halfCR); } `; } @@ -270,7 +271,7 @@ function getSampler1D(texName: string, texShape: [number, number]): string { if (texShape[0] === 1 && texShape[1] === 1) { return ` float ${funcName}(float index) { - return sample(${texName}, vec2(0.0, 0.0)); + return sample(${texName}, halfCR); } `; } @@ -311,7 +312,7 @@ function getSampler3D( float ${funcName}(float row, float col, float depth) { float texR = row; float texC = dot(vec2(col, depth), vec2(${stride1}, 1.0)); - vec2 uv = vec2(texC, texR) / vec2(${tC}.0, ${tR}.0); + vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tC}.0, ${tR}.0); return sample(${texName}, uv); } `; @@ -341,7 +342,7 @@ function getSampler4D( float texR = row; float texC = dot(vec3(col, depth, depth2), vec3(${stride1}.0, ${stride2}.0, 1.0)); - vec2 uv = vec2(texC, texR) / vec2(${tC}.0, ${tR}.0); + vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tC}.0, ${tR}.0); return sample(${texName}, uv); } `; @@ -364,7 +365,7 @@ function getSampler2D( if (util.arraysEqual(shape, texShape)) { return ` float ${funcName}(float row, float col) { - vec2 uv = vec2(col, row) / vec2(${tC}.0, ${tR}.0); + vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0); return sample(${texName}, uv); } `; @@ -403,7 +404,7 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string { if (tNumC === 1 && tNumR === 1) { return ` float ${funcName}(float index) { - return sample(${texName}, vec2(0.0, 0.0)); + return sample(${texName}, halfCR); } `; } @@ -427,7 +428,7 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string { float ${funcName}(float index) { float texR = floor(index / ${tNumC}.0); float texC = mod(index, ${tNumC}.0); - vec2 uv = vec2(texC, texR) / vec2(${tNumC}.0, ${tNumR}.0); + vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tNumC}.0, ${tNumR}.0); return sample(${texName}, uv); } `; @@ -455,8 +456,8 @@ function getSamplerAtOutputCoords( ${broadcastSnippet} float texR = floor(index / ${inTexShape[1]}.0); float texC = mod(index, ${inTexShape[1]}.0); - vec2 uv = vec2(texC, texR) / - vec2(${inTexShape[1]}.0, ${inTexShape[0]}.0); + vec2 uv = (vec2(texC, texR) + halfCR) / + vec2(${inTexShape[1]}.0, ${inTexShape[0]}.0); return sample(${texName}, uv); } `; From 373b27a07b6c35b306c95c714e6677cfd997edac Mon Sep 17 00:00:00 2001 From: Daniel Smilkov Date: Thu, 17 Aug 2017 18:59:30 -0400 Subject: [PATCH 06/10] removing math.reshape --- src/graph_runner.ts | 4 +-- src/math/math.ts | 62 +++++++++++++++++------------------- src/math/math_cpu.ts | 5 --- src/math/math_gpu.ts | 33 ------------------- src/math/webgl/copy2D_gpu.ts | 55 ++++++++++++++++++++++++++++++++ src/math/webgl/gpgpu_math.ts | 11 +++++-- src/ops/reshape.ts | 10 +++--- 7 files changed, 99 insertions(+), 81 deletions(-) create mode 100644 src/math/webgl/copy2D_gpu.ts diff --git a/src/graph_runner.ts b/src/graph_runner.ts index f5404541a5..03cdfbfdc4 100644 --- a/src/graph_runner.ts +++ b/src/graph_runner.ts @@ -211,7 +211,7 @@ export class GraphRunner { } }); - setTimeout(() => this.trainNetwork()); + requestAnimationFrame(() => this.trainNetwork()); } infer( @@ -243,7 +243,7 @@ export class GraphRunner { this.currentInferenceLoopNumPasses = numPasses; if (!this.isInferring) { this.inferencePassesThisRun = 0; - setTimeout(() => this.inferNetwork()); + requestAnimationFrame(() => this.inferNetwork()); } this.isInferring = true; } diff --git a/src/math/math.ts b/src/math/math.ts index 13a02935e7..645f314c71 100644 --- a/src/math/math.ts +++ b/src/math/math.ts @@ -71,9 +71,10 @@ export abstract class NDArrayMath { */ enableDebugMode() { this.debugMode = true; - console.warn('Debugging mode is ON. The output of every math call will ' + - 'be downloaded to CPU and checked for NaNs. ' + - 'This significantly impacts performance.'); + console.warn( + 'Debugging mode is ON. The output of every math call will ' + + 'be downloaded to CPU and checked for NaNs. ' + + 'This significantly impacts performance.'); } /** @@ -97,7 +98,7 @@ export abstract class NDArrayMath { endScope(result: ScopeResult) { let arraysToKeep = this.activeScopeNDArraysToKeep; if (result != null) { - arraysToKeep = arraysToKeep.concat(result as NDArray|NDArray[]); + arraysToKeep = arraysToKeep.concat(result as NDArray | NDArray[]); } // Dispose the current scope. for (let i = 0; i < this.activeScope.length; i++) { @@ -321,22 +322,15 @@ export abstract class NDArrayMath { protected abstract cloneInternal(ndarray: T): T; /** - * Reshapes an NDArray to a new shape. The size of the input NDArray must - * match the size of the requested shape. - * @param ndarray The input NDArray. - * @param newShape The new shape to reshape the NDArray to. Must be the same - * size as the NDArray. + * @deprecated Please call reshape() directly on the ndarray object. */ reshape( ndarray: T1, newShape: number[]): T2 { - util.assert( - ndarray.size === util.sizeFromShape(newShape), - `Error in reshape: old size ${ndarray.size} must match new size ` + - `${util.sizeFromShape(newShape)}.`); - return this.track(this.reshapeInternal(ndarray, newShape)); + console.warn( + 'math.reshape() is deprecated. Please call reshape() ' + + 'directly on the ndarray object'); + return ndarray.reshape(newShape); } - protected abstract reshapeInternal( - ndarray: T1, newShape: number[]): T2; /** * Extracts a slice from a matrix. The operation extraces a slice from input @@ -1148,7 +1142,8 @@ export abstract class NDArrayMath { * @param h Array of previous cell outputs. * @return Tuple [nextCellStates, cellOutputs] */ - multiRNNCell(lstmCells: LSTMCell[], data: Array2D, c: Array2D[], + multiRNNCell( + lstmCells: LSTMCell[], data: Array2D, c: Array2D[], h: Array2D[]): [Array2D[], Array2D[]] { util.assert( data.shape[0] === 1, @@ -1187,8 +1182,9 @@ export abstract class NDArrayMath { * @param h Previous cell output. * @return Tuple [nextCellState, cellOutput] */ - basicLSTMCell(forgetBias: Scalar, lstmKernel: Array2D, lstmBias: Array1D, - data: Array2D, c: Array2D, h: Array2D): [Array2D, Array2D] { + basicLSTMCell( + forgetBias: Scalar, lstmKernel: Array2D, lstmBias: Array1D, data: Array2D, + c: Array2D, h: Array2D): [Array2D, Array2D] { const res = this.scope(() => { util.assert( data.shape[0] === 1, @@ -1207,25 +1203,25 @@ export abstract class NDArrayMath { // i = input_gate, j = new_input, f = forget_gate, o = output_gate const i = this.slice2D(res, [0, 0], [res.shape[0], res.shape[1] / 4]); - const j = this.slice2D(res, [0, res.shape[1] / 4 * 1], - [res.shape[0], res.shape[1] / 4]); - const f = this.slice2D(res, [0, res.shape[1] / 4 * 2], - [res.shape[0], res.shape[1] / 4]); - const o = this.slice2D(res, [0, res.shape[1] / 4 * 3], - [res.shape[0], res.shape[1] / 4]); - - const newC = this.add( - this.multiplyStrict(c, - this.sigmoid(this.scalarPlusArray(forgetBias, f))), - this.multiplyStrict(this.sigmoid(i), this.tanh(j))) as Array2D; - const newH = this.multiplyStrict( - this.tanh(newC), this.sigmoid(o)) as Array2D; + const j = this.slice2D( + res, [0, res.shape[1] / 4 * 1], [res.shape[0], res.shape[1] / 4]); + const f = this.slice2D( + res, [0, res.shape[1] / 4 * 2], [res.shape[0], res.shape[1] / 4]); + const o = this.slice2D( + res, [0, res.shape[1] / 4 * 3], [res.shape[0], res.shape[1] / 4]); + + const newC = + this.add( + this.multiplyStrict( + c, this.sigmoid(this.scalarPlusArray(forgetBias, f))), + this.multiplyStrict(this.sigmoid(i), this.tanh(j))) as Array2D; + const newH = + this.multiplyStrict(this.tanh(newC), this.sigmoid(o)) as Array2D; return [newC, newH]; }); return [res[0], res[1]]; } - } export enum MatrixOrientation { diff --git a/src/math/math_cpu.ts b/src/math/math_cpu.ts index 4298b1974b..7231268000 100644 --- a/src/math/math_cpu.ts +++ b/src/math/math_cpu.ts @@ -31,11 +31,6 @@ export class NDArrayMathCPU extends NDArrayMath { ndarray.shape, {values: new Float32Array(ndarray.getValues())}); } - protected reshapeInternal( - ndarray: T1, newShape: number[]): T2 { - return this.cloneInternal(ndarray).reshape(newShape); - } - protected slice2DInternal( input: Array2D, beginRowCol: [number, number], sizeRowCol: [number, number]): Array2D { diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts index cd8319d91c..ac31e7998d 100644 --- a/src/math/math_gpu.ts +++ b/src/math/math_gpu.ts @@ -105,39 +105,6 @@ export class NDArrayMathGPU extends NDArrayMath { ndarray.shape, {texture: resultTexture, textureShapeRC}); } - protected reshapeInternal( - ndarray: T1, newShape: number[]): T2 { - let newTexShape: [number, number]; - - switch (newShape.length) { - case 0: - newTexShape = [1, 1]; - break; - case 1: - newTexShape = [newShape[0], 1]; - break; - case 2: - newTexShape = [newShape[0], newShape[1]]; - break; - case 3: - newTexShape = [newShape[0], newShape[1] * newShape[2]]; - break; - default: - throw Error( - `Reshapes into ${newShape.length}-dim ndarray is not yet ` + - `supported on GPU`); - } - - const actualTexShape = ndarray.getTextureShapeRC(newTexShape); - let clonedArray: T1; - if (!util.arraysEqual(actualTexShape, newTexShape)) { - clonedArray = this.reshapeTexture(ndarray, newTexShape); - } else { - clonedArray = this.cloneInternal(ndarray); - } - return clonedArray.reshape(newShape); - } - protected slice2DInternal( input: Array2D, beginRowCol: [number, number], sizeRowCol: [number, number]): Array2D { diff --git a/src/math/webgl/copy2D_gpu.ts b/src/math/webgl/copy2D_gpu.ts new file mode 100644 index 0000000000..af2cbdfc6f --- /dev/null +++ b/src/math/webgl/copy2D_gpu.ts @@ -0,0 +1,55 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +import {GPGPUContext} from './gpgpu_context'; +import {GPGPUProgram} from './gpgpu_math'; + +export class Copy2DProgram implements GPGPUProgram { + variableNames = ['source']; + params: Array<{}>; + outputShape: number[]; + userCode: string; + + constructor( + sourceShape: [number, number], sourceSize: [number, number], + destSize: [number, number]) { + this.userCode = ` + uniform vec2 sourceStartCR; + uniform vec2 destStartCR; + + void main() { + vec2 destOffsetCR = floor(gl_FragCoord.xy) - destStartCR; + float destOffsetFlat = (destOffsetCR.y * destSizeCR.x) + destOffsetCR.x; + vec2 sourceOffsetCR = vec2(mod(destOffsetFlat, sourceSizeCR.x), + floor(destOffsetFlat / sourceSizeCR.x)); + vec2 sourceCR = sourceStartCR + sourceOffsetCR; + setOutput(getSource(sourceCR.y, sourceCR.x)); + } + `; + } +} + +export function getCustomSetupFunc( + sourceStart: [number, number], destStart: [number, number], + destSize: [number, number]) { + return (gpgpu: GPGPUContext) => { + gpgpu.setOutputMatrixWriteRegion( + destStart[0], destSize[0], destStart[1], destSize[1]); + const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStartCR'); + gpgpu.gl.uniform2f(sourceStartCRLoc, sourceStart[1], sourceStart[0]); + const destStartCRLoc = gpgpu.getUniformLocation('destStartCR'); + gpgpu.gl.uniform2f(destStartCRLoc, destStart[1], destStart[0]); + }; +} diff --git a/src/math/webgl/gpgpu_math.ts b/src/math/webgl/gpgpu_math.ts index 56ac04f08e..8126f9190b 100644 --- a/src/math/webgl/gpgpu_math.ts +++ b/src/math/webgl/gpgpu_math.ts @@ -59,7 +59,10 @@ export function compileProgram( return { program, source, - webGLProgram: gpgpu.createProgram(source), gpgpu, inShapeInfos, outShapeInfo + webGLProgram: gpgpu.createProgram(source), + gpgpu, + inShapeInfos, + outShapeInfo }; } @@ -90,7 +93,8 @@ function validateBinaryAndProgram(shapeInfos: ShapeInfo[], inputs: NDArray[]) { } export function runProgram( - binary: GPGPUBinary, inputs: T[], output: K): void { + binary: GPGPUBinary, inputs: T[], output: K, + customSetup?: (gpgpu: GPGPUContext) => void): void { validateBinaryAndProgram(binary.inShapeInfos, inputs); validateBinaryAndProgram([binary.outShapeInfo], [output]); @@ -103,6 +107,9 @@ export function runProgram( const tex = input.getTexture(); gpgpu.setInputMatrixTexture(tex, binary.program.variableNames[i], i); }); + if (customSetup != null) { + customSetup(gpgpu); + } gpgpu.executeProgram(); } diff --git a/src/ops/reshape.ts b/src/ops/reshape.ts index 1a90af7262..2d5204b5e6 100644 --- a/src/ops/reshape.ts +++ b/src/ops/reshape.ts @@ -34,9 +34,8 @@ export class Reshape extends Operation { feedForward(math: NDArrayMath, inferenceArrays: TensorArrayMap) { const x = inferenceArrays.get(this.xTensor) as T1; - math.scope((keep) => { - inferenceArrays.set( - this.yTensor, keep(math.reshape(x, this.yTensor.shape))); + math.scope(keep => { + inferenceArrays.set(this.yTensor, keep(x.reshape(this.yTensor.shape))); }); } @@ -45,9 +44,8 @@ export class Reshape extends Operation { gradientArrays: TensorArrayMap) { const dy = gradientArrays.get(this.yTensor) as T2; - math.scope((keep) => { - gradientArrays.set( - this.xTensor, keep(math.reshape(dy, this.xTensor.shape))); + math.scope(keep => { + gradientArrays.set(this.xTensor, keep(dy.reshape(this.xTensor.shape))); }); } } From 768e0f7ac14292e47cd2d07e616767f00dc7abf7 Mon Sep 17 00:00:00 2001 From: Daniel Smilkov Date: Fri, 18 Aug 2017 21:17:02 -0400 Subject: [PATCH 07/10] migrate copy op to logical --- .vscode/settings.json | 1 + src/math/math_gpu.ts | 57 ++++++------------ src/math/webgl/copy_gpu.ts | 81 ++++++++++++-------------- src/math/webgl/copy_gpu_test.ts | 97 ++++++++++++++++--------------- src/math/webgl/mulmat_gpu_test.ts | 52 ++++++----------- 5 files changed, 125 insertions(+), 163 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 67a611b80a..e1084bbac4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -14,5 +14,6 @@ "editor.insertSpaces": true, "files.insertFinalNewline": true, "editor.detectIndentation": false, + "editor.wrappingIndent": "none", "typescript.tsdk": "node_modules/typescript/lib" } diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts index 4d0735f15e..be6ea4d3a0 100644 --- a/src/math/math_gpu.ts +++ b/src/math/math_gpu.ts @@ -30,6 +30,7 @@ import * as concat3d_gpu from './webgl/concat3d_gpu'; import {Conv2DDerBiasProgram, Conv2DDerWeightsProgram, Conv2DTransposeProgram} from './webgl/conv_backprop_gpu'; import {Conv2DProgram} from './webgl/conv_gpu'; import * as copy_gpu from './webgl/copy_gpu'; +import {Copy2DProgram} from './webgl/copy_gpu'; import {GPGPUContext} from './webgl/gpgpu_context'; import * as gpgpu_math from './webgl/gpgpu_math'; import {GPGPUBinary, GPGPUProgram} from './webgl/gpgpu_math'; @@ -47,20 +48,10 @@ import {UnaryOp, UnaryOpProgram} from './webgl/unaryop_gpu'; import * as webgl_util from './webgl/webgl_util'; const BATCHNORM_PROG = 'batchnorm'; -const COPY_PROG = 'copy'; const CONCAT_PROG = 'concat'; const RESHAPE_PROG = 'reshape'; const RESIZE_BILINEAR_PROG = 'resizebilin'; -function makeCopyProgramName( - sourceShapeRowCol: [number, number], sourceSizeRowCol: [number, number], - destSizeRowCol: [number, number]): string { - const shapeName = `${sourceShapeRowCol[0]}_${sourceShapeRowCol[1]}`; - const srcSizeName = `${sourceSizeRowCol[0]}_${sourceSizeRowCol[1]}`; - const dstSizeName = `${destSizeRowCol[0]}_${destSizeRowCol[1]}`; - return `${COPY_PROG}_${shapeName}_${srcSizeName}_${dstSizeName}`; -} - export class NDArrayMathGPU extends NDArrayMath { private gpgpu: GPGPUContext; private textureManager: TextureManager; @@ -89,20 +80,14 @@ export class NDArrayMathGPU extends NDArrayMath { } protected cloneInternal(ndarray: T): T { - const textureShapeRC = ndarray.getTextureShapeRC(); - const program = this.getAndSaveProgram( - makeCopyProgramName(textureShapeRC, textureShapeRC, textureShapeRC), - () => copy_gpu.getFragmentShaderSource( - textureShapeRC, textureShapeRC, textureShapeRC)); - - const resultTexture = this.textureManager.acquireTexture(textureShapeRC); - - copy_gpu.copy( - this.gpgpu, program, ndarray.getTexture(), textureShapeRC, [0, 0], - textureShapeRC, resultTexture, textureShapeRC, [0, 0], textureShapeRC); - - return NDArray.make( - ndarray.shape, {texture: resultTexture, textureShapeRC}); + const texShape = ndarray.getTextureShapeRC(); + // Pretend the source was in logical shape that matches the texture shape. + const source = ndarray.as2D(texShape[0], texShape[1]); + // Do the same for output. + const output = this.makeOutputArray(texShape) as Array2D; + this.copy2D(source, [0, 0], texShape, output, [0, 0], texShape); + // Get back to the original logical shape. + return output.reshape(ndarray.shape); } protected slice2DInternal( @@ -122,17 +107,10 @@ export class NDArrayMathGPU extends NDArrayMath { sourceSizeRowCol: [number, number], dest: Array2D, destBeginRowCol: [number, number], destSizeRowCol: [number, number]): void { - const sourceShapeRC = source.getTextureShapeRC(); - const destShapeRC = dest.getTextureShapeRC(); - const program = this.getAndSaveProgram( - makeCopyProgramName(sourceShapeRC, sourceSizeRowCol, destSizeRowCol), - () => copy_gpu.getFragmentShaderSource( - sourceShapeRC, sourceSizeRowCol, destSizeRowCol)); - - copy_gpu.copy( - this.gpgpu, program, source.getTexture(), sourceShapeRC, - sourceBeginRowCol, sourceSizeRowCol, dest.getTexture(), destShapeRC, - destBeginRowCol, destSizeRowCol); + const program = new Copy2DProgram(sourceSizeRowCol[1], destSizeRowCol[1]); + const customSetup = copy_gpu.getCustomSetupFunc( + sourceBeginRowCol, destBeginRowCol, destSizeRowCol); + this.compileAndRun(program, [source], dest, customSetup); } protected concat3DInternal(x1: Array3D, x2: Array3D, axis: number): Array3D { @@ -202,13 +180,16 @@ export class NDArrayMathGPU extends NDArrayMath { } private compileAndRun( - program: GPGPUProgram, inputs: T[]): K { - const output = this.makeOutputArray(program.outputShape); + program: GPGPUProgram, inputs: T[], output?: K, + customSetup?: (gpgpu: GPGPUContext) => void): K { + if (output == null) { + output = this.makeOutputArray(program.outputShape); + } const key = gpgpu_math.makeShaderKey(program, inputs, output); const binary = this.getAndSaveBinary(key, () => { return gpgpu_math.compileProgram(this.gpgpu, program, inputs, output); }); - gpgpu_math.runProgram(binary, inputs, output); + gpgpu_math.runProgram(binary, inputs, output, customSetup); return output; } diff --git a/src/math/webgl/copy_gpu.ts b/src/math/webgl/copy_gpu.ts index 3210644704..709e034d94 100644 --- a/src/math/webgl/copy_gpu.ts +++ b/src/math/webgl/copy_gpu.ts @@ -14,50 +14,43 @@ limitations under the License. ==============================================================================*/ import {GPGPUContext} from './gpgpu_context'; - -export function getFragmentShaderSource( - sourceShapeRowCol: [number, number], sourceSizeRowCol: [number, number], - destSizeRowCol: [number, number]): string { - return ` - precision highp float; - uniform sampler2D source; - uniform vec2 sourceStartCR; - uniform vec2 destStartCR; - - const vec2 sourceShapeCR = - vec2(${sourceShapeRowCol[1]}, ${sourceShapeRowCol[0]}); - const vec2 sourceSizeCR = - vec2(${sourceSizeRowCol[1]}, ${sourceSizeRowCol[0]}); - const vec2 destSizeCR = - vec2(${destSizeRowCol[1]}, ${destSizeRowCol[0]}); - - void main() { - vec2 destOffsetCR = floor(gl_FragCoord.xy) - destStartCR; - float destOffsetFlat = (destOffsetCR.y * destSizeCR.x) + destOffsetCR.x; - vec2 sourceOffsetCR = vec2(mod(destOffsetFlat, sourceSizeCR.x), - floor(destOffsetFlat / sourceSizeCR.x)); - vec2 sourceCR = sourceStartCR + sourceOffsetCR; - vec2 sourceUV = (sourceCR + vec2(0.5, 0.5)) / sourceShapeCR; - gl_FragColor = texture2D(source, sourceUV); - }`; +import {GPGPUProgram} from './gpgpu_math'; + +export class Copy2DProgram implements GPGPUProgram { + variableNames = ['source']; + params: Array<{}>; + outputShape: number[]; + userCode: string; + + constructor(srcNumCols: number, destNumCols: number) { + this.outputShape = null; + this.params = [srcNumCols, destNumCols]; + this.userCode = ` + uniform vec2 sourceStart; + uniform vec2 destStart; + + void main() { + vec2 destCoords = getOutputCoords() - destStart; + float index = dot(destCoords, vec2(${destNumCols}.0, 1.0)); + vec2 sourceCoords = sourceStart + vec2( + floor(index / ${srcNumCols}.0), + mod(index, ${srcNumCols}.0) + ); + setOutput(getSource(sourceCoords.x, sourceCoords.y)); + } + `; + } } -export function copy( - gpgpu: GPGPUContext, program: WebGLProgram, source: WebGLTexture, - sourceShapeRowCol: [number, number], sourceStartRowCol: [number, number], - sourceSizeRowCol: [number, number], dest: WebGLTexture, - destShapeRowCol: [number, number], destStartRowCol: [number, number], - destSizeRowCol: [number, number]) { - gpgpu.setOutputMatrixTexture(dest, destShapeRowCol[0], destShapeRowCol[1]); - gpgpu.setOutputMatrixWriteRegion( - destStartRowCol[0], destSizeRowCol[0], destStartRowCol[1], - destSizeRowCol[1]); - gpgpu.setProgram(program); - gpgpu.setInputMatrixTexture(source, 'source', 0); - const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStartCR'); - gpgpu.gl.uniform2f( - sourceStartCRLoc, sourceStartRowCol[1], sourceStartRowCol[0]); - const destStartCRLoc = gpgpu.getUniformLocation('destStartCR'); - gpgpu.gl.uniform2f(destStartCRLoc, destStartRowCol[1], destStartRowCol[0]); - gpgpu.executeProgram(); +export function getCustomSetupFunc( + sourceStart: [number, number], destStart: [number, number], + destSize: [number, number]) { + return (gpgpu: GPGPUContext) => { + gpgpu.setOutputMatrixWriteRegion( + destStart[0], destSize[0], destStart[1], destSize[1]); + const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStart'); + gpgpu.gl.uniform2f(sourceStartCRLoc, sourceStart[0], sourceStart[1]); + const destStartCRLoc = gpgpu.getUniformLocation('destStart'); + gpgpu.gl.uniform2f(destStartCRLoc, destStart[0], destStart[1]); + }; } diff --git a/src/math/webgl/copy_gpu_test.ts b/src/math/webgl/copy_gpu_test.ts index 6600995f3b..3a06ae2778 100644 --- a/src/math/webgl/copy_gpu_test.ts +++ b/src/math/webgl/copy_gpu_test.ts @@ -14,40 +14,37 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; +import {Array2D, initializeGPU} from '../ndarray'; + import * as copy_gpu from './copy_gpu'; +import {Copy2DProgram} from './copy_gpu'; import {GPGPUContext} from './gpgpu_context'; +import * as gpgpu_math from './gpgpu_math'; +import {TextureManager} from './texture_manager'; function uploadCopyDownload( - source: Float32Array, sourceShapeRowCol: [number, number], - sourceStartRowCol: [number, number], sourceSizeRowCol: [number, number], - destStartRowCol: [number, number], destSizeRowCol: [number, number], - dest: Float32Array, destShapeRowCol: [number, number]): Float32Array { + srcVals: Float32Array, srcShape: [number, number], + srcStart: [number, number], srcSize: [number, number], + destStart: [number, number], destSize: [number, number], + destVals: Float32Array, destShape: [number, number]): Float32Array { const gpgpu = new GPGPUContext(); - const fragmentShaderSource = copy_gpu.getFragmentShaderSource( - sourceShapeRowCol, sourceSizeRowCol, destSizeRowCol); - const program = gpgpu.createProgram(fragmentShaderSource); - - const sourceTex = - gpgpu.createMatrixTexture(sourceShapeRowCol[0], sourceShapeRowCol[1]); - const destTex = - gpgpu.createMatrixTexture(destShapeRowCol[0], destShapeRowCol[1]); - - gpgpu.uploadMatrixToTexture( - sourceTex, sourceShapeRowCol[0], sourceShapeRowCol[1], source); - gpgpu.uploadMatrixToTexture( - destTex, destShapeRowCol[0], destShapeRowCol[1], dest); - - copy_gpu.copy( - gpgpu, program, sourceTex, sourceShapeRowCol, sourceStartRowCol, - sourceSizeRowCol, destTex, destShapeRowCol, destStartRowCol, - destSizeRowCol); - - const result = gpgpu.downloadMatrixFromTexture( - destTex, destShapeRowCol[0], destShapeRowCol[1]); - - gpgpu.deleteMatrixTexture(sourceTex); - gpgpu.deleteMatrixTexture(destTex); - gpgpu.deleteProgram(program); + const texManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, texManager); + + const program = new Copy2DProgram(srcSize[1], destSize[1]); + const source = Array2D.new(srcShape, srcVals); + const dest = Array2D.new(destShape, destVals); + + const binary = gpgpu_math.compileProgram(gpgpu, program, [source], dest); + const customSetup = + copy_gpu.getCustomSetupFunc(srcStart, destStart, destSize); + gpgpu_math.runProgram(binary, [source], dest, customSetup); + const result = dest.getValues(); + + source.dispose(); + dest.dispose(); + texManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); gpgpu.dispose(); return result; @@ -157,33 +154,37 @@ describe('copy_gpu', () => { }); it('accumulates results from previous copies into dest texture', () => { - const shapeRC: [number, number] = [10, 10]; - const sizeRC: [number, number] = [10, 1]; - const source = new Float32Array(100); + const shape: [number, number] = [10, 10]; + const size: [number, number] = [10, 1]; + const sourceVals = new Float32Array(100); for (let i = 0; i < 100; ++i) { - source[i] = i; + sourceVals[i] = i; } + + const gpgpu = new GPGPUContext(); - const program = gpgpu.createProgram( - copy_gpu.getFragmentShaderSource(shapeRC, sizeRC, sizeRC)); - const sourceTex = gpgpu.createMatrixTexture(shapeRC[0], shapeRC[1]); - const destTex = gpgpu.createMatrixTexture(shapeRC[0], shapeRC[1]); - gpgpu.uploadMatrixToTexture(sourceTex, shapeRC[0], shapeRC[1], source); + const texManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, texManager); + + const program = new Copy2DProgram(size[1], size[1]); + const source = Array2D.new(shape, sourceVals); + const dest = Array2D.zeros(shape); + + const binary = gpgpu_math.compileProgram(gpgpu, program, [source], dest); for (let i = 0; i < 10; ++i) { - copy_gpu.copy( - gpgpu, program, sourceTex, shapeRC, [0, i], sizeRC, destTex, shapeRC, - [0, i], sizeRC); + const offset: [number, number] = [0, i]; + const customSetup = copy_gpu.getCustomSetupFunc(offset, offset, size); + gpgpu_math.runProgram(binary, [source], dest, customSetup); } + const res = dest.getValues(); - const dest = - gpgpu.downloadMatrixFromTexture(destTex, shapeRC[0], shapeRC[1]); - - gpgpu.deleteMatrixTexture(sourceTex); - gpgpu.deleteMatrixTexture(destTex); - gpgpu.deleteProgram(program); + source.dispose(); + dest.dispose(); + texManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); gpgpu.dispose(); - test_util.expectArraysClose(dest, source, 0); + test_util.expectArraysClose(res, sourceVals, 0); }); }); diff --git a/src/math/webgl/mulmat_gpu_test.ts b/src/math/webgl/mulmat_gpu_test.ts index f6da798467..c39526290b 100644 --- a/src/math/webgl/mulmat_gpu_test.ts +++ b/src/math/webgl/mulmat_gpu_test.ts @@ -15,11 +15,12 @@ limitations under the License. import * as test_util from '../../test_util'; import {MatrixOrientation} from '../math'; -import {Array2D} from '../ndarray'; +import {Array2D, initializeGPU} from '../ndarray'; import {GPGPUContext} from './gpgpu_context'; import * as gpgpu_math from './gpgpu_math'; import {MatMulProgram} from './mulmat_gpu'; +import {TextureManager} from './texture_manager'; describe('mulmat_gpu (1x1 * 1x1)', () => { it('returns a 1x1 matrix', () => { @@ -269,11 +270,11 @@ describe('mulmat_gpu (multiple matrices)', () => { const cArr = new Array2D(cShape, {texture: c, textureShapeRC: cShape}); const rArr = new Array2D(rShape, {texture: r, textureShapeRC: rShape}); const matMulProgram = new MatMulProgram(aArr.shape, bArr.shape); - const axbProgram = gpgpu_math.compileProgram(gpgpu, matMulProgram, - [aArr, bArr], abArr); + const axbProgram = + gpgpu_math.compileProgram(gpgpu, matMulProgram, [aArr, bArr], abArr); const matMulProgram2 = new MatMulProgram(abArr.shape, cArr.shape); - const abxcProgram = gpgpu_math.compileProgram(gpgpu, matMulProgram2, - [abArr, cArr], rArr); + const abxcProgram = + gpgpu_math.compileProgram(gpgpu, matMulProgram2, [abArr, cArr], rArr); gpgpu.uploadMatrixToTexture(a, aShape[0], aShape[1], aData); gpgpu.uploadMatrixToTexture(b, bShape[0], bShape[1], bData); @@ -335,41 +336,26 @@ export function uploadMultiplyMatrixDownload( bNumRows: number, bNumCols: number, aOrientation = MatrixOrientation.REGULAR, bOrientation = MatrixOrientation.REGULAR): Float32Array { - const outNumRows = - (aOrientation === MatrixOrientation.REGULAR) ? aNumRows : aNumCols; - const outNumCols = - (bOrientation === MatrixOrientation.REGULAR) ? bNumCols : bNumRows; const gpgpu = new GPGPUContext(); + const texManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, texManager); + const aShape: [number, number] = [aNumRows, aNumCols]; const bShape: [number, number] = [bNumRows, bNumCols]; - const outShape: [number, number] = [outNumRows, outNumCols]; - - const aTexture = gpgpu.createMatrixTexture(aNumRows, aNumCols); - const aArr = new Array2D( - aShape, {texture: aTexture, textureShapeRC: [aNumRows, aNumCols]}); - const bTexture = gpgpu.createMatrixTexture(bNumRows, bNumCols); - const bArr = new Array2D( - bShape, {texture: bTexture, textureShapeRC: [bNumRows, bNumCols]}); - const resultTexture: WebGLTexture = - gpgpu.createMatrixTexture(outNumRows, outNumCols); - const resArr = - new Array2D(outShape, {texture: resultTexture, textureShapeRC: outShape}); - - const program = - new MatMulProgram(aArr.shape, bArr.shape, aOrientation, bOrientation); + + const program = new MatMulProgram(aShape, bShape, aOrientation, bOrientation); + const resArr = Array2D.zeros(program.outputShape as [number, number]); + const aArr = Array2D.new(aShape, a); + const bArr = Array2D.new(bShape, b); + const binary = gpgpu_math.compileProgram(gpgpu, program, [aArr, bArr], resArr); - gpgpu.uploadMatrixToTexture(aTexture, aNumRows, aNumCols, a); - gpgpu.uploadMatrixToTexture(bTexture, bNumRows, bNumCols, b); - gpgpu_math.runProgram(binary, [aArr, bArr], resArr); + const result = resArr.getValues(); - const result = - gpgpu.downloadMatrixFromTexture(resultTexture, outNumRows, outNumCols); - - gpgpu.deleteMatrixTexture(aTexture); - gpgpu.deleteMatrixTexture(bTexture); - gpgpu.deleteMatrixTexture(resultTexture); + aArr.dispose(); + bArr.dispose(); + texManager.dispose(); gpgpu.deleteProgram(binary.webGLProgram); gpgpu.dispose(); From e6b04b5ec43f6130aa340c73442f67d2887c972c Mon Sep 17 00:00:00 2001 From: Daniel Smilkov Date: Fri, 18 Aug 2017 21:18:03 -0400 Subject: [PATCH 08/10] remove duplicate copy file --- src/math/webgl/copy2D_gpu.ts | 55 ------------------------------------ 1 file changed, 55 deletions(-) delete mode 100644 src/math/webgl/copy2D_gpu.ts diff --git a/src/math/webgl/copy2D_gpu.ts b/src/math/webgl/copy2D_gpu.ts deleted file mode 100644 index af2cbdfc6f..0000000000 --- a/src/math/webgl/copy2D_gpu.ts +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright 2017 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -import {GPGPUContext} from './gpgpu_context'; -import {GPGPUProgram} from './gpgpu_math'; - -export class Copy2DProgram implements GPGPUProgram { - variableNames = ['source']; - params: Array<{}>; - outputShape: number[]; - userCode: string; - - constructor( - sourceShape: [number, number], sourceSize: [number, number], - destSize: [number, number]) { - this.userCode = ` - uniform vec2 sourceStartCR; - uniform vec2 destStartCR; - - void main() { - vec2 destOffsetCR = floor(gl_FragCoord.xy) - destStartCR; - float destOffsetFlat = (destOffsetCR.y * destSizeCR.x) + destOffsetCR.x; - vec2 sourceOffsetCR = vec2(mod(destOffsetFlat, sourceSizeCR.x), - floor(destOffsetFlat / sourceSizeCR.x)); - vec2 sourceCR = sourceStartCR + sourceOffsetCR; - setOutput(getSource(sourceCR.y, sourceCR.x)); - } - `; - } -} - -export function getCustomSetupFunc( - sourceStart: [number, number], destStart: [number, number], - destSize: [number, number]) { - return (gpgpu: GPGPUContext) => { - gpgpu.setOutputMatrixWriteRegion( - destStart[0], destSize[0], destStart[1], destSize[1]); - const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStartCR'); - gpgpu.gl.uniform2f(sourceStartCRLoc, sourceStart[1], sourceStart[0]); - const destStartCRLoc = gpgpu.getUniformLocation('destStartCR'); - gpgpu.gl.uniform2f(destStartCRLoc, destStart[1], destStart[0]); - }; -} From 2c322d7204f69995fa8cd072af856f2a138eb0e6 Mon Sep 17 00:00:00 2001 From: Daniel Smilkov Date: Sun, 20 Aug 2017 21:50:33 -0400 Subject: [PATCH 09/10] move the rest of math ops to logical --- src/math/math_gpu.ts | 209 +++------------------ src/math/ndarray.ts | 4 +- src/math/webgl/batchnorm_gpu.ts | 159 +++++----------- src/math/webgl/batchnorm_gpu_test.ts | 122 +++++------- src/math/webgl/binaryop_gpu_test.ts | 15 +- src/math/webgl/concat3d_gpu.ts | 92 ++++----- src/math/webgl/concat3d_gpu_test.ts | 75 +++----- src/math/webgl/mulbcast_gpu.ts | 90 --------- src/math/webgl/mulbcast_gpu_test.ts | 140 -------------- src/math/webgl/reshape_gpu.ts | 65 ------- src/math/webgl/reshape_gpu_test.ts | 88 --------- src/math/webgl/resize_bilinear_gpu.ts | 133 ++++++------- src/math/webgl/resize_bilinear_gpu_test.ts | 72 +++---- 13 files changed, 270 insertions(+), 994 deletions(-) delete mode 100644 src/math/webgl/mulbcast_gpu.ts delete mode 100644 src/math/webgl/mulbcast_gpu_test.ts delete mode 100644 src/math/webgl/reshape_gpu.ts delete mode 100644 src/math/webgl/reshape_gpu_test.ts diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts index be6ea4d3a0..f63f070f33 100644 --- a/src/math/math_gpu.ts +++ b/src/math/math_gpu.ts @@ -13,19 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -import * as util from '../util'; - -import * as concat3d_util from './concat3d_util'; -import * as conv_util from './conv_util'; import {MatrixOrientation, NDArrayMath} from './math'; import * as ndarray from './ndarray'; import {Array1D, Array2D, Array3D, Array4D, NDArray, Scalar} from './ndarray'; import {AddScaledMatProgram} from './webgl/addscaledmat_gpu'; import {ArgMaxEqualsProgram} from './webgl/argmaxequals_gpu'; import {ArgMinMaxProgram} from './webgl/argminmax_gpu'; -import * as batchnorm_gpu from './webgl/batchnorm_gpu'; +import {BatchNormProgram} from './webgl/batchnorm_gpu'; import {BinaryOpProgram} from './webgl/binaryop_gpu'; -import * as concat3d_gpu from './webgl/concat3d_gpu'; +import {Concat3DProgram} from './webgl/concat3d_gpu'; // tslint:disable-next-line:max-line-length import {Conv2DDerBiasProgram, Conv2DDerWeightsProgram, Conv2DTransposeProgram} from './webgl/conv_backprop_gpu'; import {Conv2DProgram} from './webgl/conv_gpu'; @@ -41,21 +37,14 @@ import {MinMaxProgram} from './webgl/minmax_gpu'; import {MatMulProgram} from './webgl/mulmat_gpu'; import {Pool2DProgram} from './webgl/pool_gpu'; import {ReduceSumProgram} from './webgl/reducesum_gpu'; -import * as reshape_gpu from './webgl/reshape_gpu'; -import * as resize_bilinear_gpu from './webgl/resize_bilinear_gpu'; +import {ResizeBilinearProgram} from './webgl/resize_bilinear_gpu'; import {TextureManager} from './webgl/texture_manager'; import {UnaryOp, UnaryOpProgram} from './webgl/unaryop_gpu'; import * as webgl_util from './webgl/webgl_util'; -const BATCHNORM_PROG = 'batchnorm'; -const CONCAT_PROG = 'concat'; -const RESHAPE_PROG = 'reshape'; -const RESIZE_BILINEAR_PROG = 'resizebilin'; - export class NDArrayMathGPU extends NDArrayMath { private gpgpu: GPGPUContext; private textureManager: TextureManager; - private programCache: {[key: string]: WebGLProgram} = {}; private binaryCache: {[key: string]: GPGPUBinary} = {}; private gpgpuCreatedLocally: boolean; @@ -114,51 +103,8 @@ export class NDArrayMathGPU extends NDArrayMath { } protected concat3DInternal(x1: Array3D, x2: Array3D, axis: number): Array3D { - const x1TexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(x1.shape); - const x2TexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(x2.shape); - - // If the texture shapes doesn't match the shapes that shaders expect, - // do physical texture reshapes on the GPU. - const actualX1TexShape = x1.getTextureShapeRC(x1TexShapeRC); - let cleanupX1 = false; - if (!util.arraysEqual(actualX1TexShape, x1TexShapeRC)) { - x1 = this.reshapeTexture(x1, x1TexShapeRC); - cleanupX1 = true; - } - const actualX2TexShape = x2.getTextureShapeRC(x2TexShapeRC); - let cleanupX2 = false; - if (!util.arraysEqual(actualX2TexShape, x2TexShapeRC)) { - x2 = this.reshapeTexture(x2, x2TexShapeRC); - cleanupX2 = true; - } - - const resultShapeRCD = - concat3d_util.computeConcat3DOutputShape(x1.shape, x2.shape, axis); - - const program = this.getAndSaveProgram( - `${CONCAT_PROG}_${x1.shape}_${x2.shape}_${axis}`, - () => concat3d_gpu.getFragmentShaderSource( - x1.shape, x2.shape, resultShapeRCD, axis)); - - const resultTexShape = conv_util.computeTexShapeFrom3D(resultShapeRCD); - const resultTex = this.textureManager.acquireTexture(resultTexShape); - - concat3d_gpu.concat3D( - this.gpgpu, program, x1.getTexture(), x2.getTexture(), resultTex, - resultTexShape); - - if (cleanupX1) { - x1.dispose(); - } - - if (cleanupX2) { - x2.dispose(); - } - - return NDArray.make( - resultShapeRCD, {texture: resultTex, textureShapeRC: resultTexShape}); + const program = new Concat3DProgram(x1.shape, x2.shape, axis); + return this.compileAndRun(program, [x1, x2]); } protected scaledArrayAddInternal( @@ -193,23 +139,6 @@ export class NDArrayMathGPU extends NDArrayMath { return output; } - private reshapeTexture(a: T, newTextureShape: [ - number, number - ]): T { - const aTexShape = a.getTextureShapeRC(); - - const program = this.getAndSaveProgram( - RESHAPE_PROG, () => reshape_gpu.getFragmentShaderSource()); - - const resultTexture = this.textureManager.acquireTexture(newTextureShape); - reshape_gpu.reshape( - this.gpgpu, program, a.getTexture(), aTexShape[0], aTexShape[1], - resultTexture, newTextureShape[0], newTextureShape[1]); - - return NDArray.make( - a.shape, {texture: resultTexture, textureShapeRC: newTextureShape}); - } - protected matMulInternal( a: Array2D, b: Array2D, aOrientation: MatrixOrientation, bOrientation: MatrixOrientation): Array2D { @@ -225,92 +154,26 @@ export class NDArrayMathGPU extends NDArrayMath { protected batchNormalization3DInternal( x: Array3D, mean: Array3D|Array1D, variance: Array3D|Array1D, - varianceEpsilon: number, scale?: Array3D|Array1D, + varianceEpsilon = 0.000001, scale?: Array3D|Array1D, offset?: Array3D|Array1D): Array3D { - const xTexShape = x.getTextureShapeRC(); - - let cleanupMean = false; - const preferredMeanTexShape: [number, number] = - mean.rank === 1 ? [1, mean.size] : xTexShape; - let meanTexShape = mean.getTextureShapeRC(preferredMeanTexShape); - if (!util.arraysEqual(meanTexShape, preferredMeanTexShape)) { - mean = this.reshapeTexture(mean, preferredMeanTexShape); - meanTexShape = preferredMeanTexShape; - cleanupMean = true; - } + const inputs = [x, mean, variance]; - let cleanupVariance = false; - const preferredVarianceTexShape: [number, number] = - variance.rank === 1 ? [1, variance.size] : xTexShape; - let varianceTexShape = variance.getTextureShapeRC(preferredMeanTexShape); - if (!util.arraysEqual(varianceTexShape, preferredVarianceTexShape)) { - variance = this.reshapeTexture(variance, preferredVarianceTexShape); - varianceTexShape = preferredVarianceTexShape; - cleanupVariance = true; - } - - let scaleTexShape: [number, number]|null = null; - let cleanupScale = false; - if (scale != null) { - const preferredScaleTexShape: [number, number] = - scale.rank === 1 ? [1, scale.size] : xTexShape; - - scaleTexShape = scale.getTextureShapeRC(preferredScaleTexShape); - if (!util.arraysEqual(scaleTexShape, preferredScaleTexShape)) { - scale = this.reshapeTexture(scale, preferredScaleTexShape); - scaleTexShape = preferredScaleTexShape; - cleanupScale = true; - } - } - - let offsetTexShape: [number, number]|null = null; - let cleanupOffset = false; + let offsetShape = null; if (offset != null) { - const preferredOffsetTexShape: [number, number] = - offset.rank === 1 ? [1, offset.size] : xTexShape; - - offsetTexShape = offset.getTextureShapeRC(preferredOffsetTexShape); - if (!util.arraysEqual(offsetTexShape, preferredOffsetTexShape)) { - offset = this.reshapeTexture(offset, preferredOffsetTexShape); - offsetTexShape = preferredOffsetTexShape; - cleanupOffset = true; - } + offsetShape = offset.shape; + inputs.push(offset); } - const resultTexShape: [number, number] = x.getTextureShapeRC(); - - const program = this.getAndSaveProgram( - `${BATCHNORM_PROG}_${xTexShape}_${meanTexShape}_${varianceTexShape}_` + - `${scaleTexShape!}_${offsetTexShape!}_${varianceEpsilon}`, - () => batchnorm_gpu.getFragmentShaderSource( - xTexShape, meanTexShape, varianceTexShape, offsetTexShape, - scaleTexShape, varianceEpsilon)); - - const resultTexture = this.textureManager.acquireTexture(resultTexShape); - - batchnorm_gpu.batchNormalization( - this.gpgpu, program, x.getTexture(), xTexShape, mean.getTexture(), - meanTexShape, variance.getTexture(), varianceTexShape, - offset != null ? offset.getTexture() : null, - offset != null ? offsetTexShape : null, - scale != null ? scale.getTexture() : null, - scale != null ? scaleTexShape : null, resultTexture, resultTexShape); - - if (cleanupMean) { - mean.dispose(); - } - if (cleanupVariance) { - variance.dispose(); - } - if (cleanupScale) { - scale!.dispose(); - } - if (cleanupOffset) { - offset!.dispose(); + let scaleShape = null; + if (scale != null) { + scaleShape = scale.shape; + inputs.push(scale); } - return NDArray.make( - x.shape, {texture: resultTexture, textureShapeRC: resultTexShape}); + const program = new BatchNormProgram( + x.shape, mean.shape, variance.shape, offsetShape, scaleShape, + varianceEpsilon); + return this.compileAndRun(program, inputs); } protected switchDimInternal(a: T, newDim: number[]): T { @@ -492,25 +355,9 @@ export class NDArrayMathGPU extends NDArrayMath { protected resizeBilinear3DInternal( x: Array3D, newShape2D: [number, number], alignCorners: boolean): Array3D { - const programKey = - [RESIZE_BILINEAR_PROG, x.shape, newShape2D, alignCorners].join('_'); - - const newShapeRCD: [number, number, number] = - [newShape2D[0], newShape2D[1], x.shape[2]]; - const resultTexShape = conv_util.computeTexShapeFrom3D(newShapeRCD); - - const program = this.getAndSaveProgram( - programKey, - () => resize_bilinear_gpu.getFragmentShaderSource( - x.shape, newShape2D, alignCorners)); - - const resultTexture = this.textureManager.acquireTexture(resultTexShape); - - resize_bilinear_gpu.resizeBilinear( - this.gpgpu, program, x.getTexture(), resultTexture, resultTexShape); - - return NDArray.make( - newShapeRCD, {texture: resultTexture, textureShapeRC: resultTexShape}); + const program = + new ResizeBilinearProgram(x.shape, newShape2D, alignCorners); + return this.compileAndRun(program, [x]); } private getAndSaveBinary(key: string, getBinary: () => GPGPUBinary): @@ -521,25 +368,11 @@ export class NDArrayMathGPU extends NDArrayMath { return this.binaryCache[key]; } - private getAndSaveProgram(programKey: string, getShaderSource: () => string): - WebGLProgram { - if (!(programKey in this.programCache)) { - this.programCache[programKey] = - this.gpgpu.createProgram(getShaderSource()); - } - return this.programCache[programKey]; - } - getTextureManager(): TextureManager { return this.textureManager; } dispose() { - for (const programKey in this.programCache) { - if (this.programCache.hasOwnProperty(programKey)) { - this.gpgpu.deleteProgram(this.programCache[programKey]); - } - } for (const key in this.binaryCache) { this.gpgpu.deleteProgram(this.binaryCache[key].webGLProgram); } diff --git a/src/math/ndarray.ts b/src/math/ndarray.ts index e0104f0342..6c97994d00 100644 --- a/src/math/ndarray.ts +++ b/src/math/ndarray.ts @@ -135,8 +135,8 @@ export class NDArray { return new Array3D(shape as [number, number, number], data) as any; case 4: return new Array4D( - // tslint:disable-next-line:no-any - shape as [number, number, number, number], data) as any; + // tslint:disable-next-line:no-any + shape as [number, number, number, number], data) as any; default: // tslint:disable-next-line:no-any return new NDArray(shape, data) as any; diff --git a/src/math/webgl/batchnorm_gpu.ts b/src/math/webgl/batchnorm_gpu.ts index 6a93267a97..504ab05af3 100644 --- a/src/math/webgl/batchnorm_gpu.ts +++ b/src/math/webgl/batchnorm_gpu.ts @@ -13,119 +13,50 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -import {GPGPUContext} from './gpgpu_context'; - -export function getFragmentShaderSource( - xTexShapeRC: [number, number], meanTexShapeRC: [number, number], - varianceTexShapeRC: [number, number], - offsetTexShapeRC: [number, number]|null, - scaleTexShapeRC?: [number, number]|null, varianceEpsilon = 0.001): string { - let offsetSamplerSnippet = ''; - let offsetShapeInitializationSnippet = ''; - let offsetCoordsSnippet = ''; - let offsetUVSnippet = ''; - let offsetValueSnippet = ''; - let offsetOperationSnippet = '0.0'; - - let scaleSamplerSnippet = ''; - let scaleShapeInitializationSnippet = ''; - let scaleCoordsSnippet = ''; - let scaleUVSnippet = ''; - let scaleValueSnippet = ''; - let scaleOperationSnippet = ''; - - if (offsetTexShapeRC != null) { - offsetSamplerSnippet = 'uniform sampler2D offset;'; - offsetShapeInitializationSnippet = `const vec2 offsetShapeCR = vec2( - ${offsetTexShapeRC[1]}, ${offsetTexShapeRC[0]});`; - offsetCoordsSnippet = 'vec2 offsetCoordsCR = mod(yTexCR, offsetShapeCR);'; - offsetUVSnippet = - 'vec2 offsetUV = (offsetCoordsCR + halfCR) / offsetShapeCR;'; - offsetValueSnippet = 'float offsetValue = texture2D(offset, offsetUV).r;'; - offsetOperationSnippet = 'offsetValue'; - } - - if (scaleTexShapeRC != null) { - scaleSamplerSnippet = 'uniform sampler2D scale;'; - scaleShapeInitializationSnippet = `const vec2 scaleShapeCR = vec2( - ${scaleTexShapeRC[1]}, ${scaleTexShapeRC[0]});`; - scaleCoordsSnippet = 'vec2 scaleCoordsCR = mod(yTexCR, scaleShapeCR);'; - scaleUVSnippet = 'vec2 scaleUV = (scaleCoordsCR + halfCR) / scaleShapeCR;'; - scaleValueSnippet = 'float scaleValue = texture2D(scale, scaleUV).r;'; - scaleOperationSnippet = 'inv *= scaleValue;'; +import * as util from '../../util'; +import {GPGPUProgram} from './gpgpu_math'; + +export class BatchNormProgram implements GPGPUProgram { + variableNames: string[]; + params: Array<{}> = []; + outputShape: number[] = []; + userCode: string; + supportsBroadcasting = true; + + constructor( + xShape: number[], meanShape: number[], varianceShape: number[], + offsetShape: number[]|null, scaleShape: number[]|null, + varianceEpsilon: number) { + this.variableNames = ['x', 'mean', 'variance']; + util.assertAndGetBroadcastedShape(xShape, meanShape); + util.assertAndGetBroadcastedShape(xShape, varianceShape); + + let offsetSnippet = '0.0'; + if (offsetShape != null) { + util.assertAndGetBroadcastedShape(xShape, offsetShape); + this.variableNames.push('offset'); + offsetSnippet = 'getOffsetAtOutCoords()'; + } + + let scaleSnippet = '1.0'; + if (scaleShape != null) { + util.assertAndGetBroadcastedShape(xShape, scaleShape); + this.variableNames.push('scale'); + scaleSnippet = 'getScaleAtOutCoords()'; + } + + this.params = [varianceEpsilon]; + this.outputShape = xShape; + this.userCode = ` + void main() { + float x = getXAtOutCoords(); + float mean = getMeanAtOutCoords(); + float variance = getVarianceAtOutCoords(); + float offset = ${offsetSnippet}; + float scale = ${scaleSnippet}; + float inv = scale / sqrt(variance + float(${varianceEpsilon})); + setOutput((x - mean) * inv + offset); + } + `; } - - return ` - precision highp float; - uniform sampler2D x; - uniform sampler2D mean; - uniform sampler2D variance; - ${offsetSamplerSnippet} - ${scaleSamplerSnippet} - - varying vec2 resultUV; - - const vec2 xShapeCR = vec2(${xTexShapeRC[1]}, ${xTexShapeRC[0]}); - const vec2 meanShapeCR = vec2(${meanTexShapeRC[1]}, ${meanTexShapeRC[0]}); - const vec2 varianceShapeCR = vec2( - ${varianceTexShapeRC[1]}, ${varianceTexShapeRC[0]}); - - ${offsetShapeInitializationSnippet} - ${scaleShapeInitializationSnippet} - - const vec2 halfCR = vec2(0.5, 0.5); - const float varianceEpsilon = ${varianceEpsilon}; - - void main() { - vec2 yTexCR = floor(gl_FragCoord.xy); - - vec2 meanCoordsCR = mod(yTexCR, meanShapeCR); - vec2 varianceCoordsCR = mod(yTexCR, varianceShapeCR); - ${offsetCoordsSnippet} - ${scaleCoordsSnippet} - - vec2 meanUV = (meanCoordsCR + halfCR) / meanShapeCR; - vec2 varianceUV = (varianceCoordsCR + halfCR) / varianceShapeCR; - ${offsetUVSnippet} - ${scaleUVSnippet} - - float xValue = texture2D(x, resultUV).r; - float meanValue = texture2D(mean, meanUV).r; - float varianceValue = texture2D(variance, varianceUV).r; - ${offsetValueSnippet} - ${scaleValueSnippet} - - float inv = 1.0 / sqrt(varianceValue + varianceEpsilon); - ${scaleOperationSnippet} - float xTimesInv = xValue * inv; - float meanTimesInvWithOffset = ${offsetOperationSnippet} - - meanValue * inv; - - gl_FragColor = vec4(xTimesInv + meanTimesInvWithOffset, 0, 0, 0); - }`; } - -export function batchNormalization( - gpgpu: GPGPUContext, program: WebGLProgram, x: WebGLTexture, - xShapeRowCol: [number, number], mean: WebGLTexture, - meanShapeRowCol: [number, number], variance: WebGLTexture, - varianceShapeRowCol: [number, number], offset: WebGLTexture|null, - offsetShapeRowCol: [number, number]|null, scale: WebGLTexture|null, - scaleShapeRowCol: [number, number]|null, result: WebGLTexture, - resultShapeRowCol: [number, number]) { - gpgpu.setOutputMatrixTexture( - result, resultShapeRowCol[0], resultShapeRowCol[1]); - gpgpu.setProgram(program); - gpgpu.setInputMatrixTexture(x, 'x', 0); - gpgpu.setInputMatrixTexture(mean, 'mean', 1); - gpgpu.setInputMatrixTexture(variance, 'variance', 2); - let nextIndex = 3; - if (offset != null) { - gpgpu.setInputMatrixTexture(offset, 'offset', nextIndex); - nextIndex++; - } - if (scale != null) { - gpgpu.setInputMatrixTexture(scale, 'scale', nextIndex); - } - gpgpu.executeProgram(); -} \ No newline at end of file diff --git a/src/math/webgl/batchnorm_gpu_test.ts b/src/math/webgl/batchnorm_gpu_test.ts index 7903a9e9ac..aac8f27464 100644 --- a/src/math/webgl/batchnorm_gpu_test.ts +++ b/src/math/webgl/batchnorm_gpu_test.ts @@ -14,88 +14,14 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; +import {initializeGPU, NDArray} from '../ndarray'; -import * as batchnorm_gpu from './batchnorm_gpu'; +import {BatchNormProgram} from './batchnorm_gpu'; import {GPGPUContext} from './gpgpu_context'; +import * as gpgpu_math from './gpgpu_math'; +import {TextureManager} from './texture_manager'; describe('batchnorm gpu test', () => { - function uploadBatchNormDownload( - x: Float32Array, xTexShapeRowCol: [number, number], mean: Float32Array, - meanTexShapeRowCol: [number, number], variance: Float32Array, - varianceTexShapeRowCol: [number, number], offset: Float32Array|null, - offsetTexShapeRowCol: [number, number]|null, scale: Float32Array|null, - scaleTexShapeRowCol: [number, number]|null, - varianceEpsilon: number): Float32Array { - const resultTexShapeRC: [number, number] = xTexShapeRowCol; - const gpgpu = new GPGPUContext(); - gpgpu.enableAutomaticDebugValidation(true); - - const shaderSource = batchnorm_gpu.getFragmentShaderSource( - xTexShapeRowCol, meanTexShapeRowCol, varianceTexShapeRowCol, - offsetTexShapeRowCol, scaleTexShapeRowCol, varianceEpsilon); - - const program = gpgpu.createProgram(shaderSource); - - const xTex = - gpgpu.createMatrixTexture(xTexShapeRowCol[0], xTexShapeRowCol[1]); - const meanTex = - gpgpu.createMatrixTexture(meanTexShapeRowCol[0], meanTexShapeRowCol[1]); - const varianceTex = gpgpu.createMatrixTexture( - varianceTexShapeRowCol[0], varianceTexShapeRowCol[1]); - - let offsetTex = null; - if (offset != null) { - offsetTex = gpgpu.createMatrixTexture( - offsetTexShapeRowCol![0], offsetTexShapeRowCol![1]); - } - let scaleTex = null; - if (scale != null) { - scaleTex = gpgpu.createMatrixTexture( - scaleTexShapeRowCol![0], scaleTexShapeRowCol![1]); - } - - const resultTex = - gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.uploadMatrixToTexture( - xTex, xTexShapeRowCol[0], xTexShapeRowCol[1], x); - gpgpu.uploadMatrixToTexture( - meanTex, meanTexShapeRowCol[0], meanTexShapeRowCol[1], mean); - gpgpu.uploadMatrixToTexture( - varianceTex, varianceTexShapeRowCol[0], varianceTexShapeRowCol[1], - variance); - if (offset != null) { - gpgpu.uploadMatrixToTexture( - offsetTex!, offsetTexShapeRowCol![0], offsetTexShapeRowCol![1], - offset); - } - if (scale != null) { - gpgpu.uploadMatrixToTexture( - scaleTex!, scaleTexShapeRowCol![0], scaleTexShapeRowCol![1], scale); - } - - batchnorm_gpu.batchNormalization( - gpgpu, program, xTex, xTexShapeRowCol, meanTex, meanTexShapeRowCol, - varianceTex, varianceTexShapeRowCol, offsetTex, offsetTexShapeRowCol, - scaleTex, scaleTexShapeRowCol, resultTex, resultTexShapeRC); - - const result = gpgpu.downloadMatrixFromTexture( - resultTex, resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(xTex); - gpgpu.deleteMatrixTexture(meanTex); - gpgpu.deleteMatrixTexture(varianceTex); - if (offsetTex != null) { - gpgpu.deleteMatrixTexture(offsetTex); - } - if (scaleTex != null) { - gpgpu.deleteMatrixTexture(scaleTex); - } - gpgpu.deleteProgram(program); - gpgpu.dispose(); - return result; - } it('simple batchnorm, no offset or scale, 2x1x2', () => { const x = new Float32Array([2, 100, 4, 400]); @@ -201,7 +127,7 @@ describe('batchnorm gpu test', () => { const varianceEpsilon = .001; const result = uploadBatchNormDownload( - x, [2, 9], mean, [1, 3], variance, [1, 3], offset, [1, 3], scale, + x, [2, 3, 3], mean, [1, 3], variance, [1, 3], offset, [1, 3], scale, [1, 3], varianceEpsilon); const expectedResult = new Float32Array([ @@ -212,3 +138,41 @@ describe('batchnorm gpu test', () => { test_util.expectArraysClose(result, expectedResult, 1e-5); }); }); + +function uploadBatchNormDownload( + x: Float32Array, xShape: number[], mean: Float32Array, meanShape: number[], + variance: Float32Array, varianceShape: number[], offset: Float32Array|null, + offsetShape: number[]|null, scale: Float32Array|null, + scaleShape: number[]|null, varianceEpsilon: number): Float32Array { + const gpgpu = new GPGPUContext(); + const textureManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, textureManager); + + const program = new BatchNormProgram( + xShape, meanShape, varianceShape, offsetShape, scaleShape, + varianceEpsilon); + const xArr = NDArray.make(xShape, {values: x}); + const meanArr = NDArray.make(meanShape, {values: mean}); + const varianceArr = NDArray.make(varianceShape, {values: variance}); + const inputs = [xArr, meanArr, varianceArr]; + + if (offset != null) { + const offsetArr = NDArray.make(offsetShape, {values: offset}); + inputs.push(offsetArr); + } + if (scale != null) { + const scaleArr = NDArray.make(scaleShape, {values: scale}); + inputs.push(scaleArr); + } + + const res = NDArray.zeros(program.outputShape); + const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, res); + gpgpu_math.runProgram(binary, inputs, res); + const resValues = res.getValues(); + + textureManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); + gpgpu.dispose(); + + return resValues; +} diff --git a/src/math/webgl/binaryop_gpu_test.ts b/src/math/webgl/binaryop_gpu_test.ts index dd1ad320c0..cb2b14cf16 100644 --- a/src/math/webgl/binaryop_gpu_test.ts +++ b/src/math/webgl/binaryop_gpu_test.ts @@ -14,13 +14,12 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; +// tslint:disable-next-line:max-line-length +import {Array1D, Array2D, Array3D, initializeGPU, NDArray, Scalar} from '../ndarray'; import {BinaryOpProgram} from './binaryop_gpu'; import {GPGPUContext} from './gpgpu_context'; import * as gpgpu_math from './gpgpu_math'; -import {NDArray, Array1D, Array2D, Array3D, Scalar, - initializeGPU} from '../ndarray'; -import * as util from '../../util'; import {TextureManager} from './texture_manager'; describe('binaryop_gpu Add', () => { @@ -92,7 +91,7 @@ describe('binaryop_gpu Sub', () => { // shape [3, 2] is not compatible with shape [3]. const res = uploadBinaryOpDownload(a, b, '-'); test_util.expectArraysClose( - res, new Float32Array([0, 0, 0, -1, 4, 4, 4, 3]), 1e-4); + res, new Float32Array([0, 0, 0, -1, 4, 4, 4, 3]), 1e-4); }); }); @@ -177,17 +176,15 @@ describe('binaryop_gpu Divide', () => { }); }); -export function uploadBinaryOpDownload( +function uploadBinaryOpDownload( a: NDArray, b: NDArray, op: '+'|'-'|'*'|'/'): Float32Array { const gpgpu = new GPGPUContext(); const textureManager = new TextureManager(gpgpu); initializeGPU(gpgpu, textureManager); - const outShape = util.assertAndGetBroadcastedShape(a.shape, b.shape); - const res = NDArray.zeros(outShape); const program = new BinaryOpProgram(op, a.shape, b.shape); - const binary = - gpgpu_math.compileProgram(gpgpu, program, [a, b], res); + const res = NDArray.zeros(program.outputShape); + const binary = gpgpu_math.compileProgram(gpgpu, program, [a, b], res); gpgpu_math.runProgram(binary, [a, b], res); const resValues = res.getValues(); diff --git a/src/math/webgl/concat3d_gpu.ts b/src/math/webgl/concat3d_gpu.ts index ebe37d7ab3..e6c6840c8a 100644 --- a/src/math/webgl/concat3d_gpu.ts +++ b/src/math/webgl/concat3d_gpu.ts @@ -13,62 +13,40 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -import * as conv_util from '../conv_util'; -import {GPGPUContext} from './gpgpu_context'; - -export function getFragmentShaderSource( - x1ShapeRCD: [number, number, number], x2ShapeRCD: [number, number, number], - resultShapeRCD: [number, number, number], axis: number): string { - const x1TexShapeRC = conv_util.computeTexShapeFrom3D(x1ShapeRCD); - const x2TexShapeRC = conv_util.computeTexShapeFrom3D(x2ShapeRCD); - - const yAxes = ['yR', 'yC', 'yD']; - const concatAxis = yAxes[axis]; - - return ` - precision highp float; - uniform sampler2D x1; - uniform sampler2D x2; - - const vec2 x1ShapeCR = vec2(${x1TexShapeRC[1]}, ${x1TexShapeRC[0]}); - const vec2 x2ShapeCR = vec2(${x2TexShapeRC[1]}.0, ${x2TexShapeRC[0]}.0); - - const vec2 halfCR = vec2(0.5, 0.5); - - void main() { - vec2 yTexCR = floor(gl_FragCoord.xy); - - // Map from 2D (yTexR, yTexC) to 3D (yR, yC, yD). - float yR = yTexCR.y; - float yC = floor(yTexCR.x / ${resultShapeRCD[2]}.0); - float yD = mod(yTexCR.x, ${resultShapeRCD[2]}.0); - - float value = 0.0; - - if (${concatAxis} < ${x1ShapeRCD[axis]}.0) { - // Map yR, yC, yD back to x1 coordinates. - vec2 x1CR = vec2(yC * ${x1ShapeRCD[2]}.0 + yD, yR); - vec2 x1UV = (x1CR + halfCR) / x1ShapeCR; - value = texture2D(x1, x1UV).r; - } else { - ${concatAxis} = ${concatAxis} - ${x1ShapeRCD[axis]}.0; - - // Map yR, yC, yD back to x2 coordinates. - vec2 x2CR = vec2(yC * ${x2ShapeRCD[2]}.0 + yD, yR); - vec2 x2UV = (x2CR + halfCR) / x2ShapeCR; - value = texture2D(x2, x2UV).r; +import * as concat3d_util from '../concat3d_util'; +import {GPGPUProgram} from './gpgpu_math'; + +export class Concat3DProgram implements GPGPUProgram { + variableNames = ['A', 'B']; + params: Array<{}> = []; + outputShape: number[] = []; + userCode: string; + + constructor( + x1Shape: [number, number, number], x2Shape: [number, number, number], + axis: number) { + const yAxes = ['yR', 'yC', 'yD']; + const concatAxis = yAxes[axis]; + this.params = [axis]; + this.outputShape = + concat3d_util.computeConcat3DOutputShape(x1Shape, x2Shape, axis); + this.userCode = ` + void main() { + vec3 coords = getOutputCoords(); + float yR = coords.x; + float yC = coords.y; + float yD = coords.z; + + float value = 0.0; + if (${concatAxis} < ${x1Shape[axis]}.0) { + value = getA(yR, yC, yD); + } else { + ${concatAxis} -= ${x1Shape[axis]}.0; + value = getB(yR, yC, yD); + } + + setOutput(value); } - - gl_FragColor = vec4(value, 0.0, 0.0, 0.0); - }`; -} - -export function concat3D( - gpgpu: GPGPUContext, program: WebGLProgram, x1: WebGLTexture, - x2: WebGLTexture, result: WebGLTexture, resultShapeRC: [number, number]) { - gpgpu.setOutputMatrixTexture(result, resultShapeRC[0], resultShapeRC[1]); - gpgpu.setProgram(program); - gpgpu.setInputMatrixTexture(x1, 'x1', 0); - gpgpu.setInputMatrixTexture(x2, 'x2', 1); - gpgpu.executeProgram(); + `; + } } diff --git a/src/math/webgl/concat3d_gpu_test.ts b/src/math/webgl/concat3d_gpu_test.ts index 00ee4b14d0..3f96a78d8f 100644 --- a/src/math/webgl/concat3d_gpu_test.ts +++ b/src/math/webgl/concat3d_gpu_test.ts @@ -14,54 +14,13 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; -import * as conv_util from '../conv_util'; - -import * as concat3d_gpu from './concat3d_gpu'; +import {Array3D, initializeGPU, NDArray} from '../ndarray'; +import {Concat3DProgram} from './concat3d_gpu'; import {GPGPUContext} from './gpgpu_context'; +import * as gpgpu_math from './gpgpu_math'; +import {TextureManager} from './texture_manager'; describe('concat3d_gpu', () => { - - function uploadConcat3dDownload( - x1: Float32Array, x2: Float32Array, x1ShapeRCD: [number, number, number], - x2ShapeRCD: [number, number, number], axis: number): Float32Array { - const x1TexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(x1ShapeRCD); - const x2TexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(x2ShapeRCD); - - const resultShapeRCD = x1ShapeRCD.slice() as [number, number, number]; - resultShapeRCD[axis] += x2ShapeRCD[axis]; - const resultTexShapeRC = conv_util.computeTexShapeFrom3D(resultShapeRCD); - - const gpgpu = new GPGPUContext(); - gpgpu.enableAutomaticDebugValidation(true); - - const shaderSource = concat3d_gpu.getFragmentShaderSource( - x1ShapeRCD, x2ShapeRCD, resultShapeRCD, axis); - const program = gpgpu.createProgram(shaderSource); - - const x1Tex = gpgpu.createMatrixTexture(x1TexShapeRC[0], x1TexShapeRC[1]); - const x2Tex = gpgpu.createMatrixTexture(x2TexShapeRC[0], x2TexShapeRC[1]); - const resultTex = - gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.uploadMatrixToTexture(x1Tex, x1TexShapeRC[0], x1TexShapeRC[1], x1); - gpgpu.uploadMatrixToTexture(x2Tex, x2TexShapeRC[0], x2TexShapeRC[1], x2); - - concat3d_gpu.concat3D( - gpgpu, program, x1Tex, x2Tex, resultTex, resultTexShapeRC); - - const result = gpgpu.downloadMatrixFromTexture( - resultTex, resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(x1Tex); - gpgpu.deleteMatrixTexture(x2Tex); - gpgpu.deleteProgram(program); - gpgpu.dispose(); - return result; - } - it('concat axis=0', () => { const x1 = new Float32Array([1, 11, 111, 2, 22, 222]); const x2 = @@ -103,3 +62,29 @@ describe('concat3d_gpu', () => { 1e-6); }); }); + +function uploadConcat3dDownload( + a: Float32Array, b: Float32Array, aShape: [number, number, number], + bShape: [number, number, number], axis: number): Float32Array { + const gpgpu = new GPGPUContext(); + gpgpu.enableAutomaticDebugValidation(true); + const textureManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, textureManager); + + const program = new Concat3DProgram(aShape, bShape, axis); + const aArr = Array3D.new(aShape, a); + const bArr = Array3D.new(bShape, b); + const rArr = NDArray.zeros(program.outputShape); + const binary = gpgpu_math.compileProgram(gpgpu, program, [aArr, bArr], rArr); + gpgpu_math.runProgram(binary, [aArr, bArr], rArr); + const result = rArr.getValues(); + + aArr.dispose(); + bArr.dispose(); + rArr.dispose(); + textureManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); + gpgpu.dispose(); + + return result; +} diff --git a/src/math/webgl/mulbcast_gpu.ts b/src/math/webgl/mulbcast_gpu.ts deleted file mode 100644 index 8780720d0d..0000000000 --- a/src/math/webgl/mulbcast_gpu.ts +++ /dev/null @@ -1,90 +0,0 @@ -/* Copyright 2017 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -import {GPGPUContext} from './gpgpu_context'; - -export function getFragmentShaderSource( - aNumRows: number, aNumCols: number, bNumRows: number, bNumCols: number, - resultNumRows: number, resultNumCols: number): string { - return ` - precision highp float; - uniform sampler2D matrixA; - uniform sampler2D matrixB; - varying vec2 resultUV; - - const vec2 aDimCR = vec2(${aNumCols}.0, ${aNumRows}.0); - const vec2 bDimCR = vec2(${bNumCols}.0, ${bNumRows}.0); - const vec2 resultDimCR = vec2(${resultNumCols}.0, ${resultNumRows}.0); - const vec4 halfCR = vec4(0.5, 0.5, 0.5, 0.5); - - void main() { - vec2 resultCR = floor(resultUV * resultDimCR); - vec4 resultCRBroadcast = vec4(resultCR, resultCR); - vec4 abDimsCR = vec4(aDimCR, bDimCR); - vec4 abCR = mod(resultCRBroadcast, abDimsCR); - vec4 abCRCenters = abCR + halfCR; - vec4 abUV = abCRCenters / abDimsCR; - vec4 a = texture2D(matrixA, abUV.rg); - vec4 b = texture2D(matrixB, abUV.ba); - float product = a.r * b.r; - gl_FragColor = vec4(product, 0, 0, 0); - }`; -} - -export function multiplyBroadcast( - gpgpu: GPGPUContext, multiplyBroadcastProgram: WebGLProgram, - a: WebGLTexture, aNumRows: number, aNumCols: number, b: WebGLTexture, - bNumRows: number, bNumCols: number, result: WebGLTexture, - resultNumRows: number, resultNumCols: number) { - gpgpu.setOutputMatrixTexture(result, resultNumRows, resultNumCols); - gpgpu.setProgram(multiplyBroadcastProgram); - gpgpu.setInputMatrixTexture(a, 'matrixA', 0); - gpgpu.setInputMatrixTexture(b, 'matrixB', 1); - gpgpu.executeProgram(); -} - -export function uploadMultiplyBroadcastDownload( - a: Float32Array, aNumRows: number, aNumCols: number, b: Float32Array, - bNumRows: number, bNumCols: number): Float32Array { - const resultNumRows = Math.max(aNumRows, bNumRows); - const resultNumCols = Math.max(aNumCols, bNumCols); - - const gpgpu = new GPGPUContext(); - const program: WebGLProgram = gpgpu.createProgram(getFragmentShaderSource( - aNumRows, aNumCols, bNumRows, bNumCols, resultNumRows, resultNumCols)); - - const aTexture: WebGLTexture = gpgpu.createMatrixTexture(aNumRows, aNumCols); - const bTexture: WebGLTexture = gpgpu.createMatrixTexture(bNumRows, bNumCols); - const resultTexture: WebGLTexture = - gpgpu.createMatrixTexture(resultNumRows, resultNumCols); - - gpgpu.uploadMatrixToTexture(aTexture, aNumRows, aNumCols, a); - gpgpu.uploadMatrixToTexture(bTexture, bNumRows, bNumCols, b); - - multiplyBroadcast( - gpgpu, program, aTexture, aNumRows, aNumCols, bTexture, bNumRows, - bNumCols, resultTexture, resultNumRows, resultNumCols); - - const result = gpgpu.downloadMatrixFromTexture( - resultTexture, resultNumRows, resultNumCols); - - gpgpu.deleteMatrixTexture(aTexture); - gpgpu.deleteMatrixTexture(bTexture); - gpgpu.deleteMatrixTexture(resultTexture); - gpgpu.deleteProgram(program); - gpgpu.dispose(); - - return result; -} diff --git a/src/math/webgl/mulbcast_gpu_test.ts b/src/math/webgl/mulbcast_gpu_test.ts deleted file mode 100644 index e32c50179e..0000000000 --- a/src/math/webgl/mulbcast_gpu_test.ts +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright 2017 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -import * as test_util from '../../test_util'; -import * as mulbcast_gpu from './mulbcast_gpu'; - -export function cpuMultiplyBroadcast( - a: Float32Array, aNumRows: number, aNumCols: number, b: Float32Array, - bNumRows: number, bNumCols: number): Float32Array { - const resultNumRows = Math.max(aNumRows, bNumRows); - const resultNumCols = Math.max(aNumCols, bNumCols); - const result = new Float32Array(resultNumRows * resultNumCols); - let dst = 0; - for (let r = 0; r < resultNumRows; ++r) { - for (let c = 0; c < resultNumCols; ++c) { - const ai = ((r % aNumRows) * aNumCols) + (c % aNumCols); - const bi = ((r % bNumRows) * bNumCols) + (c % bNumCols); - result[dst] = a[ai] * b[bi]; - ++dst; - } - } - return result; -} - -describe('mulbcast_gpu', () => { - it('returns a matrix dimensions [max(aRows, bRows), max(aCols, bCols)]', - () => { - const a = new Float32Array(13 * 100); - const b = new Float32Array(100 * 99); - const result = - mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 1, 100, b, 100, 1); - expect(result.length).toEqual(100 * 100); - }); - - it('returns [0] when A is [0], A and B same size', () => { - const a = new Float32Array(16 * 16); - const b = test_util.randomArrayInRange(16 * 16, -10, 10); - const result = - mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 16, b, 16, 16); - test_util.expectArraysClose(a, result, 0.00001); - }); - - it('returns [0] when B is [0], A and B same size', () => { - const a = test_util.randomArrayInRange(16 * 16, -10, 10); - const b = new Float32Array(16 * 16); - const result = - mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 16, b, 16, 16); - test_util.expectArraysClose(b, result, 0.00001); - }); - - it('returns A when B is [1] and matrices have the same size', () => { - const a = new Float32Array(16 * 16); - a.fill(1); - const b = test_util.randomArrayInRange(16 * 16, -10, 10); - const result = - mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 16, b, 16, 16); - test_util.expectArraysClose(result, b, 0.00001); - }); - - it('returns B when A is [1] and matrices have the same size', () => { - const a = test_util.randomArrayInRange(16 * 16, -10, 10); - const b = new Float32Array(16 * 16); - b.fill(1); - const result = - mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 16, b, 16, 16); - test_util.expectArraysClose(result, a, 0.00001); - }); - - it('returns B when A is [1] and A is narrower than B', () => { - const a = new Float32Array(16 * 8); - a.fill(1); - const b = test_util.randomArrayInRange(16 * 16, -10, 10); - const result = - mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 8, b, 16, 16); - test_util.expectArraysClose(result, b, 0.00001); - }); - - it('returns B when A is [1] and A is shorter than B', () => { - const a = new Float32Array(8 * 16); - a.fill(1); - const b = test_util.randomArrayInRange(16 * 16, -10, 10); - const result = - mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 8, 16, b, 16, 16); - test_util.expectArraysClose(result, b, 0.00001); - }); - - it('returns B when A is [1] and A is smaller than B', () => { - const a = new Float32Array(7 * 6); - a.fill(1); - const b = test_util.randomArrayInRange(18 * 21, -1, 1); - const result = - mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 7, 6, b, 18, 21); - test_util.expectArraysClose(result, b, 0.00001); - }); - - it('broadcasts a smaller A [2x2] across B [4x4]', () => { - const a = new Float32Array([1, 0, 1, 0]); - const b = new Float32Array(4 * 4); - for (let i = 0; i < b.length; ++i) { - b[i] = i + 1; - } - const expected = - new Float32Array([1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0]); - const gpuResult = - mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 2, 2, b, 4, 4); - const cpuResult = cpuMultiplyBroadcast(a, 2, 2, b, 4, 4); - test_util.expectArraysClose(cpuResult, expected, 0.0001); - test_util.expectArraysClose(gpuResult, expected, 0.0001); - }); - - it('broadcasts a non-square A [3x5] across a larger B [16x16]', () => { - const a = test_util.randomArrayInRange(3 * 5, -1, 1); - const b = test_util.randomArrayInRange(16 * 16, -1, 1); - const result = - mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 3, 5, b, 16, 16); - test_util.expectArraysClose( - result, cpuMultiplyBroadcast(a, 3, 5, b, 16, 16), 0.0001); - }); - - it('broadcasts a non-square A across a larger non-square B', () => { - const a = test_util.randomArrayInRange(37 * 63, -1, 1); - const b = test_util.randomArrayInRange(128 * 150, -1, 1); - const result = - mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 37, 63, b, 128, 150); - test_util.expectArraysClose( - result, cpuMultiplyBroadcast(a, 37, 63, b, 128, 150), 0.0001); - }); -}); diff --git a/src/math/webgl/reshape_gpu.ts b/src/math/webgl/reshape_gpu.ts deleted file mode 100644 index a451a78134..0000000000 --- a/src/math/webgl/reshape_gpu.ts +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright 2017 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -import * as util from '../../util'; -import {GPGPUContext} from './gpgpu_context'; - -export function getFragmentShaderSource(): string { - return ` - precision highp float; - uniform sampler2D matrixA; - uniform vec2 inputDimCR; - uniform vec2 resultDimCR; - varying vec2 resultUV; - const vec2 halfCR = vec2(0.5, 0.5); - - void main() { - vec2 resultCR = floor(resultUV * resultDimCR); - // indexInFlat = row * stride + column, where stride == numOutputColumns - float indexInFlat = resultCR.y * resultDimCR.x + resultCR.x; - - vec2 inputCR = vec2( - mod(indexInFlat, inputDimCR.x), // col = indexInFlat % numInputColumns - floor(indexInFlat / inputDimCR.x) // row = indexInFlat / numInputColumns - ) + halfCR; - - vec2 inputUV = inputCR / inputDimCR; - gl_FragColor = texture2D(matrixA, inputUV); - }`; -} - -export function reshape( - gpgpu: GPGPUContext, reshapeProgram: WebGLProgram, a: WebGLTexture, - aNumRows: number, aNumCols: number, result: WebGLTexture, - resultNumRows: number, resultNumCols: number) { - const inputSize = aNumRows * aNumCols; - const outputSize = resultNumCols * resultNumRows; - util.assert( - inputSize === outputSize, - `The input size (${inputSize}) and output size (${outputSize}) ` + - `must match`); - - gpgpu.setOutputMatrixTexture(result, resultNumRows, resultNumCols); - gpgpu.setProgram(reshapeProgram); - gpgpu.setInputMatrixTexture(a, 'matrixA', 0); - - const inputDimCRLocation = gpgpu.getUniformLocation('inputDimCR'); - gpgpu.gl.uniform2f(inputDimCRLocation, aNumCols, aNumRows); - - const resultDimCRLocation = gpgpu.getUniformLocation('resultDimCR'); - gpgpu.gl.uniform2f(resultDimCRLocation, resultNumCols, resultNumRows); - - gpgpu.executeProgram(); -} diff --git a/src/math/webgl/reshape_gpu_test.ts b/src/math/webgl/reshape_gpu_test.ts deleted file mode 100644 index 0f83a6e69e..0000000000 --- a/src/math/webgl/reshape_gpu_test.ts +++ /dev/null @@ -1,88 +0,0 @@ -/* Copyright 2017 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -import {GPGPUContext} from './gpgpu_context'; -import * as reshape_gpu from './reshape_gpu'; - -describe('reshape_gpu', () => { - let gpgpu: GPGPUContext; - - beforeEach(() => { - gpgpu = new GPGPUContext(); - gpgpu.enableAutomaticDebugValidation(true); - }); - - afterEach(() => { - gpgpu.dispose(); - }); - - it('reshape a 2x3 matrix into the same size', () => { - const a = new Float32Array([1, 2, 3, 4, 5, 6]); - const result = uploadReshapeDownload(a, 2, 3, 2, 3); - expect(result).toEqual(a); - }); - - it('reshape a 2x3 matrix into a column (6x1)', () => { - const a = new Float32Array([1, 2, 3, 4, 5, 6]); - const result = uploadReshapeDownload(a, 2, 3, 6, 1); - expect(result).toEqual(a); - }); - - it('reshape a 2x3 matrix into a row (1x6) vector', () => { - const a = new Float32Array([1, 2, 3, 4, 5, 6]); - const result = uploadReshapeDownload(a, 2, 3, 1, 6); - expect(result).toEqual(a); - }); - - it('reshape a 2x3 into a 3x2 matrix', () => { - const a = new Float32Array([1, 2, 3, 4, 5, 6]); - const result = uploadReshapeDownload(a, 2, 3, 3, 2); - expect(result).toEqual(a); - }); - - it('reshape a 2x3 into a 3x1 causes an error', () => { - const a = new Float32Array([1, 2, 3, 4, 5, 6]); - const f = () => { - uploadReshapeDownload(a, 2, 3, 3, 1); - }; - - expect(f).toThrowError(); - }); - - function uploadReshapeDownload( - a: Float32Array, aNumRows: number, aNumCols: number, - resultNumRows: number, resultNumCols: number): Float32Array { - const program = gpgpu.createProgram(reshape_gpu.getFragmentShaderSource()); - - const aTexture = gpgpu.createMatrixTexture(aNumRows, aNumCols); - gpgpu.uploadMatrixToTexture(aTexture, aNumRows, aNumCols, a); - - const resultTexture: WebGLTexture = - gpgpu.createMatrixTexture(resultNumRows, resultNumCols); - - reshape_gpu.reshape( - gpgpu, program, aTexture, aNumRows, aNumCols, resultTexture, - resultNumRows, resultNumCols); - - const result = gpgpu.downloadMatrixFromTexture( - resultTexture, resultNumRows, resultNumCols); - - gpgpu.deleteMatrixTexture(aTexture); - gpgpu.deleteMatrixTexture(resultTexture); - gpgpu.deleteProgram(program); - - return result; - } -}); diff --git a/src/math/webgl/resize_bilinear_gpu.ts b/src/math/webgl/resize_bilinear_gpu.ts index 3491da228d..acc819c3f6 100644 --- a/src/math/webgl/resize_bilinear_gpu.ts +++ b/src/math/webgl/resize_bilinear_gpu.ts @@ -13,79 +13,62 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -import * as conv_util from '../conv_util'; - -import {GPGPUContext} from './gpgpu_context'; - -export function getFragmentShaderSource( - inputShapeRCD: [number, number, number], - outputDimensionsRowCol: [number, number], alignCorners: boolean): string { - const depth = inputShapeRCD[2]; - - const inputTexShapeRC = conv_util.computeTexShapeFrom3D(inputShapeRCD); - - const effectiveInputShapeRCD = alignCorners ? - [inputShapeRCD[0] - 1, inputShapeRCD[1] - 1, depth] : - inputShapeRCD; - - const effectiveOutputShapeRCD = alignCorners ? - [outputDimensionsRowCol[0] - 1, outputDimensionsRowCol[1] - 1, depth] : - [outputDimensionsRowCol[0], outputDimensionsRowCol[1], depth]; - - return ` - precision highp float; - uniform sampler2D matrixA; - varying vec2 resultUV; - const vec2 halfCR = vec2(0.5, 0.5); - - const vec2 inputShapeCR = vec2(${inputShapeRCD[1]}, ${inputShapeRCD[0]}); - const vec2 inputShapeTexCR = vec2( - ${inputTexShapeRC[1]}, ${inputTexShapeRC[0]}); - - const vec2 effectiveInputOverOutputRatioCR = vec2( - ${effectiveInputShapeRCD[1] / effectiveOutputShapeRCD[1]}, - ${effectiveInputShapeRCD[0] / effectiveOutputShapeRCD[0]}); - - float sampleInput(float col, float row, float d) { - vec2 uv = (vec2(col * ${depth}.0 + d, row) + halfCR) / inputShapeTexCR; - return texture2D(matrixA, uv).r; - } - - void main() { - vec2 yTexCR = floor(gl_FragCoord.xy); - - // Map from 2D (yTexR, yTexC) to 3D (yR, yC, d). - vec2 yCR = vec2(floor(yTexCR.x / ${depth}.0), yTexCR.y); - float d = mod(yTexCR.x, ${depth}.0); - - // Fractional source index. - vec2 sourceFracIndexCR = yCR * effectiveInputOverOutputRatioCR; - - // Compute the four integer indices. - vec2 sourceFloorCR = floor(sourceFracIndexCR); - vec2 sourceCeilCR = min(inputShapeCR - 1.0, ceil(sourceFracIndexCR)); - - float topLeft = sampleInput(sourceFloorCR[0], sourceFloorCR[1], d); - float bottomLeft = sampleInput(sourceFloorCR[0], sourceCeilCR[1], d); - float topRight = sampleInput(sourceCeilCR[0], sourceFloorCR[1], d); - float bottomRight = sampleInput(sourceCeilCR[0], sourceCeilCR[1], d); - - vec2 fracCR = sourceFracIndexCR - sourceFloorCR; - - float top = topLeft + (topRight - topLeft) * fracCR[0]; - float bottom = bottomLeft + (bottomRight - bottomLeft) * fracCR[0]; - float newValue = top + (bottom - top) * fracCR[1]; - - gl_FragColor = vec4(newValue, 0.0, 0.0, 0.0); - }`; -} - -export function resizeBilinear( - gpgpu: GPGPUContext, resizeBilinearProgram: WebGLProgram, a: WebGLTexture, - result: WebGLTexture, resultShapeRowCol: [number, number]) { - gpgpu.setOutputMatrixTexture( - result, resultShapeRowCol[0], resultShapeRowCol[1]); - gpgpu.setProgram(resizeBilinearProgram); - gpgpu.setInputMatrixTexture(a, 'matrixA', 0); - gpgpu.executeProgram(); +import {GPGPUProgram} from './gpgpu_math'; + +export class ResizeBilinearProgram implements GPGPUProgram { + variableNames = ['A']; + params: Array<{}> = []; + outputShape: number[] = []; + userCode: string; + + constructor( + inputShape: [number, number, number], + outputDimensionsRowCol: [number, number], alignCorners: boolean) { + const depth = inputShape[2]; + this.outputShape = + [outputDimensionsRowCol[0], outputDimensionsRowCol[1], depth]; + this.params = [alignCorners]; + + const effectiveInputShape = alignCorners ? + [inputShape[0] - 1, inputShape[1] - 1, depth] : + inputShape; + + const effectiveOutputShape = alignCorners ? + [this.outputShape[0] - 1, this.outputShape[1] - 1, depth] : + this.outputShape; + this.userCode = ` + const vec2 effectiveInputOverOutputRatioRC = vec2( + ${effectiveInputShape[0] / + effectiveOutputShape[0]}, + ${effectiveInputShape[1] / + effectiveOutputShape[1]}); + const vec2 inputShapeRC = vec2(${inputShape[0]}.0, ${inputShape[1]}.0); + + void main() { + vec3 coords = getOutputCoords(); + vec2 yRC = coords.xy; + float d = coords.z; + + // Fractional source index. + vec2 sourceFracIndexRC = yRC * effectiveInputOverOutputRatioRC; + + // Compute the four integer indices. + vec2 sourceFloorRC = floor(sourceFracIndexRC); + vec2 sourceCeilRC = min(inputShapeRC - 1.0, ceil(sourceFracIndexRC)); + + float topLeft = getA(sourceFloorRC[0], sourceFloorRC[1], d); + float bottomLeft = getA(sourceCeilRC[0], sourceFloorRC[1], d); + float topRight = getA(sourceFloorRC[0], sourceCeilRC[1], d); + float bottomRight = getA(sourceCeilRC[0], sourceCeilRC[1], d); + + vec2 fracRC = sourceFracIndexRC - sourceFloorRC; + + float top = topLeft + (topRight - topLeft) * fracRC[1]; + float bottom = bottomLeft + (bottomRight - bottomLeft) * fracRC[1]; + float newValue = top + (bottom - top) * fracRC[0]; + + setOutput(newValue); + } + `; + } } diff --git a/src/math/webgl/resize_bilinear_gpu_test.ts b/src/math/webgl/resize_bilinear_gpu_test.ts index 3adb55af61..7b8a736af8 100644 --- a/src/math/webgl/resize_bilinear_gpu_test.ts +++ b/src/math/webgl/resize_bilinear_gpu_test.ts @@ -14,52 +14,14 @@ limitations under the License. ==============================================================================*/ import * as test_util from '../../test_util'; -import * as conv_util from '../conv_util'; +import {Array3D, initializeGPU, NDArray} from '../ndarray'; import {GPGPUContext} from './gpgpu_context'; -import * as resize_bilinear_gpu from './resize_bilinear_gpu'; +import * as gpgpu_math from './gpgpu_math'; +import {ResizeBilinearProgram} from './resize_bilinear_gpu'; +import {TextureManager} from './texture_manager'; describe('resize bilinear', () => { - function uploadResizeBilinearDownload( - a: Float32Array, aShapeRowColDepth: [number, number, number], - outputDimensionsRowCol: [number, number], - alignCorners: boolean): Float32Array { - const aTexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(aShapeRowColDepth); - - const resultShapeRCD: [number, number, number] = [ - outputDimensionsRowCol[0], outputDimensionsRowCol[1], aShapeRowColDepth[2] - ]; - - const resultTexShapeRC: [number, number] = - conv_util.computeTexShapeFrom3D(resultShapeRCD); - - const gpgpu = new GPGPUContext(); - gpgpu.enableAutomaticDebugValidation(true); - - const shaderSource = resize_bilinear_gpu.getFragmentShaderSource( - aShapeRowColDepth, outputDimensionsRowCol, alignCorners); - const program = gpgpu.createProgram(shaderSource); - - const aTex = gpgpu.createMatrixTexture(aTexShapeRC[0], aTexShapeRC[1]); - const resultTex = - gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.uploadMatrixToTexture(aTex, aTexShapeRC[0], aTexShapeRC[1], a); - - resize_bilinear_gpu.resizeBilinear( - gpgpu, program, aTex, resultTex, resultTexShapeRC); - - const result = gpgpu.downloadMatrixFromTexture( - resultTex, resultTexShapeRC[0], resultTexShapeRC[1]); - - gpgpu.deleteMatrixTexture(resultTex); - gpgpu.deleteMatrixTexture(aTex); - gpgpu.deleteProgram(program); - gpgpu.dispose(); - return result; - } - it('simple bilinear', () => { const a = new Float32Array([2, 2, 4, 4]); @@ -123,3 +85,29 @@ describe('resize bilinear', () => { 1e-4); }); }); + +function uploadResizeBilinearDownload( + a: Float32Array, aShape: [number, number, number], + outputDimensionsRowCol: [number, number], + alignCorners: boolean): Float32Array { + const gpgpu = new GPGPUContext(); + gpgpu.enableAutomaticDebugValidation(true); + const textureManager = new TextureManager(gpgpu); + initializeGPU(gpgpu, textureManager); + + const program = + new ResizeBilinearProgram(aShape, outputDimensionsRowCol, alignCorners); + const aArr = Array3D.new(aShape, a); + const rArr = NDArray.zeros(program.outputShape); + const binary = gpgpu_math.compileProgram(gpgpu, program, [aArr], rArr); + gpgpu_math.runProgram(binary, [aArr], rArr); + const result = rArr.getValues(); + + aArr.dispose(); + rArr.dispose(); + textureManager.dispose(); + gpgpu.deleteProgram(binary.webGLProgram); + gpgpu.dispose(); + + return result; +} From 9e890bbe5fdf6ab2c75ea01aa68ef5af652e9682 Mon Sep 17 00:00:00 2001 From: Daniel Smilkov Date: Mon, 21 Aug 2017 11:29:02 -0400 Subject: [PATCH 10/10] address comments --- src/math/math_gpu.ts | 13 +++++++----- src/math/webgl/copy_gpu.ts | 24 +++++++++++----------- src/math/webgl/copy_gpu_test.ts | 7 ++----- src/math/webgl/resize_bilinear_gpu.ts | 8 +++----- src/math/webgl/resize_bilinear_gpu_test.ts | 4 ++-- 5 files changed, 27 insertions(+), 29 deletions(-) diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts index f63f070f33..9d00f22a67 100644 --- a/src/math/math_gpu.ts +++ b/src/math/math_gpu.ts @@ -25,7 +25,6 @@ import {Concat3DProgram} from './webgl/concat3d_gpu'; // tslint:disable-next-line:max-line-length import {Conv2DDerBiasProgram, Conv2DDerWeightsProgram, Conv2DTransposeProgram} from './webgl/conv_backprop_gpu'; import {Conv2DProgram} from './webgl/conv_gpu'; -import * as copy_gpu from './webgl/copy_gpu'; import {Copy2DProgram} from './webgl/copy_gpu'; import {GPGPUContext} from './webgl/gpgpu_context'; import * as gpgpu_math from './webgl/gpgpu_math'; @@ -37,7 +36,7 @@ import {MinMaxProgram} from './webgl/minmax_gpu'; import {MatMulProgram} from './webgl/mulmat_gpu'; import {Pool2DProgram} from './webgl/pool_gpu'; import {ReduceSumProgram} from './webgl/reducesum_gpu'; -import {ResizeBilinearProgram} from './webgl/resize_bilinear_gpu'; +import {ResizeBilinear3DProgram} from './webgl/resize_bilinear_gpu'; import {TextureManager} from './webgl/texture_manager'; import {UnaryOp, UnaryOpProgram} from './webgl/unaryop_gpu'; import * as webgl_util from './webgl/webgl_util'; @@ -97,7 +96,7 @@ export class NDArrayMathGPU extends NDArrayMath { destBeginRowCol: [number, number], destSizeRowCol: [number, number]): void { const program = new Copy2DProgram(sourceSizeRowCol[1], destSizeRowCol[1]); - const customSetup = copy_gpu.getCustomSetupFunc( + const customSetup = program.getCustomSetupFunc( sourceBeginRowCol, destBeginRowCol, destSizeRowCol); this.compileAndRun(program, [source], dest, customSetup); } @@ -349,14 +348,18 @@ export class NDArrayMathGPU extends NDArrayMath { const maxPoolBackPropProgram = new MaxPool2DBackpropProgram(dy.shape, fSize, origStride, origPad); - return this.compileAndRun(maxPoolBackPropProgram, [dy, maxPoolPositions]); + + const result = + this.compileAndRun(maxPoolBackPropProgram, [dy, maxPoolPositions]); + maxPoolPositions.dispose(); + return result as Array3D; } protected resizeBilinear3DInternal( x: Array3D, newShape2D: [number, number], alignCorners: boolean): Array3D { const program = - new ResizeBilinearProgram(x.shape, newShape2D, alignCorners); + new ResizeBilinear3DProgram(x.shape, newShape2D, alignCorners); return this.compileAndRun(program, [x]); } diff --git a/src/math/webgl/copy_gpu.ts b/src/math/webgl/copy_gpu.ts index 709e034d94..1ea1418c6b 100644 --- a/src/math/webgl/copy_gpu.ts +++ b/src/math/webgl/copy_gpu.ts @@ -40,17 +40,17 @@ export class Copy2DProgram implements GPGPUProgram { } `; } -} -export function getCustomSetupFunc( - sourceStart: [number, number], destStart: [number, number], - destSize: [number, number]) { - return (gpgpu: GPGPUContext) => { - gpgpu.setOutputMatrixWriteRegion( - destStart[0], destSize[0], destStart[1], destSize[1]); - const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStart'); - gpgpu.gl.uniform2f(sourceStartCRLoc, sourceStart[0], sourceStart[1]); - const destStartCRLoc = gpgpu.getUniformLocation('destStart'); - gpgpu.gl.uniform2f(destStartCRLoc, destStart[0], destStart[1]); - }; + getCustomSetupFunc( + sourceStart: [number, number], destStart: [number, number], + destSize: [number, number]) { + return (gpgpu: GPGPUContext) => { + gpgpu.setOutputMatrixWriteRegion( + destStart[0], destSize[0], destStart[1], destSize[1]); + const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStart'); + gpgpu.gl.uniform2f(sourceStartCRLoc, sourceStart[0], sourceStart[1]); + const destStartCRLoc = gpgpu.getUniformLocation('destStart'); + gpgpu.gl.uniform2f(destStartCRLoc, destStart[0], destStart[1]); + }; + } } diff --git a/src/math/webgl/copy_gpu_test.ts b/src/math/webgl/copy_gpu_test.ts index 3a06ae2778..59904530a7 100644 --- a/src/math/webgl/copy_gpu_test.ts +++ b/src/math/webgl/copy_gpu_test.ts @@ -15,8 +15,6 @@ limitations under the License. import * as test_util from '../../test_util'; import {Array2D, initializeGPU} from '../ndarray'; - -import * as copy_gpu from './copy_gpu'; import {Copy2DProgram} from './copy_gpu'; import {GPGPUContext} from './gpgpu_context'; import * as gpgpu_math from './gpgpu_math'; @@ -36,8 +34,7 @@ function uploadCopyDownload( const dest = Array2D.new(destShape, destVals); const binary = gpgpu_math.compileProgram(gpgpu, program, [source], dest); - const customSetup = - copy_gpu.getCustomSetupFunc(srcStart, destStart, destSize); + const customSetup = program.getCustomSetupFunc(srcStart, destStart, destSize); gpgpu_math.runProgram(binary, [source], dest, customSetup); const result = dest.getValues(); @@ -174,7 +171,7 @@ describe('copy_gpu', () => { for (let i = 0; i < 10; ++i) { const offset: [number, number] = [0, i]; - const customSetup = copy_gpu.getCustomSetupFunc(offset, offset, size); + const customSetup = program.getCustomSetupFunc(offset, offset, size); gpgpu_math.runProgram(binary, [source], dest, customSetup); } const res = dest.getValues(); diff --git a/src/math/webgl/resize_bilinear_gpu.ts b/src/math/webgl/resize_bilinear_gpu.ts index acc819c3f6..9ffb6707f7 100644 --- a/src/math/webgl/resize_bilinear_gpu.ts +++ b/src/math/webgl/resize_bilinear_gpu.ts @@ -15,7 +15,7 @@ limitations under the License. import {GPGPUProgram} from './gpgpu_math'; -export class ResizeBilinearProgram implements GPGPUProgram { +export class ResizeBilinear3DProgram implements GPGPUProgram { variableNames = ['A']; params: Array<{}> = []; outputShape: number[] = []; @@ -38,10 +38,8 @@ export class ResizeBilinearProgram implements GPGPUProgram { this.outputShape; this.userCode = ` const vec2 effectiveInputOverOutputRatioRC = vec2( - ${effectiveInputShape[0] / - effectiveOutputShape[0]}, - ${effectiveInputShape[1] / - effectiveOutputShape[1]}); + ${effectiveInputShape[0] / effectiveOutputShape[0]}, + ${effectiveInputShape[1] / effectiveOutputShape[1]}); const vec2 inputShapeRC = vec2(${inputShape[0]}.0, ${inputShape[1]}.0); void main() { diff --git a/src/math/webgl/resize_bilinear_gpu_test.ts b/src/math/webgl/resize_bilinear_gpu_test.ts index 7b8a736af8..9382b83db5 100644 --- a/src/math/webgl/resize_bilinear_gpu_test.ts +++ b/src/math/webgl/resize_bilinear_gpu_test.ts @@ -18,7 +18,7 @@ import {Array3D, initializeGPU, NDArray} from '../ndarray'; import {GPGPUContext} from './gpgpu_context'; import * as gpgpu_math from './gpgpu_math'; -import {ResizeBilinearProgram} from './resize_bilinear_gpu'; +import {ResizeBilinear3DProgram} from './resize_bilinear_gpu'; import {TextureManager} from './texture_manager'; describe('resize bilinear', () => { @@ -96,7 +96,7 @@ function uploadResizeBilinearDownload( initializeGPU(gpgpu, textureManager); const program = - new ResizeBilinearProgram(aShape, outputDimensionsRowCol, alignCorners); + new ResizeBilinear3DProgram(aShape, outputDimensionsRowCol, alignCorners); const aArr = Array3D.new(aShape, a); const rArr = NDArray.zeros(program.outputShape); const binary = gpgpu_math.compileProgram(gpgpu, program, [aArr], rArr);