From 017ea941d7d266912bdae1d0c92539ce2900edf7 Mon Sep 17 00:00:00 2001
From: Daniel Smilkov <dsmilkov@gmail.com>
Date: Wed, 16 Aug 2017 17:14:25 -0400
Subject: [PATCH 01/10] migrate addScaledMat and conv2d to logical sampling and
 improve shader compiler

---
 .vscode/settings.json                   |   1 +
 src/math/math_gpu.ts                    |  40 +----
 src/math/webgl/addscaledmat_gpu.ts      |  90 +++---------
 src/math/webgl/addscaledmat_gpu_test.ts |  73 ++++++---
 src/math/webgl/binaryop_gpu.ts          |   3 +-
 src/math/webgl/conv_gpu.ts              | 152 +++++++------------
 src/math/webgl/gpgpu_math.ts            |  11 +-
 src/math/webgl/shader_compiler.ts       | 187 ++++++++++++++++--------
 tsconfig.json                           |   7 +-
 tslint.json                             |   1 +
 10 files changed, 267 insertions(+), 298 deletions(-)
diff --git a/.vscode/settings.json b/.vscode/settings.json
index b1cd35c836..67a611b80a 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -9,6 +9,7 @@
   },
   "tslint.enable": true,
   "tslint.run": "onType",
+  "tslint.configFile": "tslint.json",
   "editor.tabSize": 2,
   "editor.insertSpaces": true,
   "files.insertFinalNewline": true,
diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts
index eaf418dadd..8818f148f1 100644
--- a/src/math/math_gpu.ts
+++ b/src/math/math_gpu.ts
@@ -20,7 +20,7 @@ import * as conv_util from './conv_util';
 import {MatrixOrientation, NDArrayMath} from './math';
 import * as ndarray from './ndarray';
 import {Array1D, Array2D, Array3D, Array4D, NDArray, Scalar} from './ndarray';
-import * as addscaledmat_gpu from './webgl/addscaledmat_gpu';
+import {AddScaledMatProgram} from './webgl/addscaledmat_gpu';
 import {ArgMaxEqualsProgram} from './webgl/argmaxequals_gpu';
 import {ArgMinMaxProgram} from './webgl/argminmax_gpu';
 import * as avg_pool_gpu from './webgl/avg_pool_gpu';
@@ -53,9 +53,6 @@ const BATCHNORM_PROG = 'batchnorm';
 const COPY_PROG = 'copy';
 const CONCAT_PROG = 'concat';
 
-// Matrix algebra.
-const ADD_SCALED_MAT_PROG = 'addscaledmat';
-
 // Element-wise ops.
 const RESHAPE_PROG = 'reshape';
 
@@ -238,27 +235,8 @@ export class NDArrayMathGPU extends NDArrayMath {
 
   protected scaledArrayAddInternal<T extends NDArray>(
       c1: Scalar, a: T, c2: Scalar, b: T) {
-    let cleanupB = false;
-    if (!this.doGPUShapesMatch(a, b)) {
-      b = this.reshapeTexture(b, a.getTextureShapeRC());
-      cleanupB = true;
-    }
-
-    const program = this.getAndSaveProgram(
-        ADD_SCALED_MAT_PROG, () => addscaledmat_gpu.getFragmentShaderSource());
-
-    const textureShapeRC = a.getTextureShapeRC();
-    const resultTexture = this.textureManager.acquireTexture(textureShapeRC);
-
-    addscaledmat_gpu.addScaledMatrices(
-        this.gpgpu, program, a.getTexture(), b.getTexture(), textureShapeRC[0],
-        textureShapeRC[1], c1.getTexture(), c2.getTexture(), resultTexture);
-
-    if (cleanupB) {
-      b.dispose();
-    }
-    // Bring the result back to the original shape.
-    return NDArray.make<T>(a.shape, {texture: resultTexture, textureShapeRC});
+    const program = new AddScaledMatProgram(a.shape, b.shape);
+    return this.compileAndRun<NDArray, T>(program, [a, b, c1, c2]);
   }
 
   protected negInternal<T extends NDArray>(a: T): T {
@@ -963,18 +941,6 @@ export class NDArrayMathGPU extends NDArrayMath {
     return this.programCache[programKey];
   }
 
-  private doGPUShapesMatch(a: NDArray, b: NDArray): boolean {
-    util.assertShapesMatch(a.shape, b.shape);
-    if (a.inGPU()) {
-      // Prefer B to have the shape of A.
-      b.getTextureShapeRC(a.getTextureShapeRC());
-    } else if (b.inGPU()) {
-      // Prefer A to have the shape of B.
-      a.getTextureShapeRC(b.getTextureShapeRC());
-    }
-    return util.arraysEqual(a.getTextureShapeRC(), b.getTextureShapeRC());
-  }
-
   getTextureManager(): TextureManager {
     return this.textureManager;
   }
diff --git a/src/math/webgl/addscaledmat_gpu.ts b/src/math/webgl/addscaledmat_gpu.ts
index 57dee24ad6..c1b2475d88 100644
--- a/src/math/webgl/addscaledmat_gpu.ts
+++ b/src/math/webgl/addscaledmat_gpu.ts
@@ -13,72 +13,26 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderSource(): string {
-  return `
-    precision highp float;
-    uniform sampler2D matrixA;
-    uniform sampler2D matrixB;
-    uniform sampler2D matrixAScalar;
-    uniform sampler2D matrixBScalar;
-    varying vec2 resultUV;
-
-    const vec2 halfTexel = vec2(0.5, 0.5);
-
-    void main() {
-      float a = texture2D(matrixA, resultUV).r;
-      float b = texture2D(matrixB, resultUV).r;
-      float aScalar = texture2D(matrixAScalar, halfTexel).r;
-      float bScalar = texture2D(matrixBScalar, halfTexel).r;
-      vec2 abScaled = vec2(a, b) * vec2(aScalar, bScalar);
-      gl_FragColor = vec4(abScaled.x + abScaled.y, 0, 0, 0);
-    }`;
-}
-
-export function addScaledMatrices(
-    gpgpu: GPGPUContext, addScaledMatricesProgram: WebGLProgram,
-    a: WebGLTexture, b: WebGLTexture, rows: number, columns: number,
-    aScalar: WebGLTexture, bScalar: WebGLTexture, result: WebGLTexture) {
-  gpgpu.setOutputMatrixTexture(result, rows, columns);
-  gpgpu.setProgram(addScaledMatricesProgram);
-  gpgpu.setInputMatrixTexture(a, 'matrixA', 0);
-  gpgpu.setInputMatrixTexture(b, 'matrixB', 1);
-  gpgpu.setInputMatrixTexture(aScalar, 'matrixAScalar', 2);
-  gpgpu.setInputMatrixTexture(bScalar, 'matrixBScalar', 3);
-  gpgpu.executeProgram();
-}
-
-export function uploadAddScaledMatricesDownload(
-    a: Float32Array, b: Float32Array, rows: number, columns: number,
-    aScalar: number, bScalar: number): Float32Array {
-  const gpgpu = new GPGPUContext();
-  const program: WebGLProgram = gpgpu.createProgram(getFragmentShaderSource());
-
-  const aTex = gpgpu.createMatrixTexture(rows, columns);
-  const bTex = gpgpu.createMatrixTexture(rows, columns);
-  const aScalarTex = gpgpu.createMatrixTexture(1, 1);
-  const bScalarTex = gpgpu.createMatrixTexture(1, 1);
-  const resultTex = gpgpu.createMatrixTexture(rows, columns);
-
-  gpgpu.uploadMatrixToTexture(aTex, rows, columns, a);
-  gpgpu.uploadMatrixToTexture(bTex, rows, columns, b);
-  gpgpu.uploadMatrixToTexture(aScalarTex, 1, 1, new Float32Array([aScalar]));
-  gpgpu.uploadMatrixToTexture(bScalarTex, 1, 1, new Float32Array([bScalar]));
-
-  addScaledMatrices(
-      gpgpu, program, aTex, bTex, rows, columns, aScalarTex, bScalarTex,
-      resultTex);
-
-  const result = gpgpu.downloadMatrixFromTexture(resultTex, rows, columns);
-
-  gpgpu.deleteMatrixTexture(aTex);
-  gpgpu.deleteMatrixTexture(bTex);
-  gpgpu.deleteMatrixTexture(resultTex);
-  gpgpu.deleteMatrixTexture(aScalarTex);
-  gpgpu.deleteMatrixTexture(bScalarTex);
-  gpgpu.deleteProgram(program);
-  gpgpu.dispose();
-
-  return result;
+import {GPGPUProgram} from './gpgpu_math';
+import * as util from '../../util';
+
+export class AddScaledMatProgram implements GPGPUProgram {
+  variableNames = ['A', 'B', 'c1', 'c2'];
+  params: Array<{}> = [];
+  outputShape: number[];
+  userCode: string;
+  supportsBroadcasting = true;
+
+  constructor(aShape: number[], bShape: number[]) {
+    this.outputShape = util.assertAndGetBroadcastedShape(aShape, bShape);
+    this.userCode = `
+      void main() {
+        float a = getAAtOutCoords();
+        float b = getBAtOutCoords();
+        float c1 = getC1();
+        float c2 = getC2();
+        setOutput(dot(vec2(c1, c2), vec2(a, b)));
+      }
+    `;
+  }
 }
diff --git a/src/math/webgl/addscaledmat_gpu_test.ts b/src/math/webgl/addscaledmat_gpu_test.ts
index 617bb17383..cf63e1a668 100644
--- a/src/math/webgl/addscaledmat_gpu_test.ts
+++ b/src/math/webgl/addscaledmat_gpu_test.ts
@@ -14,7 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
-import * as addscaledmat_gpu from './addscaledmat_gpu';
+import {AddScaledMatProgram} from './addscaledmat_gpu';
+import {GPGPUContext} from './gpgpu_context';
+import * as gpgpu_math from './gpgpu_math';
+import {NDArray, Array1D, Array2D, Scalar, initializeGPU} from '../ndarray';
+import * as util from '../../util';
+import {TextureManager} from './texture_manager';
 
 function cpuAddScaledMatrices(
     a: Float32Array, aScalar: number, b: Float32Array,
@@ -28,48 +33,68 @@ function cpuAddScaledMatrices(
 
 describe('addscaledmat_gpu', () => {
   it('returns a matrix with the same shape as the input matrices', () => {
-    const a = new Float32Array(9 * 14);
-    const b = new Float32Array(a.length);
-    const result =
-        addscaledmat_gpu.uploadAddScaledMatricesDownload(a, b, 9, 14, 0, 0);
+    const a = Array2D.zeros([9, 14]);
+    const b = Array2D.zerosLike(a);
+    const result = uploadAddScaledMatDownload(a, b, 0, 0);
     expect(result.length).toEqual(9 * 14);
   });
 
   it('returns A + B when scalars are 1', () => {
-    const a = new Float32Array([1, 2, 3, 4, 5, 6]);
-    const b = new Float32Array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6]);
-    const result =
-        addscaledmat_gpu.uploadAddScaledMatricesDownload(a, b, 3, 2, 1, 1);
+    const a = Array1D.new([1, 2, 3, 4, 5, 6]);
+    const b = Array1D.new([0.1, 0.2, 0.3, 0.4, 0.5, 0.6]);
+    const result = uploadAddScaledMatDownload(a, b, 1, 1);
     test_util.expectArraysClose(
         result, new Float32Array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6]), 0.0001);
   });
 
   it('returns A * aScalar when B and bScalar are 0', () => {
-    const a = new Float32Array([1, 2, 3, 4, 5, 6]);
-    const b = new Float32Array(a.length);
-    const result =
-        addscaledmat_gpu.uploadAddScaledMatricesDownload(a, b, 3, 2, 1.1, 0);
+    const a = Array1D.new([1, 2, 3, 4, 5, 6]);
+    const b = Array1D.zerosLike(a);
+    const result = uploadAddScaledMatDownload(a, b, 1.1, 0);
     test_util.expectArraysClose(
         result, new Float32Array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6]), 0.0001);
   });
 
   it('returns B * bScalar when A and aScalar are 0', () => {
-    const b = new Float32Array([1, 2, 3, 4, 5, 6]);
-    const a = new Float32Array(b.length);
-    const result =
-        addscaledmat_gpu.uploadAddScaledMatricesDownload(a, b, 3, 2, 0, 1.1);
+    const b = Array1D.new([1, 2, 3, 4, 5, 6]);
+    const a = Array1D.zerosLike(b);
+    const result = uploadAddScaledMatDownload(a, b, 0, 1.1);
     test_util.expectArraysClose(
         result, new Float32Array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6]), 0.0001);
   });
 
   it('returns (A * aScalar) + (B * bScalar)', () => {
-    const a = test_util.randomArrayInRange(12 * 12, -2, 2);
-    const b = test_util.randomArrayInRange(a.length, -10, 10);
-    const aScalar = 0.5;
-    const bScalar = 0.25;
-    const result = addscaledmat_gpu.uploadAddScaledMatricesDownload(
-        a, b, 12, 12, aScalar, bScalar);
+    const a = Array2D.randUniform([12, 12], -2, 2);
+    const aVals = a.getValues();
+    const b = Array2D.randUniform([12, 12], -10, 10);
+    const bVals = b.getValues();
+
+    const c1 = 0.5;
+    const c2 = 0.25;
+    const result = uploadAddScaledMatDownload(a, b, c1, c2);
     test_util.expectArraysClose(
-        result, cpuAddScaledMatrices(a, aScalar, b, bScalar), 0.001);
+        result, cpuAddScaledMatrices(aVals, c1, bVals, c2), 0.001);
   });
 });
+
+export function uploadAddScaledMatDownload(
+    a: NDArray, b: NDArray, c1Val: number, c2Val: number): Float32Array {
+  const c1 = Scalar.new(c1Val);
+  const c2 = Scalar.new(c2Val);
+  const gpgpu = new GPGPUContext();
+  const textureManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, textureManager);
+
+  const outShape = util.assertAndGetBroadcastedShape(a.shape, b.shape);
+  const res = NDArray.zeros(outShape);
+  const program = new AddScaledMatProgram(a.shape, b.shape);
+  const binary = gpgpu_math.compileProgram(gpgpu, program, [a, b, c1, c2], res);
+  gpgpu_math.runProgram(binary, [a, b, c1, c2], res);
+
+  const resValues = res.getValues();
+  textureManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
+  gpgpu.dispose();
+
+  return resValues;
+}
diff --git a/src/math/webgl/binaryop_gpu.ts b/src/math/webgl/binaryop_gpu.ts
index 63298b83e6..33bf96b863 100644
--- a/src/math/webgl/binaryop_gpu.ts
+++ b/src/math/webgl/binaryop_gpu.ts
@@ -21,10 +21,9 @@ export class BinaryOpProgram implements GPGPUProgram {
   params: Array<{}>;
   outputShape: number[];
   userCode: string;
-  supportsBroadcasting: boolean;
+  supportsBroadcasting = true;
 
   constructor(op: '+' | '-' | '*' | '/', aShape: number[], bShape: number[]) {
-    this.supportsBroadcasting = true;
     this.params = [op];
     this.outputShape = util.assertAndGetBroadcastedShape(aShape, bShape);
     this.userCode = `
diff --git a/src/math/webgl/conv_gpu.ts b/src/math/webgl/conv_gpu.ts
index 53638c8ddd..5d7b222194 100644
--- a/src/math/webgl/conv_gpu.ts
+++ b/src/math/webgl/conv_gpu.ts
@@ -14,7 +14,60 @@ limitations under the License.
 ==============================================================================*/
 
 import * as conv_util from '../conv_util';
-import {GPGPUContext} from './gpgpu_context';
+import {GPGPUProgram} from './gpgpu_math';
+
+export class Conv2DProgram implements GPGPUProgram {
+  variableNames = ['x', 'W', 'bias'];
+  params: Array<{}>;
+  outputShape: number[];
+  userCode: string;
+
+  constructor(xShape: [number, number, number], fieldSize: number,
+      outputDepth: number, stride: number, pad: number, hasBias: boolean) {
+    this.outputShape = conv_util.computeOutputShape3D(xShape,
+      fieldSize, outputDepth, stride, pad);
+    const inputDepth = xShape[2];
+    this.params = [inputDepth, fieldSize, stride, pad, hasBias];
+
+    this.userCode = `
+      void main() {
+        vec3 output = getOutputCoords();
+        float yR = output.x;
+        float yC = output.y;
+        float d2 = output.z;
+
+        vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}, ${stride}) -
+            vec2(${pad}.0, ${pad}.0);
+        float xRCorner = xRCCorner.x;
+        float xCCorner = xRCCorner.y;
+
+        // Convolve x(?, ?, d1) with w(:, :, d1, d2) to get y(yR, yC, d2).
+        // ? = to be determined. : = across all values in that axis.
+        float dotProd = 0.0;
+        for (int wR = 0; wR < ${fieldSize}; wR++) {
+          float wR_float = float(wR);
+          float xR = xRCorner + wR_float;
+
+          for (int wC = 0; wC < ${fieldSize}; wC++) {
+            float wC_float = float(wC);
+            float xC = xCCorner + wC_float;
+
+            for (int d1 = 0; d1 < ${inputDepth}; d1++) {
+              float d1_float = float(d1);
+              float xValue = getXOrZeroPad(xR, xC, d1_float);
+              float wValue = getW(wR_float, wC_float, d1_float, d2);
+              dotProd += xValue * wValue;
+            }
+          }
+        }
+        if (${hasBias}) {
+          dotProd += getBias(d2);
+        }
+        setOutput(dotProd);
+      }
+    `;
+  }
+}
 
 export function getFragmentShaderPrologueSource(): string {
   return `
@@ -38,69 +91,6 @@ export function getFragmentShaderGetMatrixValueOrZeroPadSource(): string {
     }`;
 }
 
-export function getFragmentShaderConvolveSource(
-    xShapeRCD: [number, number, number], fSize: number, outputDepth: number,
-    stride: number, pad: number, hasBias: boolean) {
-  const inputDepth = xShapeRCD[2];
-  const xTexShapeRC = conv_util.computeTexShapeFrom3D(xShapeRCD);
-  const wTexShapeRC =
-      conv_util.computeWeightsTexShape(inputDepth, outputDepth, fSize);
-
-  return `
-    const vec2 halfCR = vec2(0.5, 0.5);
-    const vec2 xShapeCR = vec2(${xTexShapeRC[1]}, ${xTexShapeRC[0]});
-    const vec2 wShapeCR = vec2(${wTexShapeRC[1]}, ${wTexShapeRC[0]});
-
-    void main() {
-      vec2 yTexCR = floor(gl_FragCoord.xy);
-
-      // Map from 2D (yTexR, yTexC) to 3D (yR, yC, d2).
-      float yR = yTexCR.y;
-      float yC = floor(yTexCR.x / ${outputDepth}.0);
-      float d2 = mod(yTexCR.x, ${outputDepth}.0);
-      float wTexC = d2;
-
-      vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}, ${stride}) -
-          vec2(${pad}.0, ${pad}.0);
-      float xRCorner = xRCCorner.x;
-      float xCCorner = xRCCorner.y;
-
-      // Convolve x(?, ?, d1) with w(:, :, d1, d2) to get y(yR, yC, d2).
-      // ? = to be determined. : = across all values in that axis.
-      float dotProd = 0.0;
-      for (int wR = 0; wR < ${fSize}; wR++) {
-        float wR_float = float(wR);
-        float xR = xRCorner + wR_float;
-        float xTexR = xR;
-
-        for (int wC = 0; wC < ${fSize}; wC++) {
-          float wC_float = float(wC);
-          float xC = xCCorner + wC_float;
-
-          for (int d1 = 0; d1 < ${inputDepth}; d1++) {
-            float d1_float = float(d1);
-            float xTexC = xC * ${inputDepth}.0 + d1_float;
-            float wTexR = wR_float * ${fSize * inputDepth}.0 +
-                wC_float * ${inputDepth}.0 + d1_float;
-
-            float xValue =
-                getMatrixValueOrZeroPad(x, xShapeCR, vec2(xTexC, xTexR));
-
-            // Read w(wR, wC, d1, d2).
-            vec2 wUV = (vec2(wTexC, wTexR) + halfCR) / wShapeCR;
-            float wValue = texture2D(weights, wUV).r;
-
-            dotProd += xValue * wValue;
-          }
-        }
-      }
-      if (${hasBias}) {
-        dotProd += getBiasValue(biases, d2);
-      }
-      gl_FragColor = vec4(dotProd, 0, 0, 0);
-    }`;
-}
-
 export function getFragmentShaderGetBiasValueSource(outputDepth: number):
     string {
   return `
@@ -111,37 +101,3 @@ export function getFragmentShaderGetBiasValueSource(outputDepth: number):
       return texture2D(bias, biasUV).r;
     }`;
 }
-
-export function getFragmentShaderSource(
-    aShapeRowColDepth: [number, number, number], resultDepth: number,
-    fieldSize: number, stride: number, zeroPad: number,
-    hasBias: boolean): string {
-  const prologue = getFragmentShaderPrologueSource();
-  const getMatrixValueOrZeroPad =
-      getFragmentShaderGetMatrixValueOrZeroPadSource();
-  const convolve = getFragmentShaderConvolveSource(
-      aShapeRowColDepth, fieldSize, resultDepth, stride, zeroPad, hasBias);
-  const getBiasValue = getFragmentShaderGetBiasValueSource(resultDepth);
-
-  return [
-    prologue,
-    getMatrixValueOrZeroPad,
-    getBiasValue,
-    convolve,
-  ].join('\n');
-}
-
-export function convolve(
-    gpgpu: GPGPUContext, program: WebGLProgram, a: WebGLTexture,
-    weights: WebGLTexture, biases: WebGLTexture|null, result: WebGLTexture,
-    resultShapeRowCol: [number, number]) {
-  gpgpu.setOutputMatrixTexture(
-      result, resultShapeRowCol[0], resultShapeRowCol[1]);
-  gpgpu.setProgram(program);
-  gpgpu.setInputMatrixTexture(a, 'x', 0);
-  gpgpu.setInputMatrixTexture(weights, 'weights', 1);
-  if (biases != null) {
-    gpgpu.setInputMatrixTexture(biases, 'biases', 2);
-  }
-  gpgpu.executeProgram();
-}
diff --git a/src/math/webgl/gpgpu_math.ts b/src/math/webgl/gpgpu_math.ts
index b112f4b9a9..b5b2fe9d5a 100644
--- a/src/math/webgl/gpgpu_math.ts
+++ b/src/math/webgl/gpgpu_math.ts
@@ -66,12 +66,17 @@ export function compileProgram<T extends NDArray, K extends NDArray>(
   };
 }
 
-function validateBinaryAndProgram(shapeInfos: ShapeInfo[], bArrays: NDArray[]) {
+function validateBinaryAndProgram(shapeInfos: ShapeInfo[], inputs: NDArray[]) {
+  if (shapeInfos.length !== inputs.length) {
+    throw Error(`Binary was compiled with ${shapeInfos.length} inputs, but ` +
+                `was executed with ${inputs.length} inputs`);
+  }
+
   shapeInfos.forEach((s, i) => {
     const shapeA = s.logicalShape;
     const texShapeA = s.texShape;
-    const shapeB = bArrays[i].shape;
-    const texShapeB = bArrays[i].getTextureShapeRC();
+    const shapeB = inputs[i].shape;
+    const texShapeB = inputs[i].getTextureShapeRC();
 
     if (!util.arraysEqual(shapeA, shapeB)) {
       throw Error(`Binary was compiled with different shapes than ` +
diff --git a/src/math/webgl/shader_compiler.ts b/src/math/webgl/shader_compiler.ts
index 89e2eae914..109efa6c3c 100644
--- a/src/math/webgl/shader_compiler.ts
+++ b/src/math/webgl/shader_compiler.ts
@@ -36,8 +36,7 @@ export function makeShader(inputsInfo: InputInfo[], outputShape: ShapeInfo,
   const outputSamplingSnippet =
       getOutputSamplingSnippet(outputShape.logicalShape, outTexShape);
   const source = [
-    SHADER_PREFIX, inputPrefixSnippet, SAMPLE_1D_SNIPPET, SAMPLE_2D_SNIPPET,
-    SAMPLE_3D_SNIPPET, SAMPLE_4D_SNIPPET, inputSamplingSnippet,
+    SHADER_PREFIX, inputPrefixSnippet, inputSamplingSnippet,
     outputSamplingSnippet, userCode
   ].join('\n');
   return source;
@@ -107,62 +106,74 @@ function getOutputSamplingSnippet(
   }
 }
 
+const SAMPLE_1D_SNIPPET = `
+vec2 UVfrom1D(float texNumR, float texNumC, float index) {
+  float texR = floor(index / texNumC);
+  float texC = mod(index, texNumC);
+  return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
+}
+`;
+
+const SAMPLE_2D_SNIPPET = `
+vec2 UVfrom2D(float texNumR, float texNumC, float numC, float row,
+    float col) {
+  float index = dot(vec2(row, col), vec2(numC, 1.0));
+  float texR = floor(index / texNumC);
+  float texC = mod(index, texNumC);
+  return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
+}
+`;
+
+const SAMPLE_3D_SNIPPET = `
+vec2 UVfrom3D(float texNumR, float texNumC, float stride0,
+    float stride1, float row, float col, float depth) {
+  float index = dot(vec3(row, col, depth), vec3(stride0, stride1, 1.0));
+  float texR = floor(index / texNumC);
+  float texC = mod(index, texNumC);
+  return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
+}
+`;
+
+const SAMPLE_4D_SNIPPET = `
+vec2 UVfrom4D(float texNumR, float texNumC, float stride0,
+    float stride1, float stride2, float row, float col, float depth,
+    float depth2) {
+  float index = dot(vec4(row, col, depth, depth2),
+                    vec4(stride0, stride1, stride2, 1.0));
+  float texR = floor(index / texNumC);
+  float texC = mod(index, texNumC);
+  return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
+}
+`;
+
 const SHADER_PREFIX = `
   precision highp float;
   varying vec2 resultUV;
   const vec2 halfCR = vec2(0.5, 0.5);
 
-  void setOutput(float val) {
-    gl_FragColor = vec4(val, 0, 0, 0);
-  }
-
-  bool isNaN(float val) {
-    return val == val ? false : true;
-  }
-`;
-
-const SAMPLE_1D_SNIPPET = `
-  float sample1D(sampler2D texture, float texNumR, float texNumC, float index) {
-    float texR = floor(index / texNumC);
-    float texC = mod(index, texNumC);
-    vec2 uv = (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
+  float sample(sampler2D texture, vec2 uv) {
     return texture2D(texture, uv).r;
   }
-`;
 
-const SAMPLE_2D_SNIPPET = `
-  float sample2D(sampler2D texture, float texNumR, float texNumC, float numC,
-      float row, float col) {
-    float index = dot(vec2(row, col), vec2(numC, 1.0));
-    float texR = floor(index / texNumC);
-    float texC = mod(index, texNumC);
-    vec2 uv = (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
-    return texture2D(texture, uv).r;
+  float sampleOrZeroPad(sampler2D texture, vec2 uv) {
+    bool lessThanZero = any(lessThan(uv, vec2(0, 0)));
+    bool greaterThanOne = any(greaterThan(uv, vec2(1, 1)));
+    bool outside = lessThanZero || greaterThanOne;
+    float value = sample(texture, uv);
+    return mix(value, 0.0, float(outside));
   }
-`;
 
-const SAMPLE_3D_SNIPPET = `
-  float sample3D(sampler2D texture, float texNumR, float texNumC, float stride0,
-      float stride1, float row, float col, float depth) {
-    float index = dot(vec3(row, col, depth), vec3(stride0, stride1, 1.0));
-    float texR = floor(index / texNumC);
-    float texC = mod(index, texNumC);
-    vec2 uv = (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
-    return texture2D(texture, uv).r;
+  void setOutput(float val) {
+    gl_FragColor = vec4(val, 0, 0, 0);
   }
-`;
 
-const SAMPLE_4D_SNIPPET = `
-  float sample4D(sampler2D texture, float texNumR, float texNumC, float stride0,
-      float stride1, float stride2, float row, float col, float depth,
-      float depth2) {
-    float index = dot(vec4(row, col, depth, depth2),
-                      vec4(stride0, stride1, stride2, 1.0));
-    float texR = floor(index / texNumC);
-    float texC = mod(index, texNumC);
-    vec2 uv = (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
-    return texture2D(texture, uv).r;
+  bool isNaN(float val) {
+    return val == val ? false : true;
   }
+  ${SAMPLE_1D_SNIPPET}
+  ${SAMPLE_2D_SNIPPET}
+  ${SAMPLE_3D_SNIPPET}
+  ${SAMPLE_4D_SNIPPET}
 `;
 
 function getOutput1DCoords(
@@ -254,7 +265,7 @@ function getSamplerScalar(texName: string): string {
   const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1);
   return `
     float ${funcName}() {
-      return texture2D(${texName}, halfCR).r;
+      return sample(${texName}, halfCR);
     }
   `;
 }
@@ -267,7 +278,11 @@ function getSampler1D(
   if (texShape[0] === 1 && texShape[1] === 1) {
     return `
       float ${funcName}(float index) {
-        return texture2D(${texName}, halfCR).r;
+        return sample(${texName}, halfCR);
+      }
+
+      float ${funcName}OrZeroPad(float index) {
+        return sampleOrZeroPad(${texName}, halfCR);
       }
     `;
   }
@@ -275,7 +290,12 @@ function getSampler1D(
     return `
       float ${funcName}(float index) {
         vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0);
-        return texture2D(${texName}, uv).r;
+        return sample(${texName}, uv);
+      }
+
+      float ${funcName}OrZeroPad(float index) {
+        vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0);
+        return sampleOrZeroPad(${texName}, uv);
       }
     `;
   }
@@ -283,13 +303,24 @@ function getSampler1D(
     return `
       float ${funcName}(float index) {
         vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5);
-        return texture2D(${texName}, uv).r;
+        return sample(${texName}, uv);
+      }
+
+      float ${funcName}OrZeroPad(float index) {
+        vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5);
+        return sampleOrZeroPad(${texName}, uv);
       }
     `;
   }
   return `
     float ${funcName}(float index) {
-      return sample1D(${texName}, ${tR}.0, ${tC}.0, index);
+      vec2 uv = UVfrom1D(${tR}.0, ${tC}.0, index);
+      return sample(${texName}, uv);
+    }
+
+    float ${funcName}OrZeroPad(float index) {
+      vec2 uv = UVfrom1D(${tR}.0, ${tC}.0, index);
+      return sampleOrZeroPad(${texName}, uv);
     }
   `;
 }
@@ -304,8 +335,15 @@ function getSampler3D(
   const stride1 = shape[2];
   return `
     float ${funcName}(float row, float col, float depth) {
-      return sample3D(${texName}, ${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0,
-          row, col, depth);
+      vec2 uv = UVfrom3D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, row,
+        col, depth);
+      return sample(${texName}, uv);
+    }
+
+    float ${funcName}OrZeroPad(float row, float col, float depth) {
+      vec2 uv = UVfrom3D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, row,
+        col, depth);
+      return sampleOrZeroPad(${texName}, uv);
     }
   `;
 }
@@ -321,11 +359,19 @@ function getSampler4D(
   const stride0 = shape[1] * stride1;
 
   return `
-  float ${funcName}(float row, float col, float depth, float depth2) {
-    return sample4D(${texName}, ${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0,
-        ${stride2}.0, row, col, depth, depth2);
-  }
-`;
+    float ${funcName}(float row, float col, float depth, float depth2) {
+      vec2 uv = UVfrom4D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0,
+          ${stride2}.0, row, col, depth, depth2);
+      return sample(${texName}, uv);
+    }
+
+    float ${funcName}OrZeroPad(float row, float col, float depth,
+        float depth2) {
+      vec2 uv = UVfrom4D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0,
+          ${stride2}.0, row, col, depth, depth2);
+      return sampleOrZeroPad(${texName}, uv);
+    }
+  `;
 }
 
 function getSampler2D(
@@ -338,13 +384,24 @@ function getSampler2D(
     return `
       float ${funcName}(float row, float col) {
         vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0);
-        return texture2D(${texName}, uv).r;
+        return sample(${texName}, uv);
+      }
+
+      float ${funcName}OrZeroPad(float row, float col) {
+        vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0);
+        return sampleOrZeroPad(${texName}, uv);
       }
     `;
   }
   return `
     float ${funcName}(float row, float col) {
-      return sample2D(${texName}, ${tR}.0, ${tC}.0, ${shape[1]}.0, row, col);
+      vec2 uv = UVfrom2D(${tR}.0, ${tC}.0, ${shape[1]}.0, row, col);
+      return sample(${texName}, uv);
+    }
+
+    float ${funcName}OrZeroPad(float row, float col) {
+      vec2 uv = UVfrom2D(${tR}.0, ${tC}.0, ${shape[1]}.0, row, col);
+      return sampleOrZeroPad(${texName}, uv);
     }
   `;
 }
@@ -357,7 +414,7 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string {
   if (tNumC === 1 && tNumR === 1) {
     return `
       float ${funcName}(float index) {
-        return texture2D(${texName}, halfCR).r;
+        return sample(${texName}, halfCR);
       }
     `;
   }
@@ -365,7 +422,7 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string {
     return `
       float ${funcName}(float index) {
         vec2 uv = vec2(0.5, (index + 0.5) / ${tNumR}.0);
-        return texture2D(${texName}, uv).r;
+        return sample(${texName}, uv);
       }
     `;
   }
@@ -373,7 +430,7 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string {
     return `
       float ${funcName}(float index) {
         vec2 uv = vec2((index + 0.5) / ${tNumC}.0, 0.5);
-        return texture2D(${texName}, uv).r;
+        return sample(${texName}, uv);
       }
     `;
   }
@@ -382,7 +439,7 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string {
       float texR = floor(index / ${tNumC}.0);
       float texC = mod(index, ${tNumC}.0);
       vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tNumC}.0, ${tNumR}.0);
-      return texture2D(${texName}, uv).r;
+      return sample(${texName}, uv);
     }
   `;
 }
@@ -394,7 +451,7 @@ function getSamplerAtOutputCoords(texName: string, inTexShape: [number, number],
   if (util.arraysEqual(inTexShape, outTexShape)) {
     return `
       float ${funcName}() {
-        return texture2D(${texName}, resultUV).r;
+        return sample(${texName}, resultUV);
       }
     `;
   }
@@ -410,7 +467,7 @@ function getSamplerAtOutputCoords(texName: string, inTexShape: [number, number],
       float texC = mod(index, ${inTexShape[1]}.0);
       vec2 uv = (vec2(texC, texR) + halfCR) /
                  vec2(${inTexShape[1]}.0, ${inTexShape[0]}.0);
-      return texture2D(${texName}, uv).r;
+      return sample(${texName}, uv);
     }
   `;
 }
diff --git a/tsconfig.json b/tsconfig.json
index 93bc6fb9e4..3f59e958de 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -10,6 +10,11 @@
         "lib": ["es2015", "dom"],
         "outDir": "./dist",
         "noUnusedLocals": true,
-        "noImplicitReturns": true
+        "noImplicitReturns": true,
+        "noImplicitThis": true,
+        "noUnusedParameters": false,
+        "pretty": true,
+        "noFallthroughCasesInSwitch": true,
+        "allowUnreachableCode": false
     }
 }
diff --git a/tslint.json b/tslint.json
index 6b67c36556..546ec936f5 100644
--- a/tslint.json
+++ b/tslint.json
@@ -21,6 +21,7 @@
     "class-name": true,
     "interface-name": [true, "never-prefix"],
     "jsdoc-format": true,
+    "forin": false,
     "label-position": true,
     "max-line-length": [true, 80],
     "new-parens": true,

From bef3392c5d42da264ddf535f52936f07ed6b8627 Mon Sep 17 00:00:00 2001
From: Daniel Smilkov <dsmilkov@gmail.com>
Date: Wed, 16 Aug 2017 21:39:49 -0400
Subject: [PATCH 02/10] fix conv2d zero paddig and make the project build

---
 demos/benchmarks/conv_gpu_benchmark.ts      |  85 ++++++----------
 demos/benchmarks/logsumexp_gpu_benchmark.ts |   2 +-
 src/math/math_gpu.ts                        |  69 ++-----------
 src/math/webgl/addscaledmat_gpu_test.ts     |   4 +-
 src/math/webgl/conv_gpu.ts                  |  14 ++-
 src/math/webgl/conv_gpu_test.ts             | 105 ++++++--------------
 src/math/webgl/gpgpu_math.ts                |   8 +-
 src/math/webgl/shader_compiler.ts           |  58 ++---------
 8 files changed, 88 insertions(+), 257 deletions(-)

diff --git a/demos/benchmarks/conv_gpu_benchmark.ts b/demos/benchmarks/conv_gpu_benchmark.ts
index fffd644e2c..8d583bed2f 100644
--- a/demos/benchmarks/conv_gpu_benchmark.ts
+++ b/demos/benchmarks/conv_gpu_benchmark.ts
@@ -14,76 +14,49 @@ limitations under the License.
 ==============================================================================*/
 
 import * as conv_util from '../../src/math/conv_util';
-import * as conv_gpu from '../../src/math/webgl/conv_gpu';
+import {Conv2DProgram} from '../../src/math/webgl/conv_gpu';
 import {GPGPUContext} from '../../src/math/webgl/gpgpu_context';
-import * as test_util from '../../src/test_util';
-
+import * as gpgpu_math from '../../src/math/webgl/gpgpu_math';
+import {Array1D, Array3D, Array4D, initializeGPU} from '../../src/math/ndarray';
+import {TextureManager} from '../../src/math/webgl/texture_manager';
 import {BenchmarkTest} from './benchmark';
 
 const OP_RUNS = 40;
 
 export const BENCHMARK_TEST: BenchmarkTest = (size: number) => {
-  const inputShapeRCD: [number, number, number] = [size, size, 1];
+  const inputDepth = 1;
+  const inputShape: [number, number, number] = [size, size, inputDepth];
   const outputDepth = 1;
   const fieldSize = 11;
   const stride = 1;
-  const zeroPad = conv_util.computeDefaultPad(inputShapeRCD, fieldSize, stride);
-  const outputShapeRCD: [number, number, number] =
-      conv_util.computeOutputShape3D(
-          inputShapeRCD, fieldSize, outputDepth, stride, zeroPad);
-
-  const inputTexShapeRC = conv_util.computeTexShapeFrom3D(inputShapeRCD);
-  const outputTexShapeRC = conv_util.computeTexShapeFrom3D(outputShapeRCD);
-  const weightsTexShapeRC = conv_util.computeWeightsTexShape(
-      inputShapeRCD[2], outputDepth, fieldSize);
-  const biasesTexShapeRC = conv_util.computeBiasesTexShape(outputDepth);
-
-  const hasBias = true;
+  const zeroPad = conv_util.computeDefaultPad(inputShape, fieldSize, stride);
   const gpgpu = new GPGPUContext();
-  const program = gpgpu.createProgram(conv_gpu.getFragmentShaderSource(
-      inputShapeRCD, outputDepth, fieldSize, stride, zeroPad, hasBias));
-
-  const inputTexture =
-      gpgpu.createMatrixTexture(inputTexShapeRC[0], inputTexShapeRC[1]);
-  const weightsTexture =
-      gpgpu.createMatrixTexture(weightsTexShapeRC[0], weightsTexShapeRC[1]);
-  const biasesTexture =
-      gpgpu.createMatrixTexture(biasesTexShapeRC[0], biasesTexShapeRC[1]);
-  const outputTexture =
-      gpgpu.createMatrixTexture(outputTexShapeRC[0], outputTexShapeRC[1]);
-
-  const inputData = test_util.randomArrayInRange(
-      inputTexShapeRC[0] * inputTexShapeRC[1], -1, 1);
-  const weightsData = test_util.randomArrayInRange(
-      weightsTexShapeRC[0] * weightsTexShapeRC[1], -1, 1);
-  const biasesData = test_util.randomArrayInRange(
-      biasesTexShapeRC[0] * biasesTexShapeRC[1], -1, 1);
-
-  gpgpu.uploadMatrixToTexture(
-      inputTexture, inputTexShapeRC[0], inputTexShapeRC[1], inputData);
-  gpgpu.uploadMatrixToTexture(
-      weightsTexture, weightsTexShapeRC[0], weightsTexShapeRC[1], weightsData);
-  gpgpu.uploadMatrixToTexture(
-      biasesTexture, biasesTexShapeRC[0], biasesTexShapeRC[1], biasesData);
+  const texManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, texManager);
+  const program = new Conv2DProgram(
+      inputShape, fieldSize, outputDepth, stride, zeroPad, true);
+  const outputShape = program.outputShape as [number, number, number];
+  const out = Array3D.zeros(outputShape);
+  const x = Array3D.randUniform(inputShape, -1, 1);
+  const wShape = conv_util.computeWeightsShape4D(1, outputDepth, fieldSize);
+  const W = Array4D.randUniform(wShape, -1, 1);
+  const b = Array1D.randUniform([outputDepth], -1, 1);
+  const inputs = [x, W, b];
+  const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, out);
 
   const start = performance.now();
   for (let i = 0; i < OP_RUNS; i++) {
-    conv_gpu.convolve(
-        gpgpu, program, inputTexture, weightsTexture, biasesTexture,
-        outputTexture, outputTexShapeRC);
+    gpgpu_math.runProgram(binary, inputs, out);
   }
-
-  gpgpu.downloadMatrixFromTexture(
-      outputTexture, outputTexShapeRC[0], outputTexShapeRC[1]);
-  const end = performance.now();
-
-  const avgTime = (end - start) / OP_RUNS;
-
-  gpgpu.deleteMatrixTexture(inputTexture);
-  gpgpu.deleteMatrixTexture(weightsTexture);
-  gpgpu.deleteMatrixTexture(biasesTexture);
-  gpgpu.deleteMatrixTexture(outputTexture);
-  gpgpu.deleteProgram(program);
+  out.getValues();
+  const avgTime = (performance.now() - start) / OP_RUNS;
+
+  x.dispose();
+  W.dispose();
+  b.dispose();
+  out.dispose();
+  texManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
   gpgpu.dispose();
 
   return avgTime;
diff --git a/demos/benchmarks/logsumexp_gpu_benchmark.ts b/demos/benchmarks/logsumexp_gpu_benchmark.ts
index 007a6228c1..6afc05f4d0 100644
--- a/demos/benchmarks/logsumexp_gpu_benchmark.ts
+++ b/demos/benchmarks/logsumexp_gpu_benchmark.ts
@@ -35,7 +35,7 @@ export const BENCHMARK_TEST: BenchmarkTest = (size: number) => {
   for (let i = 0; i < OP_RUNS; i++) {
     gpgpu_math.runProgram(binary, [a], out);
   }
-
+  out.getValues();
   const avgTime = (performance.now() - start) / OP_RUNS;
   a.dispose();
   out.dispose();
diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts
index 8818f148f1..a311acf435 100644
--- a/src/math/math_gpu.ts
+++ b/src/math/math_gpu.ts
@@ -27,7 +27,7 @@ import * as avg_pool_gpu from './webgl/avg_pool_gpu';
 import * as batchnorm_gpu from './webgl/batchnorm_gpu';
 import * as concat3d_gpu from './webgl/concat3d_gpu';
 import * as conv_backprop_gpu from './webgl/conv_backprop_gpu';
-import * as conv_gpu from './webgl/conv_gpu';
+import {Conv2DProgram} from './webgl/conv_gpu';
 import * as copy_gpu from './webgl/copy_gpu';
 import {GPGPUContext} from './webgl/gpgpu_context';
 import {BinaryOpProgram} from './webgl/binaryop_gpu';
@@ -57,7 +57,6 @@ const CONCAT_PROG = 'concat';
 const RESHAPE_PROG = 'reshape';
 
 // Convolution.
-const CONV2D_PROG = 'conv';
 const CONV2D_TRANSPOSE_PROG = 'conv_transpose';
 const CONV2D_DERW_PROG = 'conv_derw';
 const CONV2D_DERB_PROG = 'conv_derb';
@@ -477,70 +476,14 @@ export class NDArrayMathGPU extends NDArrayMath {
   }
 
   protected conv2dInternal(
-      x: Array3D, weights: Array4D, biases: Array1D|null, stride: number,
+      x: Array3D, weights: Array4D, bias: Array1D|null, stride: number,
       zeroPad: number): Array3D {
     const fieldSize = weights.shape[0];
-    const inputDepth = weights.shape[2];
     const outputDepth = weights.shape[3];
-    const progKey = [
-      CONV2D_PROG, x.shape, outputDepth, fieldSize, stride, biases != null
-    ].join('_');
-    const program = this.getAndSaveProgram(progKey, () => {
-      return conv_gpu.getFragmentShaderSource(
-          x.shape, outputDepth, fieldSize, stride, zeroPad, biases != null);
-    });
-
-    const xTexShape = conv_util.computeTexShapeFrom3D(x.shape);
-    const wTexShape =
-        conv_util.computeWeightsTexShape(inputDepth, outputDepth, fieldSize);
-    const biasTexShape = conv_util.computeBiasesTexShape(outputDepth);
-
-    // If the texture shapes doesn't match the shapes that shaders expect,
-    // do physical texture reshapes on the GPU.
-    const actualXTexShape = x.getTextureShapeRC(xTexShape);
-    let cleanupX = false;
-    if (!util.arraysEqual(actualXTexShape, xTexShape)) {
-      x = this.reshapeTexture(x, xTexShape);
-      cleanupX = true;
-    }
-
-    let cleanupW = false;
-    const actualWTexShape = weights.getTextureShapeRC(wTexShape);
-    if (!util.arraysEqual(actualWTexShape, wTexShape)) {
-      weights = this.reshapeTexture(weights, wTexShape);
-      cleanupW = true;
-    }
-
-    let cleanupB = false;
-    if (biases != null) {
-      const actualBTexShape = biases.getTextureShapeRC(biasTexShape);
-      if (!util.arraysEqual(actualBTexShape, biasTexShape)) {
-        biases = this.reshapeTexture(biases, biasTexShape);
-        cleanupB = true;
-      }
-    }
-
-    const resultShape = conv_util.computeOutputShape3D(
-        x.shape, fieldSize, outputDepth, stride, zeroPad);
-    const resultTexShape = conv_util.computeTexShapeFrom3D(resultShape);
-    const resultTex = this.textureManager.acquireTexture(resultTexShape);
-
-    conv_gpu.convolve(
-        this.gpgpu, program, x.getTexture(), weights.getTexture(),
-        biases != null ? biases.getTexture() : null, resultTex, resultTexShape);
-
-    if (cleanupX) {
-      x.dispose();
-    }
-    if (cleanupW) {
-      weights.dispose();
-    }
-    if (cleanupB && biases != null) {
-      biases.dispose();
-    }
-
-    return NDArray.make<Array3D>(
-        resultShape, {texture: resultTex, textureShapeRC: resultTexShape});
+    const program = new Conv2DProgram(
+        x.shape, fieldSize, outputDepth, stride, zeroPad, bias != null);
+    const inputs = bias != null ? [x, weights, bias] : [x, weights];
+    return this.compileAndRun(program, inputs);
   }
 
   protected conv2dBackPropInternal(
diff --git a/src/math/webgl/addscaledmat_gpu_test.ts b/src/math/webgl/addscaledmat_gpu_test.ts
index cf63e1a668..2196f9347d 100644
--- a/src/math/webgl/addscaledmat_gpu_test.ts
+++ b/src/math/webgl/addscaledmat_gpu_test.ts
@@ -18,7 +18,6 @@ import {AddScaledMatProgram} from './addscaledmat_gpu';
 import {GPGPUContext} from './gpgpu_context';
 import * as gpgpu_math from './gpgpu_math';
 import {NDArray, Array1D, Array2D, Scalar, initializeGPU} from '../ndarray';
-import * as util from '../../util';
 import {TextureManager} from './texture_manager';
 
 function cpuAddScaledMatrices(
@@ -85,9 +84,8 @@ export function uploadAddScaledMatDownload(
   const textureManager = new TextureManager(gpgpu);
   initializeGPU(gpgpu, textureManager);
 
-  const outShape = util.assertAndGetBroadcastedShape(a.shape, b.shape);
-  const res = NDArray.zeros(outShape);
   const program = new AddScaledMatProgram(a.shape, b.shape);
+  const res = NDArray.zeros(program.outputShape);
   const binary = gpgpu_math.compileProgram(gpgpu, program, [a, b, c1, c2], res);
   gpgpu_math.runProgram(binary, [a, b, c1, c2], res);
 
diff --git a/src/math/webgl/conv_gpu.ts b/src/math/webgl/conv_gpu.ts
index 5d7b222194..e05bc1b048 100644
--- a/src/math/webgl/conv_gpu.ts
+++ b/src/math/webgl/conv_gpu.ts
@@ -31,12 +31,12 @@ export class Conv2DProgram implements GPGPUProgram {
 
     this.userCode = `
       void main() {
-        vec3 output = getOutputCoords();
-        float yR = output.x;
-        float yC = output.y;
-        float d2 = output.z;
+        vec3 coords = getOutputCoords();
+        float yR = coords.x;
+        float yC = coords.y;
+        float d2 = coords.z;
 
-        vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}, ${stride}) -
+        vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}.0, ${stride}.0) -
             vec2(${pad}.0, ${pad}.0);
         float xRCorner = xRCCorner.x;
         float xCCorner = xRCCorner.y;
@@ -60,9 +60,7 @@ export class Conv2DProgram implements GPGPUProgram {
             }
           }
         }
-        if (${hasBias}) {
-          dotProd += getBias(d2);
-        }
+        ${hasBias ? 'dotProd += getBias(d2);' : ''}
         setOutput(dotProd);
       }
     `;
diff --git a/src/math/webgl/conv_gpu_test.ts b/src/math/webgl/conv_gpu_test.ts
index ac41c6a4ea..3d5a4c4542 100644
--- a/src/math/webgl/conv_gpu_test.ts
+++ b/src/math/webgl/conv_gpu_test.ts
@@ -16,78 +16,49 @@ limitations under the License.
 import * as test_util from '../../test_util';
 import * as conv_util from '../conv_util';
 import {NDArrayMathCPU} from '../math_cpu';
-import {Array1D, Array3D, Array4D, NDArray} from '../ndarray';
 
-import * as conv_gpu from './conv_gpu';
+import {Conv2DProgram} from './conv_gpu';
 import {GPGPUContext} from './gpgpu_context';
+import * as gpgpu_math from './gpgpu_math';
+import {NDArray, Array1D, Array3D, Array4D, initializeGPU} from '../ndarray';
+import {TextureManager} from './texture_manager';
 
 describe('conv_gpu', () => {
 
   function uploadConvolveDownload(
-      x: Float32Array, aShapeRowColDepth: [number, number, number],
-      weights: Float32Array, biases: Float32Array|null, resultDepth: number,
+      xVals: Float32Array, xShapeRCD: [number, number, number],
+      weights: Float32Array, biasVals: Float32Array|null, resultDepth: number,
       fieldSize: number, stride: number, zeroPad?: number): Float32Array {
     zeroPad = zeroPad != null ?
         zeroPad :
-        conv_util.computeDefaultPad(aShapeRowColDepth, fieldSize, stride);
+        conv_util.computeDefaultPad(xShapeRCD, fieldSize, stride);
 
-    const xTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(aShapeRowColDepth);
+    const x = Array3D.new(xShapeRCD, xVals);
+    const wShape =
+        conv_util.computeWeightsShape4D(xShapeRCD[2], resultDepth, fieldSize);
+    const W = Array4D.new(wShape, weights);
 
-    const resultShapeRCD: [number, number, number] =
-        conv_util.computeOutputShape3D(
-            aShapeRowColDepth, fieldSize, resultDepth, stride, zeroPad);
-
-    const weightsTexShapeRC: [number, number] =
-        conv_util.computeWeightsTexShape(
-            aShapeRowColDepth[2], resultDepth, fieldSize);
-
-    const biasesTexShapeRC: [number, number] = [1, resultDepth];
-    const resultTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(resultShapeRCD);
+    const b = biasVals != null ? Array1D.new(biasVals) : null;
 
     const gpgpu = new GPGPUContext();
     gpgpu.enableAutomaticDebugValidation(true);
-
-    const shaderSource = conv_gpu.getFragmentShaderSource(
-        aShapeRowColDepth, resultDepth, fieldSize, stride, zeroPad,
-        biases != null);
-    const program = gpgpu.createProgram(shaderSource);
-
-    const xTex = gpgpu.createMatrixTexture(xTexShapeRC[0], xTexShapeRC[1]);
-    const weightsTex =
-        gpgpu.createMatrixTexture(weightsTexShapeRC[0], weightsTexShapeRC[1]);
-    const biasesTex = biases != null ?
-        gpgpu.createMatrixTexture(biasesTexShapeRC[0], biasesTexShapeRC[1]) :
-        null;
-    const resultTex =
-        gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.uploadMatrixToTexture(xTex, xTexShapeRC[0], xTexShapeRC[1], x);
-    gpgpu.uploadMatrixToTexture(
-        weightsTex, weightsTexShapeRC[0], weightsTexShapeRC[1], weights);
-
-    if (biases != null) {
-      gpgpu.uploadMatrixToTexture(
-          biasesTex!, biasesTexShapeRC[0], biasesTexShapeRC[1], biases);
-    }
-
-    conv_gpu.convolve(
-        gpgpu, program, xTex, weightsTex, biasesTex, resultTex,
-        resultTexShapeRC);
-
-    const result = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    if (biasesTex != null) {
-      gpgpu.deleteMatrixTexture(biasesTex);
-    }
-    gpgpu.deleteMatrixTexture(weightsTex);
-    gpgpu.deleteMatrixTexture(xTex);
-    gpgpu.deleteProgram(program);
+    const textureManager = new TextureManager(gpgpu);
+    initializeGPU(gpgpu, textureManager);
+
+    const program = new Conv2DProgram(
+        xShapeRCD, fieldSize, resultDepth, stride, zeroPad,
+        biasVals != null);
+    const res = NDArray.zeros(program.outputShape);
+    const inputs = biasVals != null ? [x, W, b] : [x, W];
+    const binary =
+        gpgpu_math.compileProgram(gpgpu, program, inputs, res);
+    gpgpu_math.runProgram(binary, inputs, res);
+    const resValues = res.getValues();
+
+    textureManager.dispose();
+    gpgpu.deleteProgram(binary.webGLProgram);
     gpgpu.dispose();
-    return result;
+    return resValues;
   }
 
   function compareToCPU(
@@ -251,7 +222,7 @@ describe('conv_gpu', () => {
     expect(result[5]).toBeCloseTo(a[2] * weights[2] + a[3] * weights[5]);
   });
 
-  it('2x2x1 in, 1d out, 2x2 filter, 1 stride', () => {
+  it('2x2x1 in, 1d out, 2x2 filter, s=2, bias=0, p=1', () => {
     const x = new Float32Array([1, 2, 3, 4]);
     const w = new Float32Array([3, 1, 5, 0]);
     const bias = new Float32Array([0]);
@@ -263,7 +234,7 @@ describe('conv_gpu', () => {
     expect(result[3]).toBe(12);
   });
 
-  it('2x2x1 in, 1d out, 2x2 filter, 1 stride', () => {
+  it('2x2x1 in, 1d out, 2x2 filter, 1 stride, bias=-1', () => {
     const x = new Float32Array([1, 2, 3, 4]);
     const w = new Float32Array([3, 1, 5, 0]);
     const bias = new Float32Array([-1]);
@@ -272,7 +243,7 @@ describe('conv_gpu', () => {
     expect(result[0]).toBe(19);
   });
 
-  it('2x2x1 in, 1d out, 2x2 filter, 1 stride, null bias', () => {
+  it('2x2x1 in, 1d out, 2x2 filter, 1 stride, no bias', () => {
     const x = new Float32Array([1, 2, 3, 4]);
     const w = new Float32Array([3, 1, 5, 0]);
     const bias: Float32Array|null = null;
@@ -281,19 +252,7 @@ describe('conv_gpu', () => {
     expect(result[0]).toBe(20);
   });
 
-  it('2x2x1 in, 1d out, 2x2 filter, 1 stride, zeropad = 1', () => {
-    const x = new Float32Array([1, 2, 3, 4]);
-    const w = new Float32Array([3, 1, 5, 0]);
-    const bias = new Float32Array([0]);
-    const result = uploadConvolveDownload(x, [2, 2, 1], w, bias, 1, 2, 2, 1);
-    expect(result.length).toEqual(4);
-    expect(result[0]).toBe(0);
-    expect(result[1]).toBe(10);
-    expect(result[2]).toBe(3);
-    expect(result[3]).toBe(12);
-  });
-
-  it('5x5x3 in, 2d out, 3x3 filter, 2 stride', () => {
+  it('5x5x3 in, 2d out, 3x3 filter, s=2, p=1', () => {
     /*
       weights:       input:
         [ 1, -1,       [1, 2, 2, 0, 0, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2,
diff --git a/src/math/webgl/gpgpu_math.ts b/src/math/webgl/gpgpu_math.ts
index b5b2fe9d5a..d6b069eafa 100644
--- a/src/math/webgl/gpgpu_math.ts
+++ b/src/math/webgl/gpgpu_math.ts
@@ -41,12 +41,12 @@ export function compileProgram<T extends NDArray, K extends NDArray>(
     gpgpu: GPGPUContext, program: GPGPUProgram, inputs: T[],
     output: K): GPGPUBinary {
   const userCode = program.userCode;
-  const inputInfos = program.variableNames.map((x, i) => {
+  const inputInfos = inputs.map((input, i) => {
     const shapeInfo = {
-      logicalShape: inputs[i].shape,
-      texShape: inputs[i].getTextureShapeRC()
+      logicalShape: input.shape,
+      texShape: input.getTextureShapeRC()
     };
-    return {name: x, shapeInfo};
+    return {name: program.variableNames[i], shapeInfo};
   });
   const inShapeInfos = inputInfos.map(x => x.shapeInfo);
   const outShapeInfo = {
diff --git a/src/math/webgl/shader_compiler.ts b/src/math/webgl/shader_compiler.ts
index 109efa6c3c..19d91e2ea3 100644
--- a/src/math/webgl/shader_compiler.ts
+++ b/src/math/webgl/shader_compiler.ts
@@ -54,7 +54,7 @@ function getInputSamplingSnippet(
       res += getSamplerScalar(inInfo.name);
       break;
     case 1:
-      res += getSampler1D(inInfo.name, texShape);
+      res += getSampler1D(inInfo.name, texShape, shape[0]);
       break;
     case 2:
       res += getSampler2D(inInfo.name, shape as [number, number], texShape);
@@ -155,14 +155,6 @@ const SHADER_PREFIX = `
     return texture2D(texture, uv).r;
   }
 
-  float sampleOrZeroPad(sampler2D texture, vec2 uv) {
-    bool lessThanZero = any(lessThan(uv, vec2(0, 0)));
-    bool greaterThanOne = any(greaterThan(uv, vec2(1, 1)));
-    bool outside = lessThanZero || greaterThanOne;
-    float value = sample(texture, uv);
-    return mix(value, 0.0, float(outside));
-  }
-
   void setOutput(float val) {
     gl_FragColor = vec4(val, 0, 0, 0);
   }
@@ -271,7 +263,7 @@ function getSamplerScalar(texName: string): string {
 }
 
 function getSampler1D(
-    texName: string, texShape: [number, number]): string {
+    texName: string, texShape: [number, number], size: number): string {
   const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1);
   const tR = texShape[0];
   const tC = texShape[1];
@@ -280,10 +272,6 @@ function getSampler1D(
       float ${funcName}(float index) {
         return sample(${texName}, halfCR);
       }
-
-      float ${funcName}OrZeroPad(float index) {
-        return sampleOrZeroPad(${texName}, halfCR);
-      }
     `;
   }
   if (texShape[1] === 1) {
@@ -292,11 +280,6 @@ function getSampler1D(
         vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0);
         return sample(${texName}, uv);
       }
-
-      float ${funcName}OrZeroPad(float index) {
-        vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0);
-        return sampleOrZeroPad(${texName}, uv);
-      }
     `;
   }
   if (texShape[0] === 1) {
@@ -305,11 +288,6 @@ function getSampler1D(
         vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5);
         return sample(${texName}, uv);
       }
-
-      float ${funcName}OrZeroPad(float index) {
-        vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5);
-        return sampleOrZeroPad(${texName}, uv);
-      }
     `;
   }
   return `
@@ -317,11 +295,6 @@ function getSampler1D(
       vec2 uv = UVfrom1D(${tR}.0, ${tC}.0, index);
       return sample(${texName}, uv);
     }
-
-    float ${funcName}OrZeroPad(float index) {
-      vec2 uv = UVfrom1D(${tR}.0, ${tC}.0, index);
-      return sampleOrZeroPad(${texName}, uv);
-    }
   `;
 }
 
@@ -333,6 +306,7 @@ function getSampler3D(
   const tC = texShape[1];
   const stride0 = shape[1] * shape[2];
   const stride1 = shape[2];
+  const [numRows, numCols, numDepths] = shape;
   return `
     float ${funcName}(float row, float col, float depth) {
       vec2 uv = UVfrom3D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, row,
@@ -341,9 +315,12 @@ function getSampler3D(
     }
 
     float ${funcName}OrZeroPad(float row, float col, float depth) {
-      vec2 uv = UVfrom3D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, row,
-        col, depth);
-      return sampleOrZeroPad(${texName}, uv);
+      vec3 coords = vec3(row, col, depth);
+      bool lessThanZero = any(lessThan(coords, vec3(0.0, 0.0, 0.0)));
+      bool greaterThanSize = any(greaterThan(coords,
+          vec3(${numRows}.0 - 0.5, ${numCols}.0 - 0.5, ${numDepths}.0 - 0.5)));
+      bool outside = lessThanZero || greaterThanSize;
+      return mix(${funcName}(row, col, depth), 0.0, float(outside));
     }
   `;
 }
@@ -364,13 +341,6 @@ function getSampler4D(
           ${stride2}.0, row, col, depth, depth2);
       return sample(${texName}, uv);
     }
-
-    float ${funcName}OrZeroPad(float row, float col, float depth,
-        float depth2) {
-      vec2 uv = UVfrom4D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0,
-          ${stride2}.0, row, col, depth, depth2);
-      return sampleOrZeroPad(${texName}, uv);
-    }
   `;
 }
 
@@ -386,11 +356,6 @@ function getSampler2D(
         vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0);
         return sample(${texName}, uv);
       }
-
-      float ${funcName}OrZeroPad(float row, float col) {
-        vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0);
-        return sampleOrZeroPad(${texName}, uv);
-      }
     `;
   }
   return `
@@ -398,11 +363,6 @@ function getSampler2D(
       vec2 uv = UVfrom2D(${tR}.0, ${tC}.0, ${shape[1]}.0, row, col);
       return sample(${texName}, uv);
     }
-
-    float ${funcName}OrZeroPad(float row, float col) {
-      vec2 uv = UVfrom2D(${tR}.0, ${tC}.0, ${shape[1]}.0, row, col);
-      return sampleOrZeroPad(${texName}, uv);
-    }
   `;
 }
 

From 85c4bcef3eefbd0f9f6a3ea9cceea208f4757d2b Mon Sep 17 00:00:00 2001
From: Daniel Smilkov <dsmilkov@gmail.com>
Date: Thu, 17 Aug 2017 01:10:45 -0400
Subject: [PATCH 03/10] migrate rest of conv shaders to logical

---
 .../conv_transpose_gpu_benchmark.ts           |  67 ++--
 src/math/math_gpu.ts                          | 170 +--------
 src/math/webgl/conv_backprop_gpu.ts           | 347 +++++++-----------
 .../webgl/conv_backprop_gpu_derbias_test.ts   |  38 +-
 .../conv_backprop_gpu_derweights_test.ts      |  48 +--
 .../webgl/conv_backprop_transpose_gpu_test.ts |  74 +---
 src/math/webgl/conv_gpu.ts                    |  50 +--
 src/math/webgl/conv_gpu_getbiasvalue_test.ts  |  85 -----
 .../conv_gpu_getmatrixvalueorzeropad_test.ts  | 139 -------
 9 files changed, 225 insertions(+), 793 deletions(-)
 delete mode 100644 src/math/webgl/conv_gpu_getbiasvalue_test.ts
 delete mode 100644 src/math/webgl/conv_gpu_getmatrixvalueorzeropad_test.ts

diff --git a/demos/benchmarks/conv_transpose_gpu_benchmark.ts b/demos/benchmarks/conv_transpose_gpu_benchmark.ts
index a68cb068ba..86074a08fc 100644
--- a/demos/benchmarks/conv_transpose_gpu_benchmark.ts
+++ b/demos/benchmarks/conv_transpose_gpu_benchmark.ts
@@ -14,71 +14,48 @@ limitations under the License.
 ==============================================================================*/
 
 import * as conv_util from '../../src/math/conv_util';
-import * as conv_backprop_gpu from '../../src/math/webgl/conv_backprop_gpu';
+import {Array3D, Array4D, initializeGPU} from '../../src/math/ndarray';
+import {Conv2DTransposeProgram} from '../../src/math/webgl/conv_backprop_gpu';
 import {GPGPUContext} from '../../src/math/webgl/gpgpu_context';
-import * as test_util from '../../src/test_util';
-
+import * as gpgpu_math from '../../src/math/webgl/gpgpu_math';
+import {TextureManager} from '../../src/math/webgl/texture_manager';
 import {BenchmarkTest} from './benchmark';
 
 const OP_RUNS = 100;
 
 export const BENCHMARK_TEST: BenchmarkTest = (size: number) => {
-  const xShapeRCD: [number, number, number] = [size, size, 1];
+  const origInputDepth = 1;
   const origOutputDepth = 2;
+  const xShape: [number, number, number] = [size, size, origOutputDepth];
   const fieldSize = 11;
   const origStride = 1;
   const origPad = 1;
 
   const gpgpu = new GPGPUContext();
+  const texManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, texManager);
   gpgpu.enableAutomaticDebugValidation(true);
-  const origInputDepth = xShapeRCD[2];
-  const src = conv_backprop_gpu.getFragmentShaderConvTransposeSource(
-      xShapeRCD, fieldSize, origInputDepth, origStride, origPad, false);
-  const program = gpgpu.createProgram(src);
 
-  // Upload x.
-  const xTexShapeRC = conv_util.computeTexShapeFrom3D(xShapeRCD);
-  const xTex = gpgpu.createMatrixTexture(xTexShapeRC[0], xTexShapeRC[1]);
-  const xData =
-      test_util.randomArrayInRange(xTexShapeRC[0] * xTexShapeRC[1], -1, 1);
-  gpgpu.uploadMatrixToTexture(xTex, xTexShapeRC[0], xTexShapeRC[1], xData);
 
-  // Upload weights.
-  const wTexShapeRC = conv_util.computeWeightsTexShape(
+  const program = new Conv2DTransposeProgram(
+      xShape, fieldSize, origInputDepth, origStride, origPad, false);
+  const outputShape = program.outputShape as [number, number, number];
+  const out = Array3D.zeros(outputShape);
+  const x = Array3D.randUniform(xShape, -1, 1);
+  const wShape = conv_util.computeWeightsShape4D(
       origInputDepth, origOutputDepth, fieldSize);
-  const wData =
-      test_util.randomArrayInRange(wTexShapeRC[0] * wTexShapeRC[1], -1, 1);
-  const wTex = gpgpu.createMatrixTexture(wTexShapeRC[0], wTexShapeRC[1]);
-  gpgpu.uploadMatrixToTexture(wTex, wTexShapeRC[0], wTexShapeRC[1], wData);
-
-  // Figure out the output shape by dilating the input.
-  const dilatedRC =
-      conv_util.computeDilatedRC([xShapeRCD[0], xShapeRCD[1]], origStride);
-  const pad = fieldSize - 1 - origPad;
-  const resultShapeRCD = conv_util.computeOutputShape3D(
-      [dilatedRC[0], dilatedRC[1], origOutputDepth], fieldSize, origInputDepth,
-      1, pad);
-
-  const resultTexRC = conv_util.computeTexShapeFrom3D(resultShapeRCD);
-  const resultTex = gpgpu.createMatrixTexture(resultTexRC[0], resultTexRC[1]);
-
+  const W = Array4D.randUniform(wShape, -1, 1);
+  const inputs = [x, W];
+  const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, out);
   const start = performance.now();
   for (let i = 0; i < OP_RUNS; i++) {
-    conv_backprop_gpu.convTranspose(
-        gpgpu, program, xTex, wTex, null, resultTex, resultTexRC);
+    gpgpu_math.runProgram(binary, inputs, out);
   }
+  out.getValues();
+  const avgTime = (performance.now() - start) / OP_RUNS;
 
-  gpgpu.downloadMatrixFromTexture(resultTex, resultTexRC[0], resultTexRC[1]);
-
-  const end = performance.now();
-
-  const avgTime = (end - start) / OP_RUNS;
-
-  gpgpu.deleteMatrixTexture(resultTex);
-  gpgpu.deleteMatrixTexture(xTex);
-  gpgpu.deleteMatrixTexture(wTex);
-  gpgpu.deleteProgram(program);
+  texManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
   gpgpu.dispose();
-
   return avgTime;
 };
diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts
index a311acf435..808bf1954a 100644
--- a/src/math/math_gpu.ts
+++ b/src/math/math_gpu.ts
@@ -25,14 +25,15 @@ import {ArgMaxEqualsProgram} from './webgl/argmaxequals_gpu';
 import {ArgMinMaxProgram} from './webgl/argminmax_gpu';
 import * as avg_pool_gpu from './webgl/avg_pool_gpu';
 import * as batchnorm_gpu from './webgl/batchnorm_gpu';
+import {BinaryOpProgram} from './webgl/binaryop_gpu';
 import * as concat3d_gpu from './webgl/concat3d_gpu';
-import * as conv_backprop_gpu from './webgl/conv_backprop_gpu';
+// tslint:disable-next-line:max-line-length
+import {Conv2DDerBiasProgram, Conv2DDerWeightsProgram, Conv2DTransposeProgram} from './webgl/conv_backprop_gpu';
 import {Conv2DProgram} from './webgl/conv_gpu';
 import * as copy_gpu from './webgl/copy_gpu';
 import {GPGPUContext} from './webgl/gpgpu_context';
-import {BinaryOpProgram} from './webgl/binaryop_gpu';
-import {GPGPUProgram, GPGPUBinary} from './webgl/gpgpu_math';
 import * as gpgpu_math from './webgl/gpgpu_math';
+import {GPGPUBinary, GPGPUProgram} from './webgl/gpgpu_math';
 import * as gpgpu_util from './webgl/gpgpu_util';
 import {LogSumExpProgram} from './webgl/logsumexp_gpu';
 import * as max_pool_backprop_gpu from './webgl/max_pool_backprop_gpu';
@@ -45,8 +46,8 @@ import {ReduceSumProgram} from './webgl/reducesum_gpu';
 import * as reshape_gpu from './webgl/reshape_gpu';
 import * as resize_bilinear_gpu from './webgl/resize_bilinear_gpu';
 import {TextureManager} from './webgl/texture_manager';
+import {UnaryOp, UnaryOpProgram} from './webgl/unaryop_gpu';
 import * as webgl_util from './webgl/webgl_util';
-import {UnaryOpProgram, UnaryOp} from './webgl/unaryop_gpu';
 
 const BATCHNORM_PROG = 'batchnorm';
 
@@ -57,9 +58,6 @@ const CONCAT_PROG = 'concat';
 const RESHAPE_PROG = 'reshape';
 
 // Convolution.
-const CONV2D_TRANSPOSE_PROG = 'conv_transpose';
-const CONV2D_DERW_PROG = 'conv_derw';
-const CONV2D_DERB_PROG = 'conv_derb';
 const MAX_POOL_PROG = 'maxpool';
 const MAX_POOL_POSITIONS_PROG = 'maxpool_posn';
 const MAX_POOL_BACKPROP_PROG = 'maxpool_backprop';
@@ -281,8 +279,8 @@ export class NDArrayMathGPU extends NDArrayMath {
   protected matMulInternal(
       a: Array2D, b: Array2D, aOrientation: MatrixOrientation,
       bOrientation: MatrixOrientation): Array2D {
-    const program = new MatMulProgram(a.shape, b.shape, aOrientation,
-        bOrientation);
+    const program =
+        new MatMulProgram(a.shape, b.shape, aOrientation, bOrientation);
     return this.compileAndRun<Array2D, Array2D>(program, [a, b]);
   }
 
@@ -538,164 +536,28 @@ export class NDArrayMathGPU extends NDArrayMath {
   }
 
   protected conv2dTransposeInternal(
-      x: Array3D, weights: Array4D, biases: Array1D|null, origStride: number,
+      x: Array3D, weights: Array4D, bias: Array1D|null, origStride: number,
       origPad: number): Array3D {
     const origInputDepth = weights.shape[2];
-    const origOutputDepth = weights.shape[3];
     const fieldSize = weights.shape[0];
-
-    const progKey = [
-      CONV2D_TRANSPOSE_PROG, x.shape, fieldSize, origInputDepth, origStride,
-      origPad, biases != null
-    ].join('_');
-    const program = this.getAndSaveProgram(progKey, () => {
-      return conv_backprop_gpu.getFragmentShaderConvTransposeSource(
-          x.shape, fieldSize, origInputDepth, origStride, origPad,
-          biases != null);
-    });
-
-    const xTexShape = conv_util.computeTexShapeFrom3D(x.shape);
-    const wTexShape = conv_util.computeWeightsTexShape(
-        origInputDepth, origOutputDepth, fieldSize);
-    const biasTexShape = conv_util.computeBiasesTexShape(origInputDepth);
-
-    // If the texture shapes doesn't match the shapes that shaders expect,
-    // do physical texture reshapes on the GPU.
-    const actualXTexShape = x.getTextureShapeRC(xTexShape);
-    let cleanupX = false;
-    if (!util.arraysEqual(actualXTexShape, xTexShape)) {
-      x = this.reshapeTexture(x, xTexShape);
-      cleanupX = true;
-    }
-
-    let cleanupW = false;
-    const actualWTexShape = weights.getTextureShapeRC(wTexShape);
-    if (!util.arraysEqual(actualWTexShape, wTexShape)) {
-      weights = this.reshapeTexture(weights, wTexShape);
-      cleanupW = true;
-    }
-
-    let cleanupB = false;
-    if (biases != null) {
-      const actualBiasTexShape = biases.getTextureShapeRC(biasTexShape);
-      if (!util.arraysEqual(actualBiasTexShape, biasTexShape)) {
-        biases = this.reshapeTexture(biases, biasTexShape);
-        cleanupB = true;
-      }
-    }
-
-    // Figure out the output shape by dilating the input.
-    const dilatedRC =
-        conv_util.computeDilatedRC([x.shape[0], x.shape[1]], origStride);
-    const pad = fieldSize - 1 - origPad;
-    const resultShape = conv_util.computeOutputShape3D(
-        [dilatedRC[0], dilatedRC[1], origOutputDepth], fieldSize,
-        origInputDepth, 1, pad);
-    const resultTexShape = conv_util.computeTexShapeFrom3D(resultShape);
-    const resultTex = this.textureManager.acquireTexture(resultTexShape);
-
-    conv_backprop_gpu.convTranspose(
-        this.gpgpu, program, x.getTexture(), weights.getTexture(),
-        biases != null ? biases.getTexture() : null, resultTex, resultTexShape);
-
-    if (cleanupX) {
-      x.dispose();
-    }
-    if (cleanupW) {
-      weights.dispose();
-    }
-    if (cleanupB) {
-      biases!.dispose();
-    }
-
-    return NDArray.make<Array3D>(
-        resultShape, {texture: resultTex, textureShapeRC: resultTexShape});
+    const program = new Conv2DTransposeProgram(
+        x.shape, fieldSize, origInputDepth, origStride, origPad, bias != null);
+    const inputs = bias != null ? [x, weights, bias] : [x, weights];
+    return this.compileAndRun(program, inputs);
   }
 
   conv2dDerWeights(
       x: Array3D, dY: Array3D, fSize: number, stride: number,
       zeroPad: number): Array4D {
-    const inputDepth = x.shape[2];
     const outputDepth = dY.shape[2];
-    const progKey = [
-      CONV2D_DERW_PROG, x.shape, fSize, outputDepth, stride, zeroPad
-    ].join('_');
-    const program = this.getAndSaveProgram(progKey, () => {
-      return conv_backprop_gpu.getFragmentShaderDerWeightsSource(
-          x.shape, fSize, outputDepth, stride, zeroPad);
-    });
-
-    const xTexShape = conv_util.computeTexShapeFrom3D(x.shape);
-    const yShape = conv_util.computeOutputShape3D(
+    const program = new Conv2DDerWeightsProgram(
         x.shape, fSize, outputDepth, stride, zeroPad);
-    const yTexShape = conv_util.computeTexShapeFrom3D(yShape);
-
-    // If the texture shapes doesn't match the shapes that shaders expect,
-    // do physical texture reshapes on the GPU.
-    const actualXTexShape = x.getTextureShapeRC(xTexShape);
-    let cleanupX = false;
-    if (!util.arraysEqual(actualXTexShape, xTexShape)) {
-      x = this.reshapeTexture(x, xTexShape);
-      cleanupX = true;
-    }
-
-    let cleanupY = false;
-    const actualYTexShape = dY.getTextureShapeRC(yTexShape);
-    if (!util.arraysEqual(actualYTexShape, yTexShape)) {
-      dY = this.reshapeTexture(dY, yTexShape);
-      cleanupY = true;
-    }
-
-    const resultTexShape =
-        conv_util.computeWeightsTexShape(inputDepth, outputDepth, fSize);
-    const resultTex = this.textureManager.acquireTexture(resultTexShape);
-
-    conv_backprop_gpu.derWeights(
-        this.gpgpu, program, x.getTexture(), dY.getTexture(), resultTex,
-        resultTexShape);
-
-    if (cleanupX) {
-      x.dispose();
-    }
-    if (cleanupY) {
-      dY.dispose();
-    }
-
-    const weightsShape =
-        conv_util.computeWeightsShape4D(inputDepth, outputDepth, fSize);
-    return NDArray.make<Array4D>(
-        weightsShape, {texture: resultTex, textureShapeRC: resultTexShape});
+    return this.compileAndRun(program, [x, dY]);
   }
 
   conv2dDerBias(dY: Array3D): Array1D {
-    const outputDepth = dY.shape[2];
-    const progKey = [CONV2D_DERB_PROG, dY.shape].join('_');
-    const program = this.getAndSaveProgram(progKey, () => {
-      return conv_backprop_gpu.getFragmentShaderDerBiasSource(dY.shape);
-    });
-    const yTexShape = conv_util.computeTexShapeFrom3D(dY.shape);
-
-    // If the texture shapes doesn't match the shapes that shaders expect,
-    // do physical texture reshapes on the GPU.
-    let cleanupY = false;
-    const actualYTexShape = dY.getTextureShapeRC(yTexShape);
-    if (!util.arraysEqual(actualYTexShape, yTexShape)) {
-      dY = this.reshapeTexture(dY, yTexShape);
-      cleanupY = true;
-    }
-
-    const resultTexShape = conv_util.computeBiasesTexShape(outputDepth);
-    const resultTex = this.textureManager.acquireTexture(resultTexShape);
-
-    conv_backprop_gpu.derBias(
-        this.gpgpu, program, dY.getTexture(), resultTex, resultTexShape);
-
-    if (cleanupY) {
-      dY.dispose();
-    }
-
-    return NDArray.make<Array1D>(
-        [outputDepth], {texture: resultTex, textureShapeRC: resultTexShape});
+    const program = new Conv2DDerBiasProgram(dY.shape);
+    return this.compileAndRun(program, [dY]);
   }
 
   private pool(
diff --git a/src/math/webgl/conv_backprop_gpu.ts b/src/math/webgl/conv_backprop_gpu.ts
index c707f98aba..b3df023d3c 100644
--- a/src/math/webgl/conv_backprop_gpu.ts
+++ b/src/math/webgl/conv_backprop_gpu.ts
@@ -14,244 +14,147 @@ limitations under the License.
 ==============================================================================*/
 
 import * as conv_util from '../conv_util';
-
-import * as conv_gpu from './conv_gpu';
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderDerWeightsSource(
-    xShapeRowColDepth: [number, number, number], fSize: number,
-    outputDepth: number, stride: number, zeroPad: number) {
-  const getMatrixValueOrZeroPad =
-      conv_gpu.getFragmentShaderGetMatrixValueOrZeroPadSource();
-  const inputDepth = xShapeRowColDepth[2];
-
-  const xTexShapeRC = conv_util.computeTexShapeFrom3D(xShapeRowColDepth);
-
-  const yShape = conv_util.computeOutputShape3D(
-      xShapeRowColDepth, fSize, outputDepth, stride, zeroPad);
-  const yNumRows = yShape[0];
-  const yNumCols = yShape[1];
-  const yTexShapeRC = conv_util.computeTexShapeFrom3D(yShape);
-
-  const fSizeTimesInputDepth = fSize * inputDepth;
-
-  const prologue = `
-    precision highp float;
-    uniform sampler2D x;
-    uniform sampler2D dy;
-  `;
-
-  return prologue + '\n' + getMatrixValueOrZeroPad + '\n' +
-      `
-    const vec2 halfCR = vec2(0.5, 0.5);
-    const vec2 xShapeCR = vec2(${xTexShapeRC[1]}, ${xTexShapeRC[0]});
-    const vec2 dyShapeCR = vec2(${yTexShapeRC[1]}, ${yTexShapeRC[0]});
-
-    void main() {
-      vec2 wTexCR = floor(gl_FragCoord.xy);
-
-      // Map from 2D (wTexR, wTexC) to 4D (wR, wC, d1, d2).
-      float wR = floor(wTexCR.y / ${fSizeTimesInputDepth}.0);
-      float wTexRLeftover = wTexCR.y - wR * ${fSizeTimesInputDepth}.0;
-      float wC = floor(wTexRLeftover / ${inputDepth}.0);
-      float d1 = mod(wTexRLeftover, ${inputDepth}.0);
-      float d2 = wTexCR.x;
-
-      // Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2).
-      // ? = to be determined. : = across all values in that axis.
-      float dotProd = 0.0;
-      for (int yR = 0; yR < ${yNumRows}; yR++) {
-        float yTexR = float(yR);
-        float xR = wR + yTexR * ${stride}.0 - ${zeroPad}.0;
-        float xTexR = xR;
-
-        for (int yC = 0; yC < ${yNumCols}; yC++) {
-          float yC_float = float(yC);
-          float xC = wC + yC_float * ${stride}.0 - ${zeroPad}.0;
-
-          // Map from 3D (xR, xC, d1) to 2D (xTexR, xTexC).
-          // Map from 3D (yR, yC, d2) to 2D (yTexR, yTexC).
-          vec2 xyTexC =
-              vec2(xC, yC_float) * vec2(${inputDepth}.0, ${outputDepth}.0) +
-              vec2(d1, d2);
-          float xTexC = xyTexC.x;
-          float yTexC = xyTexC.y;
-
-          // Read dy(yR, yC, d2).
-          vec2 dyUV = (vec2(yTexC, yTexR) + halfCR) / dyShapeCR;
-          float dyValue = texture2D(dy, dyUV).r;
-
-          // Read x(xR, xC, d1) (potentially zero-padded).
-          float xValue =
-            getMatrixValueOrZeroPad(x, xShapeCR, vec2(xTexC, xTexR));
-
-          dotProd += (xValue * dyValue);
+import {GPGPUProgram} from './gpgpu_math';
+
+export class Conv2DDerWeightsProgram implements GPGPUProgram {
+  variableNames = ['x', 'dy'];
+  params: Array<{}>;
+  outputShape: number[];
+  userCode: string;
+
+  constructor(
+      xShape: [number, number, number], fSize: number, outputDepth: number,
+      stride: number, zeroPad: number) {
+    const yShape = conv_util.computeOutputShape3D(
+        xShape, fSize, outputDepth, stride, zeroPad);
+    const yNumRows = yShape[0];
+    const yNumCols = yShape[1];
+    this.outputShape =
+        conv_util.computeWeightsShape4D(xShape[2], outputDepth, fSize);
+    this.params = [stride, zeroPad];
+    this.userCode = `
+      void main() {
+        vec4 coords = getOutputCoords();
+        float wR = coords.x;
+        float wC = coords.y;
+        float d1 = coords.z;
+        float d2 = coords.w;
+
+        // Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2).
+        // ? = to be determined. : = across all values in that axis.
+        float dotProd = 0.0;
+        for (int yR = 0; yR < ${yNumRows}; yR++) {
+          float yR_float = float(yR);
+          float xR = wR + yR_float * ${stride}.0 - ${zeroPad}.0;
+
+          for (int yC = 0; yC < ${yNumCols}; yC++) {
+            float yC_float = float(yC);
+            float xC = wC + yC_float * ${stride}.0 - ${zeroPad}.0;
+
+            float dyValue = getDy(yR_float, yC_float, d2);
+            float xValue = getXOrZeroPad(xR, xC, d1);
+            dotProd += (xValue * dyValue);
+          }
         }
+        setOutput(dotProd);
       }
-      gl_FragColor = vec4(dotProd, 0, 0, 0);
-    }`;
-}
-
-export function getFragmentShaderConvTransposeSource(
-    xShapeRCD: [number, number, number], fSize: number, origInputDepth: number,
-    origStride: number, origPad: number, hasBias: boolean) {
-  const pad = fSize - 1 - origPad;
-  const [xRows, xCols, origOutputDepth] = xShapeRCD;
-
-  const xTexShapeRC = conv_util.computeTexShapeFrom3D(xShapeRCD);
-  const wTexShapeRC =
-      conv_util.computeWeightsTexShape(origInputDepth, origOutputDepth, fSize);
-
-  const getBiasValue = hasBias ?
-      conv_gpu.getFragmentShaderGetBiasValueSource(origInputDepth) :
-      '';
-  const biasPrologue = hasBias ? 'uniform sampler2D biases;' : '';
-  const biasOperation = hasBias ? 'dotProd += getBiasValue(biases, d2);' : '';
-
-  const prologue = `
-    precision highp float;
-    uniform sampler2D x;
-    uniform sampler2D weights;
-    ${biasPrologue}
     `;
+  }
+}
 
-  return prologue + '\n' + getBiasValue + '\n' +
-      `
-    const vec2 halfCR = vec2(0.5, 0.5);
-    const vec2 xShapeCR = vec2(${xTexShapeRC[1]}, ${xTexShapeRC[0]});
-    const vec2 wShapeCR = vec2(${wTexShapeRC[1]}, ${wTexShapeRC[0]});
-
-    void main() {
-      vec2 yTexCR = floor(gl_FragCoord.xy);
-
-      // Map from 2D (yTexR, yTexC) to 3D (yR, yC, d2).
-      float yR = yTexCR.y;
-      float yC = floor(yTexCR.x / ${origInputDepth}.0);
-      float d2 = mod(yTexCR.x, ${origInputDepth}.0);
-
-      vec2 xRCCorner = vec2(yR, yC) - vec2(${pad}.0, ${pad}.0);
-      float xRCorner = xRCCorner.x;
-      float xCCorner = xRCCorner.y;
-
-      // Convolve x(?, ?, d1) with w(:, :, d2, d1) to get y(yR, yC, d2).
-      // ? = to be determined. : = across all values in that axis.
-      float dotProd = 0.0;
-      for (int wR = 0; wR < ${fSize}; wR++) {
-        float wR_float = float(wR);
-        float xR = (xRCorner + wR_float) / ${origStride}.0;
-        // TODO(smilkov): Splice this with another version where you call
-        // getMatrixValueOrZeroPad(). Here and below.
-        if (xR < 0.0 || xR >= ${xRows}.0 || fract(xR) > 0.0) {
-          continue;
-        }
-
-        float wRPerm = ${fSize}.0 - 1.0 - wR_float;
-        float xTexR = xR;
-
-        for (int wC = 0; wC < ${fSize}; wC++) {
-          float wC_float = float(wC);
-          float xC = (xCCorner + wC_float) / ${origStride}.0;
-          if (xC < 0.0 || xC >= ${xCols}.0 || fract(xC) > 0.0) {
+export class Conv2DTransposeProgram implements GPGPUProgram {
+  variableNames = ['x', 'W', 'bias'];
+  params: Array<{}>;
+  outputShape: number[];
+  userCode: string;
+
+  constructor(
+      xShape: [number, number, number], fSize: number, origInputDepth: number,
+      origStride: number, origPad: number, hasBias: boolean) {
+    const [xRows, xCols, origOutputDepth] = xShape;
+    const biasSnippet = hasBias ? 'dotProd += getBias(d2);' : '';
+
+    // Figure out the output shape by dilating the input.
+    const xRowsDilated = (xRows - 1) * origStride + 1;
+    const xColsDilated = (xCols - 1) * origStride + 1;
+    const pad = fSize - 1 - origPad;
+    this.outputShape = conv_util.computeOutputShape3D(
+        [xRowsDilated, xColsDilated, origOutputDepth], fSize, origInputDepth, 1,
+        pad);
+    this.params = [pad, fSize, origStride, hasBias];
+
+    this.userCode = `
+      void main() {
+        vec3 coords = getOutputCoords();
+        float yR = coords.x;
+        float yC = coords.y;
+        float d2 = coords.z;
+
+        vec2 xRCCorner = vec2(yR, yC) - vec2(${pad}.0, ${pad}.0);
+        float xRCorner = xRCCorner.x;
+        float xCCorner = xRCCorner.y;
+
+        // Convolve x(?, ?, d1) with w(:, :, d2, d1) to get y(yR, yC, d2).
+        // ? = to be determined. : = across all values in that axis.
+        float dotProd = 0.0;
+        for (int wR = 0; wR < ${fSize}; wR++) {
+          float wR_float = float(wR);
+          float xR = (xRCorner + wR_float) / ${origStride}.0;
+          // TODO(smilkov): Splice this with another version where you call
+          // getMatrixValueOrZeroPad(). Here and below.
+          if (xR < 0.0 || xR >= ${xRows}.0 || fract(xR) > 0.0) {
             continue;
           }
 
-          float wCPerm = ${fSize}.0 - 1.0 - wC_float;
-          float wTexR = wRPerm * ${fSize}.0 * ${origInputDepth}.0 +
-                        wCPerm * ${origInputDepth}.0 + d2;
-
-          for (int d1 = 0; d1 < ${origOutputDepth}; d1++) {
-            float d1_float = float(d1);
-            float xTexC = xC * ${origOutputDepth}.0 + d1_float;
-            float wTexC = d1_float;
+          float wRPerm = ${fSize}.0 - 1.0 - wR_float;
 
-            // Read x(xR, xC, d1).
-            vec2 xUV = (vec2(xTexC, xTexR) + halfCR) / xShapeCR;
-            float xValue = texture2D(x, xUV).r;
+          for (int wC = 0; wC < ${fSize}; wC++) {
+            float wC_float = float(wC);
+            float xC = (xCCorner + wC_float) / ${origStride}.0;
+            if (xC < 0.0 || xC >= ${xCols}.0 || fract(xC) > 0.0) {
+              continue;
+            }
 
-            // Read w(wRPerm, wCPerm, d2, d1).
-            vec2 wUV = (vec2(wTexC, wTexR) + halfCR) / wShapeCR;
-            float wValue = texture2D(weights, wUV).r;
+            float wCPerm = ${fSize}.0 - 1.0 - wC_float;
 
-            dotProd += xValue * wValue;
+            for (int d1 = 0; d1 < ${origOutputDepth}; d1++) {
+              float d1_float = float(d1);
+              float xValue = getX(xR, xC, d1_float);
+              float wValue = getW(wRPerm, wCPerm, d2, d1_float);
+              dotProd += xValue * wValue;
+            }
           }
         }
+        ${biasSnippet}
+        setOutput(dotProd);
       }
-      ${biasOperation}
-      gl_FragColor = vec4(dotProd, 0, 0, 0);
-    }`;
+    `;
+  }
 }
 
-export function getFragmentShaderDerBiasSource(
-    dyShapeRCD: [number, number, number]) {
-  const dyTexShapeRC = conv_util.computeTexShapeFrom3D(dyShapeRCD);
-  const [yNumRows, yNumCols, outputDepth] = dyShapeRCD;
-
-  return `
-    precision highp float;
-    uniform sampler2D dy;
-
-    const vec2 halfCR = vec2(0.5, 0.5);
-    const vec2 dyShapeCR = vec2(${dyTexShapeRC[1]}, ${dyTexShapeRC[0]});
-
-    void main() {
-      vec2 biasTexCR = floor(gl_FragCoord.xy);
-
-      // The bias texture RC shape is [1, d2].
-      float d2 = biasTexCR.x;
-
-      float derBias = 0.0;
-      for (int yR = 0; yR < ${yNumRows}; yR++) {
-        float yTexR = float(yR);
-
-        for (int yC = 0; yC < ${yNumCols}; yC++) {
-          float yC_float = float(yC);
-          // Map from 3D (yR, yC, d2) to 2D (yTexR, yTexC).
-          float yTexC = yC_float * ${outputDepth}.0 + d2;
-
-          // Read dy(yR, yC, d2).
-          vec2 dyUV = (vec2(yTexC, yTexR) + halfCR) / dyShapeCR;
-          float dyValue = texture2D(dy, dyUV).r;
-
-          derBias += dyValue;
+export class Conv2DDerBiasProgram implements GPGPUProgram {
+  variableNames = ['dy'];
+  params: Array<{}> = [];
+  outputShape: number[];
+  userCode: string;
+
+  constructor(yShape: [number, number, number]) {
+    const [yNumRows, yNumCols, outputDepth] = yShape;
+    this.outputShape = [outputDepth];
+    this.userCode = `
+      void main() {
+        float d2 = getOutputCoords();
+
+        float derBias = 0.0;
+        for (int iyR = 0; iyR < ${yNumRows}; iyR++) {
+          float yR = float(iyR);
+          for (int iyC = 0; iyC < ${yNumCols}; iyC++) {
+            float yC = float(iyC);
+            derBias += getDy(yR, yC, d2);
+          }
         }
+        setOutput(derBias);
       }
-      gl_FragColor = vec4(derBias, 0, 0, 0);
-    }`;
-}
-
-export function derBias(
-    gpgpu: GPGPUContext, program: WebGLProgram, dyTex: WebGLTexture,
-    result: WebGLTexture, resultTexShapeRC: [number, number]) {
-  gpgpu.setOutputMatrixTexture(
-      result, resultTexShapeRC[0], resultTexShapeRC[1]);
-  gpgpu.setProgram(program);
-  gpgpu.setInputMatrixTexture(dyTex, 'dy', 0);
-  gpgpu.executeProgram();
-}
-
-export function derWeights(
-    gpgpu: GPGPUContext, program: WebGLProgram, xTex: WebGLTexture,
-    dyTex: WebGLTexture, result: WebGLTexture,
-    resultTexShapeRC: [number, number]) {
-  gpgpu.setOutputMatrixTexture(
-      result, resultTexShapeRC[0], resultTexShapeRC[1]);
-  gpgpu.setProgram(program);
-  gpgpu.setInputMatrixTexture(xTex, 'x', 0);
-  gpgpu.setInputMatrixTexture(dyTex, 'dy', 1);
-  gpgpu.executeProgram();
-}
-
-export function convTranspose(
-    gpgpu: GPGPUContext, program: WebGLProgram, xTex: WebGLTexture,
-    weightsTex: WebGLTexture, biasesTex: WebGLTexture|null,
-    resultTex: WebGLTexture, resultTexShapeRC: [number, number]) {
-  gpgpu.setOutputMatrixTexture(
-      resultTex, resultTexShapeRC[0], resultTexShapeRC[1]);
-  gpgpu.setProgram(program);
-  gpgpu.setInputMatrixTexture(xTex, 'x', 0);
-  gpgpu.setInputMatrixTexture(weightsTex, 'weights', 1);
-  if (biasesTex != null) {
-    gpgpu.setInputMatrixTexture(biasesTex, 'biases', 2);
+    `;
   }
-  gpgpu.executeProgram();
 }
diff --git a/src/math/webgl/conv_backprop_gpu_derbias_test.ts b/src/math/webgl/conv_backprop_gpu_derbias_test.ts
index efd9c0cb92..75e6f1a690 100644
--- a/src/math/webgl/conv_backprop_gpu_derbias_test.ts
+++ b/src/math/webgl/conv_backprop_gpu_derbias_test.ts
@@ -14,40 +14,32 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
-import * as conv_util from '../conv_util';
 import {NDArrayMathCPU} from '../math_cpu';
-import {Array3D, NDArray} from '../ndarray';
+import {Array1D, Array3D, initializeGPU, NDArray} from '../ndarray';
 
-import * as conv_backprop_gpu from './conv_backprop_gpu';
+import {Conv2DDerBiasProgram} from './conv_backprop_gpu';
 import {GPGPUContext} from './gpgpu_context';
+import * as gpgpu_math from './gpgpu_math';
+import {TextureManager} from './texture_manager';
 
 describe('conv_gpu derBias', () => {
 
   function uploadDerBiasDownload(dy: Array3D): Float32Array {
     const gpgpu = new GPGPUContext();
+    const texManager = new TextureManager(gpgpu);
+    initializeGPU(gpgpu, texManager);
     gpgpu.enableAutomaticDebugValidation(true);
-    const src = conv_backprop_gpu.getFragmentShaderDerBiasSource(dy.shape);
-    const program = gpgpu.createProgram(src);
-
-    // Upload dy.
-    const dyTexShapeRC = conv_util.computeTexShapeFrom3D(dy.shape);
-    const dyTex = gpgpu.createMatrixTexture(dyTexShapeRC[0], dyTexShapeRC[1]);
-    gpgpu.uploadMatrixToTexture(
-        dyTex, dyTexShapeRC[0], dyTexShapeRC[1], dy.getValues());
-
-    const outputDepth = dy.shape[2];
-    const resultTexRC = conv_util.computeBiasesTexShape(outputDepth);
-    const resultTex = gpgpu.createMatrixTexture(resultTexRC[0], resultTexRC[1]);
-    conv_backprop_gpu.derBias(gpgpu, program, dyTex, resultTex, resultTexRC);
-    const db = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexRC[0], resultTexRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(dyTex);
-    gpgpu.deleteProgram(program);
+    const program = new Conv2DDerBiasProgram(dy.shape);
+    const out = Array1D.zeros([dy.shape[2]]);
+    const binary = gpgpu_math.compileProgram(gpgpu, program, [dy], out);
+    gpgpu_math.runProgram(binary, [dy], out);
+    const result = out.getValues();
+
+    texManager.dispose();
+    gpgpu.deleteProgram(binary.webGLProgram);
     gpgpu.dispose();
 
-    return db;
+    return result;
   }
 
   function compareToCPU(dyShapeRCD: [number, number, number]) {
diff --git a/src/math/webgl/conv_backprop_gpu_derweights_test.ts b/src/math/webgl/conv_backprop_gpu_derweights_test.ts
index 03129d77ac..33e93cf9bb 100644
--- a/src/math/webgl/conv_backprop_gpu_derweights_test.ts
+++ b/src/math/webgl/conv_backprop_gpu_derweights_test.ts
@@ -16,10 +16,12 @@ limitations under the License.
 import * as test_util from '../../test_util';
 import * as conv_util from '../conv_util';
 import {NDArrayMathCPU} from '../math_cpu';
-import {Array3D, NDArray} from '../ndarray';
+import {Array3D, Array4D, initializeGPU, NDArray} from '../ndarray';
 
-import * as conv_backprop_gpu from './conv_backprop_gpu';
+import {Conv2DDerWeightsProgram} from './conv_backprop_gpu';
 import {GPGPUContext} from './gpgpu_context';
+import * as gpgpu_math from './gpgpu_math';
+import {TextureManager} from './texture_manager';
 
 describe('conv_gpu derWeights', () => {
 
@@ -27,40 +29,24 @@ describe('conv_gpu derWeights', () => {
       x: Array3D, dy: Array3D, fSize: number, stride: number,
       zeroPad: number): Float32Array {
     const gpgpu = new GPGPUContext();
+    const texManager = new TextureManager(gpgpu);
+    initializeGPU(gpgpu, texManager);
     gpgpu.enableAutomaticDebugValidation(true);
     const outputDepth = dy.shape[2];
-    const src = conv_backprop_gpu.getFragmentShaderDerWeightsSource(
+    const inDepth = x.shape[2];
+    const program = new Conv2DDerWeightsProgram(
         x.shape, fSize, outputDepth, stride, zeroPad);
-    const program = gpgpu.createProgram(src);
-    const inputDepth = x.shape[2];
-
-    // Upload x.
-    const xTexShapeRC = conv_util.computeTexShapeFrom3D(x.shape);
-    const xTex = gpgpu.createMatrixTexture(xTexShapeRC[0], xTexShapeRC[1]);
-    gpgpu.uploadMatrixToTexture(
-        xTex, xTexShapeRC[0], xTexShapeRC[1], x.getValues());
-
-    // Upload dy.
-    const dyTexShapeRC = conv_util.computeTexShapeFrom3D(dy.shape);
-    const dyTex = gpgpu.createMatrixTexture(dyTexShapeRC[0], dyTexShapeRC[1]);
-    gpgpu.uploadMatrixToTexture(
-        dyTex, dyTexShapeRC[0], dyTexShapeRC[1], dy.getValues());
-
-    const resultTexRC =
-        conv_util.computeWeightsTexShape(inputDepth, outputDepth, fSize);
-    const resultTex = gpgpu.createMatrixTexture(resultTexRC[0], resultTexRC[1]);
-    conv_backprop_gpu.derWeights(
-        gpgpu, program, xTex, dyTex, resultTex, resultTexRC);
-    const dw = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexRC[0], resultTexRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(xTex);
-    gpgpu.deleteMatrixTexture(dyTex);
-    gpgpu.deleteProgram(program);
+    const out = Array4D.zeros(
+        conv_util.computeWeightsShape4D(inDepth, outputDepth, fSize));
+    const binary = gpgpu_math.compileProgram(gpgpu, program, [x, dy], out);
+    gpgpu_math.runProgram(binary, [x, dy], out);
+    const result = out.getValues();
+
+    texManager.dispose();
+    gpgpu.deleteProgram(binary.webGLProgram);
     gpgpu.dispose();
 
-    return dw;
+    return result;
   }
 
   function compareToCPU(
diff --git a/src/math/webgl/conv_backprop_transpose_gpu_test.ts b/src/math/webgl/conv_backprop_transpose_gpu_test.ts
index 9cf4ba3c9a..0227f80ab4 100644
--- a/src/math/webgl/conv_backprop_transpose_gpu_test.ts
+++ b/src/math/webgl/conv_backprop_transpose_gpu_test.ts
@@ -14,72 +14,36 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
-import * as conv_util from '../conv_util';
 import {NDArrayMathCPU} from '../math_cpu';
-import {Array1D, Array3D, Array4D, NDArray} from '../ndarray';
+import {Array1D, Array3D, Array4D, initializeGPU, NDArray} from '../ndarray';
 
-import * as conv_backprop_gpu from './conv_backprop_gpu';
+import {Conv2DTransposeProgram} from './conv_backprop_gpu';
 import {GPGPUContext} from './gpgpu_context';
+import * as gpgpu_math from './gpgpu_math';
+import {TextureManager} from './texture_manager';
 
 describe('conv_gpu transpose', () => {
 
   function uploadConvTransposeDownload(
-      x: Array3D, weights: Array4D, biases: Array1D|null, fSize: number,
+      x: Array3D, W: Array4D, bias: Array1D|null, fSize: number,
       origStride: number, origPad: number): Float32Array {
     const gpgpu = new GPGPUContext();
     gpgpu.enableAutomaticDebugValidation(true);
-    const origInputDepth = weights.shape[2];
-    const origOutputDepth = weights.shape[3];
-    const src = conv_backprop_gpu.getFragmentShaderConvTransposeSource(
-        x.shape, fSize, origInputDepth, origStride, origPad, biases != null);
-    const program = gpgpu.createProgram(src);
-
-    // Upload x.
-    const xTexShapeRC = conv_util.computeTexShapeFrom3D(x.shape);
-    const xTex = gpgpu.createMatrixTexture(xTexShapeRC[0], xTexShapeRC[1]);
-    gpgpu.uploadMatrixToTexture(
-        xTex, xTexShapeRC[0], xTexShapeRC[1], x.getValues());
-
-    // Upload weights.
-    const wTexShapeRC = conv_util.computeWeightsTexShape(
-        origInputDepth, origOutputDepth, fSize);
-    const wTex = gpgpu.createMatrixTexture(wTexShapeRC[0], wTexShapeRC[1]);
-    gpgpu.uploadMatrixToTexture(
-        wTex, wTexShapeRC[0], wTexShapeRC[1], weights.getValues());
-
-    const biasTexShapeRC = conv_util.computeBiasesTexShape(origInputDepth);
-    const biasTex = biases != null ?
-        gpgpu.createMatrixTexture(biasTexShapeRC[0], biasTexShapeRC[1]) :
-        null;
-    if (biasTex != null) {
-      gpgpu.uploadMatrixToTexture(
-          biasTex, biasTexShapeRC[0], biasTexShapeRC[1], biases!.getValues());
-    }
-
-    // Figure out the output shape by dilating the input.
-    const xRowsDilated = (x.shape[0] - 1) * origStride + 1;
-    const xColsDilated = (x.shape[1] - 1) * origStride + 1;
-    const pad = fSize - 1 - origPad;
-    const resultShapeRCD = conv_util.computeOutputShape3D(
-        [xRowsDilated, xColsDilated, origOutputDepth], fSize, origInputDepth, 1,
-        pad);
-    const resultTexRC = conv_util.computeTexShapeFrom3D(resultShapeRCD);
-    const resultTex = gpgpu.createMatrixTexture(resultTexRC[0], resultTexRC[1]);
-    conv_backprop_gpu.convTranspose(
-        gpgpu, program, xTex, wTex, biasTex, resultTex, resultTexRC);
-    const y = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexRC[0], resultTexRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(xTex);
-    gpgpu.deleteMatrixTexture(wTex);
-    if (biasTex != null) {
-      gpgpu.deleteMatrixTexture(biasTex);
-    }
-    gpgpu.deleteProgram(program);
+    const textureManager = new TextureManager(gpgpu);
+    initializeGPU(gpgpu, textureManager);
+    const origInputDepth = W.shape[2];
+    const program = new Conv2DTransposeProgram(
+        x.shape, fSize, origInputDepth, origStride, origPad, bias != null);
+    const res = NDArray.zeros(program.outputShape);
+    const inputs = bias != null ? [x, W, bias] : [x, W];
+    const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, res);
+    gpgpu_math.runProgram(binary, inputs, res);
+    const resValues = res.getValues();
+
+    textureManager.dispose();
+    gpgpu.deleteProgram(binary.webGLProgram);
     gpgpu.dispose();
-
-    return y;
+    return resValues;
   }
 
   function compareToCPU(
diff --git a/src/math/webgl/conv_gpu.ts b/src/math/webgl/conv_gpu.ts
index e05bc1b048..6345fc50fe 100644
--- a/src/math/webgl/conv_gpu.ts
+++ b/src/math/webgl/conv_gpu.ts
@@ -22,12 +22,13 @@ export class Conv2DProgram implements GPGPUProgram {
   outputShape: number[];
   userCode: string;
 
-  constructor(xShape: [number, number, number], fieldSize: number,
-      outputDepth: number, stride: number, pad: number, hasBias: boolean) {
-    this.outputShape = conv_util.computeOutputShape3D(xShape,
-      fieldSize, outputDepth, stride, pad);
+  constructor(
+      xShape: [number, number, number], fieldSize: number, outputDepth: number,
+      stride: number, pad: number, hasBias: boolean) {
+    this.outputShape = conv_util.computeOutputShape3D(
+        xShape, fieldSize, outputDepth, stride, pad);
     const inputDepth = xShape[2];
-    this.params = [inputDepth, fieldSize, stride, pad, hasBias];
+    this.params = [fieldSize, stride, pad, hasBias];
 
     this.userCode = `
       void main() {
@@ -60,42 +61,13 @@ export class Conv2DProgram implements GPGPUProgram {
             }
           }
         }
-        ${hasBias ? 'dotProd += getBias(d2);' : ''}
+        ${hasBias ?
+        'dotProd += getBias(d2);' :
+        ''
+        }
+
         setOutput(dotProd);
       }
     `;
   }
 }
-
-export function getFragmentShaderPrologueSource(): string {
-  return `
-    precision highp float;
-    uniform sampler2D x;
-    uniform sampler2D weights;
-    uniform sampler2D biases;
-    varying vec2 resultUV;`;
-}
-
-export function getFragmentShaderGetMatrixValueOrZeroPadSource(): string {
-  return `
-    float getMatrixValueOrZeroPad(in sampler2D matrix, vec2 matrixShapeCR,
-        vec2 requestedCR) {
-      vec2 uv = (requestedCR + vec2(0.5, 0.5)) / matrixShapeCR;
-      float value = texture2D(matrix, uv).r;
-      bool lessThanZero = any(lessThan(uv, vec2(0, 0)));
-      bool greaterThanOne = any(greaterThan(uv, vec2(1, 1)));
-      bool outside = lessThanZero || greaterThanOne;
-      return mix(value, 0.0, float(outside));
-    }`;
-}
-
-export function getFragmentShaderGetBiasValueSource(outputDepth: number):
-    string {
-  return `
-    float getBiasValue(in sampler2D bias, float biasC) {
-      const vec2 biasShapeCR = vec2(${outputDepth}, 1);
-      vec2 biasCR = vec2(mod(biasC, ${outputDepth}.0), 0);
-      vec2 biasUV = (biasCR + vec2(0.5, 0.5)) / biasShapeCR;
-      return texture2D(bias, biasUV).r;
-    }`;
-}
diff --git a/src/math/webgl/conv_gpu_getbiasvalue_test.ts b/src/math/webgl/conv_gpu_getbiasvalue_test.ts
deleted file mode 100644
index 62046c36c7..0000000000
--- a/src/math/webgl/conv_gpu_getbiasvalue_test.ts
+++ /dev/null
@@ -1,85 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import * as conv_gpu from './conv_gpu';
-import {GPGPUContext} from './gpgpu_context';
-
-describe('conv_gpu getBiasValue', () => {
-  function createGetBiasValueProgram(
-      gpgpu: GPGPUContext, outputDepth: number): WebGLProgram {
-    const prologue = conv_gpu.getFragmentShaderPrologueSource();
-    const uniforms = 'uniform float biasC;';
-    const getBiasValue =
-        conv_gpu.getFragmentShaderGetBiasValueSource(outputDepth);
-    const main = `
-      void main() {
-        gl_FragColor = vec4(getBiasValue(biases, biasC), 0, 0, 0);
-      }`;
-
-    const src = [prologue, uniforms, getBiasValue, main].join('\n');
-    return gpgpu.createProgram(src);
-  }
-
-  function uploadGetBiasValueDownload(
-      biases: Float32Array, biasCol: number, outputDepth: number): number {
-    const gpgpu = new GPGPUContext();
-    const program = createGetBiasValueProgram(gpgpu, outputDepth);
-    const biasesTex = gpgpu.createMatrixTexture(1, outputDepth);
-    const resultTex = gpgpu.createMatrixTexture(1, 1);
-    gpgpu.uploadMatrixToTexture(biasesTex, 1, outputDepth, biases);
-    gpgpu.setOutputMatrixTexture(resultTex, 1, 1);
-    gpgpu.setProgram(program);
-    gpgpu.setInputMatrixTexture(biasesTex, 'biases', 2);
-    gpgpu.gl.uniform1f(gpgpu.getUniformLocation('biasC'), biasCol);
-    gpgpu.executeProgram();
-    const result = gpgpu.downloadMatrixFromTexture(resultTex, 1, 1)[0];
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(biasesTex);
-    gpgpu.deleteProgram(program);
-    gpgpu.dispose();
-    return result;
-  }
-
-  it('returns the only bias value if output depth is 1', () => {
-    const biases = new Float32Array([4]);
-    const result = uploadGetBiasValueDownload(biases, 0, 1);
-    expect(result).toEqual(4);
-  });
-
-  it('returns the requested column if < output depth', () => {
-    const biases = new Float32Array([1, 2, 3, 4, 5]);
-    const result =
-        uploadGetBiasValueDownload(biases, biases.length - 1, biases.length);
-    expect(result).toEqual(5);
-  });
-
-  it('wraps around to column 0 if column == output depth', () => {
-    const biases = new Float32Array([6, 0, 0]);
-    const result = uploadGetBiasValueDownload(biases, 3, 3);
-    expect(result).toEqual(6);
-  });
-
-  it('wraps around twice if column == 2*output depth', () => {
-    const biases = new Float32Array([7, 0, 0]);
-    const result = uploadGetBiasValueDownload(biases, 6, 3);
-    expect(result).toEqual(7);
-  });
-
-  it('selects value from column mod(biasC, outputDepth)', () => {
-    const biases = new Float32Array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]);
-    const result = uploadGetBiasValueDownload(biases, 2017, biases.length);
-    expect(result).toEqual(biases[2017 % biases.length]);
-  });
-});
diff --git a/src/math/webgl/conv_gpu_getmatrixvalueorzeropad_test.ts b/src/math/webgl/conv_gpu_getmatrixvalueorzeropad_test.ts
deleted file mode 100644
index 48d8c3d687..0000000000
--- a/src/math/webgl/conv_gpu_getmatrixvalueorzeropad_test.ts
+++ /dev/null
@@ -1,139 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import * as conv_gpu from './conv_gpu';
-import {GPGPUContext} from './gpgpu_context';
-
-describe('conv_gpu getMatrixValueOrZeroPad', () => {
-  function createGetMatrixValueOrZeroPadProgram(
-      gpgpu: GPGPUContext, shapeRowCol: [number, number]): WebGLProgram {
-    const prologue = conv_gpu.getFragmentShaderPrologueSource();
-
-    const uniformColRow = 'uniform vec2 colRow;';
-
-    const getMatrixValueOrZeroPad =
-        conv_gpu.getFragmentShaderGetMatrixValueOrZeroPadSource();
-
-    const main = `
-        void main() {
-          const vec2 aShapeCR = vec2(${shapeRowCol[1]}, ${shapeRowCol[0]});
-          float value = getMatrixValueOrZeroPad(x, aShapeCR, colRow);
-          gl_FragColor = vec4(value, 0, 0, 0);
-        }`;
-
-    const src =
-        [prologue, uniformColRow, getMatrixValueOrZeroPad, main].join('\n');
-    return gpgpu.createProgram(src);
-  }
-
-  function uploadGetMatrixValueOrZeroPadDownload(
-      matrix: Float32Array, shapeRowCol: [number, number],
-      paramRowCol: [number, number]): number {
-    const gpgpu = new GPGPUContext();
-    gpgpu.enableAutomaticDebugValidation(true);
-
-    const program: WebGLProgram =
-        createGetMatrixValueOrZeroPadProgram(gpgpu, shapeRowCol);
-
-    const matrixTexture =
-        gpgpu.createMatrixTexture(shapeRowCol[0], shapeRowCol[1]);
-    const resultTexture = gpgpu.createMatrixTexture(1, 1);
-
-    gpgpu.uploadMatrixToTexture(
-        matrixTexture, shapeRowCol[0], shapeRowCol[1], matrix);
-
-    gpgpu.setOutputMatrixTexture(resultTexture, 1, 1);
-    gpgpu.setProgram(program);
-    gpgpu.setInputMatrixTexture(matrixTexture, 'x', 0);
-    const loc = gpgpu.getUniformLocation('colRow');
-    gpgpu.gl.uniform2f(loc, paramRowCol[1], paramRowCol[0]);
-    gpgpu.executeProgram();
-    const result = gpgpu.downloadMatrixFromTexture(resultTexture, 1, 1);
-    gpgpu.deleteMatrixTexture(resultTexture);
-    gpgpu.deleteMatrixTexture(matrixTexture);
-    gpgpu.deleteProgram(program);
-    gpgpu.dispose();
-    return result[0];
-  }
-
-  it('returns only value of a 1x1 matrix when row and column are 0', () => {
-    const a = new Float32Array([1.23]);
-    const result = uploadGetMatrixValueOrZeroPadDownload(a, [1, 1], [0, 0]);
-    expect(result).toBeCloseTo(a[0]);
-  });
-
-  it('returns value of matrix cell at specified row and column', () => {
-    const a = new Float32Array(32 * 64);
-    a[5 + (30 * 64)] = Math.PI;
-    const result = uploadGetMatrixValueOrZeroPadDownload(a, [32, 64], [30, 5]);
-    expect(result).toBeCloseTo(Math.PI);
-  });
-
-  it('returns zero if sampling out-of-bounds left', () => {
-    const a = new Float32Array(4 * 4);
-    a.fill(1);
-    const result = uploadGetMatrixValueOrZeroPadDownload(a, [4, 4], [0, -1]);
-    expect(result).toEqual(0);
-  });
-
-  it('returns zero if sampling out-of-bounds right', () => {
-    const a = new Float32Array(4 * 4);
-    a.fill(1);
-    const result = uploadGetMatrixValueOrZeroPadDownload(a, [4, 4], [0, 15]);
-    expect(result).toEqual(0);
-  });
-
-  it('returns zero if sampling out-of-bounds top', () => {
-    const a = new Float32Array(19 * 35);
-    a.fill(1);
-    const result = uploadGetMatrixValueOrZeroPadDownload(a, [19, 35], [-1, 0]);
-    expect(result).toEqual(0);
-  });
-
-  it('returns zero if sampling out-of-bounds bottom', () => {
-    const a = new Float32Array(19 * 35);
-    a.fill(1);
-    const result = uploadGetMatrixValueOrZeroPadDownload(a, [19, 35], [20, 0]);
-    expect(result).toEqual(0);
-  });
-
-  it('returns zero if sampling out-of-bounds upper-left', () => {
-    const a = new Float32Array(19 * 35);
-    a.fill(1);
-    const result = uploadGetMatrixValueOrZeroPadDownload(a, [19, 35], [-1, -1]);
-    expect(result).toEqual(0);
-  });
-
-  it('returns zero if sampling out-of-bounds upper-right', () => {
-    const a = new Float32Array(19 * 35);
-    a.fill(1);
-    const result = uploadGetMatrixValueOrZeroPadDownload(a, [19, 35], [-1, 36]);
-    expect(result).toEqual(0);
-  });
-
-  it('returns zero if sampling out-of-bounds lower-left', () => {
-    const a = new Float32Array(19 * 35);
-    a.fill(1);
-    const result = uploadGetMatrixValueOrZeroPadDownload(a, [19, 35], [20, -1]);
-    expect(result).toEqual(0);
-  });
-
-  it('returns zero if sampling out-of-bounds lower-right', () => {
-    const a = new Float32Array(19 * 35);
-    a.fill(1);
-    const result = uploadGetMatrixValueOrZeroPadDownload(a, [19, 35], [20, 36]);
-    expect(result).toEqual(0);
-  });
-});

From d0007df908fdb0a4304fd5d03c45a3251c2b64d3 Mon Sep 17 00:00:00 2001
From: Daniel Smilkov <dsmilkov@gmail.com>
Date: Thu, 17 Aug 2017 09:24:35 -0400
Subject: [PATCH 04/10] replace zero pad with if

---
 .../conv_transpose_gpu_benchmark.ts           |   4 +-
 demos/benchmarks/logsumexp_gpu_benchmark.ts   |   7 +-
 src/math/ndarray_test.ts                      |   4 +-
 src/math/webgl/addscaledmat_gpu_test.ts       |   3 +-
 src/math/webgl/conv_backprop_gpu.ts           |  14 ++-
 src/math/webgl/conv_gpu.ts                    |  20 +--
 src/math/webgl/gpgpu_math.ts                  |  21 ++--
 src/math/webgl/shader_compiler.ts             | 114 +++++++++++-------
 src/math/webgl/webgl_util.ts                  |  36 +++---
 9 files changed, 132 insertions(+), 91 deletions(-)

diff --git a/demos/benchmarks/conv_transpose_gpu_benchmark.ts b/demos/benchmarks/conv_transpose_gpu_benchmark.ts
index 86074a08fc..bd34e097cf 100644
--- a/demos/benchmarks/conv_transpose_gpu_benchmark.ts
+++ b/demos/benchmarks/conv_transpose_gpu_benchmark.ts
@@ -21,12 +21,12 @@ import * as gpgpu_math from '../../src/math/webgl/gpgpu_math';
 import {TextureManager} from '../../src/math/webgl/texture_manager';
 import {BenchmarkTest} from './benchmark';
 
-const OP_RUNS = 100;
+const OP_RUNS = 40;
 
 export const BENCHMARK_TEST: BenchmarkTest = (size: number) => {
   const origInputDepth = 1;
   const origOutputDepth = 2;
-  const xShape: [number, number, number] = [size, size, origOutputDepth];
+  const xShape: [number, number, number] = [size, size, 1];
   const fieldSize = 11;
   const origStride = 1;
   const origPad = 1;
diff --git a/demos/benchmarks/logsumexp_gpu_benchmark.ts b/demos/benchmarks/logsumexp_gpu_benchmark.ts
index 6afc05f4d0..651d992c78 100644
--- a/demos/benchmarks/logsumexp_gpu_benchmark.ts
+++ b/demos/benchmarks/logsumexp_gpu_benchmark.ts
@@ -13,14 +13,15 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+import {Array2D, initializeGPU, Scalar} from '../../src/math/ndarray';
 import {GPGPUContext} from '../../src/math/webgl/gpgpu_context';
-import {LogSumExpProgram} from '../../src/math/webgl/logsumexp_gpu';
 import * as gpgpu_math from '../../src/math/webgl/gpgpu_math';
-import {Scalar, Array2D, initializeGPU} from '../../src/math/ndarray';
+import {LogSumExpProgram} from '../../src/math/webgl/logsumexp_gpu';
 import {TextureManager} from '../../src/math/webgl/texture_manager';
+
 import {BenchmarkTest} from './benchmark';
 
-const OP_RUNS = 100;
+const OP_RUNS = 2;
 
 export const BENCHMARK_TEST: BenchmarkTest = (size: number) => {
   const gpgpu = new GPGPUContext();
diff --git a/src/math/ndarray_test.ts b/src/math/ndarray_test.ts
index 143c51f063..48e966dd04 100644
--- a/src/math/ndarray_test.ts
+++ b/src/math/ndarray_test.ts
@@ -285,9 +285,9 @@ describe('NDArray', () => {
     expect(t.getTextureShapeRC()).toEqual([2, 4]);
   });
 
-  it('preferred texture shape, Array4D is squareish', () => {
+  it('preferred texture shape, Array4D d1 and d2 strided along columns', () => {
     const t = Array4D.zeros([8, 2, 4, 4]);
-    expect(t.getTextureShapeRC()).toEqual([16, 16]);
+    expect(t.getTextureShapeRC()).toEqual([8, 2 * 4 * 4]);
   });
 });  // Close describe.
 
diff --git a/src/math/webgl/addscaledmat_gpu_test.ts b/src/math/webgl/addscaledmat_gpu_test.ts
index 2196f9347d..13f886e830 100644
--- a/src/math/webgl/addscaledmat_gpu_test.ts
+++ b/src/math/webgl/addscaledmat_gpu_test.ts
@@ -14,10 +14,11 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
+import {Array1D, Array2D, initializeGPU, NDArray, Scalar} from '../ndarray';
+
 import {AddScaledMatProgram} from './addscaledmat_gpu';
 import {GPGPUContext} from './gpgpu_context';
 import * as gpgpu_math from './gpgpu_math';
-import {NDArray, Array1D, Array2D, Scalar, initializeGPU} from '../ndarray';
 import {TextureManager} from './texture_manager';
 
 function cpuAddScaledMatrices(
diff --git a/src/math/webgl/conv_backprop_gpu.ts b/src/math/webgl/conv_backprop_gpu.ts
index b3df023d3c..568ff131e0 100644
--- a/src/math/webgl/conv_backprop_gpu.ts
+++ b/src/math/webgl/conv_backprop_gpu.ts
@@ -29,6 +29,8 @@ export class Conv2DDerWeightsProgram implements GPGPUProgram {
         xShape, fSize, outputDepth, stride, zeroPad);
     const yNumRows = yShape[0];
     const yNumCols = yShape[1];
+    const xRowsLimit = xShape[0] - 0.5;
+    const xColsLimit = xShape[1] - 0.5;
     this.outputShape =
         conv_util.computeWeightsShape4D(xShape[2], outputDepth, fSize);
     this.params = [stride, zeroPad];
@@ -46,13 +48,17 @@ export class Conv2DDerWeightsProgram implements GPGPUProgram {
         for (int yR = 0; yR < ${yNumRows}; yR++) {
           float yR_float = float(yR);
           float xR = wR + yR_float * ${stride}.0 - ${zeroPad}.0;
-
+          if (xR < 0.0 || xR > ${xRowsLimit}) {
+            continue;
+          }
           for (int yC = 0; yC < ${yNumCols}; yC++) {
             float yC_float = float(yC);
             float xC = wC + yC_float * ${stride}.0 - ${zeroPad}.0;
-
+            if (xC < 0.0 || xC > ${xColsLimit}) {
+              continue;
+            }
             float dyValue = getDy(yR_float, yC_float, d2);
-            float xValue = getXOrZeroPad(xR, xC, d1);
+            float xValue = getX(xR, xC, d1);
             dotProd += (xValue * dyValue);
           }
         }
@@ -100,8 +106,6 @@ export class Conv2DTransposeProgram implements GPGPUProgram {
         for (int wR = 0; wR < ${fSize}; wR++) {
           float wR_float = float(wR);
           float xR = (xRCorner + wR_float) / ${origStride}.0;
-          // TODO(smilkov): Splice this with another version where you call
-          // getMatrixValueOrZeroPad(). Here and below.
           if (xR < 0.0 || xR >= ${xRows}.0 || fract(xR) > 0.0) {
             continue;
           }
diff --git a/src/math/webgl/conv_gpu.ts b/src/math/webgl/conv_gpu.ts
index 6345fc50fe..acf92d10bd 100644
--- a/src/math/webgl/conv_gpu.ts
+++ b/src/math/webgl/conv_gpu.ts
@@ -29,7 +29,9 @@ export class Conv2DProgram implements GPGPUProgram {
         xShape, fieldSize, outputDepth, stride, pad);
     const inputDepth = xShape[2];
     this.params = [fieldSize, stride, pad, hasBias];
-
+    const biasSnippet = hasBias ? 'dotProd += getBias(d2);' : '';
+    const xRowsLimit = xShape[0] - 0.5;
+    const xColsLimit = xShape[1] - 0.5;
     this.userCode = `
       void main() {
         vec3 coords = getOutputCoords();
@@ -48,24 +50,24 @@ export class Conv2DProgram implements GPGPUProgram {
         for (int wR = 0; wR < ${fieldSize}; wR++) {
           float wR_float = float(wR);
           float xR = xRCorner + wR_float;
-
+          if (xR < 0.0 || xR > ${xRowsLimit}) {
+            continue;
+          }
           for (int wC = 0; wC < ${fieldSize}; wC++) {
             float wC_float = float(wC);
             float xC = xCCorner + wC_float;
-
+            if (xC < 0.0 || xC > ${xColsLimit}) {
+              continue;
+            }
             for (int d1 = 0; d1 < ${inputDepth}; d1++) {
               float d1_float = float(d1);
-              float xValue = getXOrZeroPad(xR, xC, d1_float);
+              float xValue = getX(xR, xC, d1_float);
               float wValue = getW(wR_float, wC_float, d1_float, d2);
               dotProd += xValue * wValue;
             }
           }
         }
-        ${hasBias ?
-        'dotProd += getBias(d2);' :
-        ''
-        }
-
+        ${biasSnippet}
         setOutput(dotProd);
       }
     `;
diff --git a/src/math/webgl/gpgpu_math.ts b/src/math/webgl/gpgpu_math.ts
index d6b069eafa..56ac04f08e 100644
--- a/src/math/webgl/gpgpu_math.ts
+++ b/src/math/webgl/gpgpu_math.ts
@@ -13,12 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+import * as util from '../../util';
 import {NDArray} from '../ndarray';
 
 import {GPGPUContext} from './gpgpu_context';
 import * as shader_compiler from './shader_compiler';
 import {ShapeInfo} from './shader_compiler';
-import * as util from '../../util';
 
 export interface GPGPUProgram {
   variableNames: string[];
@@ -59,17 +59,15 @@ export function compileProgram<T extends NDArray, K extends NDArray>(
   return {
     program,
     source,
-    webGLProgram: gpgpu.createProgram(source),
-    gpgpu,
-    inShapeInfos,
-    outShapeInfo
+    webGLProgram: gpgpu.createProgram(source), gpgpu, inShapeInfos, outShapeInfo
   };
 }
 
 function validateBinaryAndProgram(shapeInfos: ShapeInfo[], inputs: NDArray[]) {
   if (shapeInfos.length !== inputs.length) {
-    throw Error(`Binary was compiled with ${shapeInfos.length} inputs, but ` +
-                `was executed with ${inputs.length} inputs`);
+    throw Error(
+        `Binary was compiled with ${shapeInfos.length} inputs, but ` +
+        `was executed with ${inputs.length} inputs`);
   }
 
   shapeInfos.forEach((s, i) => {
@@ -79,11 +77,13 @@ function validateBinaryAndProgram(shapeInfos: ShapeInfo[], inputs: NDArray[]) {
     const texShapeB = inputs[i].getTextureShapeRC();
 
     if (!util.arraysEqual(shapeA, shapeB)) {
-      throw Error(`Binary was compiled with different shapes than ` +
+      throw Error(
+          `Binary was compiled with different shapes than ` +
           `the current args. Shapes ${shapeA} and ${shapeB} must match`);
     }
     if (!util.arraysEqual(texShapeA, texShapeB)) {
-      throw Error(`Binary was compiled with different texture shapes than the` +
+      throw Error(
+          `Binary was compiled with different texture shapes than the` +
           ` current args. Shape ${texShapeA} and ${texShapeB} must match`);
     }
   });
@@ -107,8 +107,7 @@ export function runProgram<T extends NDArray, K extends NDArray>(
 }
 
 export function makeShaderKey(
-    program: GPGPUProgram, inputs: NDArray[],
-    output: NDArray): string {
+    program: GPGPUProgram, inputs: NDArray[], output: NDArray): string {
   const params = program.params;
   const keyStart =
       inputs.concat(output).map(x => x.shape + '_' + x.getTextureShapeRC());
diff --git a/src/math/webgl/shader_compiler.ts b/src/math/webgl/shader_compiler.ts
index 19d91e2ea3..a3af0f741a 100644
--- a/src/math/webgl/shader_compiler.ts
+++ b/src/math/webgl/shader_compiler.ts
@@ -16,8 +16,7 @@ limitations under the License.
 import * as util from '../../util';
 
 export type ShapeInfo = {
-  logicalShape: number[],
-  texShape: [number, number];
+  logicalShape: number[]; texShape: [number, number];
 };
 
 export type InputInfo = {
@@ -25,8 +24,9 @@ export type InputInfo = {
   shapeInfo: ShapeInfo
 };
 
-export function makeShader(inputsInfo: InputInfo[], outputShape: ShapeInfo,
-    userCode: string, broadcast: boolean): string {
+export function makeShader(
+    inputsInfo: InputInfo[], outputShape: ShapeInfo, userCode: string,
+    broadcast: boolean): string {
   const inputPrefixSnippet =
       inputsInfo.map(x => `uniform sampler2D ${x.name};`).join('\n');
   const inputSamplingSnippet =
@@ -54,7 +54,7 @@ function getInputSamplingSnippet(
       res += getSamplerScalar(inInfo.name);
       break;
     case 1:
-      res += getSampler1D(inInfo.name, texShape, shape[0]);
+      res += getSampler1D(inInfo.name, texShape);
       break;
     case 2:
       res += getSampler2D(inInfo.name, shape as [number, number], texShape);
@@ -75,7 +75,8 @@ function getInputSamplingSnippet(
   // If input and output have matching logical shapes, add
   // getTexNameAtOutCoord() method that samples the input texture using the
   // output coordinates.
-  if (broadcast || util.arraysEqual(
+  if (broadcast ||
+      util.arraysEqual(
           inInfo.shapeInfo.logicalShape, outShapeInfo.logicalShape)) {
     res +=
         getSamplerAtOutputCoords(inInfo.name, texShape, outTexShape, broadcast);
@@ -95,11 +96,11 @@ function getOutputSamplingSnippet(
     case 2:
       return getOutput2DCoords(outShape as [number, number], outTexShape);
     case 3:
-      return getOutput3DCoords(outShape as [number, number, number],
-          outTexShape);
+      return getOutput3DCoords(
+          outShape as [number, number, number], outTexShape);
     case 4:
-      return getOutput4DCoords(outShape as [number, number, number, number],
-        outTexShape);
+      return getOutput4DCoords(
+          outShape as [number, number, number, number], outTexShape);
     default:
       throw new Error(
           `${outShape.length}-D output sampling is not yet supported`);
@@ -110,7 +111,7 @@ const SAMPLE_1D_SNIPPET = `
 vec2 UVfrom1D(float texNumR, float texNumC, float index) {
   float texR = floor(index / texNumC);
   float texC = mod(index, texNumC);
-  return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
+  return vec2(texC, texR) / vec2(texNumC, texNumR);
 }
 `;
 
@@ -120,7 +121,7 @@ vec2 UVfrom2D(float texNumR, float texNumC, float numC, float row,
   float index = dot(vec2(row, col), vec2(numC, 1.0));
   float texR = floor(index / texNumC);
   float texC = mod(index, texNumC);
-  return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
+  return vec2(texC, texR) / vec2(texNumC, texNumR);
 }
 `;
 
@@ -130,7 +131,7 @@ vec2 UVfrom3D(float texNumR, float texNumC, float stride0,
   float index = dot(vec3(row, col, depth), vec3(stride0, stride1, 1.0));
   float texR = floor(index / texNumC);
   float texC = mod(index, texNumC);
-  return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
+  return vec2(texC, texR) / vec2(texNumC, texNumR);
 }
 `;
 
@@ -142,14 +143,13 @@ vec2 UVfrom4D(float texNumR, float texNumC, float stride0,
                     vec4(stride0, stride1, stride2, 1.0));
   float texR = floor(index / texNumC);
   float texC = mod(index, texNumC);
-  return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
+  return vec2(texC, texR) / vec2(texNumC, texNumR);
 }
 `;
 
 const SHADER_PREFIX = `
   precision highp float;
   varying vec2 resultUV;
-  const vec2 halfCR = vec2(0.5, 0.5);
 
   float sample(sampler2D texture, vec2 uv) {
     return texture2D(texture, uv).r;
@@ -192,8 +192,8 @@ function getOutput1DCoords(
   `;
 }
 
-function getOutput3DCoords(shape: [number, number, number],
-    texShape: [number, number]): string {
+function getOutput3DCoords(
+    shape: [number, number, number], texShape: [number, number]): string {
   const stride0 = shape[1] * shape[2];
   const stride1 = shape[2];
   return `
@@ -209,8 +209,9 @@ function getOutput3DCoords(shape: [number, number, number],
   `;
 }
 
-function getOutput4DCoords(shape: [number, number, number, number],
-  texShape: [number, number]): string {
+function getOutput4DCoords(
+    shape: [number, number, number, number],
+    texShape: [number, number]): string {
   const stride2 = shape[3];
   const stride1 = shape[2] * stride2;
   const stride0 = shape[1] * stride1;
@@ -257,20 +258,19 @@ function getSamplerScalar(texName: string): string {
   const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1);
   return `
     float ${funcName}() {
-      return sample(${texName}, halfCR);
+      return sample(${texName}, vec2(0.0, 0.0));
     }
   `;
 }
 
-function getSampler1D(
-    texName: string, texShape: [number, number], size: number): string {
+function getSampler1D(texName: string, texShape: [number, number]): string {
   const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1);
   const tR = texShape[0];
   const tC = texShape[1];
   if (texShape[0] === 1 && texShape[1] === 1) {
     return `
       float ${funcName}(float index) {
-        return sample(${texName}, halfCR);
+        return sample(${texName}, vec2(0.0, 0.0));
       }
     `;
   }
@@ -306,22 +306,22 @@ function getSampler3D(
   const tC = texShape[1];
   const stride0 = shape[1] * shape[2];
   const stride1 = shape[2];
-  const [numRows, numCols, numDepths] = shape;
+  if (tC === stride0) {
+    return `
+      float ${funcName}(float row, float col, float depth) {
+        float texR = row;
+        float texC = dot(vec2(col, depth), vec2(${stride1}, 1.0));
+        vec2 uv = vec2(texC, texR) / vec2(${tC}.0, ${tR}.0);
+        return sample(${texName}, uv);
+      }
+    `;
+  }
   return `
     float ${funcName}(float row, float col, float depth) {
       vec2 uv = UVfrom3D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0, row,
         col, depth);
       return sample(${texName}, uv);
     }
-
-    float ${funcName}OrZeroPad(float row, float col, float depth) {
-      vec3 coords = vec3(row, col, depth);
-      bool lessThanZero = any(lessThan(coords, vec3(0.0, 0.0, 0.0)));
-      bool greaterThanSize = any(greaterThan(coords,
-          vec3(${numRows}.0 - 0.5, ${numCols}.0 - 0.5, ${numDepths}.0 - 0.5)));
-      bool outside = lessThanZero || greaterThanSize;
-      return mix(${funcName}(row, col, depth), 0.0, float(outside));
-    }
   `;
 }
 
@@ -335,6 +335,17 @@ function getSampler4D(
   const stride1 = shape[2] * stride2;
   const stride0 = shape[1] * stride1;
 
+  if (tC === stride0) {
+    return `
+      float ${funcName}(float row, float col, float depth, float depth2) {
+        float texR = row;
+        float texC = dot(vec3(col, depth, depth2),
+                         vec3(${stride1}.0, ${stride2}.0, 1.0));
+        vec2 uv = vec2(texC, texR) / vec2(${tC}.0, ${tR}.0);
+        return sample(${texName}, uv);
+      }
+    `;
+  }
   return `
     float ${funcName}(float row, float col, float depth, float depth2) {
       vec2 uv = UVfrom4D(${tR}.0, ${tC}.0, ${stride0}.0, ${stride1}.0,
@@ -353,7 +364,25 @@ function getSampler2D(
   if (util.arraysEqual(shape, texShape)) {
     return `
       float ${funcName}(float row, float col) {
-        vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0);
+        vec2 uv = vec2(col, row) / vec2(${tC}.0, ${tR}.0);
+        return sample(${texName}, uv);
+      }
+    `;
+  }
+  if (tC === 1) {
+    return `
+      float ${funcName}(float row, float col) {
+        float index = dot(vec2(row, col), vec2(${shape[1]}.0, 1.0));
+        vec2 uv = vec2(0.5, (index + 0.5) / ${tR}.0);
+        return sample(${texName}, uv);
+      }
+    `;
+  }
+  if (tR === 1) {
+    return `
+      float ${funcName}(float row, float col) {
+        float index = dot(vec2(row, col), vec2(${shape[1]}.0, 1.0));
+        vec2 uv = vec2((index + 0.5) / ${tC}.0, 0.5);
         return sample(${texName}, uv);
       }
     `;
@@ -367,14 +396,14 @@ function getSampler2D(
 }
 
 function getSamplerFlat(texName: string, texShape: [number, number]): string {
-  const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1) +
-      'Flat';
+  const funcName =
+      'get' + texName.charAt(0).toUpperCase() + texName.slice(1) + 'Flat';
   const tNumR = texShape[0];
   const tNumC = texShape[1];
   if (tNumC === 1 && tNumR === 1) {
     return `
       float ${funcName}(float index) {
-        return sample(${texName}, halfCR);
+        return sample(${texName}, vec2(0.0, 0.0));
       }
     `;
   }
@@ -398,16 +427,17 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string {
     float ${funcName}(float index) {
       float texR = floor(index / ${tNumC}.0);
       float texC = mod(index, ${tNumC}.0);
-      vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tNumC}.0, ${tNumR}.0);
+      vec2 uv = vec2(texC, texR) / vec2(${tNumC}.0, ${tNumR}.0);
       return sample(${texName}, uv);
     }
   `;
 }
 
-function getSamplerAtOutputCoords(texName: string, inTexShape: [number, number],
+function getSamplerAtOutputCoords(
+    texName: string, inTexShape: [number, number],
     outTexShape: [number, number], broadcast: boolean) {
   const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1) +
-    'AtOutCoords';
+      'AtOutCoords';
   if (util.arraysEqual(inTexShape, outTexShape)) {
     return `
       float ${funcName}() {
@@ -425,8 +455,8 @@ function getSamplerAtOutputCoords(texName: string, inTexShape: [number, number],
       ${broadcastSnippet}
       float texR = floor(index / ${inTexShape[1]}.0);
       float texC = mod(index, ${inTexShape[1]}.0);
-      vec2 uv = (vec2(texC, texR) + halfCR) /
-                 vec2(${inTexShape[1]}.0, ${inTexShape[0]}.0);
+      vec2 uv = vec2(texC, texR) /
+                vec2(${inTexShape[1]}.0, ${inTexShape[0]}.0);
       return sample(${texName}, uv);
     }
   `;
diff --git a/src/math/webgl/webgl_util.ts b/src/math/webgl/webgl_util.ts
index 2f2eac7d9c..39afc20ea7 100644
--- a/src/math/webgl/webgl_util.ts
+++ b/src/math/webgl/webgl_util.ts
@@ -67,10 +67,9 @@ export function isWebGL2Enabled() {
     if (gl != null) {
       WEBGL2_ENABLED = true;
 
-      const loseContextExtension =
-          getExtensionOrThrow(
-              gl as WebGLRenderingContext, 'WEBGL_lose_context') as
-          WebGLLoseContextExtension;
+      const loseContextExtension = getExtensionOrThrow(
+          gl as WebGLRenderingContext,
+          'WEBGL_lose_context') as WebGLLoseContextExtension;
       loseContextExtension.loseContext();
     } else {
       WEBGL2_ENABLED = false;
@@ -86,9 +85,10 @@ export function createWebGLRenderingContextFromCanvas(
   if (isWebGL2Enabled()) {
     gl = canvas.getContext('webgl2', attributes) as WebGLRenderingContext;
   } else {
-    gl = (canvas.getContext('webgl', attributes) ||
-          canvas.getContext('experimental-webgl', attributes)) as
-        WebGLRenderingContext;
+    gl =
+        (canvas.getContext('webgl', attributes) ||
+         canvas.getContext(
+             'experimental-webgl', attributes)) as WebGLRenderingContext;
   }
 
   if (gl == null) {
@@ -379,10 +379,10 @@ function validateTextureUnit(gl: WebGLRenderingContext, textureUnit: number) {
 }
 
 export function getTextureShapeFromLogicalShape(
-    gl: WebGLRenderingContext, logicalShape: number[],
+    gl: WebGLRenderingContext, logShape: number[],
     preferredTexShape?: [number, number]): [number, number] {
   const maxTexSize = queryMaxTextureSize(gl);
-  const size = util.sizeFromShape(logicalShape);
+  const size = util.sizeFromShape(logShape);
   if (preferredTexShape != null) {
     const sizePreferred = util.sizeFromShape(preferredTexShape);
     util.assert(
@@ -395,16 +395,20 @@ export function getTextureShapeFromLogicalShape(
     }
   }
 
-  if (logicalShape.length <= 1 && size <= maxTexSize) {
+  if (logShape.length <= 1 && size <= maxTexSize) {
     return [size, 1];
   } else if (
-      logicalShape.length === 2 && logicalShape[0] <= maxTexSize &&
-      logicalShape[1] <= maxTexSize) {
-    return logicalShape as [number, number];
+      logShape.length === 2 && logShape[0] <= maxTexSize &&
+      logShape[1] <= maxTexSize) {
+    return logShape as [number, number];
   } else if (
-      logicalShape.length === 3 && logicalShape[0] <= maxTexSize &&
-      logicalShape[1] * logicalShape[2] <= maxTexSize) {
-    return [logicalShape[0], logicalShape[1] * logicalShape[2]];
+      logShape.length === 3 && logShape[0] <= maxTexSize &&
+      logShape[1] * logShape[2] <= maxTexSize) {
+    return [logShape[0], logShape[1] * logShape[2]];
+  } else if (
+      logShape.length === 4 && logShape[0] <= maxTexSize &&
+      logShape[1] * logShape[2] * logShape[3] <= maxTexSize) {
+    return [logShape[0], logShape[1] * logShape[2] * logShape[3]];
   } else {
     return util.sizeToSquarishShape(size);
   }

From ad3e6c24770fa0dfe9273c0e4266cf70fcf9afd9 Mon Sep 17 00:00:00 2001
From: Daniel Smilkov <dsmilkov@gmail.com>
Date: Thu, 17 Aug 2017 13:57:19 -0400
Subject: [PATCH 05/10] migrate pool ops

---
 demos/benchmarks/conv_gpu_benchmark.ts        |  11 +-
 .../max_pool_backprop_gpu_benchmark.ts        |  80 +++----
 demos/benchmarks/max_pool_gpu_benchmark.ts    | 109 +++-------
 src/math/conv_util.ts                         |  10 -
 src/math/math_gpu.ts                          | 199 ++----------------
 src/math/webgl/argminmax_gpu.ts               |  12 +-
 src/math/webgl/avg_pool_gpu.ts                |  30 ---
 src/math/webgl/avg_pool_gpu_test.ts           |  46 +---
 src/math/webgl/conv_backprop_gpu.ts           |  44 ++--
 src/math/webgl/conv_gpu.ts                    |  24 ++-
 src/math/webgl/conv_gpu_test.ts               |   9 +-
 src/math/webgl/max_pool_backprop_gpu.ts       | 133 ++++++------
 src/math/webgl/max_pool_backprop_gpu_test.ts  |  84 +++-----
 src/math/webgl/max_pool_gpu.ts                |  44 ----
 src/math/webgl/max_pool_gpu_test.ts           |  46 +---
 src/math/webgl/max_pool_positions_gpu_test.ts |  54 ++---
 src/math/webgl/min_pool_gpu.ts                |  30 ---
 src/math/webgl/min_pool_gpu_test.ts           |  47 +----
 src/math/webgl/mulmat_gpu.ts                  |  18 +-
 src/math/webgl/mulmat_packed_gpu.ts           |   6 +-
 src/math/webgl/pool_gpu.ts                    | 175 +++++++--------
 src/math/webgl/pool_gpu_test_util.ts          |  42 ++++
 src/math/webgl/shader_compiler.ts             |  27 +--
 23 files changed, 398 insertions(+), 882 deletions(-)
 delete mode 100644 src/math/webgl/avg_pool_gpu.ts
 delete mode 100644 src/math/webgl/max_pool_gpu.ts
 delete mode 100644 src/math/webgl/min_pool_gpu.ts
 create mode 100644 src/math/webgl/pool_gpu_test_util.ts

diff --git a/demos/benchmarks/conv_gpu_benchmark.ts b/demos/benchmarks/conv_gpu_benchmark.ts
index 8d583bed2f..736563e768 100644
--- a/demos/benchmarks/conv_gpu_benchmark.ts
+++ b/demos/benchmarks/conv_gpu_benchmark.ts
@@ -14,25 +14,28 @@ limitations under the License.
 ==============================================================================*/
 
 import * as conv_util from '../../src/math/conv_util';
+import {Array1D, Array3D, Array4D, initializeGPU} from '../../src/math/ndarray';
 import {Conv2DProgram} from '../../src/math/webgl/conv_gpu';
 import {GPGPUContext} from '../../src/math/webgl/gpgpu_context';
 import * as gpgpu_math from '../../src/math/webgl/gpgpu_math';
-import {Array1D, Array3D, Array4D, initializeGPU} from '../../src/math/ndarray';
 import {TextureManager} from '../../src/math/webgl/texture_manager';
+
 import {BenchmarkTest} from './benchmark';
 
 const OP_RUNS = 40;
 
 export const BENCHMARK_TEST: BenchmarkTest = (size: number) => {
+  const gpgpu = new GPGPUContext();
+  const texManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, texManager);
+
   const inputDepth = 1;
   const inputShape: [number, number, number] = [size, size, inputDepth];
   const outputDepth = 1;
   const fieldSize = 11;
   const stride = 1;
   const zeroPad = conv_util.computeDefaultPad(inputShape, fieldSize, stride);
-  const gpgpu = new GPGPUContext();
-  const texManager = new TextureManager(gpgpu);
-  initializeGPU(gpgpu, texManager);
+
   const program = new Conv2DProgram(
       inputShape, fieldSize, outputDepth, stride, zeroPad, true);
   const outputShape = program.outputShape as [number, number, number];
diff --git a/demos/benchmarks/max_pool_backprop_gpu_benchmark.ts b/demos/benchmarks/max_pool_backprop_gpu_benchmark.ts
index 82076e06dd..96ef41ce27 100644
--- a/demos/benchmarks/max_pool_backprop_gpu_benchmark.ts
+++ b/demos/benchmarks/max_pool_backprop_gpu_benchmark.ts
@@ -14,69 +14,49 @@ limitations under the License.
 ==============================================================================*/
 
 import * as conv_util from '../../src/math/conv_util';
+import {Array3D, initializeGPU, NDArray} from '../../src/math/ndarray';
 import {GPGPUContext} from '../../src/math/webgl/gpgpu_context';
+import * as gpgpu_math from '../../src/math/webgl/gpgpu_math';
 // tslint:disable-next-line:max-line-length
-import * as max_pool_backprop_gpu from '../../src/math/webgl/max_pool_backprop_gpu';
-import * as test_util from '../../src/test_util';
-import * as util from '../../src/util';
-
+import {MaxPool2DBackpropProgram} from '../../src/math/webgl/max_pool_backprop_gpu';
+import {TextureManager} from '../../src/math/webgl/texture_manager';
 import {BenchmarkTest} from './benchmark';
 
-const OP_RUNS = 100;
+const OP_RUNS = 40;
 
 export const BENCHMARK_TEST: BenchmarkTest = (size: number) => {
-  const dyShapeRCD: [number, number, number] = [size, size, 1];
-  const outputDepth = 1;
-  const fieldSize = 11;
-  const stride = 1;
-  const zeroPad = conv_util.computeDefaultPad(dyShapeRCD, fieldSize, stride);
-  const outputShapeRCD: [number, number, number] =
-      conv_util.computeOutputShape3D(
-          dyShapeRCD, fieldSize, outputDepth, stride, zeroPad);
-
-  const dyTexShapeRC = conv_util.computeTexShapeFrom3D(dyShapeRCD);
-  const outputTexShapeRC = conv_util.computeTexShapeFrom3D(outputShapeRCD);
-
   const gpgpu = new GPGPUContext();
-  const program = gpgpu.createProgram(
-      max_pool_backprop_gpu.getFragmentShaderMaxPoolBackprop(
-          dyShapeRCD, fieldSize, stride, zeroPad));
-
-  const dyTexture = gpgpu.createMatrixTexture(dyTexShapeRC[0], dyTexShapeRC[1]);
-  const maxPositionsTexture =
-      gpgpu.createMatrixTexture(dyTexShapeRC[0], dyTexShapeRC[1]);
-  const outputTexture =
-      gpgpu.createMatrixTexture(outputTexShapeRC[0], outputTexShapeRC[1]);
+  const texManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, texManager);
 
-  const dyData =
-      test_util.randomArrayInRange(dyTexShapeRC[0] * dyTexShapeRC[1], -1, 1);
-  const maxPositionsData = new Float32Array(util.sizeFromShape(dyShapeRCD));
-  for (let i = 0; i < maxPositionsData.length; i++) {
-    maxPositionsData[i] = Math.floor(Math.random() * fieldSize * fieldSize);
+  const outputDepth = 1;
+  const dyShape: [number, number, number] = [size, size, outputDepth];
+  const fSize = 11;
+  const stride = 1;
+  const zeroPad = conv_util.computeDefaultPad(dyShape, fSize, stride);
+  const program = new MaxPool2DBackpropProgram(dyShape, fSize, stride, zeroPad);
+  const res = NDArray.zeros(program.outputShape);
+  const dy = Array3D.randUniform(dyShape, -1, 1);
+  const positionsData = new Float32Array(dy.size);
+  for (let i = 0; i < positionsData.length; i++) {
+    positionsData[i] = Math.floor(Math.random() * fSize * fSize);
   }
-
-  gpgpu.uploadMatrixToTexture(
-      dyTexture, dyTexShapeRC[0], dyTexShapeRC[1], dyData);
-  gpgpu.uploadMatrixToTexture(
-      maxPositionsTexture, dyTexShapeRC[0], dyTexShapeRC[1], maxPositionsData);
+  const positions = Array3D.new(dyShape, positionsData);
+  const binary =
+      gpgpu_math.compileProgram(gpgpu, program, [dy, positions], res);
 
   const start = performance.now();
   for (let i = 0; i < OP_RUNS; i++) {
-    max_pool_backprop_gpu.maxPoolBackprop(
-        gpgpu, program, dyTexture, maxPositionsTexture, outputTexture,
-        outputTexShapeRC);
+    gpgpu_math.runProgram(binary, [dy, positions], res);
   }
-
-  gpgpu.downloadMatrixFromTexture(
-      outputTexture, outputTexShapeRC[0], outputTexShapeRC[1]);
-  const end = performance.now();
-
-  const avgTime = (end - start) / OP_RUNS;
-
-  gpgpu.deleteMatrixTexture(dyTexture);
-  gpgpu.deleteMatrixTexture(maxPositionsTexture);
-  gpgpu.deleteMatrixTexture(outputTexture);
-  gpgpu.deleteProgram(program);
+  res.getValues();
+  const avgTime = (performance.now() - start) / OP_RUNS;
+
+  dy.dispose();
+  positions.dispose();
+  res.dispose();
+  texManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
   gpgpu.dispose();
 
   return avgTime;
diff --git a/demos/benchmarks/max_pool_gpu_benchmark.ts b/demos/benchmarks/max_pool_gpu_benchmark.ts
index bb1e6a6e24..ce1b683bfd 100644
--- a/demos/benchmarks/max_pool_gpu_benchmark.ts
+++ b/demos/benchmarks/max_pool_gpu_benchmark.ts
@@ -14,108 +14,53 @@ limitations under the License.
 ==============================================================================*/
 
 import * as conv_util from '../../src/math/conv_util';
+import {Array3D, initializeGPU, NDArray} from '../../src/math/ndarray';
 import {GPGPUContext} from '../../src/math/webgl/gpgpu_context';
-import * as max_pool_gpu from '../../src/math/webgl/max_pool_gpu';
-import * as test_util from '../../src/test_util';
+import * as gpgpu_math from '../../src/math/webgl/gpgpu_math';
+import {Pool2DProgram} from '../../src/math/webgl/pool_gpu';
+import {TextureManager} from '../../src/math/webgl/texture_manager';
 
 import {BenchmarkTest} from './benchmark';
 
 const OP_RUNS = 40;
 
 export const MAX_POOL_BENCHMARK_TEST: BenchmarkTest = (size: number) => {
-  const inputShapeRCD: [number, number, number] = [size, size, 1];
-  const outputDepth = 1;
-  const fieldSize = 11;
-  const stride = 1;
-  const zeroPad = conv_util.computeDefaultPad(inputShapeRCD, fieldSize, stride);
-  const outputShapeRCD: [number, number, number] =
-      conv_util.computeOutputShape3D(
-          inputShapeRCD, fieldSize, outputDepth, stride, zeroPad);
+  return testMaxPool(size, false);
+};
 
-  const inputTexShapeRC = conv_util.computeTexShapeFrom3D(inputShapeRCD);
-  const outputTexShapeRC = conv_util.computeTexShapeFrom3D(outputShapeRCD);
+export const MAX_POOL_POSNS_BENCHMARK_TEST: BenchmarkTest = (size: number) => {
+  return testMaxPool(size, true);
+};
 
+function testMaxPool(size: number, positions: boolean): number {
   const gpgpu = new GPGPUContext();
-  const program =
-      gpgpu.createProgram(max_pool_gpu.getFragmentShaderMaxPoolSource(
-          inputShapeRCD, fieldSize, stride, zeroPad));
-
-  const inputTexture =
-      gpgpu.createMatrixTexture(inputTexShapeRC[0], inputTexShapeRC[1]);
-  const outputTexture =
-      gpgpu.createMatrixTexture(outputTexShapeRC[0], outputTexShapeRC[1]);
-
-  const inputData = test_util.randomArrayInRange(
-      inputTexShapeRC[0] * inputTexShapeRC[1], -1, 1);
-
-  gpgpu.uploadMatrixToTexture(
-      inputTexture, inputTexShapeRC[0], inputTexShapeRC[1], inputData);
-
-  const start = performance.now();
-  for (let i = 0; i < OP_RUNS; i++) {
-    max_pool_gpu.maxPoolCommon(
-        gpgpu, program, inputTexture, outputTexture, outputTexShapeRC);
-  }
-
-  gpgpu.downloadMatrixFromTexture(
-      outputTexture, outputTexShapeRC[0], outputTexShapeRC[1]);
-  const end = performance.now();
+  const texManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, texManager);
 
-  const avgTime = (end - start) / OP_RUNS;
-
-  gpgpu.deleteMatrixTexture(inputTexture);
-  gpgpu.deleteMatrixTexture(outputTexture);
-  gpgpu.deleteProgram(program);
-  gpgpu.dispose();
-
-  return avgTime;
-};
-
-export const MAX_POOL_POSNS_BENCHMARK_TEST: BenchmarkTest = (size: number) => {
-  const inputShapeRCD: [number, number, number] = [size, size, 1];
   const outputDepth = 1;
+  const xShape: [number, number, number] = [size, size, outputDepth];
   const fieldSize = 11;
   const stride = 1;
-  const zeroPad = conv_util.computeDefaultPad(inputShapeRCD, fieldSize, stride);
-  const outputShapeRCD: [number, number, number] =
-      conv_util.computeOutputShape3D(
-          inputShapeRCD, fieldSize, outputDepth, stride, zeroPad);
-
-  const inputTexShapeRC = conv_util.computeTexShapeFrom3D(inputShapeRCD);
-  const outputTexShapeRC = conv_util.computeTexShapeFrom3D(outputShapeRCD);
+  const zeroPad = conv_util.computeDefaultPad(xShape, fieldSize, stride);
 
-  const gpgpu = new GPGPUContext();
-  const program: WebGLProgram =
-      gpgpu.createProgram(max_pool_gpu.getFragmentShaderMaxPoolPositionsSource(
-          inputShapeRCD, fieldSize, stride, zeroPad));
-
-  const inputTexture =
-      gpgpu.createMatrixTexture(inputTexShapeRC[0], inputTexShapeRC[1]);
-  const outputTexture =
-      gpgpu.createMatrixTexture(outputTexShapeRC[0], outputTexShapeRC[1]);
-
-  const inputData = test_util.randomArrayInRange(
-      inputTexShapeRC[0] * inputTexShapeRC[1], -1, 1);
-
-  gpgpu.uploadMatrixToTexture(
-      inputTexture, inputTexShapeRC[0], inputTexShapeRC[1], inputData);
+  const program =
+      new Pool2DProgram(xShape, fieldSize, stride, zeroPad, 'max', positions);
+  const res = NDArray.zeros(program.outputShape);
+  const x = Array3D.randUniform(xShape, -1, 1);
+  const binary = gpgpu_math.compileProgram(gpgpu, program, [x], res);
 
   const start = performance.now();
   for (let i = 0; i < OP_RUNS; i++) {
-    max_pool_gpu.maxPoolCommon(
-        gpgpu, program, inputTexture, outputTexture, outputTexShapeRC);
+    gpgpu_math.runProgram(binary, [x], res);
   }
+  res.getValues();
+  const avgTime = (performance.now() - start) / OP_RUNS;
 
-  gpgpu.downloadMatrixFromTexture(
-      outputTexture, outputTexShapeRC[0], outputTexShapeRC[1]);
-  const end = performance.now();
-
-  const avgTime = (end - start) / OP_RUNS;
-
-  gpgpu.deleteMatrixTexture(inputTexture);
-  gpgpu.deleteMatrixTexture(outputTexture);
-  gpgpu.deleteProgram(program);
+  x.dispose();
+  res.dispose();
+  texManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
   gpgpu.dispose();
 
   return avgTime;
-};
\ No newline at end of file
+}
diff --git a/src/math/conv_util.ts b/src/math/conv_util.ts
index 36d8455cda..15f4ecba3d 100644
--- a/src/math/conv_util.ts
+++ b/src/math/conv_util.ts
@@ -55,16 +55,6 @@ export function computeWeightsShape4D(
   return [fSize, fSize, inputDepth, outputDepth];
 }
 
-export function computeWeightsTexShape(
-    inputDepth: number, outputDepth: number,
-    fieldSize: number): [number, number] {
-  return [fieldSize * fieldSize * inputDepth, outputDepth];
-}
-
-export function computeBiasesTexShape(outputDepth: number): [number, number] {
-  return [1, outputDepth];
-}
-
 export function computeDilatedRC(
     rc: [number, number], origStride: number): [number, number] {
   const rowsDilated = (rc[0] - 1) * origStride + 1;
diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts
index 808bf1954a..cd8319d91c 100644
--- a/src/math/math_gpu.ts
+++ b/src/math/math_gpu.ts
@@ -23,7 +23,6 @@ import {Array1D, Array2D, Array3D, Array4D, NDArray, Scalar} from './ndarray';
 import {AddScaledMatProgram} from './webgl/addscaledmat_gpu';
 import {ArgMaxEqualsProgram} from './webgl/argmaxequals_gpu';
 import {ArgMinMaxProgram} from './webgl/argminmax_gpu';
-import * as avg_pool_gpu from './webgl/avg_pool_gpu';
 import * as batchnorm_gpu from './webgl/batchnorm_gpu';
 import {BinaryOpProgram} from './webgl/binaryop_gpu';
 import * as concat3d_gpu from './webgl/concat3d_gpu';
@@ -36,12 +35,10 @@ import * as gpgpu_math from './webgl/gpgpu_math';
 import {GPGPUBinary, GPGPUProgram} from './webgl/gpgpu_math';
 import * as gpgpu_util from './webgl/gpgpu_util';
 import {LogSumExpProgram} from './webgl/logsumexp_gpu';
-import * as max_pool_backprop_gpu from './webgl/max_pool_backprop_gpu';
-import * as max_pool_gpu from './webgl/max_pool_gpu';
-import * as min_pool_gpu from './webgl/min_pool_gpu';
+import {MaxPool2DBackpropProgram} from './webgl/max_pool_backprop_gpu';
 import {MinMaxProgram} from './webgl/minmax_gpu';
 import {MatMulProgram} from './webgl/mulmat_gpu';
-import * as pool_gpu from './webgl/pool_gpu';
+import {Pool2DProgram} from './webgl/pool_gpu';
 import {ReduceSumProgram} from './webgl/reducesum_gpu';
 import * as reshape_gpu from './webgl/reshape_gpu';
 import * as resize_bilinear_gpu from './webgl/resize_bilinear_gpu';
@@ -50,20 +47,9 @@ import {UnaryOp, UnaryOpProgram} from './webgl/unaryop_gpu';
 import * as webgl_util from './webgl/webgl_util';
 
 const BATCHNORM_PROG = 'batchnorm';
-
 const COPY_PROG = 'copy';
 const CONCAT_PROG = 'concat';
-
-// Element-wise ops.
 const RESHAPE_PROG = 'reshape';
-
-// Convolution.
-const MAX_POOL_PROG = 'maxpool';
-const MAX_POOL_POSITIONS_PROG = 'maxpool_posn';
-const MAX_POOL_BACKPROP_PROG = 'maxpool_backprop';
-const MIN_POOL_PROG = 'minpool';
-const AVG_POOL_PROG = 'avgpool';
-
 const RESIZE_BILINEAR_PROG = 'resizebilin';
 
 function makeCopyProgramName(
@@ -488,50 +474,10 @@ export class NDArrayMathGPU extends NDArrayMath {
       x: Array3D, dy: Array3D, weights: Array4D, stride: number,
       pad: number): {dx: Array3D, dw: Array4D, db: Array1D} {
     const fSize = weights.shape[0];
-    const inputDepth = weights.shape[2];
-    const outputDepth = weights.shape[3];
-    const xTexShape = conv_util.computeTexShapeFrom3D(x.shape);
-    const wTexShape =
-        conv_util.computeWeightsTexShape(inputDepth, outputDepth, fSize);
-    const yTexShape = conv_util.computeTexShapeFrom3D(dy.shape);
-
-    // If the texture shapes doesn't match the shapes that shaders expect,
-    // do physical texture reshapes on the GPU.
-    let cleanupX = false;
-    const actualXTexShape = x.getTextureShapeRC(xTexShape);
-    if (!util.arraysEqual(actualXTexShape, xTexShape)) {
-      x = this.reshapeTexture(x, xTexShape);
-      cleanupX = true;
-    }
-
-    let cleanupW = false;
-    const actualWTexShape = weights.getTextureShapeRC(wTexShape);
-    if (!util.arraysEqual(actualWTexShape, wTexShape)) {
-      weights = this.reshapeTexture(weights, wTexShape);
-      cleanupW = true;
-    }
-
-    let cleanupY = false;
-    const actualYTexShape = dy.getTextureShapeRC(yTexShape);
-    if (!util.arraysEqual(actualYTexShape, yTexShape)) {
-      dy = this.reshapeTexture(dy, yTexShape);
-      cleanupY = true;
-    }
-
     const dw = this.conv2dDerWeights(x, dy, fSize, stride, pad);
     const db = this.conv2dDerBias(dy);
     const dx = this.conv2dTransposeInternal(
         dy, weights, null /** biases */, stride, pad);
-
-    if (cleanupX) {
-      x.dispose();
-    }
-    if (cleanupW) {
-      weights.dispose();
-    }
-    if (cleanupY) {
-      dy.dispose();
-    }
     return {dx, db, dw};
   }
 
@@ -560,149 +506,38 @@ export class NDArrayMathGPU extends NDArrayMath {
     return this.compileAndRun(program, [dY]);
   }
 
-  private pool(
-      program: WebGLProgram, x: Array3D, fSize: number, stride: number,
-      pad: number): Array3D {
-    const xTexShape = conv_util.computeTexShapeFrom3D(x.shape);
-
-    // If the texture shapes doesn't match the shapes that shaders expect,
-    // do physical texture reshapes on the GPU.
-    const actualXTexShape = x.getTextureShapeRC(xTexShape);
-    let cleanupX = false;
-    if (!util.arraysEqual(actualXTexShape, xTexShape)) {
-      x = this.reshapeTexture(x, xTexShape);
-      cleanupX = true;
-    }
-
-    const resultShape =
-        conv_util.computeOutputShape3D(x.shape, fSize, x.shape[2], stride, pad);
-    const resultTexShape = conv_util.computeTexShapeFrom3D(resultShape);
-    const poolResultTex = this.textureManager.acquireTexture(resultTexShape);
-
-    pool_gpu.poolCommon(
-        this.gpgpu, program, x.getTexture(), poolResultTex, resultTexShape);
-
-    if (cleanupX) {
-      x.dispose();
-    }
-
-    return NDArray.make<Array3D>(
-        resultShape, {texture: poolResultTex, textureShapeRC: resultTexShape});
-  }
-
   protected maxPoolInternal(
       x: Array3D, fSize: number, stride: number, pad: number): Array3D {
-    const maxPoolProgKey =
-        [MAX_POOL_PROG, x.shape, fSize, stride, pad].join('_');
-    const maxPoolProgram = this.getAndSaveProgram(maxPoolProgKey, () => {
-      return max_pool_gpu.getFragmentShaderMaxPoolSource(
-          x.shape, fSize, stride, pad);
-    });
-
-    return this.pool(maxPoolProgram, x, fSize, stride, pad);
+    const program =
+        new Pool2DProgram(x.shape, fSize, stride, pad, 'max', false);
+    return this.compileAndRun(program, [x]);
   }
 
   protected minPoolInternal(
       x: Array3D, fSize: number, stride: number, pad: number): Array3D {
-    const minPoolProgKey =
-        [MIN_POOL_PROG, x.shape, fSize, stride, pad].join('_');
-    const minPoolProgram = this.getAndSaveProgram(minPoolProgKey, () => {
-      return min_pool_gpu.getFragmentShaderMinPoolSource(
-          x.shape, fSize, stride, pad);
-    });
-
-    return this.pool(minPoolProgram, x, fSize, stride, pad);
+    const program =
+        new Pool2DProgram(x.shape, fSize, stride, pad, 'min', false);
+    return this.compileAndRun(program, [x]);
   }
 
   protected avgPoolInternal(
       x: Array3D, fSize: number, stride: number, pad: number): Array3D {
-    const avgPoolProgKey =
-        [AVG_POOL_PROG, x.shape, fSize, stride, pad].join('_');
-    const avgPoolProgram = this.getAndSaveProgram(avgPoolProgKey, () => {
-      return avg_pool_gpu.getFragmentShaderAvgPoolSource(
-          x.shape, fSize, stride, pad);
-    });
-
-    return this.pool(avgPoolProgram, x, fSize, stride, pad);
+    const program =
+        new Pool2DProgram(x.shape, fSize, stride, pad, 'avg', false);
+    return this.compileAndRun(program, [x]);
   }
 
   protected maxPoolBackpropInternal(
       dy: Array3D, x: Array3D, fSize: number, origStride: number,
       origPad: number): Array3D {
-    const maxPoolPositionsProgKey = [
-      MAX_POOL_POSITIONS_PROG, x.shape, fSize, origStride, origPad
-    ].join('_');
     const maxPoolPositionsProgram =
-        this.getAndSaveProgram(maxPoolPositionsProgKey, () => {
-          return max_pool_gpu.getFragmentShaderMaxPoolPositionsSource(
-              x.shape, fSize, origStride, origPad);
-        });
-
-    const maxPoolResultShape = conv_util.computeOutputShape3D(
-        x.shape, fSize, x.shape[2], origStride, origPad);
-    const maxPoolResultTexShape =
-        conv_util.computeTexShapeFrom3D(maxPoolResultShape);
-    const maxPoolPositionsResultTex =
-        this.textureManager.acquireTexture(maxPoolResultTexShape);
-    // If the texture shapes doesn't match the shapes that shaders expect,
-    // do physical texture reshapes on the GPU.
-    const xTexShape = conv_util.computeTexShapeFrom3D(x.shape);
-    const actualXTexShape = x.getTextureShapeRC(xTexShape);
-    let cleanupX = false;
-    if (!util.arraysEqual(actualXTexShape, xTexShape)) {
-      x = this.reshapeTexture(x, xTexShape);
-      cleanupX = true;
-    }
-
-    max_pool_gpu.maxPoolCommon(
-        this.gpgpu, maxPoolPositionsProgram, x.getTexture(),
-        maxPoolPositionsResultTex, maxPoolResultTexShape);
-
-    const maxPoolBackpropProgKey = [
-      MAX_POOL_BACKPROP_PROG, dy.shape, fSize, origStride, origPad
-    ].join('_');
-    const program = this.getAndSaveProgram(maxPoolBackpropProgKey, () => {
-      return max_pool_backprop_gpu.getFragmentShaderMaxPoolBackprop(
-          dy.shape, fSize, origStride, origPad);
-    });
-
-    const dyTexShape = conv_util.computeTexShapeFrom3D(dy.shape);
+        new Pool2DProgram(x.shape, fSize, origStride, origPad, 'max', true);
+    const maxPoolPositions: Array3D =
+        this.compileAndRun(maxPoolPositionsProgram, [x]);
 
-    // If the texture shapes doesn't match the shapes that shaders expect,
-    // do physical texture reshapes on the GPU.
-    const actualDyTexShape = dy.getTextureShapeRC(dyTexShape);
-    let cleanupDy = false;
-    if (!util.arraysEqual(actualDyTexShape, dyTexShape)) {
-      dy = this.reshapeTexture(dy, dyTexShape);
-      cleanupDy = true;
-    }
-
-    const dilatedDyRC =
-        conv_util.computeDilatedRC([dy.shape[0], dy.shape[1]], origStride);
-    const pad = fSize - 1 - origPad;
-    const resultShapeRCD = conv_util.computeOutputShape3D(
-        [dilatedDyRC[0], dilatedDyRC[1], dy.shape[2]], fSize, dy.shape[2], 1,
-        pad);
-    const resultTexShape = conv_util.computeTexShapeFrom3D(resultShapeRCD);
-    const resultTex = this.textureManager.acquireTexture(resultTexShape);
-
-    max_pool_backprop_gpu.maxPoolBackprop(
-        this.gpgpu, program, dy.getTexture(), maxPoolPositionsResultTex,
-        resultTex, resultTexShape);
-
-    if (cleanupDy) {
-      dy.dispose();
-    }
-
-    if (cleanupX) {
-      x.dispose();
-    }
-
-    this.textureManager.releaseTexture(
-        maxPoolPositionsResultTex, maxPoolResultTexShape);
-
-    return NDArray.make<Array3D>(
-        resultShapeRCD, {texture: resultTex, textureShapeRC: resultTexShape});
+    const maxPoolBackPropProgram =
+        new MaxPool2DBackpropProgram(dy.shape, fSize, origStride, origPad);
+    return this.compileAndRun(maxPoolBackPropProgram, [dy, maxPoolPositions]);
   }
 
   protected resizeBilinear3DInternal(
diff --git a/src/math/webgl/argminmax_gpu.ts b/src/math/webgl/argminmax_gpu.ts
index f55e34f01c..7f50b08ab4 100644
--- a/src/math/webgl/argminmax_gpu.ts
+++ b/src/math/webgl/argminmax_gpu.ts
@@ -15,23 +15,23 @@ limitations under the License.
 
 import {GPGPUProgram} from './gpgpu_math';
 
-export function getArgMinMaxSnippet(op: 'min'|'max', texName: string,
-    size: number): string {
+export function getArgMinMaxSnippet(
+    op: 'min'|'max', texName: string, size: number): string {
   const compOp = (op === 'min') ? '<' : '>';
   return `
     float getArgMinMax${texName}() {
       float bestIndex = 0.0;
       float bestValue = get${texName}Flat(0.0);
 
-      for (int i = 0; i < ${size}; i++) {
-        float i_float = float(i);
-        float candidate = get${texName}Flat(i_float);
+      for (int ii = 0; ii < ${size}; ii++) {
+        float i = float(ii);
+        float candidate = get${texName}Flat(i);
         if (isNaN(candidate)) {
           return candidate;
         }
         if (candidate ${compOp} bestValue) {
           bestValue = candidate;
-          bestIndex = i_float;
+          bestIndex = i;
         }
       }
       return bestIndex;
diff --git a/src/math/webgl/avg_pool_gpu.ts b/src/math/webgl/avg_pool_gpu.ts
deleted file mode 100644
index eb59aaf4e9..0000000000
--- a/src/math/webgl/avg_pool_gpu.ts
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import {GPGPUContext} from './gpgpu_context';
-import * as pool_gpu from './pool_gpu';
-
-export function getFragmentShaderAvgPoolSource(
-    xShapeRCD: [number, number, number], fSize: number, stride: number,
-    pad: number) {
-  return pool_gpu.getFragmentShaderPoolCommonSource(
-      xShapeRCD, fSize, stride, pad, 'avg', false);
-}
-
-export function avgPool(
-    gpgpu: GPGPUContext, program: WebGLProgram, x: WebGLTexture,
-    result: WebGLTexture, resultShapeRowCol: [number, number]) {
-  pool_gpu.poolCommon(gpgpu, program, x, result, resultShapeRowCol);
-}
\ No newline at end of file
diff --git a/src/math/webgl/avg_pool_gpu_test.ts b/src/math/webgl/avg_pool_gpu_test.ts
index d74c98a38e..db9083cc21 100644
--- a/src/math/webgl/avg_pool_gpu_test.ts
+++ b/src/math/webgl/avg_pool_gpu_test.ts
@@ -14,51 +14,17 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
-import * as conv_util from '../conv_util';
 import {NDArrayMathCPU} from '../math_cpu';
 import {Array3D, NDArray} from '../ndarray';
+import * as pool_gpu_test_util from './pool_gpu_test_util';
 
-import * as avg_pool_gpu from './avg_pool_gpu';
-import {GPGPUContext} from './gpgpu_context';
 
 describe('avg_pool_gpu', () => {
   function uploadAvgPoolDownload(
-      a: Float32Array, aShapeRowColDepth: [number, number, number],
-      fieldSize: number, stride: number, zeroPad: number): Float32Array {
-    const aTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(aShapeRowColDepth);
-
-    const resultShapeRCD: [number, number, number] =
-        conv_util.computeOutputShape3D(
-            aShapeRowColDepth, fieldSize, aShapeRowColDepth[2], stride,
-            zeroPad);
-
-    const resultTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(resultShapeRCD);
-
-    const gpgpu = new GPGPUContext();
-    gpgpu.enableAutomaticDebugValidation(true);
-
-    const shaderSource = avg_pool_gpu.getFragmentShaderAvgPoolSource(
-        aShapeRowColDepth, fieldSize, stride, zeroPad);
-    const program = gpgpu.createProgram(shaderSource);
-
-    const aTex = gpgpu.createMatrixTexture(aTexShapeRC[0], aTexShapeRC[1]);
-    const resultTex =
-        gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.uploadMatrixToTexture(aTex, aTexShapeRC[0], aTexShapeRC[1], a);
-
-    avg_pool_gpu.avgPool(gpgpu, program, aTex, resultTex, resultTexShapeRC);
-
-    const result = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(aTex);
-    gpgpu.deleteProgram(program);
-    gpgpu.dispose();
-    return result;
+      a: Float32Array, xShape: [number, number, number], fieldSize: number,
+      stride: number, zeroPad: number): Float32Array {
+    return pool_gpu_test_util.uploadPoolDownload(
+        a, xShape, fieldSize, stride, zeroPad, 'avg');
   }
 
   function compareToCPU(
@@ -109,4 +75,4 @@ describe('avg_pool_gpu', () => {
     const zeroPad = 1;
     compareToCPU(inputShape, fSize, stride, zeroPad);
   });
-});
\ No newline at end of file
+});
diff --git a/src/math/webgl/conv_backprop_gpu.ts b/src/math/webgl/conv_backprop_gpu.ts
index 568ff131e0..c662d0e848 100644
--- a/src/math/webgl/conv_backprop_gpu.ts
+++ b/src/math/webgl/conv_backprop_gpu.ts
@@ -45,19 +45,23 @@ export class Conv2DDerWeightsProgram implements GPGPUProgram {
         // Convolve x(?, ?, d1) with dy(:, :, d2) to get dw(wR, wC, d1, d2).
         // ? = to be determined. : = across all values in that axis.
         float dotProd = 0.0;
-        for (int yR = 0; yR < ${yNumRows}; yR++) {
-          float yR_float = float(yR);
-          float xR = wR + yR_float * ${stride}.0 - ${zeroPad}.0;
+        for (int iyR = 0; iyR < ${yNumRows}; iyR++) {
+          float yR = float(iyR);
+          float xR = wR + yR * ${stride}.0 - ${zeroPad}.0;
+
           if (xR < 0.0 || xR > ${xRowsLimit}) {
             continue;
           }
-          for (int yC = 0; yC < ${yNumCols}; yC++) {
-            float yC_float = float(yC);
-            float xC = wC + yC_float * ${stride}.0 - ${zeroPad}.0;
+
+          for (int iyC = 0; iyC < ${yNumCols}; iyC++) {
+            float yC = float(iyC);
+            float xC = wC + yC * ${stride}.0 - ${zeroPad}.0;
+
             if (xC < 0.0 || xC > ${xColsLimit}) {
               continue;
             }
-            float dyValue = getDy(yR_float, yC_float, d2);
+
+            float dyValue = getDy(yR, yC, d2);
             float xValue = getX(xR, xC, d1);
             dotProd += (xValue * dyValue);
           }
@@ -103,28 +107,30 @@ export class Conv2DTransposeProgram implements GPGPUProgram {
         // Convolve x(?, ?, d1) with w(:, :, d2, d1) to get y(yR, yC, d2).
         // ? = to be determined. : = across all values in that axis.
         float dotProd = 0.0;
-        for (int wR = 0; wR < ${fSize}; wR++) {
-          float wR_float = float(wR);
-          float xR = (xRCorner + wR_float) / ${origStride}.0;
+        for (int iwR = 0; iwR < ${fSize}; iwR++) {
+          float wR = float(iwR);
+          float xR = (xRCorner + wR) / ${origStride}.0;
+
           if (xR < 0.0 || xR >= ${xRows}.0 || fract(xR) > 0.0) {
             continue;
           }
 
-          float wRPerm = ${fSize}.0 - 1.0 - wR_float;
+          float wRPerm = ${fSize}.0 - 1.0 - wR;
+
+          for (int iwC = 0; iwC < ${fSize}; iwC++) {
+            float wC = float(iwC);
+            float xC = (xCCorner + wC) / ${origStride}.0;
 
-          for (int wC = 0; wC < ${fSize}; wC++) {
-            float wC_float = float(wC);
-            float xC = (xCCorner + wC_float) / ${origStride}.0;
             if (xC < 0.0 || xC >= ${xCols}.0 || fract(xC) > 0.0) {
               continue;
             }
 
-            float wCPerm = ${fSize}.0 - 1.0 - wC_float;
+            float wCPerm = ${fSize}.0 - 1.0 - wC;
 
-            for (int d1 = 0; d1 < ${origOutputDepth}; d1++) {
-              float d1_float = float(d1);
-              float xValue = getX(xR, xC, d1_float);
-              float wValue = getW(wRPerm, wCPerm, d2, d1_float);
+            for (int id1 = 0; id1 < ${origOutputDepth}; id1++) {
+              float d1 = float(id1);
+              float xValue = getX(xR, xC, d1);
+              float wValue = getW(wRPerm, wCPerm, d2, d1);
               dotProd += xValue * wValue;
             }
           }
diff --git a/src/math/webgl/conv_gpu.ts b/src/math/webgl/conv_gpu.ts
index acf92d10bd..5223f9e9ac 100644
--- a/src/math/webgl/conv_gpu.ts
+++ b/src/math/webgl/conv_gpu.ts
@@ -47,22 +47,26 @@ export class Conv2DProgram implements GPGPUProgram {
         // Convolve x(?, ?, d1) with w(:, :, d1, d2) to get y(yR, yC, d2).
         // ? = to be determined. : = across all values in that axis.
         float dotProd = 0.0;
-        for (int wR = 0; wR < ${fieldSize}; wR++) {
-          float wR_float = float(wR);
-          float xR = xRCorner + wR_float;
+        for (int iwR = 0; iwR < ${fieldSize}; iwR++) {
+          float wR = float(iwR);
+          float xR = xRCorner + wR;
+
           if (xR < 0.0 || xR > ${xRowsLimit}) {
             continue;
           }
-          for (int wC = 0; wC < ${fieldSize}; wC++) {
-            float wC_float = float(wC);
-            float xC = xCCorner + wC_float;
+
+          for (int iwC = 0; iwC < ${fieldSize}; iwC++) {
+            float wC = float(iwC);
+            float xC = xCCorner + wC;
+
             if (xC < 0.0 || xC > ${xColsLimit}) {
               continue;
             }
-            for (int d1 = 0; d1 < ${inputDepth}; d1++) {
-              float d1_float = float(d1);
-              float xValue = getX(xR, xC, d1_float);
-              float wValue = getW(wR_float, wC_float, d1_float, d2);
+
+            for (int id1 = 0; id1 < ${inputDepth}; id1++) {
+              float d1 = float(id1);
+              float xValue = getX(xR, xC, d1);
+              float wValue = getW(wR, wC, d1, d2);
               dotProd += xValue * wValue;
             }
           }
diff --git a/src/math/webgl/conv_gpu_test.ts b/src/math/webgl/conv_gpu_test.ts
index 3d5a4c4542..ef33bca443 100644
--- a/src/math/webgl/conv_gpu_test.ts
+++ b/src/math/webgl/conv_gpu_test.ts
@@ -16,11 +16,11 @@ limitations under the License.
 import * as test_util from '../../test_util';
 import * as conv_util from '../conv_util';
 import {NDArrayMathCPU} from '../math_cpu';
+import {Array1D, Array3D, Array4D, initializeGPU, NDArray} from '../ndarray';
 
 import {Conv2DProgram} from './conv_gpu';
 import {GPGPUContext} from './gpgpu_context';
 import * as gpgpu_math from './gpgpu_math';
-import {NDArray, Array1D, Array3D, Array4D, initializeGPU} from '../ndarray';
 import {TextureManager} from './texture_manager';
 
 describe('conv_gpu', () => {
@@ -37,7 +37,6 @@ describe('conv_gpu', () => {
     const wShape =
         conv_util.computeWeightsShape4D(xShapeRCD[2], resultDepth, fieldSize);
     const W = Array4D.new(wShape, weights);
-
     const b = biasVals != null ? Array1D.new(biasVals) : null;
 
     const gpgpu = new GPGPUContext();
@@ -46,12 +45,10 @@ describe('conv_gpu', () => {
     initializeGPU(gpgpu, textureManager);
 
     const program = new Conv2DProgram(
-        xShapeRCD, fieldSize, resultDepth, stride, zeroPad,
-        biasVals != null);
+        xShapeRCD, fieldSize, resultDepth, stride, zeroPad, biasVals != null);
     const res = NDArray.zeros(program.outputShape);
     const inputs = biasVals != null ? [x, W, b] : [x, W];
-    const binary =
-        gpgpu_math.compileProgram(gpgpu, program, inputs, res);
+    const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, res);
     gpgpu_math.runProgram(binary, inputs, res);
     const resValues = res.getValues();
 
diff --git a/src/math/webgl/max_pool_backprop_gpu.ts b/src/math/webgl/max_pool_backprop_gpu.ts
index 2b18d3a007..66a97db9ac 100644
--- a/src/math/webgl/max_pool_backprop_gpu.ts
+++ b/src/math/webgl/max_pool_backprop_gpu.ts
@@ -14,88 +14,73 @@ limitations under the License.
 ==============================================================================*/
 
 import * as conv_util from '../conv_util';
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderMaxPoolBackprop(
-    dyShapeRCD: [number, number, number], fSize: number, origStride: number,
-    origPad: number) {
-  const origInputDepth = dyShapeRCD[2];
-  const pad = fSize - 1 - origPad;
-  const [dyRows, dyCols, depth] = dyShapeRCD;
-
-  const dyTexShapeRC = conv_util.computeTexShapeFrom3D(dyShapeRCD);
-
-  return `
-    precision highp float;
-    uniform sampler2D dy;
-    uniform sampler2D maxPos;
-
-    const vec2 halfCR = vec2(0.5, 0.5);
-    const vec2 dyShapeCR = vec2(${dyTexShapeRC[1]}, ${dyTexShapeRC[0]});
-
-    void main() {
-      vec2 dxTexCR = floor(gl_FragCoord.xy);
-
-      // Map from 2D (dxTexR, dxTexC) to 3D (dxR, dxC, d).
-      float dxR = dxTexCR.y;
-      float dxC = floor(dxTexCR.x / ${origInputDepth}.0);
-      float d = mod(dxTexCR.x, ${origInputDepth}.0);
-
-      vec2 dyRCCorner = vec2(dxR, dxC) - vec2(${pad}.0, ${pad}.0);
-      float dyRCorner = dyRCCorner.x;
-      float dyCCorner = dyRCCorner.y;
-
-      // Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(yR, dxC, d).
-      // ? = to be determined. : = across all values in that axis.
-      float dotProd = 0.0;
-      for (int wR = 0; wR < ${fSize}; wR++) {
-        float wR_float = float(wR);
-        float dyR = (dyRCorner + wR_float) / ${origStride}.0;
-        // TODO(nsthorat): Splice this with another version where you call
-        // getMatrixValueOrZeroPad(). Here and below.
-        if (dyR < 0.0 || dyR >= ${dyRows}.0 || fract(dyR) > 0.0) {
-          continue;
-        }
-
-        float dyTexR = dyR;
 
-        for (int wC = 0; wC < ${fSize}; wC++) {
-          float wC_float = float(wC);
-          float dyC = (dyCCorner + wC_float) / ${origStride}.0;
-          if (dyC < 0.0 || dyC >= ${dyCols}.0 || fract(dyC) > 0.0) {
+import {GPGPUProgram} from './gpgpu_math';
+
+export class MaxPool2DBackpropProgram implements GPGPUProgram {
+  variableNames = ['dy', 'maxPos'];
+  params: Array<{}>;
+  outputShape: number[];
+  userCode: string;
+
+  constructor(
+      dyShape: [number, number, number], fSize: number, origStride: number,
+      origPad: number) {
+    const pad = fSize - 1 - origPad;
+    const dyRows = dyShape[0];
+    const dyCols = dyShape[1];
+    this.params = [fSize, origStride, origPad];
+
+    const dilatedDyRC =
+        conv_util.computeDilatedRC([dyShape[0], dyShape[1]], origStride);
+    this.outputShape = conv_util.computeOutputShape3D(
+        [dilatedDyRC[0], dilatedDyRC[1], dyShape[2]], fSize, dyShape[2], 1,
+        pad);
+
+    this.userCode = `
+      void main() {
+        vec3 coords = getOutputCoords();
+        float dxR = coords.x;
+        float dxC = coords.y;
+        float d = coords.z;
+
+        vec2 dyRCCorner = vec2(dxR, dxC) - vec2(${pad}.0, ${pad}.0);
+        float dyRCorner = dyRCCorner.x;
+        float dyCCorner = dyRCCorner.y;
+
+        // Convolve dy(?, ?, d) with pos mask(:, :, d) to get dx(yR, dxC, d).
+        // ? = to be determined. : = across all values in that axis.
+        float dotProd = 0.0;
+        for (int iwR = 0; iwR < ${fSize}; iwR++) {
+          float wR = float(iwR);
+          float dyR = (dyRCorner + wR) / ${origStride}.0;
+
+          if (dyR < 0.0 || dyR >= ${dyRows}.0 || fract(dyR) > 0.0) {
             continue;
           }
 
-          float dyTexC = dyC * ${depth}.0 + d;
+          for (int iwC = 0; iwC < ${fSize}; iwC++) {
+            float wC = float(iwC);
+            float dyC = (dyCCorner + wC) / ${origStride}.0;
 
-          // Read dy(dyR, dyC, d).
-          vec2 dyUV = (vec2(dyTexC, dyTexR) + halfCR) / dyShapeCR;
-          float dyValue = texture2D(dy, dyUV).r;
+            if (dyC < 0.0 || dyC >= ${dyCols}.0 || fract(dyC) > 0.0) {
+              continue;
+            }
 
-          // Read maxPos(dyR, dyC, d).
-          float maxPosValue =
-              ${fSize * fSize - 1}.0 - texture2D(maxPos, dyUV).r;
+            float dyValue = getDy(dyR, dyC, d);
+            float maxPosValue =
+                ${fSize * fSize - 1}.0 - getMaxPos(dyR, dyC, d);
 
-          // Get the current value, check it against the value from the
-          // position matrix.
-          float curPosValue = wR_float * ${fSize}.0 + wC_float;
-          float mask = float(maxPosValue == curPosValue ? 1.0 : 0.0);
+            // Get the current value, check it against the value from the
+            // position matrix.
+            float curPosValue = wR * ${fSize}.0 + wC;
+            float mask = float(maxPosValue == curPosValue ? 1.0 : 0.0);
 
-          dotProd += dyValue * mask;
+            dotProd += dyValue * mask;
+          }
         }
+        setOutput(dotProd);
       }
-      gl_FragColor = vec4(dotProd, 0, 0, 0);
-    }`;
-}
-
-export function maxPoolBackprop(
-    gpgpu: GPGPUContext, program: WebGLProgram, dyTex: WebGLTexture,
-    maxPositionsTex: WebGLTexture, resultTex: WebGLTexture,
-    resultTexShapeRC: [number, number]) {
-  gpgpu.setOutputMatrixTexture(
-      resultTex, resultTexShapeRC[0], resultTexShapeRC[1]);
-  gpgpu.setProgram(program);
-  gpgpu.setInputMatrixTexture(dyTex, 'dy', 0);
-  gpgpu.setInputMatrixTexture(maxPositionsTex, 'maxPos', 1);
-  gpgpu.executeProgram();
+    `;
+  }
 }
diff --git a/src/math/webgl/max_pool_backprop_gpu_test.ts b/src/math/webgl/max_pool_backprop_gpu_test.ts
index 7eabe43a56..82d73779a4 100644
--- a/src/math/webgl/max_pool_backprop_gpu_test.ts
+++ b/src/math/webgl/max_pool_backprop_gpu_test.ts
@@ -14,13 +14,14 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
-import * as conv_util from '../conv_util';
 import {NDArrayMathCPU} from '../math_cpu';
-import {Array3D, NDArray} from '../ndarray';
+import {Array3D, initializeGPU, NDArray} from '../ndarray';
 
 import {GPGPUContext} from './gpgpu_context';
-import * as max_pool_backprop_gpu from './max_pool_backprop_gpu';
-import * as max_pool_gpu from './max_pool_gpu';
+import * as gpgpu_math from './gpgpu_math';
+import {MaxPool2DBackpropProgram} from './max_pool_backprop_gpu';
+import {Pool2DProgram} from './pool_gpu';
+import {TextureManager} from './texture_manager';
 
 describe('max_pool_backprop_gpu', () => {
 
@@ -29,60 +30,29 @@ describe('max_pool_backprop_gpu', () => {
       origPad: number): Float32Array {
     const gpgpu = new GPGPUContext();
     gpgpu.enableAutomaticDebugValidation(true);
-
-    const depth = dy.shape[2];
-    const src = max_pool_backprop_gpu.getFragmentShaderMaxPoolBackprop(
-        dy.shape, fSize, origStride, origPad);
-    const program = gpgpu.createProgram(src);
-
-    // Upload dy.
-    const dyTexShapeRC = conv_util.computeTexShapeFrom3D(dy.shape);
-    const dyTex = gpgpu.createMatrixTexture(dyTexShapeRC[0], dyTexShapeRC[1]);
-    gpgpu.uploadMatrixToTexture(
-        dyTex, dyTexShapeRC[0], dyTexShapeRC[1], dy.getValues());
-
-    // Upload x.
-    const xTexShapeRC = conv_util.computeTexShapeFrom3D(x.shape);
-    const xTex = gpgpu.createMatrixTexture(xTexShapeRC[0], xTexShapeRC[1]);
-    gpgpu.uploadMatrixToTexture(
-        xTex, xTexShapeRC[0], xTexShapeRC[1], x.getValues());
-
-    // Compute max positions.
-    const maxPoolResultShape = conv_util.computeOutputShape3D(
-        x.shape, fSize, x.shape[2], origStride, origPad);
-    const maxPoolResultTexShape =
-        conv_util.computeTexShapeFrom3D(maxPoolResultShape);
-    const maxPoolPositionsResultTex = gpgpu.createMatrixTexture(
-        maxPoolResultTexShape[0], maxPoolResultTexShape[1]);
-    const maxPoolPositionsSrc =
-        max_pool_gpu.getFragmentShaderMaxPoolPositionsSource(
-            x.shape, fSize, origStride, origPad);
-    const maxPoolPositionsProgram = gpgpu.createProgram(maxPoolPositionsSrc);
-    max_pool_gpu.maxPoolCommon(
-        gpgpu, maxPoolPositionsProgram, xTex, maxPoolPositionsResultTex,
-        maxPoolResultTexShape);
-
-    // Figure out the output shape by dilating the input.
-    const dyRowsDilated = (dy.shape[0] - 1) * origStride + 1;
-    const dyColsDilated = (dy.shape[1] - 1) * origStride + 1;
-    const pad = fSize - 1 - origPad;
-    const resultShapeRCD = conv_util.computeOutputShape3D(
-        [dyRowsDilated, dyColsDilated, depth], fSize, depth, 1, pad);
-    const resultTexRC = conv_util.computeTexShapeFrom3D(resultShapeRCD);
-    const resultTex = gpgpu.createMatrixTexture(resultTexRC[0], resultTexRC[1]);
-    max_pool_backprop_gpu.maxPoolBackprop(
-        gpgpu, program, dyTex, maxPoolPositionsResultTex, resultTex,
-        resultTexRC);
-    const y = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexRC[0], resultTexRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(dyTex);
-    gpgpu.deleteMatrixTexture(xTex);
-    gpgpu.deleteProgram(program);
+    const textureManager = new TextureManager(gpgpu);
+    initializeGPU(gpgpu, textureManager);
+
+    const positionsProgram =
+        new Pool2DProgram(x.shape, fSize, origStride, origPad, 'max', true);
+    const positionsRes = NDArray.zeros(positionsProgram.outputShape);
+    const positionsBinary =
+        gpgpu_math.compileProgram(gpgpu, positionsProgram, [x], positionsRes);
+    gpgpu_math.runProgram(positionsBinary, [x], positionsRes);
+
+    const program =
+        new MaxPool2DBackpropProgram(dy.shape, fSize, origStride, origPad);
+    const res = NDArray.zeros(program.outputShape);
+    const binary =
+        gpgpu_math.compileProgram(gpgpu, program, [dy, positionsRes], res);
+    gpgpu_math.runProgram(binary, [dy, positionsRes], res);
+
+    const resValues = res.getValues();
+
+    textureManager.dispose();
+    gpgpu.deleteProgram(binary.webGLProgram);
     gpgpu.dispose();
-
-    return y;
+    return resValues;
   }
 
   function compareToCPU(
diff --git a/src/math/webgl/max_pool_gpu.ts b/src/math/webgl/max_pool_gpu.ts
deleted file mode 100644
index 7cac3f68fc..0000000000
--- a/src/math/webgl/max_pool_gpu.ts
+++ /dev/null
@@ -1,44 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import {GPGPUContext} from './gpgpu_context';
-import * as pool_gpu from './pool_gpu';
-
-export function getFragmentShaderMaxPoolPositionsSource(
-    xShapeRCD: [number, number, number], fSize: number, stride: number,
-    pad: number) {
-  return getFragmentShaderMaxPoolCommonSource(
-      xShapeRCD, fSize, stride, pad, true);
-}
-
-export function getFragmentShaderMaxPoolSource(
-    xShapeRCD: [number, number, number], fSize: number, stride: number,
-    pad: number) {
-  return getFragmentShaderMaxPoolCommonSource(
-      xShapeRCD, fSize, stride, pad, false);
-}
-
-function getFragmentShaderMaxPoolCommonSource(
-    xShapeRCD: [number, number, number], fSize: number, stride: number,
-    pad: number, computeMaxPositions: boolean) {
-  return pool_gpu.getFragmentShaderPoolCommonSource(
-      xShapeRCD, fSize, stride, pad, 'max', computeMaxPositions);
-}
-
-export function maxPoolCommon(
-    gpgpu: GPGPUContext, program: WebGLProgram, x: WebGLTexture,
-    result: WebGLTexture, resultShapeRowCol: [number, number]) {
-  pool_gpu.poolCommon(gpgpu, program, x, result, resultShapeRowCol);
-}
\ No newline at end of file
diff --git a/src/math/webgl/max_pool_gpu_test.ts b/src/math/webgl/max_pool_gpu_test.ts
index 33ae1dfb6e..8f9380ca80 100644
--- a/src/math/webgl/max_pool_gpu_test.ts
+++ b/src/math/webgl/max_pool_gpu_test.ts
@@ -14,52 +14,16 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
-import * as conv_util from '../conv_util';
 import {NDArrayMathCPU} from '../math_cpu';
 import {Array3D, NDArray} from '../ndarray';
-
-import {GPGPUContext} from './gpgpu_context';
-import * as max_pool_gpu from './max_pool_gpu';
+import * as pool_gpu_test_util from './pool_gpu_test_util';
 
 describe('max_pool_gpu', () => {
   function uploadMaxPoolDownload(
-      a: Float32Array, aShapeRowColDepth: [number, number, number],
-      fieldSize: number, stride: number, zeroPad: number): Float32Array {
-    const aTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(aShapeRowColDepth);
-
-    const resultShapeRCD: [number, number, number] =
-        conv_util.computeOutputShape3D(
-            aShapeRowColDepth, fieldSize, aShapeRowColDepth[2], stride,
-            zeroPad);
-
-    const resultTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(resultShapeRCD);
-
-    const gpgpu = new GPGPUContext();
-    gpgpu.enableAutomaticDebugValidation(true);
-
-    const shaderSource = max_pool_gpu.getFragmentShaderMaxPoolSource(
-        aShapeRowColDepth, fieldSize, stride, zeroPad);
-    const program = gpgpu.createProgram(shaderSource);
-
-    const aTex = gpgpu.createMatrixTexture(aTexShapeRC[0], aTexShapeRC[1]);
-    const resultTex =
-        gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.uploadMatrixToTexture(aTex, aTexShapeRC[0], aTexShapeRC[1], a);
-
-    max_pool_gpu.maxPoolCommon(
-        gpgpu, program, aTex, resultTex, resultTexShapeRC);
-
-    const result = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(aTex);
-    gpgpu.deleteProgram(program);
-    gpgpu.dispose();
-    return result;
+      a: Float32Array, xShape: [number, number, number], fieldSize: number,
+      stride: number, zeroPad: number): Float32Array {
+    return pool_gpu_test_util.uploadPoolDownload(
+        a, xShape, fieldSize, stride, zeroPad, 'max');
   }
 
   function compareToCPU(
diff --git a/src/math/webgl/max_pool_positions_gpu_test.ts b/src/math/webgl/max_pool_positions_gpu_test.ts
index 86f3a8a390..246bbb3586 100644
--- a/src/math/webgl/max_pool_positions_gpu_test.ts
+++ b/src/math/webgl/max_pool_positions_gpu_test.ts
@@ -14,50 +14,34 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
-import * as conv_util from '../conv_util';
 import {NDArrayMathCPU} from '../math_cpu';
-import {Array3D, NDArray} from '../ndarray';
+import {Array3D, initializeGPU, NDArray} from '../ndarray';
 
 import {GPGPUContext} from './gpgpu_context';
-import * as max_pool_gpu from './max_pool_gpu';
+import * as gpgpu_math from './gpgpu_math';
+import {Pool2DProgram} from './pool_gpu';
+import {TextureManager} from './texture_manager';
 
 describe('max_pool_position', () => {
   function uploadMaxPoolPositionDownload(
-      x: Float32Array, xShapeRowColDepth: [number, number, number],
-      fieldSize: number, stride: number, pad: number): Float32Array {
-    const xTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(xShapeRowColDepth);
-
-    const resultShapeRCD: [number, number, number] =
-        conv_util.computeOutputShape3D(
-            xShapeRowColDepth, fieldSize, xShapeRowColDepth[2], stride, pad);
-    const resultTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(resultShapeRCD);
-
+      xVals: Float32Array, xShape: [number, number, number], fieldSize: number,
+      stride: number, pad: number): Float32Array {
     const gpgpu = new GPGPUContext();
     gpgpu.enableAutomaticDebugValidation(true);
-
-    const shaderSource = max_pool_gpu.getFragmentShaderMaxPoolPositionsSource(
-        xShapeRowColDepth, fieldSize, stride, pad);
-    const program = gpgpu.createProgram(shaderSource);
-
-    const xTex = gpgpu.createMatrixTexture(xTexShapeRC[0], xTexShapeRC[1]);
-    const resultTex =
-        gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.uploadMatrixToTexture(xTex, xTexShapeRC[0], xTexShapeRC[1], x);
-
-    max_pool_gpu.maxPoolCommon(
-        gpgpu, program, xTex, resultTex, resultTexShapeRC);
-
-    const result = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(xTex);
-    gpgpu.deleteProgram(program);
+    const textureManager = new TextureManager(gpgpu);
+    initializeGPU(gpgpu, textureManager);
+    const program =
+        new Pool2DProgram(xShape, fieldSize, stride, pad, 'max', true);
+    const res = NDArray.zeros(program.outputShape);
+    const x = Array3D.new(xShape, xVals);
+    const binary = gpgpu_math.compileProgram(gpgpu, program, [x], res);
+    gpgpu_math.runProgram(binary, [x], res);
+    const resValues = res.getValues();
+
+    textureManager.dispose();
+    gpgpu.deleteProgram(binary.webGLProgram);
     gpgpu.dispose();
-    return result;
+    return resValues;
   }
 
   function compareToCPU(
diff --git a/src/math/webgl/min_pool_gpu.ts b/src/math/webgl/min_pool_gpu.ts
deleted file mode 100644
index f3b8888356..0000000000
--- a/src/math/webgl/min_pool_gpu.ts
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import {GPGPUContext} from './gpgpu_context';
-import * as pool_gpu from './pool_gpu';
-
-export function getFragmentShaderMinPoolSource(
-    xShapeRCD: [number, number, number], fSize: number, stride: number,
-    pad: number) {
-  return pool_gpu.getFragmentShaderPoolCommonSource(
-      xShapeRCD, fSize, stride, pad, 'min', false);
-}
-
-export function minPool(
-    gpgpu: GPGPUContext, program: WebGLProgram, x: WebGLTexture,
-    result: WebGLTexture, resultShapeRowCol: [number, number]) {
-  pool_gpu.poolCommon(gpgpu, program, x, result, resultShapeRowCol);
-}
\ No newline at end of file
diff --git a/src/math/webgl/min_pool_gpu_test.ts b/src/math/webgl/min_pool_gpu_test.ts
index 1911c649a4..b275cc1035 100644
--- a/src/math/webgl/min_pool_gpu_test.ts
+++ b/src/math/webgl/min_pool_gpu_test.ts
@@ -14,51 +14,16 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
-import * as conv_util from '../conv_util';
 import {NDArrayMathCPU} from '../math_cpu';
 import {Array3D, NDArray} from '../ndarray';
-
-import {GPGPUContext} from './gpgpu_context';
-import * as min_pool_gpu from './min_pool_gpu';
+import * as pool_gpu_test_util from './pool_gpu_test_util';
 
 describe('min_pool_gpu', () => {
   function uploadMinPoolDownload(
-      a: Float32Array, aShapeRowColDepth: [number, number, number],
-      fieldSize: number, stride: number, zeroPad: number): Float32Array {
-    const aTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(aShapeRowColDepth);
-
-    const resultShapeRCD: [number, number, number] =
-        conv_util.computeOutputShape3D(
-            aShapeRowColDepth, fieldSize, aShapeRowColDepth[2], stride,
-            zeroPad);
-
-    const resultTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(resultShapeRCD);
-
-    const gpgpu = new GPGPUContext();
-    gpgpu.enableAutomaticDebugValidation(true);
-
-    const shaderSource = min_pool_gpu.getFragmentShaderMinPoolSource(
-        aShapeRowColDepth, fieldSize, stride, zeroPad);
-    const program = gpgpu.createProgram(shaderSource);
-
-    const aTex = gpgpu.createMatrixTexture(aTexShapeRC[0], aTexShapeRC[1]);
-    const resultTex =
-        gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.uploadMatrixToTexture(aTex, aTexShapeRC[0], aTexShapeRC[1], a);
-
-    min_pool_gpu.minPool(gpgpu, program, aTex, resultTex, resultTexShapeRC);
-
-    const result = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(aTex);
-    gpgpu.deleteProgram(program);
-    gpgpu.dispose();
-    return result;
+      a: Float32Array, xShape: [number, number, number], fieldSize: number,
+      stride: number, zeroPad: number): Float32Array {
+    return pool_gpu_test_util.uploadPoolDownload(
+        a, xShape, fieldSize, stride, zeroPad, 'min');
   }
 
   function compareToCPU(
@@ -109,4 +74,4 @@ describe('min_pool_gpu', () => {
     const zeroPad = 1;
     compareToCPU(inputShape, fSize, stride, zeroPad);
   });
-});
\ No newline at end of file
+});
diff --git a/src/math/webgl/mulmat_gpu.ts b/src/math/webgl/mulmat_gpu.ts
index 069f95cf5d..366991a24a 100644
--- a/src/math/webgl/mulmat_gpu.ts
+++ b/src/math/webgl/mulmat_gpu.ts
@@ -22,7 +22,8 @@ export class MatMulProgram implements GPGPUProgram {
   outputShape: number[];
   userCode: string;
 
-  constructor(aShape: [number, number], bShape: [number, number],
+  constructor(
+      aShape: [number, number], bShape: [number, number],
       aOrient = MatrixOrientation.REGULAR,
       bOrient = MatrixOrientation.REGULAR) {
     this.params = [aOrient, bOrient];
@@ -34,19 +35,19 @@ export class MatMulProgram implements GPGPUProgram {
     this.outputShape = [outerShapeA, outerShapeB];
 
     const sharedDim =
-      (aOrient === MatrixOrientation.REGULAR ? aShape[1] : aShape[0]);
-    const aSnippet = (aOrient === MatrixOrientation.REGULAR) ?
-        'aRow, i_float' : 'i_float, aRow';
-    const bSnippet = (bOrient === MatrixOrientation.REGULAR) ?
-        'i_float, bCol' : 'bCol, i_float';
+        (aOrient === MatrixOrientation.REGULAR ? aShape[1] : aShape[0]);
+    const aSnippet =
+        (aOrient === MatrixOrientation.REGULAR) ? 'aRow, i' : 'i, aRow';
+    const bSnippet =
+        (bOrient === MatrixOrientation.REGULAR) ? 'i, bCol' : 'bCol, i';
 
     this.userCode = `
       const int sharedDim = ${sharedDim};
 
       float dotARowBCol(float aRow, float bCol) {
         float result = 0.0;
-        for (int i = 0; i < sharedDim; i++) {
-          float i_float = float(i);
+        for (int ii = 0; ii < sharedDim; ii++) {
+          float i = float(ii);
           float a = getMatrixA(${aSnippet});
           float b = getMatrixB(${bSnippet});
           result += (a * b);
@@ -61,4 +62,3 @@ export class MatMulProgram implements GPGPUProgram {
     `;
   }
 }
-
diff --git a/src/math/webgl/mulmat_packed_gpu.ts b/src/math/webgl/mulmat_packed_gpu.ts
index e0d29e1dd2..6a7a15fd3d 100644
--- a/src/math/webgl/mulmat_packed_gpu.ts
+++ b/src/math/webgl/mulmat_packed_gpu.ts
@@ -56,9 +56,9 @@ export function getFragmentShaderSource(
 
     vec4 dot2x2ARowBCol() {
       vec4 result = vec4(0, 0, 0, 0);
-      for (int i = 0; i < ${sharedDimensionPacked}; i++) {
-        float i_float = float(i);
-        float center = (i_float + 0.5) / sharedDimension;
+      for (int ii = 0; ii < ${sharedDimensionPacked}; ii++) {
+        float i = float(ii);
+        float center = (i + 0.5) / sharedDimension;
         vec4 a = texture2D(matrixA, vec2(${aSample}));
         vec4 b = texture2D(matrixB, vec2(${bSample}));
         result +=
diff --git a/src/math/webgl/pool_gpu.ts b/src/math/webgl/pool_gpu.ts
index 7de03a4a47..dbac7f5341 100644
--- a/src/math/webgl/pool_gpu.ts
+++ b/src/math/webgl/pool_gpu.ts
@@ -14,111 +14,94 @@ limitations under the License.
 ==============================================================================*/
 
 import * as conv_util from '../conv_util';
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderPoolCommonSource(
-    xShapeRCD: [number, number, number], fSize: number, stride: number,
-    pad: number, poolType: 'max'|'min'|'avg', computePositions: boolean) {
-  if (poolType === 'avg' && computePositions) {
-    throw new Error('Cannot compute positions for average pool.');
-  }
-
-  const depth = xShapeRCD[2];
-
-  const xTexShapeRC = conv_util.computeTexShapeFrom3D(xShapeRCD);
+import {GPGPUProgram} from './gpgpu_math';
+
+export class Pool2DProgram implements GPGPUProgram {
+  variableNames = ['x'];
+  params: Array<{}>;
+  outputShape: number[];
+  userCode: string;
+
+  constructor(
+      xShape: [number, number, number], fSize: number, stride: number,
+      pad: number, poolType: 'max'|'min'|'avg', computePositions: boolean) {
+    if (poolType === 'avg' && computePositions) {
+      throw new Error('Cannot compute positions for average pool.');
+    }
 
-  let returnValue = 'minMaxValue';
-  if (computePositions) {
-    returnValue = 'minMaxPosition';
-  } else if (poolType === 'avg') {
-    returnValue = 'avgValue';
-  }
+    let returnValue = 'minMaxValue';
+    if (computePositions) {
+      returnValue = 'minMaxPosition';
+    } else if (poolType === 'avg') {
+      returnValue = `avgValue / ${fSize * fSize}.0`;
+    }
+    const xRowsLimit = xShape[0] - 0.5;
+    const xColsLimit = xShape[1] - 0.5;
+    this.params = [stride, pad, fSize, computePositions];
+    this.outputShape =
+        conv_util.computeOutputShape3D(xShape, fSize, xShape[2], stride, pad);
+
+    this.userCode = `
+      void main() {
+        vec3 coords = getOutputCoords();
+        float yR = coords.x;
+        float yC = coords.y;
+        float d = coords.z;
+
+        vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}.0, ${stride}.0) -
+            vec2(${pad}.0, ${pad}.0);
+        float xRCorner = xRCCorner.x;
+        float xCCorner = xRCCorner.y;
+
+        // max/min x(?, ?, d) to get y(yR, yC, d).
+        // ? = to be determined
+        float minMaxValue = 0.0;
+        float minMaxValueFound = 0.0;
+        float minMaxPosition = 0.0;
+        float avgValue = 0.0;
+
+        for (int iwR = 0; iwR < ${fSize}; iwR++) {
+          float wR = float(iwR);
+          float xR = xRCorner + wR;
+
+          if (xR < 0.0 || xR > ${xRowsLimit}) {
+            continue;
+          }
 
-  return `
-    precision highp float;
-    uniform sampler2D x;
-    varying vec2 resultUV;
+          for (int iwC = 0; iwC < ${fSize}; iwC++) {
+            float wC = float(iwC);
+            float xC = xCCorner + wC;
 
-    const vec2 halfCR = vec2(0.5, 0.5);
-    const vec2 xShapeCR = vec2(${xTexShapeRC[1]}, ${xTexShapeRC[0]});
+            if (xC < 0.0 || xC > ${xColsLimit}) {
+              continue;
+            }
 
-    bool isNaN(float val) {
-      return val == val ? false : true;
-    }
+            float value = getX(xR, xC, d);
 
-    void main() {
-      vec2 yTexCR = floor(gl_FragCoord.xy);
-
-      // Map from 2D (yTexR, yTexC) to 3D (yR, yC, d2).
-      float yR = yTexCR.y;
-      float yC = floor(yTexCR.x / ${depth}.0);
-      float d = mod(yTexCR.x, ${depth}.0);
-
-      vec2 xRCCorner = vec2(yR, yC) * vec2(${stride}, ${stride}) -
-          vec2(${pad}.0, ${pad}.0);
-      float xRCorner = xRCCorner.x;
-      float xCCorner = xRCCorner.y;
-
-      // max/min x(?, ?, d) to get y(yR, yC, d).
-      // ? = to be determined
-      float minMaxValue = 0.0;
-      float minMaxValueFound = 0.0;
-      float minMaxPosition = 0.0;
-      float avgValue = 0.0;
-
-      for (int wR = 0; wR < ${fSize}; wR++) {
-        float wR_float = float(wR);
-        float xR = xRCorner + wR_float;
-        float xTexR = xR;
-
-        for (int wC = 0; wC < ${fSize}; wC++) {
-          float wC_float = float(wC);
-          float xC = xCCorner + wC_float;
-          float xTexC = xC * ${depth}.0 + d;
-
-          vec2 texCR = vec2(xTexC, xTexR);
-
-          // Check if the requested UV is invalid.
-          vec2 uv = (texCR + halfCR) / xShapeCR;
-          bool lessThanZero = any(lessThan(uv, vec2(0, 0)));
-          bool greaterThanOne = any(greaterThan(uv, vec2(1, 1)));
-          bool outside = lessThanZero || greaterThanOne;
-          if (outside) {
-            continue;
-          }
+            if (isNaN(value)) {
+              setOutput(value);
+              return;
+            }
 
-          float value = texture2D(x, uv).r;
-          if (isNaN(value)) {
-            gl_FragColor = vec4(value, 0, 0, 0);
-            return;
-          }
-          if (${poolType === 'avg'}) {
-            avgValue += value / ${fSize * fSize}.0;
-          } else {
-            // If a min / max value has already been found, use it. If not, use
-            // the current value.
-            float currentMinMaxValue = mix(
-                value, minMaxValue, minMaxValueFound);
-            if (value ${poolType === 'min' ? '<=' : '>='} currentMinMaxValue) {
-              minMaxValue = value;
-              minMaxValueFound = 1.0;
-              if (${computePositions}) {
-                minMaxPosition = wR_float * ${fSize}.0 + wC_float;
+            if (${poolType === 'avg'}) {
+              avgValue += value;
+            } else {
+              // If a min / max value has already been found, use it. If not,
+              // use the current value.
+              float currMinMaxValue = mix(
+                  value, minMaxValue, minMaxValueFound);
+              if (value ${poolType === 'min' ? '<=' : '>='} currMinMaxValue) {
+                minMaxValue = value;
+                minMaxValueFound = 1.0;
+                if (${computePositions}) {
+                  minMaxPosition = wR * ${fSize}.0 + wC;
+                }
               }
             }
           }
         }
+        setOutput(${returnValue});
       }
-      gl_FragColor = vec4(${returnValue}, 0, 0, 0);
-    }`;
-}
-
-export function poolCommon(
-    gpgpu: GPGPUContext, program: WebGLProgram, x: WebGLTexture,
-    result: WebGLTexture, resultShapeRowCol: [number, number]) {
-  gpgpu.setOutputMatrixTexture(
-      result, resultShapeRowCol[0], resultShapeRowCol[1]);
-  gpgpu.setProgram(program);
-  gpgpu.setInputMatrixTexture(x, 'x', 0);
-  gpgpu.executeProgram();
+    `;
+  }
 }
diff --git a/src/math/webgl/pool_gpu_test_util.ts b/src/math/webgl/pool_gpu_test_util.ts
new file mode 100644
index 0000000000..a06be73b31
--- /dev/null
+++ b/src/math/webgl/pool_gpu_test_util.ts
@@ -0,0 +1,42 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+import {Array3D, initializeGPU, NDArray} from '../ndarray';
+
+import {GPGPUContext} from './gpgpu_context';
+import * as gpgpu_math from './gpgpu_math';
+import {Pool2DProgram} from './pool_gpu';
+import {TextureManager} from './texture_manager';
+
+export function uploadPoolDownload(
+    a: Float32Array, xShape: [number, number, number], fieldSize: number,
+    stride: number, zeroPad: number, op: 'min'|'max'|'avg'): Float32Array {
+  const gpgpu = new GPGPUContext();
+  gpgpu.enableAutomaticDebugValidation(true);
+  const textureManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, textureManager);
+
+  const x = Array3D.new(xShape, a);
+  const program =
+      new Pool2DProgram(xShape, fieldSize, stride, zeroPad, op, false);
+  const res = NDArray.zeros(program.outputShape);
+  const binary = gpgpu_math.compileProgram(gpgpu, program, [x], res);
+  gpgpu_math.runProgram(binary, [x], res);
+  const resValues = res.getValues();
+
+  textureManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
+  gpgpu.dispose();
+  return resValues;
+}
diff --git a/src/math/webgl/shader_compiler.ts b/src/math/webgl/shader_compiler.ts
index a3af0f741a..19cdbad46a 100644
--- a/src/math/webgl/shader_compiler.ts
+++ b/src/math/webgl/shader_compiler.ts
@@ -111,7 +111,7 @@ const SAMPLE_1D_SNIPPET = `
 vec2 UVfrom1D(float texNumR, float texNumC, float index) {
   float texR = floor(index / texNumC);
   float texC = mod(index, texNumC);
-  return vec2(texC, texR) / vec2(texNumC, texNumR);
+  return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
 }
 `;
 
@@ -121,7 +121,7 @@ vec2 UVfrom2D(float texNumR, float texNumC, float numC, float row,
   float index = dot(vec2(row, col), vec2(numC, 1.0));
   float texR = floor(index / texNumC);
   float texC = mod(index, texNumC);
-  return vec2(texC, texR) / vec2(texNumC, texNumR);
+  return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
 }
 `;
 
@@ -131,7 +131,7 @@ vec2 UVfrom3D(float texNumR, float texNumC, float stride0,
   float index = dot(vec3(row, col, depth), vec3(stride0, stride1, 1.0));
   float texR = floor(index / texNumC);
   float texC = mod(index, texNumC);
-  return vec2(texC, texR) / vec2(texNumC, texNumR);
+  return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
 }
 `;
 
@@ -143,13 +143,14 @@ vec2 UVfrom4D(float texNumR, float texNumC, float stride0,
                     vec4(stride0, stride1, stride2, 1.0));
   float texR = floor(index / texNumC);
   float texC = mod(index, texNumC);
-  return vec2(texC, texR) / vec2(texNumC, texNumR);
+  return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
 }
 `;
 
 const SHADER_PREFIX = `
   precision highp float;
   varying vec2 resultUV;
+  const vec2 halfCR = vec2(0.5, 0.5);
 
   float sample(sampler2D texture, vec2 uv) {
     return texture2D(texture, uv).r;
@@ -258,7 +259,7 @@ function getSamplerScalar(texName: string): string {
   const funcName = 'get' + texName.charAt(0).toUpperCase() + texName.slice(1);
   return `
     float ${funcName}() {
-      return sample(${texName}, vec2(0.0, 0.0));
+      return sample(${texName}, halfCR);
     }
   `;
 }
@@ -270,7 +271,7 @@ function getSampler1D(texName: string, texShape: [number, number]): string {
   if (texShape[0] === 1 && texShape[1] === 1) {
     return `
       float ${funcName}(float index) {
-        return sample(${texName}, vec2(0.0, 0.0));
+        return sample(${texName}, halfCR);
       }
     `;
   }
@@ -311,7 +312,7 @@ function getSampler3D(
       float ${funcName}(float row, float col, float depth) {
         float texR = row;
         float texC = dot(vec2(col, depth), vec2(${stride1}, 1.0));
-        vec2 uv = vec2(texC, texR) / vec2(${tC}.0, ${tR}.0);
+        vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tC}.0, ${tR}.0);
         return sample(${texName}, uv);
       }
     `;
@@ -341,7 +342,7 @@ function getSampler4D(
         float texR = row;
         float texC = dot(vec3(col, depth, depth2),
                          vec3(${stride1}.0, ${stride2}.0, 1.0));
-        vec2 uv = vec2(texC, texR) / vec2(${tC}.0, ${tR}.0);
+        vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tC}.0, ${tR}.0);
         return sample(${texName}, uv);
       }
     `;
@@ -364,7 +365,7 @@ function getSampler2D(
   if (util.arraysEqual(shape, texShape)) {
     return `
       float ${funcName}(float row, float col) {
-        vec2 uv = vec2(col, row) / vec2(${tC}.0, ${tR}.0);
+        vec2 uv = (vec2(col, row) + halfCR) / vec2(${tC}.0, ${tR}.0);
         return sample(${texName}, uv);
       }
     `;
@@ -403,7 +404,7 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string {
   if (tNumC === 1 && tNumR === 1) {
     return `
       float ${funcName}(float index) {
-        return sample(${texName}, vec2(0.0, 0.0));
+        return sample(${texName}, halfCR);
       }
     `;
   }
@@ -427,7 +428,7 @@ function getSamplerFlat(texName: string, texShape: [number, number]): string {
     float ${funcName}(float index) {
       float texR = floor(index / ${tNumC}.0);
       float texC = mod(index, ${tNumC}.0);
-      vec2 uv = vec2(texC, texR) / vec2(${tNumC}.0, ${tNumR}.0);
+      vec2 uv = (vec2(texC, texR) + halfCR) / vec2(${tNumC}.0, ${tNumR}.0);
       return sample(${texName}, uv);
     }
   `;
@@ -455,8 +456,8 @@ function getSamplerAtOutputCoords(
       ${broadcastSnippet}
       float texR = floor(index / ${inTexShape[1]}.0);
       float texC = mod(index, ${inTexShape[1]}.0);
-      vec2 uv = vec2(texC, texR) /
-                vec2(${inTexShape[1]}.0, ${inTexShape[0]}.0);
+      vec2 uv = (vec2(texC, texR) + halfCR) /
+                 vec2(${inTexShape[1]}.0, ${inTexShape[0]}.0);
       return sample(${texName}, uv);
     }
   `;

From 373b27a07b6c35b306c95c714e6677cfd997edac Mon Sep 17 00:00:00 2001
From: Daniel Smilkov <dsmilkov@gmail.com>
Date: Thu, 17 Aug 2017 18:59:30 -0400
Subject: [PATCH 06/10] removing math.reshape

---
 src/graph_runner.ts          |  4 +--
 src/math/math.ts             | 62 +++++++++++++++++-------------------
 src/math/math_cpu.ts         |  5 ---
 src/math/math_gpu.ts         | 33 -------------------
 src/math/webgl/copy2D_gpu.ts | 55 ++++++++++++++++++++++++++++++++
 src/math/webgl/gpgpu_math.ts | 11 +++++--
 src/ops/reshape.ts           | 10 +++---
 7 files changed, 99 insertions(+), 81 deletions(-)
 create mode 100644 src/math/webgl/copy2D_gpu.ts

diff --git a/src/graph_runner.ts b/src/graph_runner.ts
index f5404541a5..03cdfbfdc4 100644
--- a/src/graph_runner.ts
+++ b/src/graph_runner.ts
@@ -211,7 +211,7 @@ export class GraphRunner {
       }
 
     });
-    setTimeout(() => this.trainNetwork());
+    requestAnimationFrame(() => this.trainNetwork());
   }
 
   infer(
@@ -243,7 +243,7 @@ export class GraphRunner {
     this.currentInferenceLoopNumPasses = numPasses;
     if (!this.isInferring) {
       this.inferencePassesThisRun = 0;
-      setTimeout(() => this.inferNetwork());
+      requestAnimationFrame(() => this.inferNetwork());
     }
     this.isInferring = true;
   }
diff --git a/src/math/math.ts b/src/math/math.ts
index 13a02935e7..645f314c71 100644
--- a/src/math/math.ts
+++ b/src/math/math.ts
@@ -71,9 +71,10 @@ export abstract class NDArrayMath {
    */
   enableDebugMode() {
     this.debugMode = true;
-    console.warn('Debugging mode is ON. The output of every math call will ' +
-                  'be downloaded to CPU and checked for NaNs. ' +
-                  'This significantly impacts performance.');
+    console.warn(
+        'Debugging mode is ON. The output of every math call will ' +
+        'be downloaded to CPU and checked for NaNs. ' +
+        'This significantly impacts performance.');
   }
 
   /**
@@ -97,7 +98,7 @@ export abstract class NDArrayMath {
   endScope(result: ScopeResult) {
     let arraysToKeep = this.activeScopeNDArraysToKeep;
     if (result != null) {
-      arraysToKeep = arraysToKeep.concat(result as NDArray|NDArray[]);
+      arraysToKeep = arraysToKeep.concat(result as NDArray | NDArray[]);
     }
     // Dispose the current scope.
     for (let i = 0; i < this.activeScope.length; i++) {
@@ -321,22 +322,15 @@ export abstract class NDArrayMath {
   protected abstract cloneInternal<T extends NDArray>(ndarray: T): T;
 
   /**
-   * Reshapes an NDArray to a new shape. The size of the input NDArray must
-   * match the size of the requested shape.
-   * @param ndarray The input NDArray.
-   * @param newShape The new shape to reshape the NDArray to. Must be the same
-   * size as the NDArray.
+   * @deprecated Please call reshape() directly on the ndarray object.
    */
   reshape<T1 extends NDArray, T2 extends NDArray>(
       ndarray: T1, newShape: number[]): T2 {
-    util.assert(
-        ndarray.size === util.sizeFromShape(newShape),
-        `Error in reshape: old size ${ndarray.size} must match new size ` +
-            `${util.sizeFromShape(newShape)}.`);
-    return this.track(this.reshapeInternal<T1, T2>(ndarray, newShape));
+    console.warn(
+        'math.reshape() is deprecated. Please call reshape() ' +
+        'directly on the ndarray object');
+    return ndarray.reshape(newShape);
   }
-  protected abstract reshapeInternal<T1 extends NDArray, T2 extends NDArray>(
-      ndarray: T1, newShape: number[]): T2;
 
   /**
    * Extracts a slice from a matrix. The operation extraces a slice from input
@@ -1148,7 +1142,8 @@ export abstract class NDArrayMath {
    * @param h Array of previous cell outputs.
    * @return Tuple [nextCellStates, cellOutputs]
    */
-  multiRNNCell(lstmCells: LSTMCell[], data: Array2D, c: Array2D[],
+  multiRNNCell(
+      lstmCells: LSTMCell[], data: Array2D, c: Array2D[],
       h: Array2D[]): [Array2D[], Array2D[]] {
     util.assert(
         data.shape[0] === 1,
@@ -1187,8 +1182,9 @@ export abstract class NDArrayMath {
    * @param h Previous cell output.
    * @return Tuple [nextCellState, cellOutput]
    */
-  basicLSTMCell(forgetBias: Scalar, lstmKernel: Array2D, lstmBias: Array1D,
-      data: Array2D, c: Array2D, h: Array2D): [Array2D, Array2D] {
+  basicLSTMCell(
+      forgetBias: Scalar, lstmKernel: Array2D, lstmBias: Array1D, data: Array2D,
+      c: Array2D, h: Array2D): [Array2D, Array2D] {
     const res = this.scope(() => {
       util.assert(
           data.shape[0] === 1,
@@ -1207,25 +1203,25 @@ export abstract class NDArrayMath {
 
       // i = input_gate, j = new_input, f = forget_gate, o = output_gate
       const i = this.slice2D(res, [0, 0], [res.shape[0], res.shape[1] / 4]);
-      const j = this.slice2D(res, [0, res.shape[1] / 4 * 1],
-          [res.shape[0], res.shape[1] / 4]);
-      const f = this.slice2D(res, [0, res.shape[1] / 4 * 2],
-          [res.shape[0], res.shape[1] / 4]);
-      const o = this.slice2D(res, [0, res.shape[1] / 4 * 3],
-          [res.shape[0], res.shape[1] / 4]);
-
-      const newC = this.add(
-          this.multiplyStrict(c,
-              this.sigmoid(this.scalarPlusArray(forgetBias, f))),
-          this.multiplyStrict(this.sigmoid(i), this.tanh(j))) as Array2D;
-      const newH = this.multiplyStrict(
-          this.tanh(newC), this.sigmoid(o)) as Array2D;
+      const j = this.slice2D(
+          res, [0, res.shape[1] / 4 * 1], [res.shape[0], res.shape[1] / 4]);
+      const f = this.slice2D(
+          res, [0, res.shape[1] / 4 * 2], [res.shape[0], res.shape[1] / 4]);
+      const o = this.slice2D(
+          res, [0, res.shape[1] / 4 * 3], [res.shape[0], res.shape[1] / 4]);
+
+      const newC =
+          this.add(
+              this.multiplyStrict(
+                  c, this.sigmoid(this.scalarPlusArray(forgetBias, f))),
+              this.multiplyStrict(this.sigmoid(i), this.tanh(j))) as Array2D;
+      const newH =
+          this.multiplyStrict(this.tanh(newC), this.sigmoid(o)) as Array2D;
 
       return [newC, newH];
     });
     return [res[0], res[1]];
   }
-
 }
 
 export enum MatrixOrientation {
diff --git a/src/math/math_cpu.ts b/src/math/math_cpu.ts
index 4298b1974b..7231268000 100644
--- a/src/math/math_cpu.ts
+++ b/src/math/math_cpu.ts
@@ -31,11 +31,6 @@ export class NDArrayMathCPU extends NDArrayMath {
         ndarray.shape, {values: new Float32Array(ndarray.getValues())});
   }
 
-  protected reshapeInternal<T1 extends NDArray, T2 extends NDArray>(
-      ndarray: T1, newShape: number[]): T2 {
-    return this.cloneInternal(ndarray).reshape<T2>(newShape);
-  }
-
   protected slice2DInternal(
       input: Array2D, beginRowCol: [number, number],
       sizeRowCol: [number, number]): Array2D {
diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts
index cd8319d91c..ac31e7998d 100644
--- a/src/math/math_gpu.ts
+++ b/src/math/math_gpu.ts
@@ -105,39 +105,6 @@ export class NDArrayMathGPU extends NDArrayMath {
         ndarray.shape, {texture: resultTexture, textureShapeRC});
   }
 
-  protected reshapeInternal<T1 extends NDArray, T2 extends NDArray>(
-      ndarray: T1, newShape: number[]): T2 {
-    let newTexShape: [number, number];
-
-    switch (newShape.length) {
-      case 0:
-        newTexShape = [1, 1];
-        break;
-      case 1:
-        newTexShape = [newShape[0], 1];
-        break;
-      case 2:
-        newTexShape = [newShape[0], newShape[1]];
-        break;
-      case 3:
-        newTexShape = [newShape[0], newShape[1] * newShape[2]];
-        break;
-      default:
-        throw Error(
-            `Reshapes into ${newShape.length}-dim ndarray is not yet ` +
-            `supported on GPU`);
-    }
-
-    const actualTexShape = ndarray.getTextureShapeRC(newTexShape);
-    let clonedArray: T1;
-    if (!util.arraysEqual(actualTexShape, newTexShape)) {
-      clonedArray = this.reshapeTexture(ndarray, newTexShape);
-    } else {
-      clonedArray = this.cloneInternal(ndarray);
-    }
-    return clonedArray.reshape<T2>(newShape);
-  }
-
   protected slice2DInternal(
       input: Array2D, beginRowCol: [number, number],
       sizeRowCol: [number, number]): Array2D {
diff --git a/src/math/webgl/copy2D_gpu.ts b/src/math/webgl/copy2D_gpu.ts
new file mode 100644
index 0000000000..af2cbdfc6f
--- /dev/null
+++ b/src/math/webgl/copy2D_gpu.ts
@@ -0,0 +1,55 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+import {GPGPUContext} from './gpgpu_context';
+import {GPGPUProgram} from './gpgpu_math';
+
+export class Copy2DProgram implements GPGPUProgram {
+  variableNames = ['source'];
+  params: Array<{}>;
+  outputShape: number[];
+  userCode: string;
+
+  constructor(
+      sourceShape: [number, number], sourceSize: [number, number],
+      destSize: [number, number]) {
+    this.userCode = `
+      uniform vec2 sourceStartCR;
+      uniform vec2 destStartCR;
+
+      void main() {
+        vec2 destOffsetCR = floor(gl_FragCoord.xy) - destStartCR;
+        float destOffsetFlat = (destOffsetCR.y * destSizeCR.x) + destOffsetCR.x;
+        vec2 sourceOffsetCR = vec2(mod(destOffsetFlat, sourceSizeCR.x),
+          floor(destOffsetFlat / sourceSizeCR.x));
+        vec2 sourceCR = sourceStartCR + sourceOffsetCR;
+        setOutput(getSource(sourceCR.y, sourceCR.x));
+      }
+    `;
+  }
+}
+
+export function getCustomSetupFunc(
+    sourceStart: [number, number], destStart: [number, number],
+    destSize: [number, number]) {
+  return (gpgpu: GPGPUContext) => {
+    gpgpu.setOutputMatrixWriteRegion(
+        destStart[0], destSize[0], destStart[1], destSize[1]);
+    const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStartCR');
+    gpgpu.gl.uniform2f(sourceStartCRLoc, sourceStart[1], sourceStart[0]);
+    const destStartCRLoc = gpgpu.getUniformLocation('destStartCR');
+    gpgpu.gl.uniform2f(destStartCRLoc, destStart[1], destStart[0]);
+  };
+}
diff --git a/src/math/webgl/gpgpu_math.ts b/src/math/webgl/gpgpu_math.ts
index 56ac04f08e..8126f9190b 100644
--- a/src/math/webgl/gpgpu_math.ts
+++ b/src/math/webgl/gpgpu_math.ts
@@ -59,7 +59,10 @@ export function compileProgram<T extends NDArray, K extends NDArray>(
   return {
     program,
     source,
-    webGLProgram: gpgpu.createProgram(source), gpgpu, inShapeInfos, outShapeInfo
+    webGLProgram: gpgpu.createProgram(source),
+    gpgpu,
+    inShapeInfos,
+    outShapeInfo
   };
 }
 
@@ -90,7 +93,8 @@ function validateBinaryAndProgram(shapeInfos: ShapeInfo[], inputs: NDArray[]) {
 }
 
 export function runProgram<T extends NDArray, K extends NDArray>(
-    binary: GPGPUBinary, inputs: T[], output: K): void {
+    binary: GPGPUBinary, inputs: T[], output: K,
+    customSetup?: (gpgpu: GPGPUContext) => void): void {
   validateBinaryAndProgram(binary.inShapeInfos, inputs);
   validateBinaryAndProgram([binary.outShapeInfo], [output]);
 
@@ -103,6 +107,9 @@ export function runProgram<T extends NDArray, K extends NDArray>(
     const tex = input.getTexture();
     gpgpu.setInputMatrixTexture(tex, binary.program.variableNames[i], i);
   });
+  if (customSetup != null) {
+    customSetup(gpgpu);
+  }
   gpgpu.executeProgram();
 }
 
diff --git a/src/ops/reshape.ts b/src/ops/reshape.ts
index 1a90af7262..2d5204b5e6 100644
--- a/src/ops/reshape.ts
+++ b/src/ops/reshape.ts
@@ -34,9 +34,8 @@ export class Reshape<T1 extends NDArray, T2 extends NDArray> extends Operation {
   feedForward(math: NDArrayMath, inferenceArrays: TensorArrayMap) {
     const x = inferenceArrays.get(this.xTensor) as T1;
 
-    math.scope((keep) => {
-      inferenceArrays.set(
-          this.yTensor, keep(math.reshape<T1, T2>(x, this.yTensor.shape)));
+    math.scope(keep => {
+      inferenceArrays.set(this.yTensor, keep(x.reshape(this.yTensor.shape)));
     });
   }
 
@@ -45,9 +44,8 @@ export class Reshape<T1 extends NDArray, T2 extends NDArray> extends Operation {
       gradientArrays: TensorArrayMap) {
     const dy = gradientArrays.get(this.yTensor) as T2;
 
-    math.scope((keep) => {
-      gradientArrays.set(
-          this.xTensor, keep(math.reshape<T2, T1>(dy, this.xTensor.shape)));
+    math.scope(keep => {
+      gradientArrays.set(this.xTensor, keep(dy.reshape(this.xTensor.shape)));
     });
   }
 }

From 768e0f7ac14292e47cd2d07e616767f00dc7abf7 Mon Sep 17 00:00:00 2001
From: Daniel Smilkov <dsmilkov@gmail.com>
Date: Fri, 18 Aug 2017 21:17:02 -0400
Subject: [PATCH 07/10] migrate copy op to logical

---
 .vscode/settings.json             |  1 +
 src/math/math_gpu.ts              | 57 ++++++------------
 src/math/webgl/copy_gpu.ts        | 81 ++++++++++++--------------
 src/math/webgl/copy_gpu_test.ts   | 97 ++++++++++++++++---------------
 src/math/webgl/mulmat_gpu_test.ts | 52 ++++++-----------
 5 files changed, 125 insertions(+), 163 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 67a611b80a..e1084bbac4 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -14,5 +14,6 @@
   "editor.insertSpaces": true,
   "files.insertFinalNewline": true,
   "editor.detectIndentation": false,
+  "editor.wrappingIndent": "none",
   "typescript.tsdk": "node_modules/typescript/lib"
 }
diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts
index 4d0735f15e..be6ea4d3a0 100644
--- a/src/math/math_gpu.ts
+++ b/src/math/math_gpu.ts
@@ -30,6 +30,7 @@ import * as concat3d_gpu from './webgl/concat3d_gpu';
 import {Conv2DDerBiasProgram, Conv2DDerWeightsProgram, Conv2DTransposeProgram} from './webgl/conv_backprop_gpu';
 import {Conv2DProgram} from './webgl/conv_gpu';
 import * as copy_gpu from './webgl/copy_gpu';
+import {Copy2DProgram} from './webgl/copy_gpu';
 import {GPGPUContext} from './webgl/gpgpu_context';
 import * as gpgpu_math from './webgl/gpgpu_math';
 import {GPGPUBinary, GPGPUProgram} from './webgl/gpgpu_math';
@@ -47,20 +48,10 @@ import {UnaryOp, UnaryOpProgram} from './webgl/unaryop_gpu';
 import * as webgl_util from './webgl/webgl_util';
 
 const BATCHNORM_PROG = 'batchnorm';
-const COPY_PROG = 'copy';
 const CONCAT_PROG = 'concat';
 const RESHAPE_PROG = 'reshape';
 const RESIZE_BILINEAR_PROG = 'resizebilin';
 
-function makeCopyProgramName(
-    sourceShapeRowCol: [number, number], sourceSizeRowCol: [number, number],
-    destSizeRowCol: [number, number]): string {
-  const shapeName = `${sourceShapeRowCol[0]}_${sourceShapeRowCol[1]}`;
-  const srcSizeName = `${sourceSizeRowCol[0]}_${sourceSizeRowCol[1]}`;
-  const dstSizeName = `${destSizeRowCol[0]}_${destSizeRowCol[1]}`;
-  return `${COPY_PROG}_${shapeName}_${srcSizeName}_${dstSizeName}`;
-}
-
 export class NDArrayMathGPU extends NDArrayMath {
   private gpgpu: GPGPUContext;
   private textureManager: TextureManager;
@@ -89,20 +80,14 @@ export class NDArrayMathGPU extends NDArrayMath {
   }
 
   protected cloneInternal<T extends NDArray>(ndarray: T): T {
-    const textureShapeRC = ndarray.getTextureShapeRC();
-    const program = this.getAndSaveProgram(
-        makeCopyProgramName(textureShapeRC, textureShapeRC, textureShapeRC),
-        () => copy_gpu.getFragmentShaderSource(
-            textureShapeRC, textureShapeRC, textureShapeRC));
-
-    const resultTexture = this.textureManager.acquireTexture(textureShapeRC);
-
-    copy_gpu.copy(
-        this.gpgpu, program, ndarray.getTexture(), textureShapeRC, [0, 0],
-        textureShapeRC, resultTexture, textureShapeRC, [0, 0], textureShapeRC);
-
-    return NDArray.make<T>(
-        ndarray.shape, {texture: resultTexture, textureShapeRC});
+    const texShape = ndarray.getTextureShapeRC();
+    // Pretend the source was in logical shape that matches the texture shape.
+    const source = ndarray.as2D(texShape[0], texShape[1]);
+    // Do the same for output.
+    const output = this.makeOutputArray(texShape) as Array2D;
+    this.copy2D(source, [0, 0], texShape, output, [0, 0], texShape);
+    // Get back to the original logical shape.
+    return output.reshape(ndarray.shape);
   }
 
   protected slice2DInternal(
@@ -122,17 +107,10 @@ export class NDArrayMathGPU extends NDArrayMath {
       sourceSizeRowCol: [number, number], dest: Array2D,
       destBeginRowCol: [number, number],
       destSizeRowCol: [number, number]): void {
-    const sourceShapeRC = source.getTextureShapeRC();
-    const destShapeRC = dest.getTextureShapeRC();
-    const program = this.getAndSaveProgram(
-        makeCopyProgramName(sourceShapeRC, sourceSizeRowCol, destSizeRowCol),
-        () => copy_gpu.getFragmentShaderSource(
-            sourceShapeRC, sourceSizeRowCol, destSizeRowCol));
-
-    copy_gpu.copy(
-        this.gpgpu, program, source.getTexture(), sourceShapeRC,
-        sourceBeginRowCol, sourceSizeRowCol, dest.getTexture(), destShapeRC,
-        destBeginRowCol, destSizeRowCol);
+    const program = new Copy2DProgram(sourceSizeRowCol[1], destSizeRowCol[1]);
+    const customSetup = copy_gpu.getCustomSetupFunc(
+        sourceBeginRowCol, destBeginRowCol, destSizeRowCol);
+    this.compileAndRun(program, [source], dest, customSetup);
   }
 
   protected concat3DInternal(x1: Array3D, x2: Array3D, axis: number): Array3D {
@@ -202,13 +180,16 @@ export class NDArrayMathGPU extends NDArrayMath {
   }
 
   private compileAndRun<T extends NDArray, K extends NDArray>(
-      program: GPGPUProgram, inputs: T[]): K {
-    const output = this.makeOutputArray<K>(program.outputShape);
+      program: GPGPUProgram, inputs: T[], output?: K,
+      customSetup?: (gpgpu: GPGPUContext) => void): K {
+    if (output == null) {
+      output = this.makeOutputArray<K>(program.outputShape);
+    }
     const key = gpgpu_math.makeShaderKey(program, inputs, output);
     const binary = this.getAndSaveBinary(key, () => {
       return gpgpu_math.compileProgram(this.gpgpu, program, inputs, output);
     });
-    gpgpu_math.runProgram(binary, inputs, output);
+    gpgpu_math.runProgram(binary, inputs, output, customSetup);
     return output;
   }
 
diff --git a/src/math/webgl/copy_gpu.ts b/src/math/webgl/copy_gpu.ts
index 3210644704..709e034d94 100644
--- a/src/math/webgl/copy_gpu.ts
+++ b/src/math/webgl/copy_gpu.ts
@@ -14,50 +14,43 @@ limitations under the License.
 ==============================================================================*/
 
 import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderSource(
-    sourceShapeRowCol: [number, number], sourceSizeRowCol: [number, number],
-    destSizeRowCol: [number, number]): string {
-  return `
-    precision highp float;
-    uniform sampler2D source;
-    uniform vec2 sourceStartCR;
-    uniform vec2 destStartCR;
-
-    const vec2 sourceShapeCR =
-      vec2(${sourceShapeRowCol[1]}, ${sourceShapeRowCol[0]});
-    const vec2 sourceSizeCR =
-      vec2(${sourceSizeRowCol[1]}, ${sourceSizeRowCol[0]});
-    const vec2 destSizeCR =
-      vec2(${destSizeRowCol[1]}, ${destSizeRowCol[0]});
-
-    void main() {
-      vec2 destOffsetCR = floor(gl_FragCoord.xy) - destStartCR;
-      float destOffsetFlat = (destOffsetCR.y * destSizeCR.x) + destOffsetCR.x;
-      vec2 sourceOffsetCR = vec2(mod(destOffsetFlat, sourceSizeCR.x),
-        floor(destOffsetFlat / sourceSizeCR.x));
-      vec2 sourceCR = sourceStartCR + sourceOffsetCR;
-      vec2 sourceUV = (sourceCR + vec2(0.5, 0.5)) / sourceShapeCR;
-      gl_FragColor = texture2D(source, sourceUV);
-    }`;
+import {GPGPUProgram} from './gpgpu_math';
+
+export class Copy2DProgram implements GPGPUProgram {
+  variableNames = ['source'];
+  params: Array<{}>;
+  outputShape: number[];
+  userCode: string;
+
+  constructor(srcNumCols: number, destNumCols: number) {
+    this.outputShape = null;
+    this.params = [srcNumCols, destNumCols];
+    this.userCode = `
+      uniform vec2 sourceStart;
+      uniform vec2 destStart;
+
+      void main() {
+        vec2 destCoords = getOutputCoords() - destStart;
+        float index = dot(destCoords, vec2(${destNumCols}.0, 1.0));
+        vec2 sourceCoords = sourceStart + vec2(
+          floor(index / ${srcNumCols}.0),
+          mod(index, ${srcNumCols}.0)
+        );
+        setOutput(getSource(sourceCoords.x, sourceCoords.y));
+      }
+    `;
+  }
 }
 
-export function copy(
-    gpgpu: GPGPUContext, program: WebGLProgram, source: WebGLTexture,
-    sourceShapeRowCol: [number, number], sourceStartRowCol: [number, number],
-    sourceSizeRowCol: [number, number], dest: WebGLTexture,
-    destShapeRowCol: [number, number], destStartRowCol: [number, number],
-    destSizeRowCol: [number, number]) {
-  gpgpu.setOutputMatrixTexture(dest, destShapeRowCol[0], destShapeRowCol[1]);
-  gpgpu.setOutputMatrixWriteRegion(
-      destStartRowCol[0], destSizeRowCol[0], destStartRowCol[1],
-      destSizeRowCol[1]);
-  gpgpu.setProgram(program);
-  gpgpu.setInputMatrixTexture(source, 'source', 0);
-  const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStartCR');
-  gpgpu.gl.uniform2f(
-      sourceStartCRLoc, sourceStartRowCol[1], sourceStartRowCol[0]);
-  const destStartCRLoc = gpgpu.getUniformLocation('destStartCR');
-  gpgpu.gl.uniform2f(destStartCRLoc, destStartRowCol[1], destStartRowCol[0]);
-  gpgpu.executeProgram();
+export function getCustomSetupFunc(
+    sourceStart: [number, number], destStart: [number, number],
+    destSize: [number, number]) {
+  return (gpgpu: GPGPUContext) => {
+    gpgpu.setOutputMatrixWriteRegion(
+        destStart[0], destSize[0], destStart[1], destSize[1]);
+    const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStart');
+    gpgpu.gl.uniform2f(sourceStartCRLoc, sourceStart[0], sourceStart[1]);
+    const destStartCRLoc = gpgpu.getUniformLocation('destStart');
+    gpgpu.gl.uniform2f(destStartCRLoc, destStart[0], destStart[1]);
+  };
 }
diff --git a/src/math/webgl/copy_gpu_test.ts b/src/math/webgl/copy_gpu_test.ts
index 6600995f3b..3a06ae2778 100644
--- a/src/math/webgl/copy_gpu_test.ts
+++ b/src/math/webgl/copy_gpu_test.ts
@@ -14,40 +14,37 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
+import {Array2D, initializeGPU} from '../ndarray';
+
 import * as copy_gpu from './copy_gpu';
+import {Copy2DProgram} from './copy_gpu';
 import {GPGPUContext} from './gpgpu_context';
+import * as gpgpu_math from './gpgpu_math';
+import {TextureManager} from './texture_manager';
 
 function uploadCopyDownload(
-    source: Float32Array, sourceShapeRowCol: [number, number],
-    sourceStartRowCol: [number, number], sourceSizeRowCol: [number, number],
-    destStartRowCol: [number, number], destSizeRowCol: [number, number],
-    dest: Float32Array, destShapeRowCol: [number, number]): Float32Array {
+    srcVals: Float32Array, srcShape: [number, number],
+    srcStart: [number, number], srcSize: [number, number],
+    destStart: [number, number], destSize: [number, number],
+    destVals: Float32Array, destShape: [number, number]): Float32Array {
   const gpgpu = new GPGPUContext();
-  const fragmentShaderSource = copy_gpu.getFragmentShaderSource(
-      sourceShapeRowCol, sourceSizeRowCol, destSizeRowCol);
-  const program = gpgpu.createProgram(fragmentShaderSource);
-
-  const sourceTex =
-      gpgpu.createMatrixTexture(sourceShapeRowCol[0], sourceShapeRowCol[1]);
-  const destTex =
-      gpgpu.createMatrixTexture(destShapeRowCol[0], destShapeRowCol[1]);
-
-  gpgpu.uploadMatrixToTexture(
-      sourceTex, sourceShapeRowCol[0], sourceShapeRowCol[1], source);
-  gpgpu.uploadMatrixToTexture(
-      destTex, destShapeRowCol[0], destShapeRowCol[1], dest);
-
-  copy_gpu.copy(
-      gpgpu, program, sourceTex, sourceShapeRowCol, sourceStartRowCol,
-      sourceSizeRowCol, destTex, destShapeRowCol, destStartRowCol,
-      destSizeRowCol);
-
-  const result = gpgpu.downloadMatrixFromTexture(
-      destTex, destShapeRowCol[0], destShapeRowCol[1]);
-
-  gpgpu.deleteMatrixTexture(sourceTex);
-  gpgpu.deleteMatrixTexture(destTex);
-  gpgpu.deleteProgram(program);
+  const texManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, texManager);
+
+  const program = new Copy2DProgram(srcSize[1], destSize[1]);
+  const source = Array2D.new(srcShape, srcVals);
+  const dest = Array2D.new(destShape, destVals);
+
+  const binary = gpgpu_math.compileProgram(gpgpu, program, [source], dest);
+  const customSetup =
+      copy_gpu.getCustomSetupFunc(srcStart, destStart, destSize);
+  gpgpu_math.runProgram(binary, [source], dest, customSetup);
+  const result = dest.getValues();
+
+  source.dispose();
+  dest.dispose();
+  texManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
   gpgpu.dispose();
 
   return result;
@@ -157,33 +154,37 @@ describe('copy_gpu', () => {
   });
 
   it('accumulates results from previous copies into dest texture', () => {
-    const shapeRC: [number, number] = [10, 10];
-    const sizeRC: [number, number] = [10, 1];
-    const source = new Float32Array(100);
+    const shape: [number, number] = [10, 10];
+    const size: [number, number] = [10, 1];
+    const sourceVals = new Float32Array(100);
     for (let i = 0; i < 100; ++i) {
-      source[i] = i;
+      sourceVals[i] = i;
     }
+
+
     const gpgpu = new GPGPUContext();
-    const program = gpgpu.createProgram(
-        copy_gpu.getFragmentShaderSource(shapeRC, sizeRC, sizeRC));
-    const sourceTex = gpgpu.createMatrixTexture(shapeRC[0], shapeRC[1]);
-    const destTex = gpgpu.createMatrixTexture(shapeRC[0], shapeRC[1]);
-    gpgpu.uploadMatrixToTexture(sourceTex, shapeRC[0], shapeRC[1], source);
+    const texManager = new TextureManager(gpgpu);
+    initializeGPU(gpgpu, texManager);
+
+    const program = new Copy2DProgram(size[1], size[1]);
+    const source = Array2D.new(shape, sourceVals);
+    const dest = Array2D.zeros(shape);
+
+    const binary = gpgpu_math.compileProgram(gpgpu, program, [source], dest);
 
     for (let i = 0; i < 10; ++i) {
-      copy_gpu.copy(
-          gpgpu, program, sourceTex, shapeRC, [0, i], sizeRC, destTex, shapeRC,
-          [0, i], sizeRC);
+      const offset: [number, number] = [0, i];
+      const customSetup = copy_gpu.getCustomSetupFunc(offset, offset, size);
+      gpgpu_math.runProgram(binary, [source], dest, customSetup);
     }
+    const res = dest.getValues();
 
-    const dest =
-        gpgpu.downloadMatrixFromTexture(destTex, shapeRC[0], shapeRC[1]);
-
-    gpgpu.deleteMatrixTexture(sourceTex);
-    gpgpu.deleteMatrixTexture(destTex);
-    gpgpu.deleteProgram(program);
+    source.dispose();
+    dest.dispose();
+    texManager.dispose();
+    gpgpu.deleteProgram(binary.webGLProgram);
     gpgpu.dispose();
 
-    test_util.expectArraysClose(dest, source, 0);
+    test_util.expectArraysClose(res, sourceVals, 0);
   });
 });
diff --git a/src/math/webgl/mulmat_gpu_test.ts b/src/math/webgl/mulmat_gpu_test.ts
index f6da798467..c39526290b 100644
--- a/src/math/webgl/mulmat_gpu_test.ts
+++ b/src/math/webgl/mulmat_gpu_test.ts
@@ -15,11 +15,12 @@ limitations under the License.
 
 import * as test_util from '../../test_util';
 import {MatrixOrientation} from '../math';
-import {Array2D} from '../ndarray';
+import {Array2D, initializeGPU} from '../ndarray';
 
 import {GPGPUContext} from './gpgpu_context';
 import * as gpgpu_math from './gpgpu_math';
 import {MatMulProgram} from './mulmat_gpu';
+import {TextureManager} from './texture_manager';
 
 describe('mulmat_gpu (1x1 * 1x1)', () => {
   it('returns a 1x1 matrix', () => {
@@ -269,11 +270,11 @@ describe('mulmat_gpu (multiple matrices)', () => {
     const cArr = new Array2D(cShape, {texture: c, textureShapeRC: cShape});
     const rArr = new Array2D(rShape, {texture: r, textureShapeRC: rShape});
     const matMulProgram = new MatMulProgram(aArr.shape, bArr.shape);
-    const axbProgram = gpgpu_math.compileProgram(gpgpu, matMulProgram,
-        [aArr, bArr], abArr);
+    const axbProgram =
+        gpgpu_math.compileProgram(gpgpu, matMulProgram, [aArr, bArr], abArr);
     const matMulProgram2 = new MatMulProgram(abArr.shape, cArr.shape);
-    const abxcProgram = gpgpu_math.compileProgram(gpgpu, matMulProgram2,
-        [abArr, cArr], rArr);
+    const abxcProgram =
+        gpgpu_math.compileProgram(gpgpu, matMulProgram2, [abArr, cArr], rArr);
 
     gpgpu.uploadMatrixToTexture(a, aShape[0], aShape[1], aData);
     gpgpu.uploadMatrixToTexture(b, bShape[0], bShape[1], bData);
@@ -335,41 +336,26 @@ export function uploadMultiplyMatrixDownload(
     bNumRows: number, bNumCols: number,
     aOrientation = MatrixOrientation.REGULAR,
     bOrientation = MatrixOrientation.REGULAR): Float32Array {
-  const outNumRows =
-      (aOrientation === MatrixOrientation.REGULAR) ? aNumRows : aNumCols;
-  const outNumCols =
-      (bOrientation === MatrixOrientation.REGULAR) ? bNumCols : bNumRows;
   const gpgpu = new GPGPUContext();
+  const texManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, texManager);
+
   const aShape: [number, number] = [aNumRows, aNumCols];
   const bShape: [number, number] = [bNumRows, bNumCols];
-  const outShape: [number, number] = [outNumRows, outNumCols];
-
-  const aTexture = gpgpu.createMatrixTexture(aNumRows, aNumCols);
-  const aArr = new Array2D(
-      aShape, {texture: aTexture, textureShapeRC: [aNumRows, aNumCols]});
-  const bTexture = gpgpu.createMatrixTexture(bNumRows, bNumCols);
-  const bArr = new Array2D(
-      bShape, {texture: bTexture, textureShapeRC: [bNumRows, bNumCols]});
-  const resultTexture: WebGLTexture =
-      gpgpu.createMatrixTexture(outNumRows, outNumCols);
-  const resArr =
-      new Array2D(outShape, {texture: resultTexture, textureShapeRC: outShape});
-
-  const program =
-      new MatMulProgram(aArr.shape, bArr.shape, aOrientation, bOrientation);
+
+  const program = new MatMulProgram(aShape, bShape, aOrientation, bOrientation);
+  const resArr = Array2D.zeros(program.outputShape as [number, number]);
+  const aArr = Array2D.new(aShape, a);
+  const bArr = Array2D.new(bShape, b);
+
   const binary =
       gpgpu_math.compileProgram(gpgpu, program, [aArr, bArr], resArr);
-  gpgpu.uploadMatrixToTexture(aTexture, aNumRows, aNumCols, a);
-  gpgpu.uploadMatrixToTexture(bTexture, bNumRows, bNumCols, b);
-
   gpgpu_math.runProgram(binary, [aArr, bArr], resArr);
+  const result = resArr.getValues();
 
-  const result =
-      gpgpu.downloadMatrixFromTexture(resultTexture, outNumRows, outNumCols);
-
-  gpgpu.deleteMatrixTexture(aTexture);
-  gpgpu.deleteMatrixTexture(bTexture);
-  gpgpu.deleteMatrixTexture(resultTexture);
+  aArr.dispose();
+  bArr.dispose();
+  texManager.dispose();
   gpgpu.deleteProgram(binary.webGLProgram);
   gpgpu.dispose();
 

From e6b04b5ec43f6130aa340c73442f67d2887c972c Mon Sep 17 00:00:00 2001
From: Daniel Smilkov <dsmilkov@gmail.com>
Date: Fri, 18 Aug 2017 21:18:03 -0400
Subject: [PATCH 08/10] remove duplicate copy file

---
 src/math/webgl/copy2D_gpu.ts | 55 ------------------------------------
 1 file changed, 55 deletions(-)
 delete mode 100644 src/math/webgl/copy2D_gpu.ts

diff --git a/src/math/webgl/copy2D_gpu.ts b/src/math/webgl/copy2D_gpu.ts
deleted file mode 100644
index af2cbdfc6f..0000000000
--- a/src/math/webgl/copy2D_gpu.ts
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import {GPGPUContext} from './gpgpu_context';
-import {GPGPUProgram} from './gpgpu_math';
-
-export class Copy2DProgram implements GPGPUProgram {
-  variableNames = ['source'];
-  params: Array<{}>;
-  outputShape: number[];
-  userCode: string;
-
-  constructor(
-      sourceShape: [number, number], sourceSize: [number, number],
-      destSize: [number, number]) {
-    this.userCode = `
-      uniform vec2 sourceStartCR;
-      uniform vec2 destStartCR;
-
-      void main() {
-        vec2 destOffsetCR = floor(gl_FragCoord.xy) - destStartCR;
-        float destOffsetFlat = (destOffsetCR.y * destSizeCR.x) + destOffsetCR.x;
-        vec2 sourceOffsetCR = vec2(mod(destOffsetFlat, sourceSizeCR.x),
-          floor(destOffsetFlat / sourceSizeCR.x));
-        vec2 sourceCR = sourceStartCR + sourceOffsetCR;
-        setOutput(getSource(sourceCR.y, sourceCR.x));
-      }
-    `;
-  }
-}
-
-export function getCustomSetupFunc(
-    sourceStart: [number, number], destStart: [number, number],
-    destSize: [number, number]) {
-  return (gpgpu: GPGPUContext) => {
-    gpgpu.setOutputMatrixWriteRegion(
-        destStart[0], destSize[0], destStart[1], destSize[1]);
-    const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStartCR');
-    gpgpu.gl.uniform2f(sourceStartCRLoc, sourceStart[1], sourceStart[0]);
-    const destStartCRLoc = gpgpu.getUniformLocation('destStartCR');
-    gpgpu.gl.uniform2f(destStartCRLoc, destStart[1], destStart[0]);
-  };
-}

From 2c322d7204f69995fa8cd072af856f2a138eb0e6 Mon Sep 17 00:00:00 2001
From: Daniel Smilkov <dsmilkov@gmail.com>
Date: Sun, 20 Aug 2017 21:50:33 -0400
Subject: [PATCH 09/10] move the rest of math ops to logical

---
 src/math/math_gpu.ts                       | 209 +++------------------
 src/math/ndarray.ts                        |   4 +-
 src/math/webgl/batchnorm_gpu.ts            | 159 +++++-----------
 src/math/webgl/batchnorm_gpu_test.ts       | 122 +++++-------
 src/math/webgl/binaryop_gpu_test.ts        |  15 +-
 src/math/webgl/concat3d_gpu.ts             |  92 ++++-----
 src/math/webgl/concat3d_gpu_test.ts        |  75 +++-----
 src/math/webgl/mulbcast_gpu.ts             |  90 ---------
 src/math/webgl/mulbcast_gpu_test.ts        | 140 --------------
 src/math/webgl/reshape_gpu.ts              |  65 -------
 src/math/webgl/reshape_gpu_test.ts         |  88 ---------
 src/math/webgl/resize_bilinear_gpu.ts      | 133 ++++++-------
 src/math/webgl/resize_bilinear_gpu_test.ts |  72 +++----
 13 files changed, 270 insertions(+), 994 deletions(-)
 delete mode 100644 src/math/webgl/mulbcast_gpu.ts
 delete mode 100644 src/math/webgl/mulbcast_gpu_test.ts
 delete mode 100644 src/math/webgl/reshape_gpu.ts
 delete mode 100644 src/math/webgl/reshape_gpu_test.ts

diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts
index be6ea4d3a0..f63f070f33 100644
--- a/src/math/math_gpu.ts
+++ b/src/math/math_gpu.ts
@@ -13,19 +13,15 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import * as util from '../util';
-
-import * as concat3d_util from './concat3d_util';
-import * as conv_util from './conv_util';
 import {MatrixOrientation, NDArrayMath} from './math';
 import * as ndarray from './ndarray';
 import {Array1D, Array2D, Array3D, Array4D, NDArray, Scalar} from './ndarray';
 import {AddScaledMatProgram} from './webgl/addscaledmat_gpu';
 import {ArgMaxEqualsProgram} from './webgl/argmaxequals_gpu';
 import {ArgMinMaxProgram} from './webgl/argminmax_gpu';
-import * as batchnorm_gpu from './webgl/batchnorm_gpu';
+import {BatchNormProgram} from './webgl/batchnorm_gpu';
 import {BinaryOpProgram} from './webgl/binaryop_gpu';
-import * as concat3d_gpu from './webgl/concat3d_gpu';
+import {Concat3DProgram} from './webgl/concat3d_gpu';
 // tslint:disable-next-line:max-line-length
 import {Conv2DDerBiasProgram, Conv2DDerWeightsProgram, Conv2DTransposeProgram} from './webgl/conv_backprop_gpu';
 import {Conv2DProgram} from './webgl/conv_gpu';
@@ -41,21 +37,14 @@ import {MinMaxProgram} from './webgl/minmax_gpu';
 import {MatMulProgram} from './webgl/mulmat_gpu';
 import {Pool2DProgram} from './webgl/pool_gpu';
 import {ReduceSumProgram} from './webgl/reducesum_gpu';
-import * as reshape_gpu from './webgl/reshape_gpu';
-import * as resize_bilinear_gpu from './webgl/resize_bilinear_gpu';
+import {ResizeBilinearProgram} from './webgl/resize_bilinear_gpu';
 import {TextureManager} from './webgl/texture_manager';
 import {UnaryOp, UnaryOpProgram} from './webgl/unaryop_gpu';
 import * as webgl_util from './webgl/webgl_util';
 
-const BATCHNORM_PROG = 'batchnorm';
-const CONCAT_PROG = 'concat';
-const RESHAPE_PROG = 'reshape';
-const RESIZE_BILINEAR_PROG = 'resizebilin';
-
 export class NDArrayMathGPU extends NDArrayMath {
   private gpgpu: GPGPUContext;
   private textureManager: TextureManager;
-  private programCache: {[key: string]: WebGLProgram} = {};
   private binaryCache: {[key: string]: GPGPUBinary} = {};
   private gpgpuCreatedLocally: boolean;
 
@@ -114,51 +103,8 @@ export class NDArrayMathGPU extends NDArrayMath {
   }
 
   protected concat3DInternal(x1: Array3D, x2: Array3D, axis: number): Array3D {
-    const x1TexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(x1.shape);
-    const x2TexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(x2.shape);
-
-    // If the texture shapes doesn't match the shapes that shaders expect,
-    // do physical texture reshapes on the GPU.
-    const actualX1TexShape = x1.getTextureShapeRC(x1TexShapeRC);
-    let cleanupX1 = false;
-    if (!util.arraysEqual(actualX1TexShape, x1TexShapeRC)) {
-      x1 = this.reshapeTexture(x1, x1TexShapeRC);
-      cleanupX1 = true;
-    }
-    const actualX2TexShape = x2.getTextureShapeRC(x2TexShapeRC);
-    let cleanupX2 = false;
-    if (!util.arraysEqual(actualX2TexShape, x2TexShapeRC)) {
-      x2 = this.reshapeTexture(x2, x2TexShapeRC);
-      cleanupX2 = true;
-    }
-
-    const resultShapeRCD =
-        concat3d_util.computeConcat3DOutputShape(x1.shape, x2.shape, axis);
-
-    const program = this.getAndSaveProgram(
-        `${CONCAT_PROG}_${x1.shape}_${x2.shape}_${axis}`,
-        () => concat3d_gpu.getFragmentShaderSource(
-            x1.shape, x2.shape, resultShapeRCD, axis));
-
-    const resultTexShape = conv_util.computeTexShapeFrom3D(resultShapeRCD);
-    const resultTex = this.textureManager.acquireTexture(resultTexShape);
-
-    concat3d_gpu.concat3D(
-        this.gpgpu, program, x1.getTexture(), x2.getTexture(), resultTex,
-        resultTexShape);
-
-    if (cleanupX1) {
-      x1.dispose();
-    }
-
-    if (cleanupX2) {
-      x2.dispose();
-    }
-
-    return NDArray.make<Array3D>(
-        resultShapeRCD, {texture: resultTex, textureShapeRC: resultTexShape});
+    const program = new Concat3DProgram(x1.shape, x2.shape, axis);
+    return this.compileAndRun(program, [x1, x2]);
   }
 
   protected scaledArrayAddInternal<T extends NDArray>(
@@ -193,23 +139,6 @@ export class NDArrayMathGPU extends NDArrayMath {
     return output;
   }
 
-  private reshapeTexture<T extends NDArray>(a: T, newTextureShape: [
-    number, number
-  ]): T {
-    const aTexShape = a.getTextureShapeRC();
-
-    const program = this.getAndSaveProgram(
-        RESHAPE_PROG, () => reshape_gpu.getFragmentShaderSource());
-
-    const resultTexture = this.textureManager.acquireTexture(newTextureShape);
-    reshape_gpu.reshape(
-        this.gpgpu, program, a.getTexture(), aTexShape[0], aTexShape[1],
-        resultTexture, newTextureShape[0], newTextureShape[1]);
-
-    return NDArray.make<T>(
-        a.shape, {texture: resultTexture, textureShapeRC: newTextureShape});
-  }
-
   protected matMulInternal(
       a: Array2D, b: Array2D, aOrientation: MatrixOrientation,
       bOrientation: MatrixOrientation): Array2D {
@@ -225,92 +154,26 @@ export class NDArrayMathGPU extends NDArrayMath {
 
   protected batchNormalization3DInternal(
       x: Array3D, mean: Array3D|Array1D, variance: Array3D|Array1D,
-      varianceEpsilon: number, scale?: Array3D|Array1D,
+      varianceEpsilon = 0.000001, scale?: Array3D|Array1D,
       offset?: Array3D|Array1D): Array3D {
-    const xTexShape = x.getTextureShapeRC();
-
-    let cleanupMean = false;
-    const preferredMeanTexShape: [number, number] =
-        mean.rank === 1 ? [1, mean.size] : xTexShape;
-    let meanTexShape = mean.getTextureShapeRC(preferredMeanTexShape);
-    if (!util.arraysEqual(meanTexShape, preferredMeanTexShape)) {
-      mean = this.reshapeTexture(mean, preferredMeanTexShape);
-      meanTexShape = preferredMeanTexShape;
-      cleanupMean = true;
-    }
+    const inputs = [x, mean, variance];
 
-    let cleanupVariance = false;
-    const preferredVarianceTexShape: [number, number] =
-        variance.rank === 1 ? [1, variance.size] : xTexShape;
-    let varianceTexShape = variance.getTextureShapeRC(preferredMeanTexShape);
-    if (!util.arraysEqual(varianceTexShape, preferredVarianceTexShape)) {
-      variance = this.reshapeTexture(variance, preferredVarianceTexShape);
-      varianceTexShape = preferredVarianceTexShape;
-      cleanupVariance = true;
-    }
-
-    let scaleTexShape: [number, number]|null = null;
-    let cleanupScale = false;
-    if (scale != null) {
-      const preferredScaleTexShape: [number, number] =
-          scale.rank === 1 ? [1, scale.size] : xTexShape;
-
-      scaleTexShape = scale.getTextureShapeRC(preferredScaleTexShape);
-      if (!util.arraysEqual(scaleTexShape, preferredScaleTexShape)) {
-        scale = this.reshapeTexture(scale, preferredScaleTexShape);
-        scaleTexShape = preferredScaleTexShape;
-        cleanupScale = true;
-      }
-    }
-
-    let offsetTexShape: [number, number]|null = null;
-    let cleanupOffset = false;
+    let offsetShape = null;
     if (offset != null) {
-      const preferredOffsetTexShape: [number, number] =
-          offset.rank === 1 ? [1, offset.size] : xTexShape;
-
-      offsetTexShape = offset.getTextureShapeRC(preferredOffsetTexShape);
-      if (!util.arraysEqual(offsetTexShape, preferredOffsetTexShape)) {
-        offset = this.reshapeTexture(offset, preferredOffsetTexShape);
-        offsetTexShape = preferredOffsetTexShape;
-        cleanupOffset = true;
-      }
+      offsetShape = offset.shape;
+      inputs.push(offset);
     }
 
-    const resultTexShape: [number, number] = x.getTextureShapeRC();
-
-    const program = this.getAndSaveProgram(
-        `${BATCHNORM_PROG}_${xTexShape}_${meanTexShape}_${varianceTexShape}_` +
-            `${scaleTexShape!}_${offsetTexShape!}_${varianceEpsilon}`,
-        () => batchnorm_gpu.getFragmentShaderSource(
-            xTexShape, meanTexShape, varianceTexShape, offsetTexShape,
-            scaleTexShape, varianceEpsilon));
-
-    const resultTexture = this.textureManager.acquireTexture(resultTexShape);
-
-    batchnorm_gpu.batchNormalization(
-        this.gpgpu, program, x.getTexture(), xTexShape, mean.getTexture(),
-        meanTexShape, variance.getTexture(), varianceTexShape,
-        offset != null ? offset.getTexture() : null,
-        offset != null ? offsetTexShape : null,
-        scale != null ? scale.getTexture() : null,
-        scale != null ? scaleTexShape : null, resultTexture, resultTexShape);
-
-    if (cleanupMean) {
-      mean.dispose();
-    }
-    if (cleanupVariance) {
-      variance.dispose();
-    }
-    if (cleanupScale) {
-      scale!.dispose();
-    }
-    if (cleanupOffset) {
-      offset!.dispose();
+    let scaleShape = null;
+    if (scale != null) {
+      scaleShape = scale.shape;
+      inputs.push(scale);
     }
 
-    return NDArray.make<Array3D>(
-        x.shape, {texture: resultTexture, textureShapeRC: resultTexShape});
+    const program = new BatchNormProgram(
+        x.shape, mean.shape, variance.shape, offsetShape, scaleShape,
+        varianceEpsilon);
+    return this.compileAndRun(program, inputs);
   }
 
   protected switchDimInternal<T extends NDArray>(a: T, newDim: number[]): T {
@@ -492,25 +355,9 @@ export class NDArrayMathGPU extends NDArrayMath {
   protected resizeBilinear3DInternal(
       x: Array3D, newShape2D: [number, number],
       alignCorners: boolean): Array3D {
-    const programKey =
-        [RESIZE_BILINEAR_PROG, x.shape, newShape2D, alignCorners].join('_');
-
-    const newShapeRCD: [number, number, number] =
-        [newShape2D[0], newShape2D[1], x.shape[2]];
-    const resultTexShape = conv_util.computeTexShapeFrom3D(newShapeRCD);
-
-    const program = this.getAndSaveProgram(
-        programKey,
-        () => resize_bilinear_gpu.getFragmentShaderSource(
-            x.shape, newShape2D, alignCorners));
-
-    const resultTexture = this.textureManager.acquireTexture(resultTexShape);
-
-    resize_bilinear_gpu.resizeBilinear(
-        this.gpgpu, program, x.getTexture(), resultTexture, resultTexShape);
-
-    return NDArray.make<Array3D>(
-        newShapeRCD, {texture: resultTexture, textureShapeRC: resultTexShape});
+    const program =
+        new ResizeBilinearProgram(x.shape, newShape2D, alignCorners);
+    return this.compileAndRun(program, [x]);
   }
 
   private getAndSaveBinary(key: string, getBinary: () => GPGPUBinary):
@@ -521,25 +368,11 @@ export class NDArrayMathGPU extends NDArrayMath {
     return this.binaryCache[key];
   }
 
-  private getAndSaveProgram(programKey: string, getShaderSource: () => string):
-      WebGLProgram {
-    if (!(programKey in this.programCache)) {
-      this.programCache[programKey] =
-          this.gpgpu.createProgram(getShaderSource());
-    }
-    return this.programCache[programKey];
-  }
-
   getTextureManager(): TextureManager {
     return this.textureManager;
   }
 
   dispose() {
-    for (const programKey in this.programCache) {
-      if (this.programCache.hasOwnProperty(programKey)) {
-        this.gpgpu.deleteProgram(this.programCache[programKey]);
-      }
-    }
     for (const key in this.binaryCache) {
       this.gpgpu.deleteProgram(this.binaryCache[key].webGLProgram);
     }
diff --git a/src/math/ndarray.ts b/src/math/ndarray.ts
index e0104f0342..6c97994d00 100644
--- a/src/math/ndarray.ts
+++ b/src/math/ndarray.ts
@@ -135,8 +135,8 @@ export class NDArray {
         return new Array3D(shape as [number, number, number], data) as any;
       case 4:
         return new Array4D(
-                   // tslint:disable-next-line:no-any
-                   shape as [number, number, number, number], data) as any;
+            // tslint:disable-next-line:no-any
+            shape as [number, number, number, number], data) as any;
       default:
         // tslint:disable-next-line:no-any
         return new NDArray(shape, data) as any;
diff --git a/src/math/webgl/batchnorm_gpu.ts b/src/math/webgl/batchnorm_gpu.ts
index 6a93267a97..504ab05af3 100644
--- a/src/math/webgl/batchnorm_gpu.ts
+++ b/src/math/webgl/batchnorm_gpu.ts
@@ -13,119 +13,50 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderSource(
-    xTexShapeRC: [number, number], meanTexShapeRC: [number, number],
-    varianceTexShapeRC: [number, number],
-    offsetTexShapeRC: [number, number]|null,
-    scaleTexShapeRC?: [number, number]|null, varianceEpsilon = 0.001): string {
-  let offsetSamplerSnippet = '';
-  let offsetShapeInitializationSnippet = '';
-  let offsetCoordsSnippet = '';
-  let offsetUVSnippet = '';
-  let offsetValueSnippet = '';
-  let offsetOperationSnippet = '0.0';
-
-  let scaleSamplerSnippet = '';
-  let scaleShapeInitializationSnippet = '';
-  let scaleCoordsSnippet = '';
-  let scaleUVSnippet = '';
-  let scaleValueSnippet = '';
-  let scaleOperationSnippet = '';
-
-  if (offsetTexShapeRC != null) {
-    offsetSamplerSnippet = 'uniform sampler2D offset;';
-    offsetShapeInitializationSnippet = `const vec2 offsetShapeCR = vec2(
-            ${offsetTexShapeRC[1]}, ${offsetTexShapeRC[0]});`;
-    offsetCoordsSnippet = 'vec2 offsetCoordsCR = mod(yTexCR, offsetShapeCR);';
-    offsetUVSnippet =
-        'vec2 offsetUV = (offsetCoordsCR + halfCR) / offsetShapeCR;';
-    offsetValueSnippet = 'float offsetValue = texture2D(offset, offsetUV).r;';
-    offsetOperationSnippet = 'offsetValue';
-  }
-
-  if (scaleTexShapeRC != null) {
-    scaleSamplerSnippet = 'uniform sampler2D scale;';
-    scaleShapeInitializationSnippet = `const vec2 scaleShapeCR = vec2(
-            ${scaleTexShapeRC[1]}, ${scaleTexShapeRC[0]});`;
-    scaleCoordsSnippet = 'vec2 scaleCoordsCR = mod(yTexCR, scaleShapeCR);';
-    scaleUVSnippet = 'vec2 scaleUV = (scaleCoordsCR + halfCR) / scaleShapeCR;';
-    scaleValueSnippet = 'float scaleValue = texture2D(scale, scaleUV).r;';
-    scaleOperationSnippet = 'inv *= scaleValue;';
+import * as util from '../../util';
+import {GPGPUProgram} from './gpgpu_math';
+
+export class BatchNormProgram implements GPGPUProgram {
+  variableNames: string[];
+  params: Array<{}> = [];
+  outputShape: number[] = [];
+  userCode: string;
+  supportsBroadcasting = true;
+
+  constructor(
+      xShape: number[], meanShape: number[], varianceShape: number[],
+      offsetShape: number[]|null, scaleShape: number[]|null,
+      varianceEpsilon: number) {
+    this.variableNames = ['x', 'mean', 'variance'];
+    util.assertAndGetBroadcastedShape(xShape, meanShape);
+    util.assertAndGetBroadcastedShape(xShape, varianceShape);
+
+    let offsetSnippet = '0.0';
+    if (offsetShape != null) {
+      util.assertAndGetBroadcastedShape(xShape, offsetShape);
+      this.variableNames.push('offset');
+      offsetSnippet = 'getOffsetAtOutCoords()';
+    }
+
+    let scaleSnippet = '1.0';
+    if (scaleShape != null) {
+      util.assertAndGetBroadcastedShape(xShape, scaleShape);
+      this.variableNames.push('scale');
+      scaleSnippet = 'getScaleAtOutCoords()';
+    }
+
+    this.params = [varianceEpsilon];
+    this.outputShape = xShape;
+    this.userCode = `
+      void main() {
+        float x = getXAtOutCoords();
+        float mean = getMeanAtOutCoords();
+        float variance = getVarianceAtOutCoords();
+        float offset = ${offsetSnippet};
+        float scale = ${scaleSnippet};
+        float inv = scale / sqrt(variance + float(${varianceEpsilon}));
+        setOutput((x - mean) * inv + offset);
+      }
+    `;
   }
-
-  return `
-    precision highp float;
-    uniform sampler2D x;
-    uniform sampler2D mean;
-    uniform sampler2D variance;
-    ${offsetSamplerSnippet}
-    ${scaleSamplerSnippet}
-
-    varying vec2 resultUV;
-
-    const vec2 xShapeCR = vec2(${xTexShapeRC[1]}, ${xTexShapeRC[0]});
-    const vec2 meanShapeCR = vec2(${meanTexShapeRC[1]}, ${meanTexShapeRC[0]});
-    const vec2 varianceShapeCR = vec2(
-        ${varianceTexShapeRC[1]}, ${varianceTexShapeRC[0]});
-
-    ${offsetShapeInitializationSnippet}
-    ${scaleShapeInitializationSnippet}
-
-    const vec2 halfCR = vec2(0.5, 0.5);
-    const float varianceEpsilon = ${varianceEpsilon};
-
-    void main() {
-      vec2 yTexCR = floor(gl_FragCoord.xy);
-
-      vec2 meanCoordsCR = mod(yTexCR, meanShapeCR);
-      vec2 varianceCoordsCR = mod(yTexCR, varianceShapeCR);
-      ${offsetCoordsSnippet}
-      ${scaleCoordsSnippet}
-
-      vec2 meanUV = (meanCoordsCR + halfCR) / meanShapeCR;
-      vec2 varianceUV = (varianceCoordsCR + halfCR) / varianceShapeCR;
-      ${offsetUVSnippet}
-      ${scaleUVSnippet}
-
-      float xValue = texture2D(x, resultUV).r;
-      float meanValue = texture2D(mean, meanUV).r;
-      float varianceValue = texture2D(variance, varianceUV).r;
-      ${offsetValueSnippet}
-      ${scaleValueSnippet}
-
-      float inv = 1.0 / sqrt(varianceValue + varianceEpsilon);
-      ${scaleOperationSnippet}
-      float xTimesInv = xValue * inv;
-      float meanTimesInvWithOffset = ${offsetOperationSnippet}
-          - meanValue * inv;
-
-      gl_FragColor = vec4(xTimesInv + meanTimesInvWithOffset, 0, 0, 0);
-    }`;
 }
-
-export function batchNormalization(
-    gpgpu: GPGPUContext, program: WebGLProgram, x: WebGLTexture,
-    xShapeRowCol: [number, number], mean: WebGLTexture,
-    meanShapeRowCol: [number, number], variance: WebGLTexture,
-    varianceShapeRowCol: [number, number], offset: WebGLTexture|null,
-    offsetShapeRowCol: [number, number]|null, scale: WebGLTexture|null,
-    scaleShapeRowCol: [number, number]|null, result: WebGLTexture,
-    resultShapeRowCol: [number, number]) {
-  gpgpu.setOutputMatrixTexture(
-      result, resultShapeRowCol[0], resultShapeRowCol[1]);
-  gpgpu.setProgram(program);
-  gpgpu.setInputMatrixTexture(x, 'x', 0);
-  gpgpu.setInputMatrixTexture(mean, 'mean', 1);
-  gpgpu.setInputMatrixTexture(variance, 'variance', 2);
-  let nextIndex = 3;
-  if (offset != null) {
-    gpgpu.setInputMatrixTexture(offset, 'offset', nextIndex);
-    nextIndex++;
-  }
-  if (scale != null) {
-    gpgpu.setInputMatrixTexture(scale, 'scale', nextIndex);
-  }
-  gpgpu.executeProgram();
-}
\ No newline at end of file
diff --git a/src/math/webgl/batchnorm_gpu_test.ts b/src/math/webgl/batchnorm_gpu_test.ts
index 7903a9e9ac..aac8f27464 100644
--- a/src/math/webgl/batchnorm_gpu_test.ts
+++ b/src/math/webgl/batchnorm_gpu_test.ts
@@ -14,88 +14,14 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
+import {initializeGPU, NDArray} from '../ndarray';
 
-import * as batchnorm_gpu from './batchnorm_gpu';
+import {BatchNormProgram} from './batchnorm_gpu';
 import {GPGPUContext} from './gpgpu_context';
+import * as gpgpu_math from './gpgpu_math';
+import {TextureManager} from './texture_manager';
 
 describe('batchnorm gpu test', () => {
-  function uploadBatchNormDownload(
-      x: Float32Array, xTexShapeRowCol: [number, number], mean: Float32Array,
-      meanTexShapeRowCol: [number, number], variance: Float32Array,
-      varianceTexShapeRowCol: [number, number], offset: Float32Array|null,
-      offsetTexShapeRowCol: [number, number]|null, scale: Float32Array|null,
-      scaleTexShapeRowCol: [number, number]|null,
-      varianceEpsilon: number): Float32Array {
-    const resultTexShapeRC: [number, number] = xTexShapeRowCol;
-    const gpgpu = new GPGPUContext();
-    gpgpu.enableAutomaticDebugValidation(true);
-
-    const shaderSource = batchnorm_gpu.getFragmentShaderSource(
-        xTexShapeRowCol, meanTexShapeRowCol, varianceTexShapeRowCol,
-        offsetTexShapeRowCol, scaleTexShapeRowCol, varianceEpsilon);
-
-    const program = gpgpu.createProgram(shaderSource);
-
-    const xTex =
-        gpgpu.createMatrixTexture(xTexShapeRowCol[0], xTexShapeRowCol[1]);
-    const meanTex =
-        gpgpu.createMatrixTexture(meanTexShapeRowCol[0], meanTexShapeRowCol[1]);
-    const varianceTex = gpgpu.createMatrixTexture(
-        varianceTexShapeRowCol[0], varianceTexShapeRowCol[1]);
-
-    let offsetTex = null;
-    if (offset != null) {
-      offsetTex = gpgpu.createMatrixTexture(
-          offsetTexShapeRowCol![0], offsetTexShapeRowCol![1]);
-    }
-    let scaleTex = null;
-    if (scale != null) {
-      scaleTex = gpgpu.createMatrixTexture(
-          scaleTexShapeRowCol![0], scaleTexShapeRowCol![1]);
-    }
-
-    const resultTex =
-        gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.uploadMatrixToTexture(
-        xTex, xTexShapeRowCol[0], xTexShapeRowCol[1], x);
-    gpgpu.uploadMatrixToTexture(
-        meanTex, meanTexShapeRowCol[0], meanTexShapeRowCol[1], mean);
-    gpgpu.uploadMatrixToTexture(
-        varianceTex, varianceTexShapeRowCol[0], varianceTexShapeRowCol[1],
-        variance);
-    if (offset != null) {
-      gpgpu.uploadMatrixToTexture(
-          offsetTex!, offsetTexShapeRowCol![0], offsetTexShapeRowCol![1],
-          offset);
-    }
-    if (scale != null) {
-      gpgpu.uploadMatrixToTexture(
-          scaleTex!, scaleTexShapeRowCol![0], scaleTexShapeRowCol![1], scale);
-    }
-
-    batchnorm_gpu.batchNormalization(
-        gpgpu, program, xTex, xTexShapeRowCol, meanTex, meanTexShapeRowCol,
-        varianceTex, varianceTexShapeRowCol, offsetTex, offsetTexShapeRowCol,
-        scaleTex, scaleTexShapeRowCol, resultTex, resultTexShapeRC);
-
-    const result = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(xTex);
-    gpgpu.deleteMatrixTexture(meanTex);
-    gpgpu.deleteMatrixTexture(varianceTex);
-    if (offsetTex != null) {
-      gpgpu.deleteMatrixTexture(offsetTex);
-    }
-    if (scaleTex != null) {
-      gpgpu.deleteMatrixTexture(scaleTex);
-    }
-    gpgpu.deleteProgram(program);
-    gpgpu.dispose();
-    return result;
-  }
 
   it('simple batchnorm, no offset or scale, 2x1x2', () => {
     const x = new Float32Array([2, 100, 4, 400]);
@@ -201,7 +127,7 @@ describe('batchnorm gpu test', () => {
     const varianceEpsilon = .001;
 
     const result = uploadBatchNormDownload(
-        x, [2, 9], mean, [1, 3], variance, [1, 3], offset, [1, 3], scale,
+        x, [2, 3, 3], mean, [1, 3], variance, [1, 3], offset, [1, 3], scale,
         [1, 3], varianceEpsilon);
 
     const expectedResult = new Float32Array([
@@ -212,3 +138,41 @@ describe('batchnorm gpu test', () => {
     test_util.expectArraysClose(result, expectedResult, 1e-5);
   });
 });
+
+function uploadBatchNormDownload(
+    x: Float32Array, xShape: number[], mean: Float32Array, meanShape: number[],
+    variance: Float32Array, varianceShape: number[], offset: Float32Array|null,
+    offsetShape: number[]|null, scale: Float32Array|null,
+    scaleShape: number[]|null, varianceEpsilon: number): Float32Array {
+  const gpgpu = new GPGPUContext();
+  const textureManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, textureManager);
+
+  const program = new BatchNormProgram(
+      xShape, meanShape, varianceShape, offsetShape, scaleShape,
+      varianceEpsilon);
+  const xArr = NDArray.make(xShape, {values: x});
+  const meanArr = NDArray.make(meanShape, {values: mean});
+  const varianceArr = NDArray.make(varianceShape, {values: variance});
+  const inputs = [xArr, meanArr, varianceArr];
+
+  if (offset != null) {
+    const offsetArr = NDArray.make(offsetShape, {values: offset});
+    inputs.push(offsetArr);
+  }
+  if (scale != null) {
+    const scaleArr = NDArray.make(scaleShape, {values: scale});
+    inputs.push(scaleArr);
+  }
+
+  const res = NDArray.zeros(program.outputShape);
+  const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, res);
+  gpgpu_math.runProgram(binary, inputs, res);
+  const resValues = res.getValues();
+
+  textureManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
+  gpgpu.dispose();
+
+  return resValues;
+}
diff --git a/src/math/webgl/binaryop_gpu_test.ts b/src/math/webgl/binaryop_gpu_test.ts
index dd1ad320c0..cb2b14cf16 100644
--- a/src/math/webgl/binaryop_gpu_test.ts
+++ b/src/math/webgl/binaryop_gpu_test.ts
@@ -14,13 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
+// tslint:disable-next-line:max-line-length
+import {Array1D, Array2D, Array3D, initializeGPU, NDArray, Scalar} from '../ndarray';
 
 import {BinaryOpProgram} from './binaryop_gpu';
 import {GPGPUContext} from './gpgpu_context';
 import * as gpgpu_math from './gpgpu_math';
-import {NDArray, Array1D, Array2D, Array3D, Scalar,
-  initializeGPU} from '../ndarray';
-import * as util from '../../util';
 import {TextureManager} from './texture_manager';
 
 describe('binaryop_gpu Add', () => {
@@ -92,7 +91,7 @@ describe('binaryop_gpu Sub', () => {
     // shape [3, 2] is not compatible with shape [3].
     const res = uploadBinaryOpDownload(a, b, '-');
     test_util.expectArraysClose(
-      res, new Float32Array([0, 0, 0, -1, 4, 4, 4, 3]), 1e-4);
+        res, new Float32Array([0, 0, 0, -1, 4, 4, 4, 3]), 1e-4);
   });
 });
 
@@ -177,17 +176,15 @@ describe('binaryop_gpu Divide', () => {
   });
 });
 
-export function uploadBinaryOpDownload(
+function uploadBinaryOpDownload(
     a: NDArray, b: NDArray, op: '+'|'-'|'*'|'/'): Float32Array {
   const gpgpu = new GPGPUContext();
   const textureManager = new TextureManager(gpgpu);
   initializeGPU(gpgpu, textureManager);
 
-  const outShape = util.assertAndGetBroadcastedShape(a.shape, b.shape);
-  const res = NDArray.zeros(outShape);
   const program = new BinaryOpProgram(op, a.shape, b.shape);
-  const binary =
-      gpgpu_math.compileProgram(gpgpu, program, [a, b], res);
+  const res = NDArray.zeros(program.outputShape);
+  const binary = gpgpu_math.compileProgram(gpgpu, program, [a, b], res);
   gpgpu_math.runProgram(binary, [a, b], res);
 
   const resValues = res.getValues();
diff --git a/src/math/webgl/concat3d_gpu.ts b/src/math/webgl/concat3d_gpu.ts
index ebe37d7ab3..e6c6840c8a 100644
--- a/src/math/webgl/concat3d_gpu.ts
+++ b/src/math/webgl/concat3d_gpu.ts
@@ -13,62 +13,40 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import * as conv_util from '../conv_util';
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderSource(
-    x1ShapeRCD: [number, number, number], x2ShapeRCD: [number, number, number],
-    resultShapeRCD: [number, number, number], axis: number): string {
-  const x1TexShapeRC = conv_util.computeTexShapeFrom3D(x1ShapeRCD);
-  const x2TexShapeRC = conv_util.computeTexShapeFrom3D(x2ShapeRCD);
-
-  const yAxes = ['yR', 'yC', 'yD'];
-  const concatAxis = yAxes[axis];
-
-  return `
-    precision highp float;
-    uniform sampler2D x1;
-    uniform sampler2D x2;
-
-    const vec2 x1ShapeCR = vec2(${x1TexShapeRC[1]}, ${x1TexShapeRC[0]});
-    const vec2 x2ShapeCR = vec2(${x2TexShapeRC[1]}.0, ${x2TexShapeRC[0]}.0);
-
-    const vec2 halfCR = vec2(0.5, 0.5);
-
-    void main() {
-      vec2 yTexCR = floor(gl_FragCoord.xy);
-
-      // Map from 2D (yTexR, yTexC) to 3D (yR, yC, yD).
-      float yR = yTexCR.y;
-      float yC = floor(yTexCR.x / ${resultShapeRCD[2]}.0);
-      float yD = mod(yTexCR.x, ${resultShapeRCD[2]}.0);
-
-      float value = 0.0;
-
-      if (${concatAxis} < ${x1ShapeRCD[axis]}.0) {
-        // Map yR, yC, yD back to x1 coordinates.
-        vec2 x1CR = vec2(yC * ${x1ShapeRCD[2]}.0 + yD, yR);
-        vec2 x1UV = (x1CR + halfCR) / x1ShapeCR;
-        value = texture2D(x1, x1UV).r;
-      } else {
-        ${concatAxis} = ${concatAxis} - ${x1ShapeRCD[axis]}.0;
-
-        // Map yR, yC, yD back to x2 coordinates.
-        vec2 x2CR = vec2(yC * ${x2ShapeRCD[2]}.0 + yD, yR);
-        vec2 x2UV = (x2CR + halfCR) / x2ShapeCR;
-        value = texture2D(x2, x2UV).r;
+import * as concat3d_util from '../concat3d_util';
+import {GPGPUProgram} from './gpgpu_math';
+
+export class Concat3DProgram implements GPGPUProgram {
+  variableNames = ['A', 'B'];
+  params: Array<{}> = [];
+  outputShape: number[] = [];
+  userCode: string;
+
+  constructor(
+      x1Shape: [number, number, number], x2Shape: [number, number, number],
+      axis: number) {
+    const yAxes = ['yR', 'yC', 'yD'];
+    const concatAxis = yAxes[axis];
+    this.params = [axis];
+    this.outputShape =
+        concat3d_util.computeConcat3DOutputShape(x1Shape, x2Shape, axis);
+    this.userCode = `
+      void main() {
+        vec3 coords = getOutputCoords();
+        float yR = coords.x;
+        float yC = coords.y;
+        float yD = coords.z;
+
+        float value = 0.0;
+        if (${concatAxis} < ${x1Shape[axis]}.0) {
+          value = getA(yR, yC, yD);
+        } else {
+          ${concatAxis} -= ${x1Shape[axis]}.0;
+          value = getB(yR, yC, yD);
+        }
+
+        setOutput(value);
       }
-
-      gl_FragColor = vec4(value, 0.0, 0.0, 0.0);
-    }`;
-}
-
-export function concat3D(
-    gpgpu: GPGPUContext, program: WebGLProgram, x1: WebGLTexture,
-    x2: WebGLTexture, result: WebGLTexture, resultShapeRC: [number, number]) {
-  gpgpu.setOutputMatrixTexture(result, resultShapeRC[0], resultShapeRC[1]);
-  gpgpu.setProgram(program);
-  gpgpu.setInputMatrixTexture(x1, 'x1', 0);
-  gpgpu.setInputMatrixTexture(x2, 'x2', 1);
-  gpgpu.executeProgram();
+    `;
+  }
 }
diff --git a/src/math/webgl/concat3d_gpu_test.ts b/src/math/webgl/concat3d_gpu_test.ts
index 00ee4b14d0..3f96a78d8f 100644
--- a/src/math/webgl/concat3d_gpu_test.ts
+++ b/src/math/webgl/concat3d_gpu_test.ts
@@ -14,54 +14,13 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
-import * as conv_util from '../conv_util';
-
-import * as concat3d_gpu from './concat3d_gpu';
+import {Array3D, initializeGPU, NDArray} from '../ndarray';
+import {Concat3DProgram} from './concat3d_gpu';
 import {GPGPUContext} from './gpgpu_context';
+import * as gpgpu_math from './gpgpu_math';
+import {TextureManager} from './texture_manager';
 
 describe('concat3d_gpu', () => {
-
-  function uploadConcat3dDownload(
-      x1: Float32Array, x2: Float32Array, x1ShapeRCD: [number, number, number],
-      x2ShapeRCD: [number, number, number], axis: number): Float32Array {
-    const x1TexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(x1ShapeRCD);
-    const x2TexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(x2ShapeRCD);
-
-    const resultShapeRCD = x1ShapeRCD.slice() as [number, number, number];
-    resultShapeRCD[axis] += x2ShapeRCD[axis];
-    const resultTexShapeRC = conv_util.computeTexShapeFrom3D(resultShapeRCD);
-
-    const gpgpu = new GPGPUContext();
-    gpgpu.enableAutomaticDebugValidation(true);
-
-    const shaderSource = concat3d_gpu.getFragmentShaderSource(
-        x1ShapeRCD, x2ShapeRCD, resultShapeRCD, axis);
-    const program = gpgpu.createProgram(shaderSource);
-
-    const x1Tex = gpgpu.createMatrixTexture(x1TexShapeRC[0], x1TexShapeRC[1]);
-    const x2Tex = gpgpu.createMatrixTexture(x2TexShapeRC[0], x2TexShapeRC[1]);
-    const resultTex =
-        gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.uploadMatrixToTexture(x1Tex, x1TexShapeRC[0], x1TexShapeRC[1], x1);
-    gpgpu.uploadMatrixToTexture(x2Tex, x2TexShapeRC[0], x2TexShapeRC[1], x2);
-
-    concat3d_gpu.concat3D(
-        gpgpu, program, x1Tex, x2Tex, resultTex, resultTexShapeRC);
-
-    const result = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(x1Tex);
-    gpgpu.deleteMatrixTexture(x2Tex);
-    gpgpu.deleteProgram(program);
-    gpgpu.dispose();
-    return result;
-  }
-
   it('concat axis=0', () => {
     const x1 = new Float32Array([1, 11, 111, 2, 22, 222]);
     const x2 =
@@ -103,3 +62,29 @@ describe('concat3d_gpu', () => {
         1e-6);
   });
 });
+
+function uploadConcat3dDownload(
+    a: Float32Array, b: Float32Array, aShape: [number, number, number],
+    bShape: [number, number, number], axis: number): Float32Array {
+  const gpgpu = new GPGPUContext();
+  gpgpu.enableAutomaticDebugValidation(true);
+  const textureManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, textureManager);
+
+  const program = new Concat3DProgram(aShape, bShape, axis);
+  const aArr = Array3D.new(aShape, a);
+  const bArr = Array3D.new(bShape, b);
+  const rArr = NDArray.zeros(program.outputShape);
+  const binary = gpgpu_math.compileProgram(gpgpu, program, [aArr, bArr], rArr);
+  gpgpu_math.runProgram(binary, [aArr, bArr], rArr);
+  const result = rArr.getValues();
+
+  aArr.dispose();
+  bArr.dispose();
+  rArr.dispose();
+  textureManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
+  gpgpu.dispose();
+
+  return result;
+}
diff --git a/src/math/webgl/mulbcast_gpu.ts b/src/math/webgl/mulbcast_gpu.ts
deleted file mode 100644
index 8780720d0d..0000000000
--- a/src/math/webgl/mulbcast_gpu.ts
+++ /dev/null
@@ -1,90 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderSource(
-    aNumRows: number, aNumCols: number, bNumRows: number, bNumCols: number,
-    resultNumRows: number, resultNumCols: number): string {
-  return `
-    precision highp float;
-    uniform sampler2D matrixA;
-    uniform sampler2D matrixB;
-    varying vec2 resultUV;
-
-    const vec2 aDimCR = vec2(${aNumCols}.0, ${aNumRows}.0);
-    const vec2 bDimCR = vec2(${bNumCols}.0, ${bNumRows}.0);
-    const vec2 resultDimCR = vec2(${resultNumCols}.0, ${resultNumRows}.0);
-    const vec4 halfCR = vec4(0.5, 0.5, 0.5, 0.5);
-
-    void main() {
-      vec2 resultCR = floor(resultUV * resultDimCR);
-      vec4 resultCRBroadcast = vec4(resultCR, resultCR);
-      vec4 abDimsCR = vec4(aDimCR, bDimCR);
-      vec4 abCR = mod(resultCRBroadcast, abDimsCR);
-      vec4 abCRCenters = abCR + halfCR;
-      vec4 abUV = abCRCenters / abDimsCR;
-      vec4 a = texture2D(matrixA, abUV.rg);
-      vec4 b = texture2D(matrixB, abUV.ba);
-      float product = a.r * b.r;
-      gl_FragColor = vec4(product, 0, 0, 0);
-    }`;
-}
-
-export function multiplyBroadcast(
-    gpgpu: GPGPUContext, multiplyBroadcastProgram: WebGLProgram,
-    a: WebGLTexture, aNumRows: number, aNumCols: number, b: WebGLTexture,
-    bNumRows: number, bNumCols: number, result: WebGLTexture,
-    resultNumRows: number, resultNumCols: number) {
-  gpgpu.setOutputMatrixTexture(result, resultNumRows, resultNumCols);
-  gpgpu.setProgram(multiplyBroadcastProgram);
-  gpgpu.setInputMatrixTexture(a, 'matrixA', 0);
-  gpgpu.setInputMatrixTexture(b, 'matrixB', 1);
-  gpgpu.executeProgram();
-}
-
-export function uploadMultiplyBroadcastDownload(
-    a: Float32Array, aNumRows: number, aNumCols: number, b: Float32Array,
-    bNumRows: number, bNumCols: number): Float32Array {
-  const resultNumRows = Math.max(aNumRows, bNumRows);
-  const resultNumCols = Math.max(aNumCols, bNumCols);
-
-  const gpgpu = new GPGPUContext();
-  const program: WebGLProgram = gpgpu.createProgram(getFragmentShaderSource(
-      aNumRows, aNumCols, bNumRows, bNumCols, resultNumRows, resultNumCols));
-
-  const aTexture: WebGLTexture = gpgpu.createMatrixTexture(aNumRows, aNumCols);
-  const bTexture: WebGLTexture = gpgpu.createMatrixTexture(bNumRows, bNumCols);
-  const resultTexture: WebGLTexture =
-      gpgpu.createMatrixTexture(resultNumRows, resultNumCols);
-
-  gpgpu.uploadMatrixToTexture(aTexture, aNumRows, aNumCols, a);
-  gpgpu.uploadMatrixToTexture(bTexture, bNumRows, bNumCols, b);
-
-  multiplyBroadcast(
-      gpgpu, program, aTexture, aNumRows, aNumCols, bTexture, bNumRows,
-      bNumCols, resultTexture, resultNumRows, resultNumCols);
-
-  const result = gpgpu.downloadMatrixFromTexture(
-      resultTexture, resultNumRows, resultNumCols);
-
-  gpgpu.deleteMatrixTexture(aTexture);
-  gpgpu.deleteMatrixTexture(bTexture);
-  gpgpu.deleteMatrixTexture(resultTexture);
-  gpgpu.deleteProgram(program);
-  gpgpu.dispose();
-
-  return result;
-}
diff --git a/src/math/webgl/mulbcast_gpu_test.ts b/src/math/webgl/mulbcast_gpu_test.ts
deleted file mode 100644
index e32c50179e..0000000000
--- a/src/math/webgl/mulbcast_gpu_test.ts
+++ /dev/null
@@ -1,140 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import * as test_util from '../../test_util';
-import * as mulbcast_gpu from './mulbcast_gpu';
-
-export function cpuMultiplyBroadcast(
-    a: Float32Array, aNumRows: number, aNumCols: number, b: Float32Array,
-    bNumRows: number, bNumCols: number): Float32Array {
-  const resultNumRows = Math.max(aNumRows, bNumRows);
-  const resultNumCols = Math.max(aNumCols, bNumCols);
-  const result = new Float32Array(resultNumRows * resultNumCols);
-  let dst = 0;
-  for (let r = 0; r < resultNumRows; ++r) {
-    for (let c = 0; c < resultNumCols; ++c) {
-      const ai = ((r % aNumRows) * aNumCols) + (c % aNumCols);
-      const bi = ((r % bNumRows) * bNumCols) + (c % bNumCols);
-      result[dst] = a[ai] * b[bi];
-      ++dst;
-    }
-  }
-  return result;
-}
-
-describe('mulbcast_gpu', () => {
-  it('returns a matrix dimensions [max(aRows, bRows), max(aCols, bCols)]',
-     () => {
-       const a = new Float32Array(13 * 100);
-       const b = new Float32Array(100 * 99);
-       const result =
-           mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 1, 100, b, 100, 1);
-       expect(result.length).toEqual(100 * 100);
-     });
-
-  it('returns [0] when A is [0], A and B same size', () => {
-    const a = new Float32Array(16 * 16);
-    const b = test_util.randomArrayInRange(16 * 16, -10, 10);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 16, b, 16, 16);
-    test_util.expectArraysClose(a, result, 0.00001);
-  });
-
-  it('returns [0] when B is [0], A and B same size', () => {
-    const a = test_util.randomArrayInRange(16 * 16, -10, 10);
-    const b = new Float32Array(16 * 16);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 16, b, 16, 16);
-    test_util.expectArraysClose(b, result, 0.00001);
-  });
-
-  it('returns A when B is [1] and matrices have the same size', () => {
-    const a = new Float32Array(16 * 16);
-    a.fill(1);
-    const b = test_util.randomArrayInRange(16 * 16, -10, 10);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 16, b, 16, 16);
-    test_util.expectArraysClose(result, b, 0.00001);
-  });
-
-  it('returns B when A is [1] and matrices have the same size', () => {
-    const a = test_util.randomArrayInRange(16 * 16, -10, 10);
-    const b = new Float32Array(16 * 16);
-    b.fill(1);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 16, b, 16, 16);
-    test_util.expectArraysClose(result, a, 0.00001);
-  });
-
-  it('returns B when A is [1] and A is narrower than B', () => {
-    const a = new Float32Array(16 * 8);
-    a.fill(1);
-    const b = test_util.randomArrayInRange(16 * 16, -10, 10);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 8, b, 16, 16);
-    test_util.expectArraysClose(result, b, 0.00001);
-  });
-
-  it('returns B when A is [1] and A is shorter than B', () => {
-    const a = new Float32Array(8 * 16);
-    a.fill(1);
-    const b = test_util.randomArrayInRange(16 * 16, -10, 10);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 8, 16, b, 16, 16);
-    test_util.expectArraysClose(result, b, 0.00001);
-  });
-
-  it('returns B when A is [1] and A is smaller than B', () => {
-    const a = new Float32Array(7 * 6);
-    a.fill(1);
-    const b = test_util.randomArrayInRange(18 * 21, -1, 1);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 7, 6, b, 18, 21);
-    test_util.expectArraysClose(result, b, 0.00001);
-  });
-
-  it('broadcasts a smaller A [2x2] across B [4x4]', () => {
-    const a = new Float32Array([1, 0, 1, 0]);
-    const b = new Float32Array(4 * 4);
-    for (let i = 0; i < b.length; ++i) {
-      b[i] = i + 1;
-    }
-    const expected =
-        new Float32Array([1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0]);
-    const gpuResult =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 2, 2, b, 4, 4);
-    const cpuResult = cpuMultiplyBroadcast(a, 2, 2, b, 4, 4);
-    test_util.expectArraysClose(cpuResult, expected, 0.0001);
-    test_util.expectArraysClose(gpuResult, expected, 0.0001);
-  });
-
-  it('broadcasts a non-square A [3x5] across a larger B [16x16]', () => {
-    const a = test_util.randomArrayInRange(3 * 5, -1, 1);
-    const b = test_util.randomArrayInRange(16 * 16, -1, 1);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 3, 5, b, 16, 16);
-    test_util.expectArraysClose(
-        result, cpuMultiplyBroadcast(a, 3, 5, b, 16, 16), 0.0001);
-  });
-
-  it('broadcasts a non-square A across a larger non-square B', () => {
-    const a = test_util.randomArrayInRange(37 * 63, -1, 1);
-    const b = test_util.randomArrayInRange(128 * 150, -1, 1);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 37, 63, b, 128, 150);
-    test_util.expectArraysClose(
-        result, cpuMultiplyBroadcast(a, 37, 63, b, 128, 150), 0.0001);
-  });
-});
diff --git a/src/math/webgl/reshape_gpu.ts b/src/math/webgl/reshape_gpu.ts
deleted file mode 100644
index a451a78134..0000000000
--- a/src/math/webgl/reshape_gpu.ts
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import * as util from '../../util';
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderSource(): string {
-  return `
-    precision highp float;
-    uniform sampler2D matrixA;
-    uniform vec2 inputDimCR;
-    uniform vec2 resultDimCR;
-    varying vec2 resultUV;
-    const vec2 halfCR = vec2(0.5, 0.5);
-
-    void main() {
-      vec2 resultCR = floor(resultUV * resultDimCR);
-      // indexInFlat = row * stride + column, where stride == numOutputColumns
-      float indexInFlat = resultCR.y * resultDimCR.x + resultCR.x;
-
-      vec2 inputCR = vec2(
-        mod(indexInFlat, inputDimCR.x), // col = indexInFlat % numInputColumns
-        floor(indexInFlat / inputDimCR.x) // row = indexInFlat / numInputColumns
-      ) + halfCR;
-
-      vec2 inputUV = inputCR / inputDimCR;
-      gl_FragColor = texture2D(matrixA, inputUV);
-    }`;
-}
-
-export function reshape(
-    gpgpu: GPGPUContext, reshapeProgram: WebGLProgram, a: WebGLTexture,
-    aNumRows: number, aNumCols: number, result: WebGLTexture,
-    resultNumRows: number, resultNumCols: number) {
-  const inputSize = aNumRows * aNumCols;
-  const outputSize = resultNumCols * resultNumRows;
-  util.assert(
-      inputSize === outputSize,
-      `The input size (${inputSize}) and output size (${outputSize}) ` +
-          `must match`);
-
-  gpgpu.setOutputMatrixTexture(result, resultNumRows, resultNumCols);
-  gpgpu.setProgram(reshapeProgram);
-  gpgpu.setInputMatrixTexture(a, 'matrixA', 0);
-
-  const inputDimCRLocation = gpgpu.getUniformLocation('inputDimCR');
-  gpgpu.gl.uniform2f(inputDimCRLocation, aNumCols, aNumRows);
-
-  const resultDimCRLocation = gpgpu.getUniformLocation('resultDimCR');
-  gpgpu.gl.uniform2f(resultDimCRLocation, resultNumCols, resultNumRows);
-
-  gpgpu.executeProgram();
-}
diff --git a/src/math/webgl/reshape_gpu_test.ts b/src/math/webgl/reshape_gpu_test.ts
deleted file mode 100644
index 0f83a6e69e..0000000000
--- a/src/math/webgl/reshape_gpu_test.ts
+++ /dev/null
@@ -1,88 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import {GPGPUContext} from './gpgpu_context';
-import * as reshape_gpu from './reshape_gpu';
-
-describe('reshape_gpu', () => {
-  let gpgpu: GPGPUContext;
-
-  beforeEach(() => {
-    gpgpu = new GPGPUContext();
-    gpgpu.enableAutomaticDebugValidation(true);
-  });
-
-  afterEach(() => {
-    gpgpu.dispose();
-  });
-
-  it('reshape a 2x3 matrix into the same size', () => {
-    const a = new Float32Array([1, 2, 3, 4, 5, 6]);
-    const result = uploadReshapeDownload(a, 2, 3, 2, 3);
-    expect(result).toEqual(a);
-  });
-
-  it('reshape a 2x3 matrix into a column (6x1)', () => {
-    const a = new Float32Array([1, 2, 3, 4, 5, 6]);
-    const result = uploadReshapeDownload(a, 2, 3, 6, 1);
-    expect(result).toEqual(a);
-  });
-
-  it('reshape a 2x3 matrix into a row (1x6) vector', () => {
-    const a = new Float32Array([1, 2, 3, 4, 5, 6]);
-    const result = uploadReshapeDownload(a, 2, 3, 1, 6);
-    expect(result).toEqual(a);
-  });
-
-  it('reshape a 2x3 into a 3x2 matrix', () => {
-    const a = new Float32Array([1, 2, 3, 4, 5, 6]);
-    const result = uploadReshapeDownload(a, 2, 3, 3, 2);
-    expect(result).toEqual(a);
-  });
-
-  it('reshape a 2x3 into a 3x1 causes an error', () => {
-    const a = new Float32Array([1, 2, 3, 4, 5, 6]);
-    const f = () => {
-      uploadReshapeDownload(a, 2, 3, 3, 1);
-    };
-
-    expect(f).toThrowError();
-  });
-
-  function uploadReshapeDownload(
-      a: Float32Array, aNumRows: number, aNumCols: number,
-      resultNumRows: number, resultNumCols: number): Float32Array {
-    const program = gpgpu.createProgram(reshape_gpu.getFragmentShaderSource());
-
-    const aTexture = gpgpu.createMatrixTexture(aNumRows, aNumCols);
-    gpgpu.uploadMatrixToTexture(aTexture, aNumRows, aNumCols, a);
-
-    const resultTexture: WebGLTexture =
-        gpgpu.createMatrixTexture(resultNumRows, resultNumCols);
-
-    reshape_gpu.reshape(
-        gpgpu, program, aTexture, aNumRows, aNumCols, resultTexture,
-        resultNumRows, resultNumCols);
-
-    const result = gpgpu.downloadMatrixFromTexture(
-        resultTexture, resultNumRows, resultNumCols);
-
-    gpgpu.deleteMatrixTexture(aTexture);
-    gpgpu.deleteMatrixTexture(resultTexture);
-    gpgpu.deleteProgram(program);
-
-    return result;
-  }
-});
diff --git a/src/math/webgl/resize_bilinear_gpu.ts b/src/math/webgl/resize_bilinear_gpu.ts
index 3491da228d..acc819c3f6 100644
--- a/src/math/webgl/resize_bilinear_gpu.ts
+++ b/src/math/webgl/resize_bilinear_gpu.ts
@@ -13,79 +13,62 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import * as conv_util from '../conv_util';
-
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderSource(
-    inputShapeRCD: [number, number, number],
-    outputDimensionsRowCol: [number, number], alignCorners: boolean): string {
-  const depth = inputShapeRCD[2];
-
-  const inputTexShapeRC = conv_util.computeTexShapeFrom3D(inputShapeRCD);
-
-  const effectiveInputShapeRCD = alignCorners ?
-      [inputShapeRCD[0] - 1, inputShapeRCD[1] - 1, depth] :
-      inputShapeRCD;
-
-  const effectiveOutputShapeRCD = alignCorners ?
-      [outputDimensionsRowCol[0] - 1, outputDimensionsRowCol[1] - 1, depth] :
-      [outputDimensionsRowCol[0], outputDimensionsRowCol[1], depth];
-
-  return `
-    precision highp float;
-    uniform sampler2D matrixA;
-    varying vec2 resultUV;
-    const vec2 halfCR = vec2(0.5, 0.5);
-
-    const vec2 inputShapeCR = vec2(${inputShapeRCD[1]}, ${inputShapeRCD[0]});
-    const vec2 inputShapeTexCR = vec2(
-        ${inputTexShapeRC[1]}, ${inputTexShapeRC[0]});
-
-    const vec2 effectiveInputOverOutputRatioCR = vec2(
-        ${effectiveInputShapeRCD[1] / effectiveOutputShapeRCD[1]},
-        ${effectiveInputShapeRCD[0] / effectiveOutputShapeRCD[0]});
-
-    float sampleInput(float col, float row, float d) {
-      vec2 uv = (vec2(col * ${depth}.0 + d, row) + halfCR) / inputShapeTexCR;
-      return texture2D(matrixA, uv).r;
-    }
-
-    void main() {
-      vec2 yTexCR = floor(gl_FragCoord.xy);
-
-      // Map from 2D (yTexR, yTexC) to 3D (yR, yC, d).
-      vec2 yCR = vec2(floor(yTexCR.x / ${depth}.0), yTexCR.y);
-      float d = mod(yTexCR.x, ${depth}.0);
-
-      // Fractional source index.
-      vec2 sourceFracIndexCR = yCR * effectiveInputOverOutputRatioCR;
-
-      // Compute the four integer indices.
-      vec2 sourceFloorCR = floor(sourceFracIndexCR);
-      vec2 sourceCeilCR = min(inputShapeCR - 1.0, ceil(sourceFracIndexCR));
-
-      float topLeft = sampleInput(sourceFloorCR[0], sourceFloorCR[1], d);
-      float bottomLeft = sampleInput(sourceFloorCR[0], sourceCeilCR[1], d);
-      float topRight = sampleInput(sourceCeilCR[0], sourceFloorCR[1], d);
-      float bottomRight = sampleInput(sourceCeilCR[0], sourceCeilCR[1], d);
-
-      vec2 fracCR = sourceFracIndexCR - sourceFloorCR;
-
-      float top = topLeft + (topRight - topLeft) * fracCR[0];
-      float bottom = bottomLeft + (bottomRight - bottomLeft) * fracCR[0];
-      float newValue = top + (bottom - top) * fracCR[1];
-
-      gl_FragColor = vec4(newValue, 0.0, 0.0, 0.0);
-    }`;
-}
-
-export function resizeBilinear(
-    gpgpu: GPGPUContext, resizeBilinearProgram: WebGLProgram, a: WebGLTexture,
-    result: WebGLTexture, resultShapeRowCol: [number, number]) {
-  gpgpu.setOutputMatrixTexture(
-      result, resultShapeRowCol[0], resultShapeRowCol[1]);
-  gpgpu.setProgram(resizeBilinearProgram);
-  gpgpu.setInputMatrixTexture(a, 'matrixA', 0);
-  gpgpu.executeProgram();
+import {GPGPUProgram} from './gpgpu_math';
+
+export class ResizeBilinearProgram implements GPGPUProgram {
+  variableNames = ['A'];
+  params: Array<{}> = [];
+  outputShape: number[] = [];
+  userCode: string;
+
+  constructor(
+      inputShape: [number, number, number],
+      outputDimensionsRowCol: [number, number], alignCorners: boolean) {
+    const depth = inputShape[2];
+    this.outputShape =
+        [outputDimensionsRowCol[0], outputDimensionsRowCol[1], depth];
+    this.params = [alignCorners];
+
+    const effectiveInputShape = alignCorners ?
+        [inputShape[0] - 1, inputShape[1] - 1, depth] :
+        inputShape;
+
+    const effectiveOutputShape = alignCorners ?
+        [this.outputShape[0] - 1, this.outputShape[1] - 1, depth] :
+        this.outputShape;
+    this.userCode = `
+      const vec2 effectiveInputOverOutputRatioRC = vec2(
+          ${effectiveInputShape[0] /
+        effectiveOutputShape[0]},
+          ${effectiveInputShape[1] /
+        effectiveOutputShape[1]});
+      const vec2 inputShapeRC = vec2(${inputShape[0]}.0, ${inputShape[1]}.0);
+
+      void main() {
+        vec3 coords = getOutputCoords();
+        vec2 yRC = coords.xy;
+        float d = coords.z;
+
+        // Fractional source index.
+        vec2 sourceFracIndexRC = yRC * effectiveInputOverOutputRatioRC;
+
+        // Compute the four integer indices.
+        vec2 sourceFloorRC = floor(sourceFracIndexRC);
+        vec2 sourceCeilRC = min(inputShapeRC - 1.0, ceil(sourceFracIndexRC));
+
+        float topLeft = getA(sourceFloorRC[0], sourceFloorRC[1], d);
+        float bottomLeft = getA(sourceCeilRC[0], sourceFloorRC[1], d);
+        float topRight = getA(sourceFloorRC[0], sourceCeilRC[1], d);
+        float bottomRight = getA(sourceCeilRC[0], sourceCeilRC[1], d);
+
+        vec2 fracRC = sourceFracIndexRC - sourceFloorRC;
+
+        float top = topLeft + (topRight - topLeft) * fracRC[1];
+        float bottom = bottomLeft + (bottomRight - bottomLeft) * fracRC[1];
+        float newValue = top + (bottom - top) * fracRC[0];
+
+        setOutput(newValue);
+      }
+    `;
+  }
 }
diff --git a/src/math/webgl/resize_bilinear_gpu_test.ts b/src/math/webgl/resize_bilinear_gpu_test.ts
index 3adb55af61..7b8a736af8 100644
--- a/src/math/webgl/resize_bilinear_gpu_test.ts
+++ b/src/math/webgl/resize_bilinear_gpu_test.ts
@@ -14,52 +14,14 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
-import * as conv_util from '../conv_util';
+import {Array3D, initializeGPU, NDArray} from '../ndarray';
 
 import {GPGPUContext} from './gpgpu_context';
-import * as resize_bilinear_gpu from './resize_bilinear_gpu';
+import * as gpgpu_math from './gpgpu_math';
+import {ResizeBilinearProgram} from './resize_bilinear_gpu';
+import {TextureManager} from './texture_manager';
 
 describe('resize bilinear', () => {
-  function uploadResizeBilinearDownload(
-      a: Float32Array, aShapeRowColDepth: [number, number, number],
-      outputDimensionsRowCol: [number, number],
-      alignCorners: boolean): Float32Array {
-    const aTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(aShapeRowColDepth);
-
-    const resultShapeRCD: [number, number, number] = [
-      outputDimensionsRowCol[0], outputDimensionsRowCol[1], aShapeRowColDepth[2]
-    ];
-
-    const resultTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(resultShapeRCD);
-
-    const gpgpu = new GPGPUContext();
-    gpgpu.enableAutomaticDebugValidation(true);
-
-    const shaderSource = resize_bilinear_gpu.getFragmentShaderSource(
-        aShapeRowColDepth, outputDimensionsRowCol, alignCorners);
-    const program = gpgpu.createProgram(shaderSource);
-
-    const aTex = gpgpu.createMatrixTexture(aTexShapeRC[0], aTexShapeRC[1]);
-    const resultTex =
-        gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.uploadMatrixToTexture(aTex, aTexShapeRC[0], aTexShapeRC[1], a);
-
-    resize_bilinear_gpu.resizeBilinear(
-        gpgpu, program, aTex, resultTex, resultTexShapeRC);
-
-    const result = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(aTex);
-    gpgpu.deleteProgram(program);
-    gpgpu.dispose();
-    return result;
-  }
-
   it('simple bilinear', () => {
     const a = new Float32Array([2, 2, 4, 4]);
 
@@ -123,3 +85,29 @@ describe('resize bilinear', () => {
         1e-4);
   });
 });
+
+function uploadResizeBilinearDownload(
+    a: Float32Array, aShape: [number, number, number],
+    outputDimensionsRowCol: [number, number],
+    alignCorners: boolean): Float32Array {
+  const gpgpu = new GPGPUContext();
+  gpgpu.enableAutomaticDebugValidation(true);
+  const textureManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, textureManager);
+
+  const program =
+      new ResizeBilinearProgram(aShape, outputDimensionsRowCol, alignCorners);
+  const aArr = Array3D.new(aShape, a);
+  const rArr = NDArray.zeros(program.outputShape);
+  const binary = gpgpu_math.compileProgram(gpgpu, program, [aArr], rArr);
+  gpgpu_math.runProgram(binary, [aArr], rArr);
+  const result = rArr.getValues();
+
+  aArr.dispose();
+  rArr.dispose();
+  textureManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
+  gpgpu.dispose();
+
+  return result;
+}

From 9e890bbe5fdf6ab2c75ea01aa68ef5af652e9682 Mon Sep 17 00:00:00 2001
From: Daniel Smilkov <dsmilkov@gmail.com>
Date: Mon, 21 Aug 2017 11:29:02 -0400
Subject: [PATCH 10/10] address comments

---
 src/math/math_gpu.ts                       | 13 +++++++-----
 src/math/webgl/copy_gpu.ts                 | 24 +++++++++++-----------
 src/math/webgl/copy_gpu_test.ts            |  7 ++-----
 src/math/webgl/resize_bilinear_gpu.ts      |  8 +++-----
 src/math/webgl/resize_bilinear_gpu_test.ts |  4 ++--
 5 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts
index f63f070f33..9d00f22a67 100644
--- a/src/math/math_gpu.ts
+++ b/src/math/math_gpu.ts
@@ -25,7 +25,6 @@ import {Concat3DProgram} from './webgl/concat3d_gpu';
 // tslint:disable-next-line:max-line-length
 import {Conv2DDerBiasProgram, Conv2DDerWeightsProgram, Conv2DTransposeProgram} from './webgl/conv_backprop_gpu';
 import {Conv2DProgram} from './webgl/conv_gpu';
-import * as copy_gpu from './webgl/copy_gpu';
 import {Copy2DProgram} from './webgl/copy_gpu';
 import {GPGPUContext} from './webgl/gpgpu_context';
 import * as gpgpu_math from './webgl/gpgpu_math';
@@ -37,7 +36,7 @@ import {MinMaxProgram} from './webgl/minmax_gpu';
 import {MatMulProgram} from './webgl/mulmat_gpu';
 import {Pool2DProgram} from './webgl/pool_gpu';
 import {ReduceSumProgram} from './webgl/reducesum_gpu';
-import {ResizeBilinearProgram} from './webgl/resize_bilinear_gpu';
+import {ResizeBilinear3DProgram} from './webgl/resize_bilinear_gpu';
 import {TextureManager} from './webgl/texture_manager';
 import {UnaryOp, UnaryOpProgram} from './webgl/unaryop_gpu';
 import * as webgl_util from './webgl/webgl_util';
@@ -97,7 +96,7 @@ export class NDArrayMathGPU extends NDArrayMath {
       destBeginRowCol: [number, number],
       destSizeRowCol: [number, number]): void {
     const program = new Copy2DProgram(sourceSizeRowCol[1], destSizeRowCol[1]);
-    const customSetup = copy_gpu.getCustomSetupFunc(
+    const customSetup = program.getCustomSetupFunc(
         sourceBeginRowCol, destBeginRowCol, destSizeRowCol);
     this.compileAndRun(program, [source], dest, customSetup);
   }
@@ -349,14 +348,18 @@ export class NDArrayMathGPU extends NDArrayMath {
 
     const maxPoolBackPropProgram =
         new MaxPool2DBackpropProgram(dy.shape, fSize, origStride, origPad);
-    return this.compileAndRun(maxPoolBackPropProgram, [dy, maxPoolPositions]);
+
+    const result =
+        this.compileAndRun(maxPoolBackPropProgram, [dy, maxPoolPositions]);
+    maxPoolPositions.dispose();
+    return result as Array3D;
   }
 
   protected resizeBilinear3DInternal(
       x: Array3D, newShape2D: [number, number],
       alignCorners: boolean): Array3D {
     const program =
-        new ResizeBilinearProgram(x.shape, newShape2D, alignCorners);
+        new ResizeBilinear3DProgram(x.shape, newShape2D, alignCorners);
     return this.compileAndRun(program, [x]);
   }
 
diff --git a/src/math/webgl/copy_gpu.ts b/src/math/webgl/copy_gpu.ts
index 709e034d94..1ea1418c6b 100644
--- a/src/math/webgl/copy_gpu.ts
+++ b/src/math/webgl/copy_gpu.ts
@@ -40,17 +40,17 @@ export class Copy2DProgram implements GPGPUProgram {
       }
     `;
   }
-}
 
-export function getCustomSetupFunc(
-    sourceStart: [number, number], destStart: [number, number],
-    destSize: [number, number]) {
-  return (gpgpu: GPGPUContext) => {
-    gpgpu.setOutputMatrixWriteRegion(
-        destStart[0], destSize[0], destStart[1], destSize[1]);
-    const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStart');
-    gpgpu.gl.uniform2f(sourceStartCRLoc, sourceStart[0], sourceStart[1]);
-    const destStartCRLoc = gpgpu.getUniformLocation('destStart');
-    gpgpu.gl.uniform2f(destStartCRLoc, destStart[0], destStart[1]);
-  };
+  getCustomSetupFunc(
+      sourceStart: [number, number], destStart: [number, number],
+      destSize: [number, number]) {
+    return (gpgpu: GPGPUContext) => {
+      gpgpu.setOutputMatrixWriteRegion(
+          destStart[0], destSize[0], destStart[1], destSize[1]);
+      const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStart');
+      gpgpu.gl.uniform2f(sourceStartCRLoc, sourceStart[0], sourceStart[1]);
+      const destStartCRLoc = gpgpu.getUniformLocation('destStart');
+      gpgpu.gl.uniform2f(destStartCRLoc, destStart[0], destStart[1]);
+    };
+  }
 }
diff --git a/src/math/webgl/copy_gpu_test.ts b/src/math/webgl/copy_gpu_test.ts
index 3a06ae2778..59904530a7 100644
--- a/src/math/webgl/copy_gpu_test.ts
+++ b/src/math/webgl/copy_gpu_test.ts
@@ -15,8 +15,6 @@ limitations under the License.
 
 import * as test_util from '../../test_util';
 import {Array2D, initializeGPU} from '../ndarray';
-
-import * as copy_gpu from './copy_gpu';
 import {Copy2DProgram} from './copy_gpu';
 import {GPGPUContext} from './gpgpu_context';
 import * as gpgpu_math from './gpgpu_math';
@@ -36,8 +34,7 @@ function uploadCopyDownload(
   const dest = Array2D.new(destShape, destVals);
 
   const binary = gpgpu_math.compileProgram(gpgpu, program, [source], dest);
-  const customSetup =
-      copy_gpu.getCustomSetupFunc(srcStart, destStart, destSize);
+  const customSetup = program.getCustomSetupFunc(srcStart, destStart, destSize);
   gpgpu_math.runProgram(binary, [source], dest, customSetup);
   const result = dest.getValues();
 
@@ -174,7 +171,7 @@ describe('copy_gpu', () => {
 
     for (let i = 0; i < 10; ++i) {
       const offset: [number, number] = [0, i];
-      const customSetup = copy_gpu.getCustomSetupFunc(offset, offset, size);
+      const customSetup = program.getCustomSetupFunc(offset, offset, size);
       gpgpu_math.runProgram(binary, [source], dest, customSetup);
     }
     const res = dest.getValues();
diff --git a/src/math/webgl/resize_bilinear_gpu.ts b/src/math/webgl/resize_bilinear_gpu.ts
index acc819c3f6..9ffb6707f7 100644
--- a/src/math/webgl/resize_bilinear_gpu.ts
+++ b/src/math/webgl/resize_bilinear_gpu.ts
@@ -15,7 +15,7 @@ limitations under the License.
 
 import {GPGPUProgram} from './gpgpu_math';
 
-export class ResizeBilinearProgram implements GPGPUProgram {
+export class ResizeBilinear3DProgram implements GPGPUProgram {
   variableNames = ['A'];
   params: Array<{}> = [];
   outputShape: number[] = [];
@@ -38,10 +38,8 @@ export class ResizeBilinearProgram implements GPGPUProgram {
         this.outputShape;
     this.userCode = `
       const vec2 effectiveInputOverOutputRatioRC = vec2(
-          ${effectiveInputShape[0] /
-        effectiveOutputShape[0]},
-          ${effectiveInputShape[1] /
-        effectiveOutputShape[1]});
+          ${effectiveInputShape[0] / effectiveOutputShape[0]},
+          ${effectiveInputShape[1] / effectiveOutputShape[1]});
       const vec2 inputShapeRC = vec2(${inputShape[0]}.0, ${inputShape[1]}.0);
 
       void main() {
diff --git a/src/math/webgl/resize_bilinear_gpu_test.ts b/src/math/webgl/resize_bilinear_gpu_test.ts
index 7b8a736af8..9382b83db5 100644
--- a/src/math/webgl/resize_bilinear_gpu_test.ts
+++ b/src/math/webgl/resize_bilinear_gpu_test.ts
@@ -18,7 +18,7 @@ import {Array3D, initializeGPU, NDArray} from '../ndarray';
 
 import {GPGPUContext} from './gpgpu_context';
 import * as gpgpu_math from './gpgpu_math';
-import {ResizeBilinearProgram} from './resize_bilinear_gpu';
+import {ResizeBilinear3DProgram} from './resize_bilinear_gpu';
 import {TextureManager} from './texture_manager';
 
 describe('resize bilinear', () => {
@@ -96,7 +96,7 @@ function uploadResizeBilinearDownload(
   initializeGPU(gpgpu, textureManager);
 
   const program =
-      new ResizeBilinearProgram(aShape, outputDimensionsRowCol, alignCorners);
+      new ResizeBilinear3DProgram(aShape, outputDimensionsRowCol, alignCorners);
   const aArr = Array3D.new(aShape, a);
   const rArr = NDArray.zeros(program.outputShape);
   const binary = gpgpu_math.compileProgram(gpgpu, program, [aArr], rArr);