diff --git a/.vscode/settings.json b/.vscode/settings.json
index 67a611b80a..e1084bbac4 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -14,5 +14,6 @@
   "editor.insertSpaces": true,
   "files.insertFinalNewline": true,
   "editor.detectIndentation": false,
+  "editor.wrappingIndent": "none",
   "typescript.tsdk": "node_modules/typescript/lib"
 }
diff --git a/src/graph_runner.ts b/src/graph_runner.ts
index f5404541a5..03cdfbfdc4 100644
--- a/src/graph_runner.ts
+++ b/src/graph_runner.ts
@@ -211,7 +211,7 @@ export class GraphRunner {
       }
 
     });
-    setTimeout(() => this.trainNetwork());
+    requestAnimationFrame(() => this.trainNetwork());
   }
 
   infer(
@@ -243,7 +243,7 @@ export class GraphRunner {
     this.currentInferenceLoopNumPasses = numPasses;
     if (!this.isInferring) {
       this.inferencePassesThisRun = 0;
-      setTimeout(() => this.inferNetwork());
+      requestAnimationFrame(() => this.inferNetwork());
     }
     this.isInferring = true;
   }
diff --git a/src/math/math.ts b/src/math/math.ts
index 13a02935e7..645f314c71 100644
--- a/src/math/math.ts
+++ b/src/math/math.ts
@@ -71,9 +71,10 @@ export abstract class NDArrayMath {
    */
   enableDebugMode() {
     this.debugMode = true;
-    console.warn('Debugging mode is ON. The output of every math call will ' +
-                  'be downloaded to CPU and checked for NaNs. ' +
-                  'This significantly impacts performance.');
+    console.warn(
+        'Debugging mode is ON. The output of every math call will ' +
+        'be downloaded to CPU and checked for NaNs. ' +
+        'This significantly impacts performance.');
   }
 
   /**
@@ -97,7 +98,7 @@ export abstract class NDArrayMath {
   endScope(result: ScopeResult) {
     let arraysToKeep = this.activeScopeNDArraysToKeep;
     if (result != null) {
-      arraysToKeep = arraysToKeep.concat(result as NDArray|NDArray[]);
+      arraysToKeep = arraysToKeep.concat(result as NDArray | NDArray[]);
     }
     // Dispose the current scope.
     for (let i = 0; i < this.activeScope.length; i++) {
@@ -321,22 +322,15 @@ export abstract class NDArrayMath {
   protected abstract cloneInternal<T extends NDArray>(ndarray: T): T;
 
   /**
-   * Reshapes an NDArray to a new shape. The size of the input NDArray must
-   * match the size of the requested shape.
-   * @param ndarray The input NDArray.
-   * @param newShape The new shape to reshape the NDArray to. Must be the same
-   * size as the NDArray.
+   * @deprecated Please call reshape() directly on the ndarray object.
    */
   reshape<T1 extends NDArray, T2 extends NDArray>(
       ndarray: T1, newShape: number[]): T2 {
-    util.assert(
-        ndarray.size === util.sizeFromShape(newShape),
-        `Error in reshape: old size ${ndarray.size} must match new size ` +
-            `${util.sizeFromShape(newShape)}.`);
-    return this.track(this.reshapeInternal<T1, T2>(ndarray, newShape));
+    console.warn(
+        'math.reshape() is deprecated. Please call reshape() ' +
+        'directly on the ndarray object');
+    return ndarray.reshape(newShape);
   }
-  protected abstract reshapeInternal<T1 extends NDArray, T2 extends NDArray>(
-      ndarray: T1, newShape: number[]): T2;
 
   /**
    * Extracts a slice from a matrix. The operation extraces a slice from input
@@ -1148,7 +1142,8 @@ export abstract class NDArrayMath {
    * @param h Array of previous cell outputs.
    * @return Tuple [nextCellStates, cellOutputs]
    */
-  multiRNNCell(lstmCells: LSTMCell[], data: Array2D, c: Array2D[],
+  multiRNNCell(
+      lstmCells: LSTMCell[], data: Array2D, c: Array2D[],
       h: Array2D[]): [Array2D[], Array2D[]] {
     util.assert(
         data.shape[0] === 1,
@@ -1187,8 +1182,9 @@ export abstract class NDArrayMath {
    * @param h Previous cell output.
    * @return Tuple [nextCellState, cellOutput]
    */
-  basicLSTMCell(forgetBias: Scalar, lstmKernel: Array2D, lstmBias: Array1D,
-      data: Array2D, c: Array2D, h: Array2D): [Array2D, Array2D] {
+  basicLSTMCell(
+      forgetBias: Scalar, lstmKernel: Array2D, lstmBias: Array1D, data: Array2D,
+      c: Array2D, h: Array2D): [Array2D, Array2D] {
     const res = this.scope(() => {
       util.assert(
           data.shape[0] === 1,
@@ -1207,25 +1203,25 @@ export abstract class NDArrayMath {
 
       // i = input_gate, j = new_input, f = forget_gate, o = output_gate
       const i = this.slice2D(res, [0, 0], [res.shape[0], res.shape[1] / 4]);
-      const j = this.slice2D(res, [0, res.shape[1] / 4 * 1],
-          [res.shape[0], res.shape[1] / 4]);
-      const f = this.slice2D(res, [0, res.shape[1] / 4 * 2],
-          [res.shape[0], res.shape[1] / 4]);
-      const o = this.slice2D(res, [0, res.shape[1] / 4 * 3],
-          [res.shape[0], res.shape[1] / 4]);
-
-      const newC = this.add(
-          this.multiplyStrict(c,
-              this.sigmoid(this.scalarPlusArray(forgetBias, f))),
-          this.multiplyStrict(this.sigmoid(i), this.tanh(j))) as Array2D;
-      const newH = this.multiplyStrict(
-          this.tanh(newC), this.sigmoid(o)) as Array2D;
+      const j = this.slice2D(
+          res, [0, res.shape[1] / 4 * 1], [res.shape[0], res.shape[1] / 4]);
+      const f = this.slice2D(
+          res, [0, res.shape[1] / 4 * 2], [res.shape[0], res.shape[1] / 4]);
+      const o = this.slice2D(
+          res, [0, res.shape[1] / 4 * 3], [res.shape[0], res.shape[1] / 4]);
+
+      const newC =
+          this.add(
+              this.multiplyStrict(
+                  c, this.sigmoid(this.scalarPlusArray(forgetBias, f))),
+              this.multiplyStrict(this.sigmoid(i), this.tanh(j))) as Array2D;
+      const newH =
+          this.multiplyStrict(this.tanh(newC), this.sigmoid(o)) as Array2D;
 
       return [newC, newH];
     });
     return [res[0], res[1]];
   }
-
 }
 
 export enum MatrixOrientation {
diff --git a/src/math/math_cpu.ts b/src/math/math_cpu.ts
index 4298b1974b..7231268000 100644
--- a/src/math/math_cpu.ts
+++ b/src/math/math_cpu.ts
@@ -31,11 +31,6 @@ export class NDArrayMathCPU extends NDArrayMath {
         ndarray.shape, {values: new Float32Array(ndarray.getValues())});
   }
 
-  protected reshapeInternal<T1 extends NDArray, T2 extends NDArray>(
-      ndarray: T1, newShape: number[]): T2 {
-    return this.cloneInternal(ndarray).reshape<T2>(newShape);
-  }
-
   protected slice2DInternal(
       input: Array2D, beginRowCol: [number, number],
       sizeRowCol: [number, number]): Array2D {
diff --git a/src/math/math_gpu.ts b/src/math/math_gpu.ts
index ceed2748b2..9d00f22a67 100644
--- a/src/math/math_gpu.ts
+++ b/src/math/math_gpu.ts
@@ -13,23 +13,19 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import * as util from '../util';
-
-import * as concat3d_util from './concat3d_util';
-import * as conv_util from './conv_util';
 import {MatrixOrientation, NDArrayMath} from './math';
 import * as ndarray from './ndarray';
 import {Array1D, Array2D, Array3D, Array4D, NDArray, Scalar} from './ndarray';
 import {AddScaledMatProgram} from './webgl/addscaledmat_gpu';
 import {ArgMaxEqualsProgram} from './webgl/argmaxequals_gpu';
 import {ArgMinMaxProgram} from './webgl/argminmax_gpu';
-import * as batchnorm_gpu from './webgl/batchnorm_gpu';
+import {BatchNormProgram} from './webgl/batchnorm_gpu';
 import {BinaryOpProgram} from './webgl/binaryop_gpu';
-import * as concat3d_gpu from './webgl/concat3d_gpu';
+import {Concat3DProgram} from './webgl/concat3d_gpu';
 // tslint:disable-next-line:max-line-length
 import {Conv2DDerBiasProgram, Conv2DDerWeightsProgram, Conv2DTransposeProgram} from './webgl/conv_backprop_gpu';
 import {Conv2DProgram} from './webgl/conv_gpu';
-import * as copy_gpu from './webgl/copy_gpu';
+import {Copy2DProgram} from './webgl/copy_gpu';
 import {GPGPUContext} from './webgl/gpgpu_context';
 import * as gpgpu_math from './webgl/gpgpu_math';
 import {GPGPUBinary, GPGPUProgram} from './webgl/gpgpu_math';
@@ -40,31 +36,14 @@ import {MinMaxProgram} from './webgl/minmax_gpu';
 import {MatMulProgram} from './webgl/mulmat_gpu';
 import {Pool2DProgram} from './webgl/pool_gpu';
 import {ReduceSumProgram} from './webgl/reducesum_gpu';
-import * as reshape_gpu from './webgl/reshape_gpu';
-import * as resize_bilinear_gpu from './webgl/resize_bilinear_gpu';
+import {ResizeBilinear3DProgram} from './webgl/resize_bilinear_gpu';
 import {TextureManager} from './webgl/texture_manager';
 import {UnaryOp, UnaryOpProgram} from './webgl/unaryop_gpu';
 import * as webgl_util from './webgl/webgl_util';
 
-const BATCHNORM_PROG = 'batchnorm';
-const COPY_PROG = 'copy';
-const CONCAT_PROG = 'concat';
-const RESHAPE_PROG = 'reshape';
-const RESIZE_BILINEAR_PROG = 'resizebilin';
-
-function makeCopyProgramName(
-    sourceShapeRowCol: [number, number], sourceSizeRowCol: [number, number],
-    destSizeRowCol: [number, number]): string {
-  const shapeName = `${sourceShapeRowCol[0]}_${sourceShapeRowCol[1]}`;
-  const srcSizeName = `${sourceSizeRowCol[0]}_${sourceSizeRowCol[1]}`;
-  const dstSizeName = `${destSizeRowCol[0]}_${destSizeRowCol[1]}`;
-  return `${COPY_PROG}_${shapeName}_${srcSizeName}_${dstSizeName}`;
-}
-
 export class NDArrayMathGPU extends NDArrayMath {
   private gpgpu: GPGPUContext;
   private textureManager: TextureManager;
-  private programCache: {[key: string]: WebGLProgram} = {};
   private binaryCache: {[key: string]: GPGPUBinary} = {};
   private gpgpuCreatedLocally: boolean;
 
@@ -89,53 +68,14 @@ export class NDArrayMathGPU extends NDArrayMath {
   }
 
   protected cloneInternal<T extends NDArray>(ndarray: T): T {
-    const textureShapeRC = ndarray.getTextureShapeRC();
-    const program = this.getAndSaveProgram(
-        makeCopyProgramName(textureShapeRC, textureShapeRC, textureShapeRC),
-        () => copy_gpu.getFragmentShaderSource(
-            textureShapeRC, textureShapeRC, textureShapeRC));
-
-    const resultTexture = this.textureManager.acquireTexture(textureShapeRC);
-
-    copy_gpu.copy(
-        this.gpgpu, program, ndarray.getTexture(), textureShapeRC, [0, 0],
-        textureShapeRC, resultTexture, textureShapeRC, [0, 0], textureShapeRC);
-
-    return NDArray.make<T>(
-        ndarray.shape, {texture: resultTexture, textureShapeRC});
-  }
-
-  protected reshapeInternal<T1 extends NDArray, T2 extends NDArray>(
-      ndarray: T1, newShape: number[]): T2 {
-    let newTexShape: [number, number];
-
-    switch (newShape.length) {
-      case 0:
-        newTexShape = [1, 1];
-        break;
-      case 1:
-        newTexShape = [newShape[0], 1];
-        break;
-      case 2:
-        newTexShape = [newShape[0], newShape[1]];
-        break;
-      case 3:
-        newTexShape = [newShape[0], newShape[1] * newShape[2]];
-        break;
-      default:
-        throw Error(
-            `Reshapes into ${newShape.length}-dim ndarray is not yet ` +
-            `supported on GPU`);
-    }
-
-    const actualTexShape = ndarray.getTextureShapeRC(newTexShape);
-    let clonedArray: T1;
-    if (!util.arraysEqual(actualTexShape, newTexShape)) {
-      clonedArray = this.reshapeTexture(ndarray, newTexShape);
-    } else {
-      clonedArray = this.cloneInternal(ndarray);
-    }
-    return clonedArray.reshape<T2>(newShape);
+    const texShape = ndarray.getTextureShapeRC();
+    // Pretend the source was in logical shape that matches the texture shape.
+    const source = ndarray.as2D(texShape[0], texShape[1]);
+    // Do the same for output.
+    const output = this.makeOutputArray(texShape) as Array2D;
+    this.copy2D(source, [0, 0], texShape, output, [0, 0], texShape);
+    // Get back to the original logical shape.
+    return output.reshape(ndarray.shape);
   }
 
   protected slice2DInternal(
@@ -155,65 +95,15 @@ export class NDArrayMathGPU extends NDArrayMath {
       sourceSizeRowCol: [number, number], dest: Array2D,
       destBeginRowCol: [number, number],
       destSizeRowCol: [number, number]): void {
-    const sourceShapeRC = source.getTextureShapeRC();
-    const destShapeRC = dest.getTextureShapeRC();
-    const program = this.getAndSaveProgram(
-        makeCopyProgramName(sourceShapeRC, sourceSizeRowCol, destSizeRowCol),
-        () => copy_gpu.getFragmentShaderSource(
-            sourceShapeRC, sourceSizeRowCol, destSizeRowCol));
-
-    copy_gpu.copy(
-        this.gpgpu, program, source.getTexture(), sourceShapeRC,
-        sourceBeginRowCol, sourceSizeRowCol, dest.getTexture(), destShapeRC,
-        destBeginRowCol, destSizeRowCol);
+    const program = new Copy2DProgram(sourceSizeRowCol[1], destSizeRowCol[1]);
+    const customSetup = program.getCustomSetupFunc(
+        sourceBeginRowCol, destBeginRowCol, destSizeRowCol);
+    this.compileAndRun(program, [source], dest, customSetup);
   }
 
   protected concat3DInternal(x1: Array3D, x2: Array3D, axis: number): Array3D {
-    const x1TexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(x1.shape);
-    const x2TexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(x2.shape);
-
-    // If the texture shapes doesn't match the shapes that shaders expect,
-    // do physical texture reshapes on the GPU.
-    const actualX1TexShape = x1.getTextureShapeRC(x1TexShapeRC);
-    let cleanupX1 = false;
-    if (!util.arraysEqual(actualX1TexShape, x1TexShapeRC)) {
-      x1 = this.reshapeTexture(x1, x1TexShapeRC);
-      cleanupX1 = true;
-    }
-    const actualX2TexShape = x2.getTextureShapeRC(x2TexShapeRC);
-    let cleanupX2 = false;
-    if (!util.arraysEqual(actualX2TexShape, x2TexShapeRC)) {
-      x2 = this.reshapeTexture(x2, x2TexShapeRC);
-      cleanupX2 = true;
-    }
-
-    const resultShapeRCD =
-        concat3d_util.computeConcat3DOutputShape(x1.shape, x2.shape, axis);
-
-    const program = this.getAndSaveProgram(
-        `${CONCAT_PROG}_${x1.shape}_${x2.shape}_${axis}`,
-        () => concat3d_gpu.getFragmentShaderSource(
-            x1.shape, x2.shape, resultShapeRCD, axis));
-
-    const resultTexShape = conv_util.computeTexShapeFrom3D(resultShapeRCD);
-    const resultTex = this.textureManager.acquireTexture(resultTexShape);
-
-    concat3d_gpu.concat3D(
-        this.gpgpu, program, x1.getTexture(), x2.getTexture(), resultTex,
-        resultTexShape);
-
-    if (cleanupX1) {
-      x1.dispose();
-    }
-
-    if (cleanupX2) {
-      x2.dispose();
-    }
-
-    return NDArray.make<Array3D>(
-        resultShapeRCD, {texture: resultTex, textureShapeRC: resultTexShape});
+    const program = new Concat3DProgram(x1.shape, x2.shape, axis);
+    return this.compileAndRun(program, [x1, x2]);
   }
 
   protected scaledArrayAddInternal<T extends NDArray>(
@@ -235,33 +125,19 @@ export class NDArrayMathGPU extends NDArrayMath {
   }
 
   private compileAndRun<T extends NDArray, K extends NDArray>(
-      program: GPGPUProgram, inputs: T[]): K {
-    const output = this.makeOutputArray<K>(program.outputShape);
+      program: GPGPUProgram, inputs: T[], output?: K,
+      customSetup?: (gpgpu: GPGPUContext) => void): K {
+    if (output == null) {
+      output = this.makeOutputArray<K>(program.outputShape);
+    }
     const key = gpgpu_math.makeShaderKey(program, inputs, output);
     const binary = this.getAndSaveBinary(key, () => {
       return gpgpu_math.compileProgram(this.gpgpu, program, inputs, output);
     });
-    gpgpu_math.runProgram(binary, inputs, output);
+    gpgpu_math.runProgram(binary, inputs, output, customSetup);
     return output;
   }
 
-  private reshapeTexture<T extends NDArray>(a: T, newTextureShape: [
-    number, number
-  ]): T {
-    const aTexShape = a.getTextureShapeRC();
-
-    const program = this.getAndSaveProgram(
-        RESHAPE_PROG, () => reshape_gpu.getFragmentShaderSource());
-
-    const resultTexture = this.textureManager.acquireTexture(newTextureShape);
-    reshape_gpu.reshape(
-        this.gpgpu, program, a.getTexture(), aTexShape[0], aTexShape[1],
-        resultTexture, newTextureShape[0], newTextureShape[1]);
-
-    return NDArray.make<T>(
-        a.shape, {texture: resultTexture, textureShapeRC: newTextureShape});
-  }
-
   protected matMulInternal(
       a: Array2D, b: Array2D, aOrientation: MatrixOrientation,
       bOrientation: MatrixOrientation): Array2D {
@@ -277,92 +153,26 @@ export class NDArrayMathGPU extends NDArrayMath {
 
   protected batchNormalization3DInternal(
       x: Array3D, mean: Array3D|Array1D, variance: Array3D|Array1D,
-      varianceEpsilon: number, scale?: Array3D|Array1D,
+      varianceEpsilon = 0.000001, scale?: Array3D|Array1D,
       offset?: Array3D|Array1D): Array3D {
-    const xTexShape = x.getTextureShapeRC();
-
-    let cleanupMean = false;
-    const preferredMeanTexShape: [number, number] =
-        mean.rank === 1 ? [1, mean.size] : xTexShape;
-    let meanTexShape = mean.getTextureShapeRC(preferredMeanTexShape);
-    if (!util.arraysEqual(meanTexShape, preferredMeanTexShape)) {
-      mean = this.reshapeTexture(mean, preferredMeanTexShape);
-      meanTexShape = preferredMeanTexShape;
-      cleanupMean = true;
-    }
+    const inputs = [x, mean, variance];
 
-    let cleanupVariance = false;
-    const preferredVarianceTexShape: [number, number] =
-        variance.rank === 1 ? [1, variance.size] : xTexShape;
-    let varianceTexShape = variance.getTextureShapeRC(preferredMeanTexShape);
-    if (!util.arraysEqual(varianceTexShape, preferredVarianceTexShape)) {
-      variance = this.reshapeTexture(variance, preferredVarianceTexShape);
-      varianceTexShape = preferredVarianceTexShape;
-      cleanupVariance = true;
-    }
-
-    let scaleTexShape: [number, number]|null = null;
-    let cleanupScale = false;
-    if (scale != null) {
-      const preferredScaleTexShape: [number, number] =
-          scale.rank === 1 ? [1, scale.size] : xTexShape;
-
-      scaleTexShape = scale.getTextureShapeRC(preferredScaleTexShape);
-      if (!util.arraysEqual(scaleTexShape, preferredScaleTexShape)) {
-        scale = this.reshapeTexture(scale, preferredScaleTexShape);
-        scaleTexShape = preferredScaleTexShape;
-        cleanupScale = true;
-      }
-    }
-
-    let offsetTexShape: [number, number]|null = null;
-    let cleanupOffset = false;
+    let offsetShape = null;
     if (offset != null) {
-      const preferredOffsetTexShape: [number, number] =
-          offset.rank === 1 ? [1, offset.size] : xTexShape;
-
-      offsetTexShape = offset.getTextureShapeRC(preferredOffsetTexShape);
-      if (!util.arraysEqual(offsetTexShape, preferredOffsetTexShape)) {
-        offset = this.reshapeTexture(offset, preferredOffsetTexShape);
-        offsetTexShape = preferredOffsetTexShape;
-        cleanupOffset = true;
-      }
+      offsetShape = offset.shape;
+      inputs.push(offset);
     }
 
-    const resultTexShape: [number, number] = x.getTextureShapeRC();
-
-    const program = this.getAndSaveProgram(
-        `${BATCHNORM_PROG}_${xTexShape}_${meanTexShape}_${varianceTexShape}_` +
-            `${scaleTexShape!}_${offsetTexShape!}_${varianceEpsilon}`,
-        () => batchnorm_gpu.getFragmentShaderSource(
-            xTexShape, meanTexShape, varianceTexShape, offsetTexShape,
-            scaleTexShape, varianceEpsilon));
-
-    const resultTexture = this.textureManager.acquireTexture(resultTexShape);
-
-    batchnorm_gpu.batchNormalization(
-        this.gpgpu, program, x.getTexture(), xTexShape, mean.getTexture(),
-        meanTexShape, variance.getTexture(), varianceTexShape,
-        offset != null ? offset.getTexture() : null,
-        offset != null ? offsetTexShape : null,
-        scale != null ? scale.getTexture() : null,
-        scale != null ? scaleTexShape : null, resultTexture, resultTexShape);
-
-    if (cleanupMean) {
-      mean.dispose();
-    }
-    if (cleanupVariance) {
-      variance.dispose();
-    }
-    if (cleanupScale) {
-      scale!.dispose();
-    }
-    if (cleanupOffset) {
-      offset!.dispose();
+    let scaleShape = null;
+    if (scale != null) {
+      scaleShape = scale.shape;
+      inputs.push(scale);
     }
 
-    return NDArray.make<Array3D>(
-        x.shape, {texture: resultTexture, textureShapeRC: resultTexShape});
+    const program = new BatchNormProgram(
+        x.shape, mean.shape, variance.shape, offsetShape, scaleShape,
+        varianceEpsilon);
+    return this.compileAndRun(program, inputs);
   }
 
   protected switchDimInternal<T extends NDArray>(a: T, newDim: number[]): T {
@@ -538,31 +348,19 @@ export class NDArrayMathGPU extends NDArrayMath {
 
     const maxPoolBackPropProgram =
         new MaxPool2DBackpropProgram(dy.shape, fSize, origStride, origPad);
-    return this.compileAndRun(maxPoolBackPropProgram, [dy, maxPoolPositions]);
+
+    const result =
+        this.compileAndRun(maxPoolBackPropProgram, [dy, maxPoolPositions]);
+    maxPoolPositions.dispose();
+    return result as Array3D;
   }
 
   protected resizeBilinear3DInternal(
       x: Array3D, newShape2D: [number, number],
       alignCorners: boolean): Array3D {
-    const programKey =
-        [RESIZE_BILINEAR_PROG, x.shape, newShape2D, alignCorners].join('_');
-
-    const newShapeRCD: [number, number, number] =
-        [newShape2D[0], newShape2D[1], x.shape[2]];
-    const resultTexShape = conv_util.computeTexShapeFrom3D(newShapeRCD);
-
-    const program = this.getAndSaveProgram(
-        programKey,
-        () => resize_bilinear_gpu.getFragmentShaderSource(
-            x.shape, newShape2D, alignCorners));
-
-    const resultTexture = this.textureManager.acquireTexture(resultTexShape);
-
-    resize_bilinear_gpu.resizeBilinear(
-        this.gpgpu, program, x.getTexture(), resultTexture, resultTexShape);
-
-    return NDArray.make<Array3D>(
-        newShapeRCD, {texture: resultTexture, textureShapeRC: resultTexShape});
+    const program =
+        new ResizeBilinear3DProgram(x.shape, newShape2D, alignCorners);
+    return this.compileAndRun(program, [x]);
   }
 
   private getAndSaveBinary(key: string, getBinary: () => GPGPUBinary):
@@ -573,25 +371,11 @@ export class NDArrayMathGPU extends NDArrayMath {
     return this.binaryCache[key];
   }
 
-  private getAndSaveProgram(programKey: string, getShaderSource: () => string):
-      WebGLProgram {
-    if (!(programKey in this.programCache)) {
-      this.programCache[programKey] =
-          this.gpgpu.createProgram(getShaderSource());
-    }
-    return this.programCache[programKey];
-  }
-
   getTextureManager(): TextureManager {
     return this.textureManager;
   }
 
   dispose() {
-    for (const programKey in this.programCache) {
-      if (this.programCache.hasOwnProperty(programKey)) {
-        this.gpgpu.deleteProgram(this.programCache[programKey]);
-      }
-    }
     for (const key in this.binaryCache) {
       this.gpgpu.deleteProgram(this.binaryCache[key].webGLProgram);
     }
diff --git a/src/math/ndarray.ts b/src/math/ndarray.ts
index e0104f0342..6c97994d00 100644
--- a/src/math/ndarray.ts
+++ b/src/math/ndarray.ts
@@ -135,8 +135,8 @@ export class NDArray {
         return new Array3D(shape as [number, number, number], data) as any;
       case 4:
         return new Array4D(
-                   // tslint:disable-next-line:no-any
-                   shape as [number, number, number, number], data) as any;
+            // tslint:disable-next-line:no-any
+            shape as [number, number, number, number], data) as any;
       default:
         // tslint:disable-next-line:no-any
         return new NDArray(shape, data) as any;
diff --git a/src/math/webgl/batchnorm_gpu.ts b/src/math/webgl/batchnorm_gpu.ts
index 6a93267a97..504ab05af3 100644
--- a/src/math/webgl/batchnorm_gpu.ts
+++ b/src/math/webgl/batchnorm_gpu.ts
@@ -13,119 +13,50 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderSource(
-    xTexShapeRC: [number, number], meanTexShapeRC: [number, number],
-    varianceTexShapeRC: [number, number],
-    offsetTexShapeRC: [number, number]|null,
-    scaleTexShapeRC?: [number, number]|null, varianceEpsilon = 0.001): string {
-  let offsetSamplerSnippet = '';
-  let offsetShapeInitializationSnippet = '';
-  let offsetCoordsSnippet = '';
-  let offsetUVSnippet = '';
-  let offsetValueSnippet = '';
-  let offsetOperationSnippet = '0.0';
-
-  let scaleSamplerSnippet = '';
-  let scaleShapeInitializationSnippet = '';
-  let scaleCoordsSnippet = '';
-  let scaleUVSnippet = '';
-  let scaleValueSnippet = '';
-  let scaleOperationSnippet = '';
-
-  if (offsetTexShapeRC != null) {
-    offsetSamplerSnippet = 'uniform sampler2D offset;';
-    offsetShapeInitializationSnippet = `const vec2 offsetShapeCR = vec2(
-            ${offsetTexShapeRC[1]}, ${offsetTexShapeRC[0]});`;
-    offsetCoordsSnippet = 'vec2 offsetCoordsCR = mod(yTexCR, offsetShapeCR);';
-    offsetUVSnippet =
-        'vec2 offsetUV = (offsetCoordsCR + halfCR) / offsetShapeCR;';
-    offsetValueSnippet = 'float offsetValue = texture2D(offset, offsetUV).r;';
-    offsetOperationSnippet = 'offsetValue';
-  }
-
-  if (scaleTexShapeRC != null) {
-    scaleSamplerSnippet = 'uniform sampler2D scale;';
-    scaleShapeInitializationSnippet = `const vec2 scaleShapeCR = vec2(
-            ${scaleTexShapeRC[1]}, ${scaleTexShapeRC[0]});`;
-    scaleCoordsSnippet = 'vec2 scaleCoordsCR = mod(yTexCR, scaleShapeCR);';
-    scaleUVSnippet = 'vec2 scaleUV = (scaleCoordsCR + halfCR) / scaleShapeCR;';
-    scaleValueSnippet = 'float scaleValue = texture2D(scale, scaleUV).r;';
-    scaleOperationSnippet = 'inv *= scaleValue;';
+import * as util from '../../util';
+import {GPGPUProgram} from './gpgpu_math';
+
+export class BatchNormProgram implements GPGPUProgram {
+  variableNames: string[];
+  params: Array<{}> = [];
+  outputShape: number[] = [];
+  userCode: string;
+  supportsBroadcasting = true;
+
+  constructor(
+      xShape: number[], meanShape: number[], varianceShape: number[],
+      offsetShape: number[]|null, scaleShape: number[]|null,
+      varianceEpsilon: number) {
+    this.variableNames = ['x', 'mean', 'variance'];
+    util.assertAndGetBroadcastedShape(xShape, meanShape);
+    util.assertAndGetBroadcastedShape(xShape, varianceShape);
+
+    let offsetSnippet = '0.0';
+    if (offsetShape != null) {
+      util.assertAndGetBroadcastedShape(xShape, offsetShape);
+      this.variableNames.push('offset');
+      offsetSnippet = 'getOffsetAtOutCoords()';
+    }
+
+    let scaleSnippet = '1.0';
+    if (scaleShape != null) {
+      util.assertAndGetBroadcastedShape(xShape, scaleShape);
+      this.variableNames.push('scale');
+      scaleSnippet = 'getScaleAtOutCoords()';
+    }
+
+    this.params = [varianceEpsilon];
+    this.outputShape = xShape;
+    this.userCode = `
+      void main() {
+        float x = getXAtOutCoords();
+        float mean = getMeanAtOutCoords();
+        float variance = getVarianceAtOutCoords();
+        float offset = ${offsetSnippet};
+        float scale = ${scaleSnippet};
+        float inv = scale / sqrt(variance + float(${varianceEpsilon}));
+        setOutput((x - mean) * inv + offset);
+      }
+    `;
   }
-
-  return `
-    precision highp float;
-    uniform sampler2D x;
-    uniform sampler2D mean;
-    uniform sampler2D variance;
-    ${offsetSamplerSnippet}
-    ${scaleSamplerSnippet}
-
-    varying vec2 resultUV;
-
-    const vec2 xShapeCR = vec2(${xTexShapeRC[1]}, ${xTexShapeRC[0]});
-    const vec2 meanShapeCR = vec2(${meanTexShapeRC[1]}, ${meanTexShapeRC[0]});
-    const vec2 varianceShapeCR = vec2(
-        ${varianceTexShapeRC[1]}, ${varianceTexShapeRC[0]});
-
-    ${offsetShapeInitializationSnippet}
-    ${scaleShapeInitializationSnippet}
-
-    const vec2 halfCR = vec2(0.5, 0.5);
-    const float varianceEpsilon = ${varianceEpsilon};
-
-    void main() {
-      vec2 yTexCR = floor(gl_FragCoord.xy);
-
-      vec2 meanCoordsCR = mod(yTexCR, meanShapeCR);
-      vec2 varianceCoordsCR = mod(yTexCR, varianceShapeCR);
-      ${offsetCoordsSnippet}
-      ${scaleCoordsSnippet}
-
-      vec2 meanUV = (meanCoordsCR + halfCR) / meanShapeCR;
-      vec2 varianceUV = (varianceCoordsCR + halfCR) / varianceShapeCR;
-      ${offsetUVSnippet}
-      ${scaleUVSnippet}
-
-      float xValue = texture2D(x, resultUV).r;
-      float meanValue = texture2D(mean, meanUV).r;
-      float varianceValue = texture2D(variance, varianceUV).r;
-      ${offsetValueSnippet}
-      ${scaleValueSnippet}
-
-      float inv = 1.0 / sqrt(varianceValue + varianceEpsilon);
-      ${scaleOperationSnippet}
-      float xTimesInv = xValue * inv;
-      float meanTimesInvWithOffset = ${offsetOperationSnippet}
-          - meanValue * inv;
-
-      gl_FragColor = vec4(xTimesInv + meanTimesInvWithOffset, 0, 0, 0);
-    }`;
 }
-
-export function batchNormalization(
-    gpgpu: GPGPUContext, program: WebGLProgram, x: WebGLTexture,
-    xShapeRowCol: [number, number], mean: WebGLTexture,
-    meanShapeRowCol: [number, number], variance: WebGLTexture,
-    varianceShapeRowCol: [number, number], offset: WebGLTexture|null,
-    offsetShapeRowCol: [number, number]|null, scale: WebGLTexture|null,
-    scaleShapeRowCol: [number, number]|null, result: WebGLTexture,
-    resultShapeRowCol: [number, number]) {
-  gpgpu.setOutputMatrixTexture(
-      result, resultShapeRowCol[0], resultShapeRowCol[1]);
-  gpgpu.setProgram(program);
-  gpgpu.setInputMatrixTexture(x, 'x', 0);
-  gpgpu.setInputMatrixTexture(mean, 'mean', 1);
-  gpgpu.setInputMatrixTexture(variance, 'variance', 2);
-  let nextIndex = 3;
-  if (offset != null) {
-    gpgpu.setInputMatrixTexture(offset, 'offset', nextIndex);
-    nextIndex++;
-  }
-  if (scale != null) {
-    gpgpu.setInputMatrixTexture(scale, 'scale', nextIndex);
-  }
-  gpgpu.executeProgram();
-}
\ No newline at end of file
diff --git a/src/math/webgl/batchnorm_gpu_test.ts b/src/math/webgl/batchnorm_gpu_test.ts
index 7903a9e9ac..aac8f27464 100644
--- a/src/math/webgl/batchnorm_gpu_test.ts
+++ b/src/math/webgl/batchnorm_gpu_test.ts
@@ -14,88 +14,14 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
+import {initializeGPU, NDArray} from '../ndarray';
 
-import * as batchnorm_gpu from './batchnorm_gpu';
+import {BatchNormProgram} from './batchnorm_gpu';
 import {GPGPUContext} from './gpgpu_context';
+import * as gpgpu_math from './gpgpu_math';
+import {TextureManager} from './texture_manager';
 
 describe('batchnorm gpu test', () => {
-  function uploadBatchNormDownload(
-      x: Float32Array, xTexShapeRowCol: [number, number], mean: Float32Array,
-      meanTexShapeRowCol: [number, number], variance: Float32Array,
-      varianceTexShapeRowCol: [number, number], offset: Float32Array|null,
-      offsetTexShapeRowCol: [number, number]|null, scale: Float32Array|null,
-      scaleTexShapeRowCol: [number, number]|null,
-      varianceEpsilon: number): Float32Array {
-    const resultTexShapeRC: [number, number] = xTexShapeRowCol;
-    const gpgpu = new GPGPUContext();
-    gpgpu.enableAutomaticDebugValidation(true);
-
-    const shaderSource = batchnorm_gpu.getFragmentShaderSource(
-        xTexShapeRowCol, meanTexShapeRowCol, varianceTexShapeRowCol,
-        offsetTexShapeRowCol, scaleTexShapeRowCol, varianceEpsilon);
-
-    const program = gpgpu.createProgram(shaderSource);
-
-    const xTex =
-        gpgpu.createMatrixTexture(xTexShapeRowCol[0], xTexShapeRowCol[1]);
-    const meanTex =
-        gpgpu.createMatrixTexture(meanTexShapeRowCol[0], meanTexShapeRowCol[1]);
-    const varianceTex = gpgpu.createMatrixTexture(
-        varianceTexShapeRowCol[0], varianceTexShapeRowCol[1]);
-
-    let offsetTex = null;
-    if (offset != null) {
-      offsetTex = gpgpu.createMatrixTexture(
-          offsetTexShapeRowCol![0], offsetTexShapeRowCol![1]);
-    }
-    let scaleTex = null;
-    if (scale != null) {
-      scaleTex = gpgpu.createMatrixTexture(
-          scaleTexShapeRowCol![0], scaleTexShapeRowCol![1]);
-    }
-
-    const resultTex =
-        gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.uploadMatrixToTexture(
-        xTex, xTexShapeRowCol[0], xTexShapeRowCol[1], x);
-    gpgpu.uploadMatrixToTexture(
-        meanTex, meanTexShapeRowCol[0], meanTexShapeRowCol[1], mean);
-    gpgpu.uploadMatrixToTexture(
-        varianceTex, varianceTexShapeRowCol[0], varianceTexShapeRowCol[1],
-        variance);
-    if (offset != null) {
-      gpgpu.uploadMatrixToTexture(
-          offsetTex!, offsetTexShapeRowCol![0], offsetTexShapeRowCol![1],
-          offset);
-    }
-    if (scale != null) {
-      gpgpu.uploadMatrixToTexture(
-          scaleTex!, scaleTexShapeRowCol![0], scaleTexShapeRowCol![1], scale);
-    }
-
-    batchnorm_gpu.batchNormalization(
-        gpgpu, program, xTex, xTexShapeRowCol, meanTex, meanTexShapeRowCol,
-        varianceTex, varianceTexShapeRowCol, offsetTex, offsetTexShapeRowCol,
-        scaleTex, scaleTexShapeRowCol, resultTex, resultTexShapeRC);
-
-    const result = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(xTex);
-    gpgpu.deleteMatrixTexture(meanTex);
-    gpgpu.deleteMatrixTexture(varianceTex);
-    if (offsetTex != null) {
-      gpgpu.deleteMatrixTexture(offsetTex);
-    }
-    if (scaleTex != null) {
-      gpgpu.deleteMatrixTexture(scaleTex);
-    }
-    gpgpu.deleteProgram(program);
-    gpgpu.dispose();
-    return result;
-  }
 
   it('simple batchnorm, no offset or scale, 2x1x2', () => {
     const x = new Float32Array([2, 100, 4, 400]);
@@ -201,7 +127,7 @@ describe('batchnorm gpu test', () => {
     const varianceEpsilon = .001;
 
     const result = uploadBatchNormDownload(
-        x, [2, 9], mean, [1, 3], variance, [1, 3], offset, [1, 3], scale,
+        x, [2, 3, 3], mean, [1, 3], variance, [1, 3], offset, [1, 3], scale,
         [1, 3], varianceEpsilon);
 
     const expectedResult = new Float32Array([
@@ -212,3 +138,41 @@ describe('batchnorm gpu test', () => {
     test_util.expectArraysClose(result, expectedResult, 1e-5);
   });
 });
+
+function uploadBatchNormDownload(
+    x: Float32Array, xShape: number[], mean: Float32Array, meanShape: number[],
+    variance: Float32Array, varianceShape: number[], offset: Float32Array|null,
+    offsetShape: number[]|null, scale: Float32Array|null,
+    scaleShape: number[]|null, varianceEpsilon: number): Float32Array {
+  const gpgpu = new GPGPUContext();
+  const textureManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, textureManager);
+
+  const program = new BatchNormProgram(
+      xShape, meanShape, varianceShape, offsetShape, scaleShape,
+      varianceEpsilon);
+  const xArr = NDArray.make(xShape, {values: x});
+  const meanArr = NDArray.make(meanShape, {values: mean});
+  const varianceArr = NDArray.make(varianceShape, {values: variance});
+  const inputs = [xArr, meanArr, varianceArr];
+
+  if (offset != null) {
+    const offsetArr = NDArray.make(offsetShape, {values: offset});
+    inputs.push(offsetArr);
+  }
+  if (scale != null) {
+    const scaleArr = NDArray.make(scaleShape, {values: scale});
+    inputs.push(scaleArr);
+  }
+
+  const res = NDArray.zeros(program.outputShape);
+  const binary = gpgpu_math.compileProgram(gpgpu, program, inputs, res);
+  gpgpu_math.runProgram(binary, inputs, res);
+  const resValues = res.getValues();
+
+  textureManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
+  gpgpu.dispose();
+
+  return resValues;
+}
diff --git a/src/math/webgl/binaryop_gpu_test.ts b/src/math/webgl/binaryop_gpu_test.ts
index dd1ad320c0..cb2b14cf16 100644
--- a/src/math/webgl/binaryop_gpu_test.ts
+++ b/src/math/webgl/binaryop_gpu_test.ts
@@ -14,13 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
+// tslint:disable-next-line:max-line-length
+import {Array1D, Array2D, Array3D, initializeGPU, NDArray, Scalar} from '../ndarray';
 
 import {BinaryOpProgram} from './binaryop_gpu';
 import {GPGPUContext} from './gpgpu_context';
 import * as gpgpu_math from './gpgpu_math';
-import {NDArray, Array1D, Array2D, Array3D, Scalar,
-  initializeGPU} from '../ndarray';
-import * as util from '../../util';
 import {TextureManager} from './texture_manager';
 
 describe('binaryop_gpu Add', () => {
@@ -92,7 +91,7 @@ describe('binaryop_gpu Sub', () => {
     // shape [3, 2] is not compatible with shape [3].
     const res = uploadBinaryOpDownload(a, b, '-');
     test_util.expectArraysClose(
-      res, new Float32Array([0, 0, 0, -1, 4, 4, 4, 3]), 1e-4);
+        res, new Float32Array([0, 0, 0, -1, 4, 4, 4, 3]), 1e-4);
   });
 });
 
@@ -177,17 +176,15 @@ describe('binaryop_gpu Divide', () => {
   });
 });
 
-export function uploadBinaryOpDownload(
+function uploadBinaryOpDownload(
     a: NDArray, b: NDArray, op: '+'|'-'|'*'|'/'): Float32Array {
   const gpgpu = new GPGPUContext();
   const textureManager = new TextureManager(gpgpu);
   initializeGPU(gpgpu, textureManager);
 
-  const outShape = util.assertAndGetBroadcastedShape(a.shape, b.shape);
-  const res = NDArray.zeros(outShape);
   const program = new BinaryOpProgram(op, a.shape, b.shape);
-  const binary =
-      gpgpu_math.compileProgram(gpgpu, program, [a, b], res);
+  const res = NDArray.zeros(program.outputShape);
+  const binary = gpgpu_math.compileProgram(gpgpu, program, [a, b], res);
   gpgpu_math.runProgram(binary, [a, b], res);
 
   const resValues = res.getValues();
diff --git a/src/math/webgl/concat3d_gpu.ts b/src/math/webgl/concat3d_gpu.ts
index ebe37d7ab3..e6c6840c8a 100644
--- a/src/math/webgl/concat3d_gpu.ts
+++ b/src/math/webgl/concat3d_gpu.ts
@@ -13,62 +13,40 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import * as conv_util from '../conv_util';
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderSource(
-    x1ShapeRCD: [number, number, number], x2ShapeRCD: [number, number, number],
-    resultShapeRCD: [number, number, number], axis: number): string {
-  const x1TexShapeRC = conv_util.computeTexShapeFrom3D(x1ShapeRCD);
-  const x2TexShapeRC = conv_util.computeTexShapeFrom3D(x2ShapeRCD);
-
-  const yAxes = ['yR', 'yC', 'yD'];
-  const concatAxis = yAxes[axis];
-
-  return `
-    precision highp float;
-    uniform sampler2D x1;
-    uniform sampler2D x2;
-
-    const vec2 x1ShapeCR = vec2(${x1TexShapeRC[1]}, ${x1TexShapeRC[0]});
-    const vec2 x2ShapeCR = vec2(${x2TexShapeRC[1]}.0, ${x2TexShapeRC[0]}.0);
-
-    const vec2 halfCR = vec2(0.5, 0.5);
-
-    void main() {
-      vec2 yTexCR = floor(gl_FragCoord.xy);
-
-      // Map from 2D (yTexR, yTexC) to 3D (yR, yC, yD).
-      float yR = yTexCR.y;
-      float yC = floor(yTexCR.x / ${resultShapeRCD[2]}.0);
-      float yD = mod(yTexCR.x, ${resultShapeRCD[2]}.0);
-
-      float value = 0.0;
-
-      if (${concatAxis} < ${x1ShapeRCD[axis]}.0) {
-        // Map yR, yC, yD back to x1 coordinates.
-        vec2 x1CR = vec2(yC * ${x1ShapeRCD[2]}.0 + yD, yR);
-        vec2 x1UV = (x1CR + halfCR) / x1ShapeCR;
-        value = texture2D(x1, x1UV).r;
-      } else {
-        ${concatAxis} = ${concatAxis} - ${x1ShapeRCD[axis]}.0;
-
-        // Map yR, yC, yD back to x2 coordinates.
-        vec2 x2CR = vec2(yC * ${x2ShapeRCD[2]}.0 + yD, yR);
-        vec2 x2UV = (x2CR + halfCR) / x2ShapeCR;
-        value = texture2D(x2, x2UV).r;
+import * as concat3d_util from '../concat3d_util';
+import {GPGPUProgram} from './gpgpu_math';
+
+export class Concat3DProgram implements GPGPUProgram {
+  variableNames = ['A', 'B'];
+  params: Array<{}> = [];
+  outputShape: number[] = [];
+  userCode: string;
+
+  constructor(
+      x1Shape: [number, number, number], x2Shape: [number, number, number],
+      axis: number) {
+    const yAxes = ['yR', 'yC', 'yD'];
+    const concatAxis = yAxes[axis];
+    this.params = [axis];
+    this.outputShape =
+        concat3d_util.computeConcat3DOutputShape(x1Shape, x2Shape, axis);
+    this.userCode = `
+      void main() {
+        vec3 coords = getOutputCoords();
+        float yR = coords.x;
+        float yC = coords.y;
+        float yD = coords.z;
+
+        float value = 0.0;
+        if (${concatAxis} < ${x1Shape[axis]}.0) {
+          value = getA(yR, yC, yD);
+        } else {
+          ${concatAxis} -= ${x1Shape[axis]}.0;
+          value = getB(yR, yC, yD);
+        }
+
+        setOutput(value);
       }
-
-      gl_FragColor = vec4(value, 0.0, 0.0, 0.0);
-    }`;
-}
-
-export function concat3D(
-    gpgpu: GPGPUContext, program: WebGLProgram, x1: WebGLTexture,
-    x2: WebGLTexture, result: WebGLTexture, resultShapeRC: [number, number]) {
-  gpgpu.setOutputMatrixTexture(result, resultShapeRC[0], resultShapeRC[1]);
-  gpgpu.setProgram(program);
-  gpgpu.setInputMatrixTexture(x1, 'x1', 0);
-  gpgpu.setInputMatrixTexture(x2, 'x2', 1);
-  gpgpu.executeProgram();
+    `;
+  }
 }
diff --git a/src/math/webgl/concat3d_gpu_test.ts b/src/math/webgl/concat3d_gpu_test.ts
index 00ee4b14d0..3f96a78d8f 100644
--- a/src/math/webgl/concat3d_gpu_test.ts
+++ b/src/math/webgl/concat3d_gpu_test.ts
@@ -14,54 +14,13 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
-import * as conv_util from '../conv_util';
-
-import * as concat3d_gpu from './concat3d_gpu';
+import {Array3D, initializeGPU, NDArray} from '../ndarray';
+import {Concat3DProgram} from './concat3d_gpu';
 import {GPGPUContext} from './gpgpu_context';
+import * as gpgpu_math from './gpgpu_math';
+import {TextureManager} from './texture_manager';
 
 describe('concat3d_gpu', () => {
-
-  function uploadConcat3dDownload(
-      x1: Float32Array, x2: Float32Array, x1ShapeRCD: [number, number, number],
-      x2ShapeRCD: [number, number, number], axis: number): Float32Array {
-    const x1TexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(x1ShapeRCD);
-    const x2TexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(x2ShapeRCD);
-
-    const resultShapeRCD = x1ShapeRCD.slice() as [number, number, number];
-    resultShapeRCD[axis] += x2ShapeRCD[axis];
-    const resultTexShapeRC = conv_util.computeTexShapeFrom3D(resultShapeRCD);
-
-    const gpgpu = new GPGPUContext();
-    gpgpu.enableAutomaticDebugValidation(true);
-
-    const shaderSource = concat3d_gpu.getFragmentShaderSource(
-        x1ShapeRCD, x2ShapeRCD, resultShapeRCD, axis);
-    const program = gpgpu.createProgram(shaderSource);
-
-    const x1Tex = gpgpu.createMatrixTexture(x1TexShapeRC[0], x1TexShapeRC[1]);
-    const x2Tex = gpgpu.createMatrixTexture(x2TexShapeRC[0], x2TexShapeRC[1]);
-    const resultTex =
-        gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.uploadMatrixToTexture(x1Tex, x1TexShapeRC[0], x1TexShapeRC[1], x1);
-    gpgpu.uploadMatrixToTexture(x2Tex, x2TexShapeRC[0], x2TexShapeRC[1], x2);
-
-    concat3d_gpu.concat3D(
-        gpgpu, program, x1Tex, x2Tex, resultTex, resultTexShapeRC);
-
-    const result = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(x1Tex);
-    gpgpu.deleteMatrixTexture(x2Tex);
-    gpgpu.deleteProgram(program);
-    gpgpu.dispose();
-    return result;
-  }
-
   it('concat axis=0', () => {
     const x1 = new Float32Array([1, 11, 111, 2, 22, 222]);
     const x2 =
@@ -103,3 +62,29 @@ describe('concat3d_gpu', () => {
         1e-6);
   });
 });
+
+function uploadConcat3dDownload(
+    a: Float32Array, b: Float32Array, aShape: [number, number, number],
+    bShape: [number, number, number], axis: number): Float32Array {
+  const gpgpu = new GPGPUContext();
+  gpgpu.enableAutomaticDebugValidation(true);
+  const textureManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, textureManager);
+
+  const program = new Concat3DProgram(aShape, bShape, axis);
+  const aArr = Array3D.new(aShape, a);
+  const bArr = Array3D.new(bShape, b);
+  const rArr = NDArray.zeros(program.outputShape);
+  const binary = gpgpu_math.compileProgram(gpgpu, program, [aArr, bArr], rArr);
+  gpgpu_math.runProgram(binary, [aArr, bArr], rArr);
+  const result = rArr.getValues();
+
+  aArr.dispose();
+  bArr.dispose();
+  rArr.dispose();
+  textureManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
+  gpgpu.dispose();
+
+  return result;
+}
diff --git a/src/math/webgl/copy_gpu.ts b/src/math/webgl/copy_gpu.ts
index 3210644704..1ea1418c6b 100644
--- a/src/math/webgl/copy_gpu.ts
+++ b/src/math/webgl/copy_gpu.ts
@@ -14,50 +14,43 @@ limitations under the License.
 ==============================================================================*/
 
 import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderSource(
-    sourceShapeRowCol: [number, number], sourceSizeRowCol: [number, number],
-    destSizeRowCol: [number, number]): string {
-  return `
-    precision highp float;
-    uniform sampler2D source;
-    uniform vec2 sourceStartCR;
-    uniform vec2 destStartCR;
-
-    const vec2 sourceShapeCR =
-      vec2(${sourceShapeRowCol[1]}, ${sourceShapeRowCol[0]});
-    const vec2 sourceSizeCR =
-      vec2(${sourceSizeRowCol[1]}, ${sourceSizeRowCol[0]});
-    const vec2 destSizeCR =
-      vec2(${destSizeRowCol[1]}, ${destSizeRowCol[0]});
-
-    void main() {
-      vec2 destOffsetCR = floor(gl_FragCoord.xy) - destStartCR;
-      float destOffsetFlat = (destOffsetCR.y * destSizeCR.x) + destOffsetCR.x;
-      vec2 sourceOffsetCR = vec2(mod(destOffsetFlat, sourceSizeCR.x),
-        floor(destOffsetFlat / sourceSizeCR.x));
-      vec2 sourceCR = sourceStartCR + sourceOffsetCR;
-      vec2 sourceUV = (sourceCR + vec2(0.5, 0.5)) / sourceShapeCR;
-      gl_FragColor = texture2D(source, sourceUV);
-    }`;
-}
-
-export function copy(
-    gpgpu: GPGPUContext, program: WebGLProgram, source: WebGLTexture,
-    sourceShapeRowCol: [number, number], sourceStartRowCol: [number, number],
-    sourceSizeRowCol: [number, number], dest: WebGLTexture,
-    destShapeRowCol: [number, number], destStartRowCol: [number, number],
-    destSizeRowCol: [number, number]) {
-  gpgpu.setOutputMatrixTexture(dest, destShapeRowCol[0], destShapeRowCol[1]);
-  gpgpu.setOutputMatrixWriteRegion(
-      destStartRowCol[0], destSizeRowCol[0], destStartRowCol[1],
-      destSizeRowCol[1]);
-  gpgpu.setProgram(program);
-  gpgpu.setInputMatrixTexture(source, 'source', 0);
-  const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStartCR');
-  gpgpu.gl.uniform2f(
-      sourceStartCRLoc, sourceStartRowCol[1], sourceStartRowCol[0]);
-  const destStartCRLoc = gpgpu.getUniformLocation('destStartCR');
-  gpgpu.gl.uniform2f(destStartCRLoc, destStartRowCol[1], destStartRowCol[0]);
-  gpgpu.executeProgram();
+import {GPGPUProgram} from './gpgpu_math';
+
+export class Copy2DProgram implements GPGPUProgram {
+  variableNames = ['source'];
+  params: Array<{}>;
+  outputShape: number[];
+  userCode: string;
+
+  constructor(srcNumCols: number, destNumCols: number) {
+    this.outputShape = null;
+    this.params = [srcNumCols, destNumCols];
+    this.userCode = `
+      uniform vec2 sourceStart;
+      uniform vec2 destStart;
+
+      void main() {
+        vec2 destCoords = getOutputCoords() - destStart;
+        float index = dot(destCoords, vec2(${destNumCols}.0, 1.0));
+        vec2 sourceCoords = sourceStart + vec2(
+          floor(index / ${srcNumCols}.0),
+          mod(index, ${srcNumCols}.0)
+        );
+        setOutput(getSource(sourceCoords.x, sourceCoords.y));
+      }
+    `;
+  }
+
+  getCustomSetupFunc(
+      sourceStart: [number, number], destStart: [number, number],
+      destSize: [number, number]) {
+    return (gpgpu: GPGPUContext) => {
+      gpgpu.setOutputMatrixWriteRegion(
+          destStart[0], destSize[0], destStart[1], destSize[1]);
+      const sourceStartCRLoc = gpgpu.getUniformLocation('sourceStart');
+      gpgpu.gl.uniform2f(sourceStartCRLoc, sourceStart[0], sourceStart[1]);
+      const destStartCRLoc = gpgpu.getUniformLocation('destStart');
+      gpgpu.gl.uniform2f(destStartCRLoc, destStart[0], destStart[1]);
+    };
+  }
 }
diff --git a/src/math/webgl/copy_gpu_test.ts b/src/math/webgl/copy_gpu_test.ts
index 6600995f3b..59904530a7 100644
--- a/src/math/webgl/copy_gpu_test.ts
+++ b/src/math/webgl/copy_gpu_test.ts
@@ -14,40 +14,34 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
-import * as copy_gpu from './copy_gpu';
+import {Array2D, initializeGPU} from '../ndarray';
+import {Copy2DProgram} from './copy_gpu';
 import {GPGPUContext} from './gpgpu_context';
+import * as gpgpu_math from './gpgpu_math';
+import {TextureManager} from './texture_manager';
 
 function uploadCopyDownload(
-    source: Float32Array, sourceShapeRowCol: [number, number],
-    sourceStartRowCol: [number, number], sourceSizeRowCol: [number, number],
-    destStartRowCol: [number, number], destSizeRowCol: [number, number],
-    dest: Float32Array, destShapeRowCol: [number, number]): Float32Array {
+    srcVals: Float32Array, srcShape: [number, number],
+    srcStart: [number, number], srcSize: [number, number],
+    destStart: [number, number], destSize: [number, number],
+    destVals: Float32Array, destShape: [number, number]): Float32Array {
   const gpgpu = new GPGPUContext();
-  const fragmentShaderSource = copy_gpu.getFragmentShaderSource(
-      sourceShapeRowCol, sourceSizeRowCol, destSizeRowCol);
-  const program = gpgpu.createProgram(fragmentShaderSource);
-
-  const sourceTex =
-      gpgpu.createMatrixTexture(sourceShapeRowCol[0], sourceShapeRowCol[1]);
-  const destTex =
-      gpgpu.createMatrixTexture(destShapeRowCol[0], destShapeRowCol[1]);
-
-  gpgpu.uploadMatrixToTexture(
-      sourceTex, sourceShapeRowCol[0], sourceShapeRowCol[1], source);
-  gpgpu.uploadMatrixToTexture(
-      destTex, destShapeRowCol[0], destShapeRowCol[1], dest);
-
-  copy_gpu.copy(
-      gpgpu, program, sourceTex, sourceShapeRowCol, sourceStartRowCol,
-      sourceSizeRowCol, destTex, destShapeRowCol, destStartRowCol,
-      destSizeRowCol);
-
-  const result = gpgpu.downloadMatrixFromTexture(
-      destTex, destShapeRowCol[0], destShapeRowCol[1]);
-
-  gpgpu.deleteMatrixTexture(sourceTex);
-  gpgpu.deleteMatrixTexture(destTex);
-  gpgpu.deleteProgram(program);
+  const texManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, texManager);
+
+  const program = new Copy2DProgram(srcSize[1], destSize[1]);
+  const source = Array2D.new(srcShape, srcVals);
+  const dest = Array2D.new(destShape, destVals);
+
+  const binary = gpgpu_math.compileProgram(gpgpu, program, [source], dest);
+  const customSetup = program.getCustomSetupFunc(srcStart, destStart, destSize);
+  gpgpu_math.runProgram(binary, [source], dest, customSetup);
+  const result = dest.getValues();
+
+  source.dispose();
+  dest.dispose();
+  texManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
   gpgpu.dispose();
 
   return result;
@@ -157,33 +151,37 @@ describe('copy_gpu', () => {
   });
 
   it('accumulates results from previous copies into dest texture', () => {
-    const shapeRC: [number, number] = [10, 10];
-    const sizeRC: [number, number] = [10, 1];
-    const source = new Float32Array(100);
+    const shape: [number, number] = [10, 10];
+    const size: [number, number] = [10, 1];
+    const sourceVals = new Float32Array(100);
     for (let i = 0; i < 100; ++i) {
-      source[i] = i;
+      sourceVals[i] = i;
     }
+
+
     const gpgpu = new GPGPUContext();
-    const program = gpgpu.createProgram(
-        copy_gpu.getFragmentShaderSource(shapeRC, sizeRC, sizeRC));
-    const sourceTex = gpgpu.createMatrixTexture(shapeRC[0], shapeRC[1]);
-    const destTex = gpgpu.createMatrixTexture(shapeRC[0], shapeRC[1]);
-    gpgpu.uploadMatrixToTexture(sourceTex, shapeRC[0], shapeRC[1], source);
+    const texManager = new TextureManager(gpgpu);
+    initializeGPU(gpgpu, texManager);
+
+    const program = new Copy2DProgram(size[1], size[1]);
+    const source = Array2D.new(shape, sourceVals);
+    const dest = Array2D.zeros(shape);
+
+    const binary = gpgpu_math.compileProgram(gpgpu, program, [source], dest);
 
     for (let i = 0; i < 10; ++i) {
-      copy_gpu.copy(
-          gpgpu, program, sourceTex, shapeRC, [0, i], sizeRC, destTex, shapeRC,
-          [0, i], sizeRC);
+      const offset: [number, number] = [0, i];
+      const customSetup = program.getCustomSetupFunc(offset, offset, size);
+      gpgpu_math.runProgram(binary, [source], dest, customSetup);
     }
+    const res = dest.getValues();
 
-    const dest =
-        gpgpu.downloadMatrixFromTexture(destTex, shapeRC[0], shapeRC[1]);
-
-    gpgpu.deleteMatrixTexture(sourceTex);
-    gpgpu.deleteMatrixTexture(destTex);
-    gpgpu.deleteProgram(program);
+    source.dispose();
+    dest.dispose();
+    texManager.dispose();
+    gpgpu.deleteProgram(binary.webGLProgram);
     gpgpu.dispose();
 
-    test_util.expectArraysClose(dest, source, 0);
+    test_util.expectArraysClose(res, sourceVals, 0);
   });
 });
diff --git a/src/math/webgl/gpgpu_math.ts b/src/math/webgl/gpgpu_math.ts
index 56ac04f08e..5784c18a3a 100644
--- a/src/math/webgl/gpgpu_math.ts
+++ b/src/math/webgl/gpgpu_math.ts
@@ -90,7 +90,8 @@ function validateBinaryAndProgram(shapeInfos: ShapeInfo[], inputs: NDArray[]) {
 }
 
 export function runProgram<T extends NDArray, K extends NDArray>(
-    binary: GPGPUBinary, inputs: T[], output: K): void {
+    binary: GPGPUBinary, inputs: T[], output: K,
+    customSetup?: (gpgpu: GPGPUContext) => void): void {
   validateBinaryAndProgram(binary.inShapeInfos, inputs);
   validateBinaryAndProgram([binary.outShapeInfo], [output]);
 
@@ -103,6 +104,9 @@ export function runProgram<T extends NDArray, K extends NDArray>(
     const tex = input.getTexture();
     gpgpu.setInputMatrixTexture(tex, binary.program.variableNames[i], i);
   });
+  if (customSetup != null) {
+    customSetup(gpgpu);
+  }
   gpgpu.executeProgram();
 }
 
diff --git a/src/math/webgl/mulbcast_gpu.ts b/src/math/webgl/mulbcast_gpu.ts
deleted file mode 100644
index 8780720d0d..0000000000
--- a/src/math/webgl/mulbcast_gpu.ts
+++ /dev/null
@@ -1,90 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderSource(
-    aNumRows: number, aNumCols: number, bNumRows: number, bNumCols: number,
-    resultNumRows: number, resultNumCols: number): string {
-  return `
-    precision highp float;
-    uniform sampler2D matrixA;
-    uniform sampler2D matrixB;
-    varying vec2 resultUV;
-
-    const vec2 aDimCR = vec2(${aNumCols}.0, ${aNumRows}.0);
-    const vec2 bDimCR = vec2(${bNumCols}.0, ${bNumRows}.0);
-    const vec2 resultDimCR = vec2(${resultNumCols}.0, ${resultNumRows}.0);
-    const vec4 halfCR = vec4(0.5, 0.5, 0.5, 0.5);
-
-    void main() {
-      vec2 resultCR = floor(resultUV * resultDimCR);
-      vec4 resultCRBroadcast = vec4(resultCR, resultCR);
-      vec4 abDimsCR = vec4(aDimCR, bDimCR);
-      vec4 abCR = mod(resultCRBroadcast, abDimsCR);
-      vec4 abCRCenters = abCR + halfCR;
-      vec4 abUV = abCRCenters / abDimsCR;
-      vec4 a = texture2D(matrixA, abUV.rg);
-      vec4 b = texture2D(matrixB, abUV.ba);
-      float product = a.r * b.r;
-      gl_FragColor = vec4(product, 0, 0, 0);
-    }`;
-}
-
-export function multiplyBroadcast(
-    gpgpu: GPGPUContext, multiplyBroadcastProgram: WebGLProgram,
-    a: WebGLTexture, aNumRows: number, aNumCols: number, b: WebGLTexture,
-    bNumRows: number, bNumCols: number, result: WebGLTexture,
-    resultNumRows: number, resultNumCols: number) {
-  gpgpu.setOutputMatrixTexture(result, resultNumRows, resultNumCols);
-  gpgpu.setProgram(multiplyBroadcastProgram);
-  gpgpu.setInputMatrixTexture(a, 'matrixA', 0);
-  gpgpu.setInputMatrixTexture(b, 'matrixB', 1);
-  gpgpu.executeProgram();
-}
-
-export function uploadMultiplyBroadcastDownload(
-    a: Float32Array, aNumRows: number, aNumCols: number, b: Float32Array,
-    bNumRows: number, bNumCols: number): Float32Array {
-  const resultNumRows = Math.max(aNumRows, bNumRows);
-  const resultNumCols = Math.max(aNumCols, bNumCols);
-
-  const gpgpu = new GPGPUContext();
-  const program: WebGLProgram = gpgpu.createProgram(getFragmentShaderSource(
-      aNumRows, aNumCols, bNumRows, bNumCols, resultNumRows, resultNumCols));
-
-  const aTexture: WebGLTexture = gpgpu.createMatrixTexture(aNumRows, aNumCols);
-  const bTexture: WebGLTexture = gpgpu.createMatrixTexture(bNumRows, bNumCols);
-  const resultTexture: WebGLTexture =
-      gpgpu.createMatrixTexture(resultNumRows, resultNumCols);
-
-  gpgpu.uploadMatrixToTexture(aTexture, aNumRows, aNumCols, a);
-  gpgpu.uploadMatrixToTexture(bTexture, bNumRows, bNumCols, b);
-
-  multiplyBroadcast(
-      gpgpu, program, aTexture, aNumRows, aNumCols, bTexture, bNumRows,
-      bNumCols, resultTexture, resultNumRows, resultNumCols);
-
-  const result = gpgpu.downloadMatrixFromTexture(
-      resultTexture, resultNumRows, resultNumCols);
-
-  gpgpu.deleteMatrixTexture(aTexture);
-  gpgpu.deleteMatrixTexture(bTexture);
-  gpgpu.deleteMatrixTexture(resultTexture);
-  gpgpu.deleteProgram(program);
-  gpgpu.dispose();
-
-  return result;
-}
diff --git a/src/math/webgl/mulbcast_gpu_test.ts b/src/math/webgl/mulbcast_gpu_test.ts
deleted file mode 100644
index e32c50179e..0000000000
--- a/src/math/webgl/mulbcast_gpu_test.ts
+++ /dev/null
@@ -1,140 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import * as test_util from '../../test_util';
-import * as mulbcast_gpu from './mulbcast_gpu';
-
-export function cpuMultiplyBroadcast(
-    a: Float32Array, aNumRows: number, aNumCols: number, b: Float32Array,
-    bNumRows: number, bNumCols: number): Float32Array {
-  const resultNumRows = Math.max(aNumRows, bNumRows);
-  const resultNumCols = Math.max(aNumCols, bNumCols);
-  const result = new Float32Array(resultNumRows * resultNumCols);
-  let dst = 0;
-  for (let r = 0; r < resultNumRows; ++r) {
-    for (let c = 0; c < resultNumCols; ++c) {
-      const ai = ((r % aNumRows) * aNumCols) + (c % aNumCols);
-      const bi = ((r % bNumRows) * bNumCols) + (c % bNumCols);
-      result[dst] = a[ai] * b[bi];
-      ++dst;
-    }
-  }
-  return result;
-}
-
-describe('mulbcast_gpu', () => {
-  it('returns a matrix dimensions [max(aRows, bRows), max(aCols, bCols)]',
-     () => {
-       const a = new Float32Array(13 * 100);
-       const b = new Float32Array(100 * 99);
-       const result =
-           mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 1, 100, b, 100, 1);
-       expect(result.length).toEqual(100 * 100);
-     });
-
-  it('returns [0] when A is [0], A and B same size', () => {
-    const a = new Float32Array(16 * 16);
-    const b = test_util.randomArrayInRange(16 * 16, -10, 10);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 16, b, 16, 16);
-    test_util.expectArraysClose(a, result, 0.00001);
-  });
-
-  it('returns [0] when B is [0], A and B same size', () => {
-    const a = test_util.randomArrayInRange(16 * 16, -10, 10);
-    const b = new Float32Array(16 * 16);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 16, b, 16, 16);
-    test_util.expectArraysClose(b, result, 0.00001);
-  });
-
-  it('returns A when B is [1] and matrices have the same size', () => {
-    const a = new Float32Array(16 * 16);
-    a.fill(1);
-    const b = test_util.randomArrayInRange(16 * 16, -10, 10);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 16, b, 16, 16);
-    test_util.expectArraysClose(result, b, 0.00001);
-  });
-
-  it('returns B when A is [1] and matrices have the same size', () => {
-    const a = test_util.randomArrayInRange(16 * 16, -10, 10);
-    const b = new Float32Array(16 * 16);
-    b.fill(1);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 16, b, 16, 16);
-    test_util.expectArraysClose(result, a, 0.00001);
-  });
-
-  it('returns B when A is [1] and A is narrower than B', () => {
-    const a = new Float32Array(16 * 8);
-    a.fill(1);
-    const b = test_util.randomArrayInRange(16 * 16, -10, 10);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 16, 8, b, 16, 16);
-    test_util.expectArraysClose(result, b, 0.00001);
-  });
-
-  it('returns B when A is [1] and A is shorter than B', () => {
-    const a = new Float32Array(8 * 16);
-    a.fill(1);
-    const b = test_util.randomArrayInRange(16 * 16, -10, 10);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 8, 16, b, 16, 16);
-    test_util.expectArraysClose(result, b, 0.00001);
-  });
-
-  it('returns B when A is [1] and A is smaller than B', () => {
-    const a = new Float32Array(7 * 6);
-    a.fill(1);
-    const b = test_util.randomArrayInRange(18 * 21, -1, 1);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 7, 6, b, 18, 21);
-    test_util.expectArraysClose(result, b, 0.00001);
-  });
-
-  it('broadcasts a smaller A [2x2] across B [4x4]', () => {
-    const a = new Float32Array([1, 0, 1, 0]);
-    const b = new Float32Array(4 * 4);
-    for (let i = 0; i < b.length; ++i) {
-      b[i] = i + 1;
-    }
-    const expected =
-        new Float32Array([1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0]);
-    const gpuResult =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 2, 2, b, 4, 4);
-    const cpuResult = cpuMultiplyBroadcast(a, 2, 2, b, 4, 4);
-    test_util.expectArraysClose(cpuResult, expected, 0.0001);
-    test_util.expectArraysClose(gpuResult, expected, 0.0001);
-  });
-
-  it('broadcasts a non-square A [3x5] across a larger B [16x16]', () => {
-    const a = test_util.randomArrayInRange(3 * 5, -1, 1);
-    const b = test_util.randomArrayInRange(16 * 16, -1, 1);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 3, 5, b, 16, 16);
-    test_util.expectArraysClose(
-        result, cpuMultiplyBroadcast(a, 3, 5, b, 16, 16), 0.0001);
-  });
-
-  it('broadcasts a non-square A across a larger non-square B', () => {
-    const a = test_util.randomArrayInRange(37 * 63, -1, 1);
-    const b = test_util.randomArrayInRange(128 * 150, -1, 1);
-    const result =
-        mulbcast_gpu.uploadMultiplyBroadcastDownload(a, 37, 63, b, 128, 150);
-    test_util.expectArraysClose(
-        result, cpuMultiplyBroadcast(a, 37, 63, b, 128, 150), 0.0001);
-  });
-});
diff --git a/src/math/webgl/mulmat_gpu_test.ts b/src/math/webgl/mulmat_gpu_test.ts
index f6da798467..c39526290b 100644
--- a/src/math/webgl/mulmat_gpu_test.ts
+++ b/src/math/webgl/mulmat_gpu_test.ts
@@ -15,11 +15,12 @@ limitations under the License.
 
 import * as test_util from '../../test_util';
 import {MatrixOrientation} from '../math';
-import {Array2D} from '../ndarray';
+import {Array2D, initializeGPU} from '../ndarray';
 
 import {GPGPUContext} from './gpgpu_context';
 import * as gpgpu_math from './gpgpu_math';
 import {MatMulProgram} from './mulmat_gpu';
+import {TextureManager} from './texture_manager';
 
 describe('mulmat_gpu (1x1 * 1x1)', () => {
   it('returns a 1x1 matrix', () => {
@@ -269,11 +270,11 @@ describe('mulmat_gpu (multiple matrices)', () => {
     const cArr = new Array2D(cShape, {texture: c, textureShapeRC: cShape});
     const rArr = new Array2D(rShape, {texture: r, textureShapeRC: rShape});
     const matMulProgram = new MatMulProgram(aArr.shape, bArr.shape);
-    const axbProgram = gpgpu_math.compileProgram(gpgpu, matMulProgram,
-        [aArr, bArr], abArr);
+    const axbProgram =
+        gpgpu_math.compileProgram(gpgpu, matMulProgram, [aArr, bArr], abArr);
     const matMulProgram2 = new MatMulProgram(abArr.shape, cArr.shape);
-    const abxcProgram = gpgpu_math.compileProgram(gpgpu, matMulProgram2,
-        [abArr, cArr], rArr);
+    const abxcProgram =
+        gpgpu_math.compileProgram(gpgpu, matMulProgram2, [abArr, cArr], rArr);
 
     gpgpu.uploadMatrixToTexture(a, aShape[0], aShape[1], aData);
     gpgpu.uploadMatrixToTexture(b, bShape[0], bShape[1], bData);
@@ -335,41 +336,26 @@ export function uploadMultiplyMatrixDownload(
     bNumRows: number, bNumCols: number,
     aOrientation = MatrixOrientation.REGULAR,
     bOrientation = MatrixOrientation.REGULAR): Float32Array {
-  const outNumRows =
-      (aOrientation === MatrixOrientation.REGULAR) ? aNumRows : aNumCols;
-  const outNumCols =
-      (bOrientation === MatrixOrientation.REGULAR) ? bNumCols : bNumRows;
   const gpgpu = new GPGPUContext();
+  const texManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, texManager);
+
   const aShape: [number, number] = [aNumRows, aNumCols];
   const bShape: [number, number] = [bNumRows, bNumCols];
-  const outShape: [number, number] = [outNumRows, outNumCols];
-
-  const aTexture = gpgpu.createMatrixTexture(aNumRows, aNumCols);
-  const aArr = new Array2D(
-      aShape, {texture: aTexture, textureShapeRC: [aNumRows, aNumCols]});
-  const bTexture = gpgpu.createMatrixTexture(bNumRows, bNumCols);
-  const bArr = new Array2D(
-      bShape, {texture: bTexture, textureShapeRC: [bNumRows, bNumCols]});
-  const resultTexture: WebGLTexture =
-      gpgpu.createMatrixTexture(outNumRows, outNumCols);
-  const resArr =
-      new Array2D(outShape, {texture: resultTexture, textureShapeRC: outShape});
-
-  const program =
-      new MatMulProgram(aArr.shape, bArr.shape, aOrientation, bOrientation);
+
+  const program = new MatMulProgram(aShape, bShape, aOrientation, bOrientation);
+  const resArr = Array2D.zeros(program.outputShape as [number, number]);
+  const aArr = Array2D.new(aShape, a);
+  const bArr = Array2D.new(bShape, b);
+
   const binary =
       gpgpu_math.compileProgram(gpgpu, program, [aArr, bArr], resArr);
-  gpgpu.uploadMatrixToTexture(aTexture, aNumRows, aNumCols, a);
-  gpgpu.uploadMatrixToTexture(bTexture, bNumRows, bNumCols, b);
-
   gpgpu_math.runProgram(binary, [aArr, bArr], resArr);
+  const result = resArr.getValues();
 
-  const result =
-      gpgpu.downloadMatrixFromTexture(resultTexture, outNumRows, outNumCols);
-
-  gpgpu.deleteMatrixTexture(aTexture);
-  gpgpu.deleteMatrixTexture(bTexture);
-  gpgpu.deleteMatrixTexture(resultTexture);
+  aArr.dispose();
+  bArr.dispose();
+  texManager.dispose();
   gpgpu.deleteProgram(binary.webGLProgram);
   gpgpu.dispose();
 
diff --git a/src/math/webgl/reshape_gpu.ts b/src/math/webgl/reshape_gpu.ts
deleted file mode 100644
index a451a78134..0000000000
--- a/src/math/webgl/reshape_gpu.ts
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import * as util from '../../util';
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderSource(): string {
-  return `
-    precision highp float;
-    uniform sampler2D matrixA;
-    uniform vec2 inputDimCR;
-    uniform vec2 resultDimCR;
-    varying vec2 resultUV;
-    const vec2 halfCR = vec2(0.5, 0.5);
-
-    void main() {
-      vec2 resultCR = floor(resultUV * resultDimCR);
-      // indexInFlat = row * stride + column, where stride == numOutputColumns
-      float indexInFlat = resultCR.y * resultDimCR.x + resultCR.x;
-
-      vec2 inputCR = vec2(
-        mod(indexInFlat, inputDimCR.x), // col = indexInFlat % numInputColumns
-        floor(indexInFlat / inputDimCR.x) // row = indexInFlat / numInputColumns
-      ) + halfCR;
-
-      vec2 inputUV = inputCR / inputDimCR;
-      gl_FragColor = texture2D(matrixA, inputUV);
-    }`;
-}
-
-export function reshape(
-    gpgpu: GPGPUContext, reshapeProgram: WebGLProgram, a: WebGLTexture,
-    aNumRows: number, aNumCols: number, result: WebGLTexture,
-    resultNumRows: number, resultNumCols: number) {
-  const inputSize = aNumRows * aNumCols;
-  const outputSize = resultNumCols * resultNumRows;
-  util.assert(
-      inputSize === outputSize,
-      `The input size (${inputSize}) and output size (${outputSize}) ` +
-          `must match`);
-
-  gpgpu.setOutputMatrixTexture(result, resultNumRows, resultNumCols);
-  gpgpu.setProgram(reshapeProgram);
-  gpgpu.setInputMatrixTexture(a, 'matrixA', 0);
-
-  const inputDimCRLocation = gpgpu.getUniformLocation('inputDimCR');
-  gpgpu.gl.uniform2f(inputDimCRLocation, aNumCols, aNumRows);
-
-  const resultDimCRLocation = gpgpu.getUniformLocation('resultDimCR');
-  gpgpu.gl.uniform2f(resultDimCRLocation, resultNumCols, resultNumRows);
-
-  gpgpu.executeProgram();
-}
diff --git a/src/math/webgl/reshape_gpu_test.ts b/src/math/webgl/reshape_gpu_test.ts
deleted file mode 100644
index 0f83a6e69e..0000000000
--- a/src/math/webgl/reshape_gpu_test.ts
+++ /dev/null
@@ -1,88 +0,0 @@
-/* Copyright 2017 Google Inc. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-import {GPGPUContext} from './gpgpu_context';
-import * as reshape_gpu from './reshape_gpu';
-
-describe('reshape_gpu', () => {
-  let gpgpu: GPGPUContext;
-
-  beforeEach(() => {
-    gpgpu = new GPGPUContext();
-    gpgpu.enableAutomaticDebugValidation(true);
-  });
-
-  afterEach(() => {
-    gpgpu.dispose();
-  });
-
-  it('reshape a 2x3 matrix into the same size', () => {
-    const a = new Float32Array([1, 2, 3, 4, 5, 6]);
-    const result = uploadReshapeDownload(a, 2, 3, 2, 3);
-    expect(result).toEqual(a);
-  });
-
-  it('reshape a 2x3 matrix into a column (6x1)', () => {
-    const a = new Float32Array([1, 2, 3, 4, 5, 6]);
-    const result = uploadReshapeDownload(a, 2, 3, 6, 1);
-    expect(result).toEqual(a);
-  });
-
-  it('reshape a 2x3 matrix into a row (1x6) vector', () => {
-    const a = new Float32Array([1, 2, 3, 4, 5, 6]);
-    const result = uploadReshapeDownload(a, 2, 3, 1, 6);
-    expect(result).toEqual(a);
-  });
-
-  it('reshape a 2x3 into a 3x2 matrix', () => {
-    const a = new Float32Array([1, 2, 3, 4, 5, 6]);
-    const result = uploadReshapeDownload(a, 2, 3, 3, 2);
-    expect(result).toEqual(a);
-  });
-
-  it('reshape a 2x3 into a 3x1 causes an error', () => {
-    const a = new Float32Array([1, 2, 3, 4, 5, 6]);
-    const f = () => {
-      uploadReshapeDownload(a, 2, 3, 3, 1);
-    };
-
-    expect(f).toThrowError();
-  });
-
-  function uploadReshapeDownload(
-      a: Float32Array, aNumRows: number, aNumCols: number,
-      resultNumRows: number, resultNumCols: number): Float32Array {
-    const program = gpgpu.createProgram(reshape_gpu.getFragmentShaderSource());
-
-    const aTexture = gpgpu.createMatrixTexture(aNumRows, aNumCols);
-    gpgpu.uploadMatrixToTexture(aTexture, aNumRows, aNumCols, a);
-
-    const resultTexture: WebGLTexture =
-        gpgpu.createMatrixTexture(resultNumRows, resultNumCols);
-
-    reshape_gpu.reshape(
-        gpgpu, program, aTexture, aNumRows, aNumCols, resultTexture,
-        resultNumRows, resultNumCols);
-
-    const result = gpgpu.downloadMatrixFromTexture(
-        resultTexture, resultNumRows, resultNumCols);
-
-    gpgpu.deleteMatrixTexture(aTexture);
-    gpgpu.deleteMatrixTexture(resultTexture);
-    gpgpu.deleteProgram(program);
-
-    return result;
-  }
-});
diff --git a/src/math/webgl/resize_bilinear_gpu.ts b/src/math/webgl/resize_bilinear_gpu.ts
index 3491da228d..9ffb6707f7 100644
--- a/src/math/webgl/resize_bilinear_gpu.ts
+++ b/src/math/webgl/resize_bilinear_gpu.ts
@@ -13,79 +13,60 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import * as conv_util from '../conv_util';
-
-import {GPGPUContext} from './gpgpu_context';
-
-export function getFragmentShaderSource(
-    inputShapeRCD: [number, number, number],
-    outputDimensionsRowCol: [number, number], alignCorners: boolean): string {
-  const depth = inputShapeRCD[2];
-
-  const inputTexShapeRC = conv_util.computeTexShapeFrom3D(inputShapeRCD);
-
-  const effectiveInputShapeRCD = alignCorners ?
-      [inputShapeRCD[0] - 1, inputShapeRCD[1] - 1, depth] :
-      inputShapeRCD;
-
-  const effectiveOutputShapeRCD = alignCorners ?
-      [outputDimensionsRowCol[0] - 1, outputDimensionsRowCol[1] - 1, depth] :
-      [outputDimensionsRowCol[0], outputDimensionsRowCol[1], depth];
-
-  return `
-    precision highp float;
-    uniform sampler2D matrixA;
-    varying vec2 resultUV;
-    const vec2 halfCR = vec2(0.5, 0.5);
-
-    const vec2 inputShapeCR = vec2(${inputShapeRCD[1]}, ${inputShapeRCD[0]});
-    const vec2 inputShapeTexCR = vec2(
-        ${inputTexShapeRC[1]}, ${inputTexShapeRC[0]});
-
-    const vec2 effectiveInputOverOutputRatioCR = vec2(
-        ${effectiveInputShapeRCD[1] / effectiveOutputShapeRCD[1]},
-        ${effectiveInputShapeRCD[0] / effectiveOutputShapeRCD[0]});
-
-    float sampleInput(float col, float row, float d) {
-      vec2 uv = (vec2(col * ${depth}.0 + d, row) + halfCR) / inputShapeTexCR;
-      return texture2D(matrixA, uv).r;
-    }
-
-    void main() {
-      vec2 yTexCR = floor(gl_FragCoord.xy);
-
-      // Map from 2D (yTexR, yTexC) to 3D (yR, yC, d).
-      vec2 yCR = vec2(floor(yTexCR.x / ${depth}.0), yTexCR.y);
-      float d = mod(yTexCR.x, ${depth}.0);
-
-      // Fractional source index.
-      vec2 sourceFracIndexCR = yCR * effectiveInputOverOutputRatioCR;
-
-      // Compute the four integer indices.
-      vec2 sourceFloorCR = floor(sourceFracIndexCR);
-      vec2 sourceCeilCR = min(inputShapeCR - 1.0, ceil(sourceFracIndexCR));
-
-      float topLeft = sampleInput(sourceFloorCR[0], sourceFloorCR[1], d);
-      float bottomLeft = sampleInput(sourceFloorCR[0], sourceCeilCR[1], d);
-      float topRight = sampleInput(sourceCeilCR[0], sourceFloorCR[1], d);
-      float bottomRight = sampleInput(sourceCeilCR[0], sourceCeilCR[1], d);
-
-      vec2 fracCR = sourceFracIndexCR - sourceFloorCR;
-
-      float top = topLeft + (topRight - topLeft) * fracCR[0];
-      float bottom = bottomLeft + (bottomRight - bottomLeft) * fracCR[0];
-      float newValue = top + (bottom - top) * fracCR[1];
-
-      gl_FragColor = vec4(newValue, 0.0, 0.0, 0.0);
-    }`;
-}
-
-export function resizeBilinear(
-    gpgpu: GPGPUContext, resizeBilinearProgram: WebGLProgram, a: WebGLTexture,
-    result: WebGLTexture, resultShapeRowCol: [number, number]) {
-  gpgpu.setOutputMatrixTexture(
-      result, resultShapeRowCol[0], resultShapeRowCol[1]);
-  gpgpu.setProgram(resizeBilinearProgram);
-  gpgpu.setInputMatrixTexture(a, 'matrixA', 0);
-  gpgpu.executeProgram();
+import {GPGPUProgram} from './gpgpu_math';
+
+export class ResizeBilinear3DProgram implements GPGPUProgram {
+  variableNames = ['A'];
+  params: Array<{}> = [];
+  outputShape: number[] = [];
+  userCode: string;
+
+  constructor(
+      inputShape: [number, number, number],
+      outputDimensionsRowCol: [number, number], alignCorners: boolean) {
+    const depth = inputShape[2];
+    this.outputShape =
+        [outputDimensionsRowCol[0], outputDimensionsRowCol[1], depth];
+    this.params = [alignCorners];
+
+    const effectiveInputShape = alignCorners ?
+        [inputShape[0] - 1, inputShape[1] - 1, depth] :
+        inputShape;
+
+    const effectiveOutputShape = alignCorners ?
+        [this.outputShape[0] - 1, this.outputShape[1] - 1, depth] :
+        this.outputShape;
+    this.userCode = `
+      const vec2 effectiveInputOverOutputRatioRC = vec2(
+          ${effectiveInputShape[0] / effectiveOutputShape[0]},
+          ${effectiveInputShape[1] / effectiveOutputShape[1]});
+      const vec2 inputShapeRC = vec2(${inputShape[0]}.0, ${inputShape[1]}.0);
+
+      void main() {
+        vec3 coords = getOutputCoords();
+        vec2 yRC = coords.xy;
+        float d = coords.z;
+
+        // Fractional source index.
+        vec2 sourceFracIndexRC = yRC * effectiveInputOverOutputRatioRC;
+
+        // Compute the four integer indices.
+        vec2 sourceFloorRC = floor(sourceFracIndexRC);
+        vec2 sourceCeilRC = min(inputShapeRC - 1.0, ceil(sourceFracIndexRC));
+
+        float topLeft = getA(sourceFloorRC[0], sourceFloorRC[1], d);
+        float bottomLeft = getA(sourceCeilRC[0], sourceFloorRC[1], d);
+        float topRight = getA(sourceFloorRC[0], sourceCeilRC[1], d);
+        float bottomRight = getA(sourceCeilRC[0], sourceCeilRC[1], d);
+
+        vec2 fracRC = sourceFracIndexRC - sourceFloorRC;
+
+        float top = topLeft + (topRight - topLeft) * fracRC[1];
+        float bottom = bottomLeft + (bottomRight - bottomLeft) * fracRC[1];
+        float newValue = top + (bottom - top) * fracRC[0];
+
+        setOutput(newValue);
+      }
+    `;
+  }
 }
diff --git a/src/math/webgl/resize_bilinear_gpu_test.ts b/src/math/webgl/resize_bilinear_gpu_test.ts
index 3adb55af61..9382b83db5 100644
--- a/src/math/webgl/resize_bilinear_gpu_test.ts
+++ b/src/math/webgl/resize_bilinear_gpu_test.ts
@@ -14,52 +14,14 @@ limitations under the License.
 ==============================================================================*/
 
 import * as test_util from '../../test_util';
-import * as conv_util from '../conv_util';
+import {Array3D, initializeGPU, NDArray} from '../ndarray';
 
 import {GPGPUContext} from './gpgpu_context';
-import * as resize_bilinear_gpu from './resize_bilinear_gpu';
+import * as gpgpu_math from './gpgpu_math';
+import {ResizeBilinear3DProgram} from './resize_bilinear_gpu';
+import {TextureManager} from './texture_manager';
 
 describe('resize bilinear', () => {
-  function uploadResizeBilinearDownload(
-      a: Float32Array, aShapeRowColDepth: [number, number, number],
-      outputDimensionsRowCol: [number, number],
-      alignCorners: boolean): Float32Array {
-    const aTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(aShapeRowColDepth);
-
-    const resultShapeRCD: [number, number, number] = [
-      outputDimensionsRowCol[0], outputDimensionsRowCol[1], aShapeRowColDepth[2]
-    ];
-
-    const resultTexShapeRC: [number, number] =
-        conv_util.computeTexShapeFrom3D(resultShapeRCD);
-
-    const gpgpu = new GPGPUContext();
-    gpgpu.enableAutomaticDebugValidation(true);
-
-    const shaderSource = resize_bilinear_gpu.getFragmentShaderSource(
-        aShapeRowColDepth, outputDimensionsRowCol, alignCorners);
-    const program = gpgpu.createProgram(shaderSource);
-
-    const aTex = gpgpu.createMatrixTexture(aTexShapeRC[0], aTexShapeRC[1]);
-    const resultTex =
-        gpgpu.createMatrixTexture(resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.uploadMatrixToTexture(aTex, aTexShapeRC[0], aTexShapeRC[1], a);
-
-    resize_bilinear_gpu.resizeBilinear(
-        gpgpu, program, aTex, resultTex, resultTexShapeRC);
-
-    const result = gpgpu.downloadMatrixFromTexture(
-        resultTex, resultTexShapeRC[0], resultTexShapeRC[1]);
-
-    gpgpu.deleteMatrixTexture(resultTex);
-    gpgpu.deleteMatrixTexture(aTex);
-    gpgpu.deleteProgram(program);
-    gpgpu.dispose();
-    return result;
-  }
-
   it('simple bilinear', () => {
     const a = new Float32Array([2, 2, 4, 4]);
 
@@ -123,3 +85,29 @@ describe('resize bilinear', () => {
         1e-4);
   });
 });
+
+function uploadResizeBilinearDownload(
+    a: Float32Array, aShape: [number, number, number],
+    outputDimensionsRowCol: [number, number],
+    alignCorners: boolean): Float32Array {
+  const gpgpu = new GPGPUContext();
+  gpgpu.enableAutomaticDebugValidation(true);
+  const textureManager = new TextureManager(gpgpu);
+  initializeGPU(gpgpu, textureManager);
+
+  const program =
+      new ResizeBilinear3DProgram(aShape, outputDimensionsRowCol, alignCorners);
+  const aArr = Array3D.new(aShape, a);
+  const rArr = NDArray.zeros(program.outputShape);
+  const binary = gpgpu_math.compileProgram(gpgpu, program, [aArr], rArr);
+  gpgpu_math.runProgram(binary, [aArr], rArr);
+  const result = rArr.getValues();
+
+  aArr.dispose();
+  rArr.dispose();
+  textureManager.dispose();
+  gpgpu.deleteProgram(binary.webGLProgram);
+  gpgpu.dispose();
+
+  return result;
+}
diff --git a/src/ops/reshape.ts b/src/ops/reshape.ts
index 1a90af7262..2d5204b5e6 100644
--- a/src/ops/reshape.ts
+++ b/src/ops/reshape.ts
@@ -34,9 +34,8 @@ export class Reshape<T1 extends NDArray, T2 extends NDArray> extends Operation {
   feedForward(math: NDArrayMath, inferenceArrays: TensorArrayMap) {
     const x = inferenceArrays.get(this.xTensor) as T1;
 
-    math.scope((keep) => {
-      inferenceArrays.set(
-          this.yTensor, keep(math.reshape<T1, T2>(x, this.yTensor.shape)));
+    math.scope(keep => {
+      inferenceArrays.set(this.yTensor, keep(x.reshape(this.yTensor.shape)));
     });
   }
 
@@ -45,9 +44,8 @@ export class Reshape<T1 extends NDArray, T2 extends NDArray> extends Operation {
       gradientArrays: TensorArrayMap) {
     const dy = gradientArrays.get(this.yTensor) as T2;
 
-    math.scope((keep) => {
-      gradientArrays.set(
-          this.xTensor, keep(math.reshape<T2, T1>(dy, this.xTensor.shape)));
+    math.scope(keep => {
+      gradientArrays.set(this.xTensor, keep(dy.reshape(this.xTensor.shape)));
     });
   }
 }